In [4]:
import pandas as pd
from bokeh.plotting import *
import numpy as np
from bokeh.layouts import row
from bokeh.models import LinearAxis, Range1d, LabelSet, Label
import scipy.stats
import math
In [9]:
#----------------------------------------------------------------------------------
def find_nearest(array, value):
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
return idx
#----------------------------------------------------------------------------------
def chunk(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
In [6]:
df_rad = pd.read_csv('/Users/alihanks/pinewood_os_d3s.csv')
df_weather = pd.read_csv('/Users/alihanks/pinewood_os_weather.csv')
df_air = pd.read_csv('/Users/alihanks/pinewood_os_aq.csv')
In [10]:
df_rad_sub = df_rad.drop(df_rad.columns[[0,1,2,3,4]],axis=1)
In [11]:
#----------------------------------------------------------------------------------------
# This cell bins the data into groups of 10,000 minutes.
# This is accomplished using the chunck method defined above,
# which splits a dataframe into smaller dataframes of a given size
# Each chunk contains 2000 entries, which amounts to 10,000 minutes
#----------------------------------------------------------------------------------------
time = []
dft = df_rad[['deviceTime_unix']].copy()
for dft_chunk in chunk(dft,2000):
time.append(dft_chunk.iloc[0]['deviceTime_unix'])
In [ ]:
#-------------------------------------------------------------------------------------------
# It's a good idea to do a time sync in case things don't match up between temp and rad data
temp = []
df_weather = df_weather[['deviceTime_unix','temperature']].copy()
temptime = df_weather.as_matrix(columns=df_weather.columns[0:1]).ravel()
indx = find_nearest(temptime,time[0])
df_weather = df_weather.drop(df_weather.index[0:indx])
for dftemp_chunk in chunk(df_weather,2000):
temp.append(dftemp_chunk["temperature"].mean())
In [3]:
#-----------------------------------------------------------------------------------------------------------------------
# For Pinewood, it is less important to sync times up because the sensors were installed at the same time, but
# nevertheless, it still remains a good idea to keep the time sync around just in case things don't match up
temp = []
df2 = df2[['deviceTime_unix','temperature']].copy()
temptime = df2.as_matrix(columns=df2.columns[0:1]).ravel()
indx = find_nearest(temptime,time[0])
df2 = df2.drop(df2.index[0:indx])
for dftemp_chunk in chunk(df2,2000):
temp.append(dftemp_chunk["temperature"].mean())
#-----------------------------------------------------------------------------------------------------------------------
i = 0
channels = []
while i <=1023:
channels.append(i) # Create the array of channel labels
i= i+1
#-----------------------------------------------------------------------------------------------------------------------
m = 200
bins = []
while m<=2800:
bins.append(m) # Create array of bin edges
m =m+5
#-----------------------------------------------------------------------------------------------------------------------
total = [] # Bismuth Counts
totalk = [] # Potassium Counts
totalth = [] # Thallium Counts
totalt = [] # Overall Counts
#-----------------------------------------------------------------------------------------------------------------------
for df_chunk in chunk(df1, 2000):
buckets = [0] * 1500
arr = df_chunk.as_matrix(columns=df_chunk.columns[1:])
conv = df_chunk.as_matrix(columns=df_chunk.columns[0:1]).ravel()
k = 0
while k < len(conv):
kev = [i * conv[k] for i in channels]
indexes = np.digitize(kev, bins).flatten()
i = 0
while i < len(indexes):
buckets[indexes[i] - 1] = buckets[indexes[i] - 1] + arr[k][i]
i = i + 1
k = k + 1
# -----------------------------------------------------------------------------------------------------------------------
# In order to remove the background counts in the peaks, we create a line from the endpoints of the window, integrate under that line,
# and subtract that value from the integral under the peak
bismuthA = buckets[80:100]
sumBA = (np.trapz(bismuthA, bins[80:100]))
x1 = bins[80]
x2 = bins[100]
y1 = bismuthA[0]
y2 = bismuthA[len(bismuthA) - 1]
m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
c = math.log(y1) - (m * x1)
A = math.exp(c)
xarino = np.arange(x1, x2)
y = (A * np.exp((m * xarino)))
bismuthbackground = (np.trapz(y, x=xarino))
# -----------------------------------------------------------------------------------------------------------------------
potassiumA = buckets[230:270]
sumKA = (np.trapz(potassiumA, bins[230:270]))
x1 = bins[230]
x2 = bins[270]
y1 = potassiumA[0]
y2 = potassiumA[len(potassiumA) - 1]
m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
c = math.log(y1) - (m * x1)
A = math.exp(c)
xarino = np.arange(x1, x2)
y = (A * np.exp((m * xarino)))
potassiumbackground = (np.trapz(y, x=xarino))
# -----------------------------------------------------------------------------------------------------------------------
thoriumA = buckets[423:450]
sumTA = (np.trapz(thoriumA, bins[423:450]))
x1 = bins[423]
x2 = bins[450]
y1 = thoriumA[0]
y2 = thoriumA[len(thoriumA) - 1]
m = ((math.log(y2)-math.log(y1))/(x2-x1))
c = math.log(y1) - (m*x1)
A = math.exp(c)
xarino = np.arange(x1,x2)
y = (A*np.exp((m*xarino)))
thoriumbackground = (np.trapz(y,x=xarino))
# -----------------------------------------------------------------------------------------------------------------------
totalt.append(sum(buckets))
total.append(sumBA-bismuthbackground)
totalk.append(sumKA-potassiumbackground)
totalth.append(sumTA-thoriumbackground)
# -----------------------------------------------------------------------------------------------------------------------
print str(scipy.stats.pearsonr(total[0:len(temp)],temp)) + " This is bismuth"
print str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)) + " This is potassium"
print str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)) + " This is thallium"
# -----------------------------------------------------------------------------------------------------------------------
p = figure()
p.circle(total[0:len(temp)],temp)
citation1 = Label(x=70, y=70, x_units='screen', y_units='screen',
text=str(scipy.stats.pearsonr(total[0:len(temp)],temp)),
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation1)
p2 = figure()
p2.circle(totalk[0:len(temp)],temp)
citation2 = Label(x=70, y=70, x_units='screen', y_units='screen',
text=str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)),
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0)
p2.add_layout(citation2)
p3 = figure()
p3.circle(totalth[0:len(temp)],temp)
citation3 = Label(x=70, y=70, x_units='screen', y_units='screen',
text=str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)), render_mode='css',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0)
p3.add_layout(citation3)
show(row(p,p2,p3))
# -----------------------------------------------------------------------------------------------------------------------
p = figure()
p.circle(time,total,color = 'firebrick')
p.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p.add_layout(LinearAxis(y_range_name="foo"), 'right')
p.circle(time,temp, y_range_name='foo', color='blue')
# -----------------------------------------------------------------------------------------------------------------------
p3 = figure()
p3.circle(time,totalth,color = 'green')
p3.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p3.add_layout(LinearAxis(y_range_name="foo"), 'right')
p3.circle(time,temp, y_range_name='foo', color='blue')
# -----------------------------------------------------------------------------------------------------------------------
p2 = figure()
p2.circle(time,totalk,color = 'orange')
p2.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p2.add_layout(LinearAxis(y_range_name="foo"), 'right')
p2.circle(time,temp, y_range_name='foo', color='blue')
# -----------------------------------------------------------------------------------------------------------------------
show(row(p,p2,p3))
In [ ]: