In [4]:
import pandas as pd
from bokeh.plotting import *
import numpy as np
from bokeh.layouts import row
from bokeh.models import LinearAxis, Range1d, LabelSet, Label
import scipy.stats
import math

In [9]:
#----------------------------------------------------------------------------------

def find_nearest(array, value):

    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

#----------------------------------------------------------------------------------

def chunk(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [6]:
df_rad = pd.read_csv('/Users/alihanks/pinewood_os_d3s.csv')
df_weather = pd.read_csv('/Users/alihanks/pinewood_os_weather.csv')
df_air = pd.read_csv('/Users/alihanks/pinewood_os_aq.csv')

In [10]:
df_rad_sub = df_rad.drop(df_rad.columns[[0,1,2,3,4]],axis=1)

In [11]:
#----------------------------------------------------------------------------------------
# This cell bins the data into groups of 10,000 minutes.
# This is accomplished using the chunck method defined above, 
#   which splits a dataframe into smaller dataframes of a given size
# Each chunk contains 2000 entries, which amounts to 10,000 minutes
#----------------------------------------------------------------------------------------

time = []

dft = df_rad[['deviceTime_unix']].copy()
for dft_chunk in chunk(dft,2000):
    time.append(dft_chunk.iloc[0]['deviceTime_unix'])

In [ ]:
#-------------------------------------------------------------------------------------------
# It's a good idea to do a time sync in case things don't match up between temp and rad data
temp = []

df_weather = df_weather[['deviceTime_unix','temperature']].copy()

temptime = df_weather.as_matrix(columns=df_weather.columns[0:1]).ravel()
indx = find_nearest(temptime,time[0])
df_weather = df_weather.drop(df_weather.index[0:indx])

for dftemp_chunk in chunk(df_weather,2000):
    temp.append(dftemp_chunk["temperature"].mean())

In [3]:
#-----------------------------------------------------------------------------------------------------------------------
# For Pinewood, it is less important to sync times up because the sensors were installed at the same time, but 
# nevertheless, it still remains a good idea to keep the time sync around just in case things don't match up

temp = []

df2 = df2[['deviceTime_unix','temperature']].copy()

temptime = df2.as_matrix(columns=df2.columns[0:1]).ravel()
indx = find_nearest(temptime,time[0])
df2 = df2.drop(df2.index[0:indx])

for dftemp_chunk in chunk(df2,2000):
    temp.append(dftemp_chunk["temperature"].mean())

#-----------------------------------------------------------------------------------------------------------------------

i = 0
channels = []

while i <=1023:
    channels.append(i) # Create the array of channel labels
    i= i+1

#-----------------------------------------------------------------------------------------------------------------------

m = 200
bins = []
while m<=2800:
    bins.append(m) # Create array of bin edges
    m =m+5

#-----------------------------------------------------------------------------------------------------------------------

total = [] # Bismuth Counts
totalk = [] # Potassium Counts
totalth = [] # Thallium Counts
totalt = [] # Overall Counts

#-----------------------------------------------------------------------------------------------------------------------

for df_chunk in chunk(df1, 2000):

    buckets = [0] * 1500

    arr = df_chunk.as_matrix(columns=df_chunk.columns[1:])
    conv = df_chunk.as_matrix(columns=df_chunk.columns[0:1]).ravel()

    k = 0

    while k < len(conv):

        kev = [i * conv[k] for i in channels]

        indexes = np.digitize(kev, bins).flatten()

        i = 0
        while i < len(indexes):
            buckets[indexes[i] - 1] = buckets[indexes[i] - 1] + arr[k][i]
            i = i + 1

        k = k + 1

    # -----------------------------------------------------------------------------------------------------------------------
    # In order to remove the background counts in the peaks, we create a line from the endpoints of the window, integrate under that line,
    # and subtract that value from the integral under the peak
    
    bismuthA = buckets[80:100]

    sumBA = (np.trapz(bismuthA, bins[80:100]))

    x1 = bins[80]
    x2 = bins[100]
    y1 = bismuthA[0]
    y2 = bismuthA[len(bismuthA) - 1]

    m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
    c = math.log(y1) - (m * x1)
    A = math.exp(c)

    xarino = np.arange(x1, x2)

    y = (A * np.exp((m * xarino)))

    bismuthbackground = (np.trapz(y, x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    potassiumA = buckets[230:270]
    sumKA = (np.trapz(potassiumA, bins[230:270]))

    x1 = bins[230]
    x2 = bins[270]
    y1 = potassiumA[0]
    y2 = potassiumA[len(potassiumA) - 1]

    m = ((math.log(y2) - math.log(y1)) / (x2 - x1))
    c = math.log(y1) - (m * x1)
    A = math.exp(c)

    xarino = np.arange(x1, x2)

    y = (A * np.exp((m * xarino)))

    potassiumbackground = (np.trapz(y, x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    thoriumA = buckets[423:450]
    sumTA = (np.trapz(thoriumA, bins[423:450]))

    x1 = bins[423]
    x2 = bins[450]
    y1 = thoriumA[0]
    y2 = thoriumA[len(thoriumA) - 1]

    m = ((math.log(y2)-math.log(y1))/(x2-x1))
    c = math.log(y1) - (m*x1)
    A = math.exp(c)

    xarino = np.arange(x1,x2)

    y = (A*np.exp((m*xarino)))

    thoriumbackground = (np.trapz(y,x=xarino))

    # -----------------------------------------------------------------------------------------------------------------------

    totalt.append(sum(buckets))
    total.append(sumBA-bismuthbackground)
    totalk.append(sumKA-potassiumbackground)
    totalth.append(sumTA-thoriumbackground)

# -----------------------------------------------------------------------------------------------------------------------


print str(scipy.stats.pearsonr(total[0:len(temp)],temp)) + " This is bismuth"
print str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)) + " This is potassium"
print str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)) + " This is thallium"

# -----------------------------------------------------------------------------------------------------------------------

p = figure()
p.circle(total[0:len(temp)],temp)
citation1 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(total[0:len(temp)],temp)),
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation1)



p2 = figure()
p2.circle(totalk[0:len(temp)],temp)

citation2 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(totalk[0:len(temp)],temp)),
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p2.add_layout(citation2)


p3 = figure()
p3.circle(totalth[0:len(temp)],temp)

citation3 = Label(x=70, y=70, x_units='screen', y_units='screen',
                 text=str(scipy.stats.pearsonr(totalth[0:len(temp)],temp)), render_mode='css',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)
p3.add_layout(citation3)

show(row(p,p2,p3))

# -----------------------------------------------------------------------------------------------------------------------
p = figure()

p.circle(time,total,color = 'firebrick')

p.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p.add_layout(LinearAxis(y_range_name="foo"), 'right')

p.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

p3 = figure()
p3.circle(time,totalth,color = 'green')

p3.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p3.add_layout(LinearAxis(y_range_name="foo"), 'right')

p3.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

p2 = figure()
p2.circle(time,totalk,color = 'orange')

p2.extra_y_ranges = {"foo": Range1d(start=0, end=35)}
p2.add_layout(LinearAxis(y_range_name="foo"), 'right')

p2.circle(time,temp, y_range_name='foo', color='blue')

# -----------------------------------------------------------------------------------------------------------------------

show(row(p,p2,p3))


/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/ipykernel_launcher.py:48: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/ipykernel_launcher.py:85: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/ipykernel_launcher.py:86: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/scipy/stats/stats.py:3010: RuntimeWarning: invalid value encountered in double_scalars
  r = r_num / r_den
(nan, nan) This is bismuth
(nan, nan) This is potassium
(nan, nan) This is thallium
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/bokeh/models/sources.py:110: BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('x', 47), ('y', 1)
  "Current lengths: %s" % ", ".join(sorted(str((k, len(v))) for k, v in data.items())), BokehUserWarning))
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/bokeh/models/sources.py:110: BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('x', 47), ('y', 1)
  "Current lengths: %s" % ", ".join(sorted(str((k, len(v))) for k, v in data.items())), BokehUserWarning))
/Users/alihanks/anaconda3/envs/aerim/lib/python2.7/site-packages/bokeh/models/sources.py:110: BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('x', 47), ('y', 1)
  "Current lengths: %s" % ", ".join(sorted(str((k, len(v))) for k, v in data.items())), BokehUserWarning))

In [ ]: