In [ ]:
import pandas as pd
import scipy.stats
from bokeh.plotting import figure , ColumnDataSource
from bokeh.io import show
import numpy as np
from bokeh.models import HoverTool
# The purpose of this script is to plot the correlation strength between cpm and temperature vs the amount of time the data is binned by
#______________________________________________________________________________________________________________________________
def find_nearest(array, value):
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
return idx
#______________________________________________________________________________________________________________________________
# Draws from the etchverry roof data, but can be changed
# ______________________________________________________________________________________________________________________________
df1 = pd.read_csv('etch_roof_d3s.csv')
df2 = pd.read_csv('etch_roof_weather.csv')
#______________________________________________________________________________________________________________________________
df1 = df1[['deviceTime_unix', 'cpm']].copy()
df2 = df2[['deviceTime_unix', 'temperature']].copy()
unix_timerad = df1.as_matrix(columns=df1.columns[0:1]).ravel()
#print unix_timerad
unix_timeweather = df2.as_matrix(columns=df2.columns[0:1]).ravel()
#print unix_timeweather
if(unix_timerad[0]<unix_timeweather[0]):
df2 = df2.drop(df2.index[0:find_nearest(unix_timeweather,unix_timerad[0])])
else:
df1 = df1.drop(df1.index[0:find_nearest(unix_timerad, unix_timeweather[0])])
#______________________________________________________________________________________________________________________________
# Deal with the NaNs
df1 = df1.set_index(['deviceTime_unix'])
df1.index = pd.to_datetime(df1.index, unit='s')
df2 = df2.set_index(['deviceTime_unix'])
df2.index = pd.to_datetime(df2.index, unit='s')
corr = []
count = []
i = 1000
while i <= 80000:
df_binned = df1.resample(str(i)+"T", label='right').mean().reset_index()
df2_binned = df2.resample(str(i)+"T", label='right').mean().reset_index()
df_binned = df_binned.drop(df_binned.index[len(df2_binned):])
print df_binned
print df2_binned
indsr = pd.isnull(df_binned).any(1).nonzero()[0].ravel()
indsw = pd.isnull(df2_binned).any(1).nonzero()[0].ravel()
indsnan = np.concatenate([indsr,indsw])
indsnan = np.unique(indsnan)
j = len(indsnan)-1
while (j>=0):
df_binned = df_binned.drop(df_binned.index[indsnan[j]])
df2_binned = df2_binned.drop(df2_binned.index[indsnan[j]])
j = j-1
print df_binned
print df2_binned
count.append(i)
corr.append((scipy.stats.pearsonr(df_binned.as_matrix(columns=df_binned.columns[1:]).ravel(), df2_binned.as_matrix(columns=df2_binned.columns[1:]).ravel()))[0])
i = i+2000
#______________________________________________________________________________________________________________________________
# In order to create the mouseover plot functionality, one must first have a file containing all the images you wish to pop up when the mouse rolls over the graph
# These these images must be added to an array in preparation for usage with bokeh
k = 1000
imgs = []
while k<=80000:
imgs.append('file:/Users/albertqiang/PycharmProjects/DoseNet/images/'+str(k)+'plot.png')
k = k+2000
#______________________________________________________________________________________________________________________________
# The images that pop up are contained in the imgs array
# They can be drawn from a local source, or from a url
source = ColumnDataSource(
data=dict(
x=count,
y=corr,
#imgs = [ ---- these are standard examples
# 'http://bokeh.pydata.org/static/snake.jpg',
# 'http://bokeh.pydata.org/static/snake2.png',
# 'http://bokeh.pydata.org/static/snake3D.png',
#'http://bokeh.pydata.org/static/snake4_TheRevenge.png',
#'http://bokeh.pydata.org/static/snakebite.jpg',
#'file:///Users/albertqiang/Desktop/download.jpeg'
#]
imgs = imgs
)
)
hover = HoverTool(
tooltips="""
<div>
<div>
<img
src="@imgs" height="500" alt="@imgs" width="500"
style="float: center; margin: 0px 0px 0px 0px;"
border="0"
></img>
</div>
</div>
"""
)
p = figure(plot_width=1000, plot_height=1000, tools=[hover],
)
p.line('x', 'y', source=source)
show(p)