In [58]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = 20, 16
plt.style.use('ggplot')
In [19]:
metrics = ['cloudCover','dewPoint','humidity','pressure','temperature','visibility','windSpeed']
hive_names = ['Cohort 1', 'Cohort 2', 'Experiment 4', 'Normal Hive']
weather_files = ['../results/2_weather.csv', '../results/3_weather.csv', '../results/4_weather.csv', '../results/1_weather.csv']
weather_df_list = []
for file in weather_files:
df = pd.read_csv(file, comment='#', header = 0)
weather_df_list.append(df)
print(weather_df_list[0].dtypes)
weather_df_list[0].head()
Out[19]:
In [21]:
for metric in metrics:
for i, df in enumerate(weather_df_list):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
plt.title(metric)
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='lower left')
for j, time_period in enumerate(weather_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [23]:
for metric in metrics:
for i, df in enumerate(weather_df_list):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
plt.title('{} {}'.format(metric, hive_names[i]))
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='lower left')
for j, time_period in enumerate(weather_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [4]:
experiment_files = ['../results/2_output.csv', '../results/3_output.csv', '../results/4_output.csv', '../results/1_output.csv']
experiment_df_list = []
for file in experiment_files:
df = pd.read_csv(file, comment='#', header = 0)
experiment_df_list.append(df)
print(experiment_df_list[0].dtypes)
print(experiment_df_list[0]['result_type'].unique())
real_result_df_list = []
for df in experiment_df_list:
real_df = df[(df['result_type'] == 'real') & (df['tag_type'] == 'All')].reset_index()
real_result_df_list.append(real_df)
In [35]:
real_result_df_list[0].head()
Out[35]:
In [5]:
extended_metrics = ['mean_all_tracked_speeds', 'mean_min_tracked_speeds', 'median_all_tracked_speeds', 'median_min_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_all_tracked_individuals', 'spread_min_tracked_all_xy', 'spread_min_tracked_individuals']
for metric in extended_metrics:
for i, df in enumerate(real_result_df_list):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
plt.title(metric)
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='lower left')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [40]:
for i, df in enumerate(real_result_df_list):
#plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'mean_all_tracked_speeds'))
plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'spread_all_tracked_all_xy'))
#plt.plot(weather_df_list[i]['temperature'][0:25], marker='o', label='temperature')
#plt.plot(weather_df_list[i]['humidity'][0:25] * 10, marker='o', label='humidity')
#plt.plot(weather_df_list[i]['dewPoint'][0:25], marker='o', label='dewPoint')
#plt.plot(weather_df_list[i]['windSpeed'][0:25], marker='o', label='windSpeed')
#plt.plot(weather_df_list[i]['cloudCover'][0:25] * 10, marker='o', label='cloudCover')
plt.title('metric')
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='lower left')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.03, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.03, edgecolor='none')
plt.show()
In [6]:
for i, df in enumerate(real_result_df_list):
plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='mean_all_tracked_speeds')
plt.plot(df['mean_min_tracked_speeds'][0:25], marker='o', label='mean_min_tracked_speeds')
plt.plot(df['median_all_tracked_speeds'][0:25], marker='o', label='median_all_tracked_speeds')
plt.plot(df['median_min_tracked_speeds'][0:25], marker='o', label='median_min_tracked_speeds')
#plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
#plt.plot(df['spread_all_tracked_individuals'][0:25], marker='o', label='spread_all_tracked_individuals')
#plt.plot(df['spread_min_tracked_all_xy'][0:25], marker='o', label='spread_min_tracked_all_xy')
#plt.plot(df['spread_min_tracked_individuals'][0:25], marker='o', label='spread_min_tracked_individuals')
plt.title(hive_names[i])
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(weather_df_list[i]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [7]:
for i, df in enumerate(real_result_df_list):
#plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='mean_all_tracked_speeds')
#plt.plot(df['mean_min_tracked_speeds'][0:25], marker='o', label='mean_min_tracked_speeds')
#plt.plot(df['median_all_tracked_speeds'][0:25], marker='o', label='median_all_tracked_speeds')
#plt.plot(df['median_min_tracked_speeds'][0:25], marker='o', label='median_min_tracked_speeds')
plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
plt.plot(df['spread_all_tracked_individuals'][0:25], marker='o', label='spread_all_tracked_individuals')
plt.plot(df['spread_min_tracked_all_xy'][0:25], marker='o', label='spread_min_tracked_all_xy')
plt.plot(df['spread_min_tracked_individuals'][0:25], marker='o', label='spread_min_tracked_individuals')
plt.title(hive_names[i])
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(weather_df_list[i]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [8]:
for i, df in enumerate(real_result_df_list):
plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='mean_all_tracked_speeds')
plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
plt.title(hive_names[i])
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(weather_df_list[i]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [80]:
#extended_metrics = ['mean_all_tracked_speeds', 'mean_min_tracked_speeds', 'median_all_tracked_speeds', 'median_min_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_all_tracked_individuals', 'spread_min_tracked_all_xy', 'spread_min_tracked_individuals']
for i, df in enumerate(real_result_df_list):
plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='mean_all_tracked_speeds')
plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
#plt.plot(weather_df_list[i]['temperature'][0:25], marker='o', label='temperature')
#plt.plot(weather_df_list[i]['humidity'][0:25] * 10, marker='o', label='humidity')
#plt.plot(weather_df_list[i]['dewPoint'][0:25], marker='o', label='dewPoint')
plt.plot(weather_df_list[i]['windSpeed'][0:25] * 2, marker='o', label='windSpeed')
#plt.plot(weather_df_list[i]['cloudCover'][0:25] * 10, marker='o', label='cloudCover')
#y1 = weather_df_list[i]['temperature'][0:25]
#y2 = df['mean_all_tracked_speeds'][0:25]
#y3 = df['spread_all_tracked_all_xy'][0:25]
#x1 = range(len(y1))
#x2 = range(len(y2))
#x3 = range(len(y3))
#slope, intercept, r_value, p_value, slope_std_error = linregress(x1, y1)
#predict_y = intercept + slope * x1
#plt.plot(x1, predict_y, 'k-')
#slope, intercept, r_value, p_value, slope_std_error = linregress(x2, y2)
#predict_y = intercept + slope * x2
#plt.plot(x2, predict_y, 'b-')
#slope, intercept, r_value, p_value, slope_std_error = linregress(x3, y3)
#predict_y = intercept + slope * x2
#plt.plot(x2, predict_y, 'y-')
plt.title(hive_names[i])
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(weather_df_list[i]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.03, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.03, edgecolor='none')
plt.show()
In [30]:
x = [1,2,3,4,5,6,7,8,9,10]
y = [10,9,8,7,6,5,4,3,2,1]
#f = linregress(x,y) #x and y are arrays or lists.
# Fit the model
x = np.array([1, 2, 5, 7, 10, 15])
y = np.array([2, 6, 7, 9, 14, 19])
slope, intercept, r_value, p_value, slope_std_error = linregress(x, y)
# Calculate some additional outputs
predict_y = intercept + slope * x
pred_error = y - predict_y
degrees_of_freedom = len(x) - 2
residual_std_error = np.sqrt(np.sum(pred_error**2) / degrees_of_freedom)
# Plotting
plt.plot(x, y, 'o')
plt.plot(x, predict_y, 'k-')
plt.show()
In [68]:
merged_df[:25]
Out[68]:
In [78]:
for i, df in enumerate(real_result_df_list):
df = df.fillna(1)
for metric in ['temperature', 'humidity', 'dewPoint', 'windSpeed', 'cloudCover']:
if metric not in ['temperature', 'dewPoint']:
weather = weather_df_list[i][metric][0:25] * 5
else:
weather = weather_df_list[i][metric][0:25]
plt.scatter(weather, df['mean_all_tracked_speeds'][0:25], marker='o', label=metric)
plt.title('{} {}'.format(hive_names[i], metric))
plt.xlabel('Night/Day')
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(weather_df_list[i]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.03, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.03, edgecolor='none')
plt.show()
#plt.plot(df['mean_all_tracked_speeds'][0:25], marker='o', label='mean_all_tracked_speeds')
#plt.plot(df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
#plt.scatter(df['mean_all_tracked_speeds'][0:25], df['spread_all_tracked_all_xy'][0:25], marker='o', label='spread_all_tracked_all_xy')
#plt.plot(weather_df_list[i]['temperature'][0:25], marker='o', label='temperature')
#plt.plot(weather_df_list[i]['humidity'][0:25] * 10, marker='o', label='humidity')
#plt.plot(weather_df_list[i]['dewPoint'][0:25], marker='o', label='dewPoint')
#plt.plot(weather_df_list[i]['windSpeed'][0:25] * 2, marker='o', label='windSpeed')
#plt.plot(weather_df_list[i]['cloudCover'][0:25] * 10, marker='o', label='cloudCover')
#y1 = weather_df_list[i]['temperature'][0:25]
#y2 = df['mean_all_tracked_speeds'][0:25]
#y3 = df['spread_all_tracked_all_xy'][0:25]
#x1 = range(len(y1))
#x2 = range(len(y2))
#x3 = range(len(y3))
#slope, intercept, r_value, p_value, slope_std_error = linregress(x1, y1)
#predict_y = intercept + slope * x1
#plt.plot(x1, predict_y, 'k-')
#slope, intercept, r_value, p_value, slope_std_error = linregress(x2, y2)
#predict_y = intercept + slope * x2
#plt.plot(x2, predict_y, 'b-')
#slope, intercept, r_value, p_value, slope_std_error = linregress(x3, y3)
#predict_y = intercept + slope * x2
#plt.plot(x2, predict_y, 'y-')
In [ ]: