In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
import copy
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = 20, 16
plt.style.use('ggplot')
In [2]:
experiment_files = ['../results/2_output.csv', '../results/3_output.csv', '../results/4_output.csv', '../results/1_output.csv']
hive_names = ['Cohort 1', 'Cohort 2', 'Normal Hive 1', 'Normal Hive 2']
experiment_df_list = []
for file in experiment_files:
df = pd.read_csv(file, comment='#', header = 0)
experiment_df_list.append(df)
print(experiment_df_list[0].dtypes)
print(experiment_df_list[0]['result_type'].unique())
experiment_df_list[0]
Out[2]:
In [3]:
real_result_df_list = []
shuffled_result_df_list = []
bootstrapped_result_df_list = []
for df in experiment_df_list:
real_df = df[(df['result_type'] == 'real') & (df['tag_type'] == 'All')].reset_index()
real_result_df_list.append(real_df)
shuffled_df = df[(df['result_type'] == 'shuffled') & (df['tag_type'] == 'All')].reset_index()
shuffled_result_df_list.append(shuffled_df)
bootstrapped_df = df[(df['result_type'] == 'bootstrapped') & (df['tag_type'] == 'All')].reset_index()
bootstrapped_result_df_list.append(bootstrapped_df)
real_result_df_list[0]
#shuffled_result_df_list[0]
#bootstrapped_result_df_list
Out[3]:
In [4]:
shuffled_permutations = []
for shuffled_df in shuffled_result_df_list:
num_perms = int(len(shuffled_df[shuffled_df['day_num']==0]) / 2)
empty_lists = [[] for i in range(num_perms)]
permutations = {}
for m in ['diff_spread_all_tracked_all_xy', 'diff_median_all_tracked_speeds', 'diff_median_density', 'diff_percent_idle_all_tracked']:
permutations[m] = copy.deepcopy(empty_lists)
#permutations = {'diff_spread_all_tracked_all_xy': copy.deepcopy(empty_lists)}
days_nums_in_experiment = list(shuffled_df['day_num'].unique())
days_nums_in_experiment.sort()
for day_num in days_nums_in_experiment:
day_num_df = shuffled_df[shuffled_df['day_num'] == day_num]
night_df = day_num_df[day_num_df['time_period'] == 'night']
#day_df = day_num_df[day_num_df['time_period'] == 'day']
for metric in permutations.keys():
night_metric = list(night_df[metric])
#day_metric = list(day_df[metric])
for i, group in enumerate(permutations[metric]):
permutations[metric][i].append(night_metric[i])
#permutations[metric][i].extend([night_metric[i], day_metric[i]])
shuffled_permutations.append(permutations)
print(shuffled_permutations[0]['diff_spread_all_tracked_all_xy'][0])
In [5]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(7.5,-2)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.15,-0.02)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1))}
for i, df in enumerate(real_result_df_list):
night_df = df[df['time_period']=='night']
for metric in shuffled_permutations[0].keys():
plt.figure()
for shuffled_days in shuffled_permutations[i][metric]:
plt.plot(shuffled_days[0:14], marker='o', color='b', linestyle="None")
plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
metric_title, y_axis = formatting[metric]
print(metric_title, y_axis)
plt.title('{} {}'.format(hive_names[i], metric_title))
plt.xlim(-0.5,13.5)
plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
plt.show()
'''
plt.xlabel('Days')
plt.xlim(-0.5,24.5)
x_len = len(df['spread_all_tracked_all_xy'][0:25])
day_nums = []
[day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
plt.xticks(range(x_len), day_nums)
plt.legend(loc='lower left')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
'''
plt.show()
In [6]:
bootstrapped_permutations = []
for bootstrapped_df in bootstrapped_result_df_list:
num_perms = int(len(bootstrapped_df[bootstrapped_df['day_num']==0]) / 2)
empty_lists = [[] for i in range(num_perms)]
permutations = {}
for m in ['diff_spread_all_tracked_all_xy', 'diff_median_all_tracked_speeds', 'diff_median_density', 'diff_percent_idle_all_tracked']:
permutations[m] = copy.deepcopy(empty_lists)
days_nums_in_experiment = list(bootstrapped_df['day_num'].unique())
days_nums_in_experiment.sort()
for day_num in days_nums_in_experiment:
day_num_df = bootstrapped_df[bootstrapped_df['day_num'] == day_num]
night_df = day_num_df[day_num_df['time_period'] == 'night']
#day_df = day_num_df[day_num_df['time_period'] == 'day']
for metric in permutations.keys():
night_metric = list(night_df[metric])
#day_metric = list(day_df[metric])
for i, group in enumerate(permutations[metric]):
permutations[metric][i].append(night_metric[i])
#permutations[metric][i].extend([night_metric[i], day_metric[i]])
bootstrapped_permutations.append(permutations)
In [7]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(7.5,-2)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.01,-0.02)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1))}
for i, df in enumerate(real_result_df_list):
night_df = df[df['time_period']=='night']
for metric in bootstrapped_permutations[0].keys():
plt.figure()
for bootstrapped_days in bootstrapped_permutations[i][metric]:
plt.plot(bootstrapped_days[0:14], marker='o', color='b', linestyle="None")
plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
metric_title, y_axis = formatting[metric]
print(metric_title, y_axis)
plt.title('{} {}'.format(hive_names[i], metric_title))
plt.xlim(-0.5,13.5)
plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
plt.show()
In [28]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3, 1.8)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(9,-2.5, 8.5)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.02,-0.025, 0.017)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1, 0.09))}
for i, df in enumerate(real_result_df_list):
night_df = df[df['time_period']=='night']
for metric in shuffled_permutations[0].keys():
indicate_sig = []
p_values = []
for j, result in enumerate(list(night_df[metric])[0:14]):
num_diff = 1
num = 1
for shuffled_days in shuffled_permutations[i][metric]:
num += 1
if abs(shuffled_days[j]) > abs(result):
num_diff += 1
p_value = num_diff / num
p_values.append(p_value)
if p_value >= 0.05:
indicate_sig.append('')
elif p_value < 0.001:
indicate_sig.append('***')
elif p_value < 0.01:
indicate_sig.append('**')
else:
indicate_sig.append('*')
plt.figure()
'''
for shuffled_days in shuffled_permutations[i][metric]:
plt.plot(shuffled_days[0:14], zorder=-32, marker='o', color='b', linestyle="None")
'''
plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
confidence_upper_lower_list = [tuple(np.percentile(day_results, [2.5, 97.5])) for day_results in list(zip(*bootstrapped_permutations[i][metric]))]
shuffled_upper_lower_list = [tuple(np.percentile(day_results, [0, 100])) for day_results in list(zip(*shuffled_permutations[i][metric]))]
y_error_lower = []
y_error_upper = []
for j, real_value in enumerate(list(night_df[metric])[0:14]):
y_error_lower.append(real_value - confidence_upper_lower_list[j][0])
y_error_upper.append(confidence_upper_lower_list[j][1] - real_value)
y_shuffle_error_lower = []
y_shuffle_error_upper = []
shuffled_median_value = []
for j, real_value in enumerate(list(night_df[metric])[0:14]):
real_value = np.median(shuffled_upper_lower_list[j])
shuffled_median_value.append(real_value)
y_shuffle_error_lower.append(real_value - shuffled_upper_lower_list[j][0])
y_shuffle_error_upper.append(shuffled_upper_lower_list[j][1] - real_value)
metric_title, y_axis = formatting[metric]
#print(metric_title, y_axis)
plt.title('{} {}'.format(hive_names[i], metric_title))
plt.xlim(-0.5,13.5)
plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
plt.xticks(range(len(list(night_df[metric])[0:14])), range(len(list(night_df[metric])[0:14])), size='large')
plt.xlabel('Day', fontsize=18)
plt.ylabel('Difference', fontsize=18)
plt.errorbar(range(len(shuffled_median_value)), shuffled_median_value, yerr=(y_shuffle_error_lower, y_shuffle_error_upper), color='b', fmt="o", alpha=0.5)
plt.errorbar(range(len(list(night_df[metric])[0:14])), list(night_df[metric])[0:14], yerr=(y_error_lower, y_error_upper), color='r', fmt="o", alpha=0.5)
for j, txt in enumerate(indicate_sig):
plt.annotate(txt, (j, y_axis[2]), size=30, horizontalalignment='center', color='k')
plt.show()
In [78]:
# testing statistical tests
night_df = real_result_df_list[0][real_result_df_list[0]['time_period']=='night']
indicate_sig = []
p_values = []
for i, result in enumerate(list(night_df['diff_spread_all_tracked_all_xy'])[0:14]):
num_diff = 1
num = 1
for shuffled_days in shuffled_permutations[0]['diff_spread_all_tracked_all_xy']:
num += 1
if abs(shuffled_days[i]) > abs(result):
num_diff += 1
p_value = num_diff / num
p_values.append(p_value)
if p_value >= 0.05:
indicate_sig.append('')
elif p_value < 0.001:
indicate_sig.append('***')
elif p_value < 0.01:
indicate_sig.append('**')
else:
indicate_sig.append('*')
print(list(zip(p_values, indicate_sig)))
plt.figure()
for shuffled_days in shuffled_permutations[0]['diff_spread_all_tracked_all_xy']:
plt.plot(shuffled_days[0:14], zorder=-32, marker='o', color='b', linestyle="None")
plt.plot(list(night_df['diff_spread_all_tracked_all_xy'])[0:14], marker='o', color='r', linestyle="None")
for i, txt in enumerate(indicate_sig):
plt.annotate(txt, (i,1.2), size=30, horizontalalignment='center', color='k')
plt.xlim(-0.5,13.5)
plt.title('test spread')
plt.show()
In [93]:
extended_metrics = ['median_all_tracked_speeds', 'mean_all_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_min_tracked_individuals', 'percent_idle_all_tracked']
metrics_names = ['Median Speed All Bees Tracked', 'Mean Speed All Bees Tracked', 'Spread All Bees All Coordinates', 'Spread Filtered Bees Individuals', 'Percentage of Time Idle All Bees Tracked']
for k, metric in enumerate(extended_metrics):
for i, df in enumerate(real_result_df_list):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
plt.title(metrics_names[k])
plt.xlabel('Days')
plt.xlim(-0.5,24.5)
x_len = len(df[metric][0:25])
day_nums = []
[day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
plt.xticks(range(x_len), day_nums)
plt.legend(loc='lower left')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [17]:
x_len = 24
day_nums = []
[day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
for i, df in enumerate(real_result_df_list):
plt.plot(df['median_node_degree'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median node degree'))
plt.title('Median Node Degree')
plt.xlabel('Days')
plt.xticks(range(x_len), day_nums)
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
for i, df in enumerate(real_result_df_list):
plt.plot(df['median_density'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median density'))
plt.title('Median Node Density')
plt.xlabel('Days')
plt.xticks(range(x_len), day_nums)
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
for i, df in enumerate(real_result_df_list):
plt.plot(df['median_clustering'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median clustering'))
plt.title('Median Node Clustering')
plt.xlabel('Days')
plt.xticks(range(x_len), day_nums)
plt.xlim(-0.5,24.5)
plt.legend(loc='upper right')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [5]:
extended_metrics = ['mean_all_tracked_speeds', 'mean_min_tracked_speeds', 'median_all_tracked_speeds', 'median_min_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_all_tracked_individuals', 'spread_min_tracked_all_xy', 'spread_min_tracked_individuals']
metrics_names = ['Mean Speed All Bees Tracked', 'Mean Speed Filtered Bees', 'Median Speed All Bees Tracked', 'Median Speed Filtered Bees', 'Spread All Bees All Coordinates', 'Spread All Bees Individuals', 'Spread Filtered Bees All Coordinates', 'Spread Filtered Bees Individuals']
for i, df in enumerate(real_result_df_list):
for j, metric in enumerate(extended_metrics):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(metrics_names[j]))
x_len = 24
day_nums = []
[day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
plt.title(hive_names[i])
plt.xlabel('Days')
plt.xlim(-0.5,24.5)
plt.xticks(range(x_len), day_nums)
plt.legend(loc='lower left')
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()
In [6]:
weather_metrics = ['cloudCover','dewPoint','humidity','pressure','temperature','visibility','windSpeed']
weather_metrics_names = ['Cloud Cover','Dewpoint','Humidity','Pressure','Temperature','Visibility','Wind Speed']
weather_files = ['../results/2_weather.csv', '../results/3_weather.csv', '../results/4_weather.csv', '../results/1_weather.csv']
weather_df_list = []
for file in weather_files:
df = pd.read_csv(file, comment='#', header = 0)
weather_df_list.append(df)
print(weather_df_list[0].dtypes)
weather_df_list[0].head()
Out[6]:
In [7]:
for jj, metric in enumerate(weather_metrics):
x_len = 24
day_nums = []
[day_nums.extend(['', i + 1]) for i in range(int(x_len / 2))]
for i, df in enumerate(weather_df_list):
plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
plt.title(weather_metrics_names[jj])
plt.xlabel('Days')
plt.xticks(range(x_len), day_nums)
plt.xlim(-0.5,24.5)
plt.legend(loc='lower left')
for j, time_period in enumerate(weather_df_list[0]['time_period'][0:25]):
if time_period == 'night':
plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
else:
plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()