In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
import copy
from IPython.display import display

import matplotlib.pyplot as plt
%matplotlib inline
from pylab import rcParams
rcParams['figure.figsize'] = 20, 16
plt.style.use('ggplot')


/usr/local/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))

In [2]:
experiment_files = ['../results/2_output.csv', '../results/3_output.csv', '../results/4_output.csv', '../results/1_output.csv']
hive_names = ['Cohort 1', 'Cohort 2', 'Normal Hive 1', 'Normal Hive 2']

experiment_df_list = []
for file in experiment_files:
    df = pd.read_csv(file, comment='#', header = 0)
    experiment_df_list.append(df)

print(experiment_df_list[0].dtypes)
print(experiment_df_list[0]['result_type'].unique())

experiment_df_list[0]


day_num                                  int64
diff_mean_all_tracked_speeds           float64
diff_mean_clustering                   float64
diff_mean_density                      float64
diff_mean_min_tracked_speeds           float64
diff_mean_node_degree                  float64
diff_median_all_tracked_speeds         float64
diff_median_clustering                 float64
diff_median_density                    float64
diff_median_min_tracked_speeds         float64
diff_median_node_degree                float64
diff_percent_idle_all_tracked          float64
diff_percent_idle_min_tracked          float64
diff_spread_all_tracked_all_xy         float64
diff_spread_all_tracked_individuals    float64
diff_spread_min_tracked_all_xy         float64
diff_spread_min_tracked_individuals    float64
mean_all_tracked_speeds                float64
mean_clustering                        float64
mean_density                           float64
mean_min_tracked_speeds                float64
mean_node_degree                       float64
median_all_tracked_speeds              float64
median_clustering                      float64
median_density                         float64
median_min_tracked_speeds              float64
median_node_degree                     float64
percent_idle_all_tracked               float64
percent_idle_min_tracked               float64
result_type                             object
spread_all_tracked_all_xy              float64
spread_all_tracked_individuals         float64
spread_min_tracked_all_xy              float64
spread_min_tracked_individuals         float64
tag_type                                object
time_period                             object
dtype: object
['real' 'shuffled' 'bootstrapped']
Out[2]:
day_num diff_mean_all_tracked_speeds diff_mean_clustering diff_mean_density diff_mean_min_tracked_speeds diff_mean_node_degree diff_median_all_tracked_speeds diff_median_clustering diff_median_density diff_median_min_tracked_speeds ... median_node_degree percent_idle_all_tracked percent_idle_min_tracked result_type spread_all_tracked_all_xy spread_all_tracked_individuals spread_min_tracked_all_xy spread_min_tracked_individuals tag_type time_period
0 0 3.020365 0.138153 0.010347 3.062317 2.006401 3.269409 0.145162 0.009864 3.298449 ... 5.134129 0.943837 0.945175 real 10.944326 11.405257 10.910378 11.054341 0 night
1 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.917139 0.917720 real 9.933599 10.153356 9.908420 9.772973 0 day
2 0 4.302984 0.138153 0.010347 4.302157 2.006401 4.320457 0.145162 0.009864 4.272902 ... 5.134129 0.959213 0.959444 real 9.590282 9.776511 9.589640 9.767503 1 night
3 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.919611 0.919839 real 8.905367 8.915304 8.906722 8.923107 1 day
4 0 3.200401 0.138153 0.010347 3.183804 2.006401 3.611718 0.145162 0.009864 3.611718 ... 5.134129 0.932293 0.932539 real 10.641876 10.419949 10.643430 10.433664 2 night
5 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.904360 0.904730 real 9.662102 9.422465 9.662725 9.401503 2 day
6 0 NaN 0.138153 0.010347 NaN 2.006401 NaN 0.145162 0.009864 NaN ... 5.134129 0.000000 0.000000 real NaN NaN NaN NaN 3 night
7 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.893218 0.893218 real 6.621447 10.270627 6.621447 10.270627 3 day
8 0 3.727570 0.138153 0.010347 3.732812 2.006401 3.911097 0.145162 0.009864 3.911097 ... 5.134129 0.949009 0.949477 real 10.184953 10.469189 10.174605 10.311756 All night
9 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.915367 0.915698 real 9.319706 9.397101 9.311868 9.256276 All day
10 0 -0.117888 0.003234 0.000200 -0.116042 0.084314 -0.110707 0.004721 -0.000138 -0.125966 ... 4.121429 0.927799 0.928706 shuffled 10.433374 10.608924 10.407933 10.209864 0 night
11 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.105072 0.929471 0.930413 shuffled 10.517005 10.665204 10.493190 10.311874 0 day
12 0 0.040428 0.003234 0.000200 0.039697 0.084314 0.023162 0.004721 -0.000138 0.023162 ... 4.121429 0.937716 0.937944 shuffled 9.101992 9.136624 9.102172 9.134611 1 night
13 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.105072 0.937306 0.937544 shuffled 9.329895 9.251775 9.330404 9.253349 1 day
14 0 0.058301 0.003234 0.000200 0.061659 0.084314 0.159171 0.004721 -0.000138 0.194647 ... 4.121429 0.916903 0.917205 shuffled 10.057955 9.762448 10.059269 9.759963 2 night
15 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.105072 0.918233 0.918555 shuffled 10.187894 9.866114 10.189082 9.858136 2 day
16 0 NaN 0.003234 0.000200 NaN 0.084314 NaN 0.004721 -0.000138 NaN ... 4.121429 0.000000 0.000000 shuffled NaN NaN NaN NaN 3 night
17 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.105072 0.893218 0.893218 shuffled 6.621447 10.270627 6.621447 10.270627 3 day
18 0 0.007821 0.003234 0.000200 0.008519 0.084314 0.000000 0.004721 -0.000138 0.039637 ... 4.121429 0.930382 0.930773 shuffled 9.631262 9.723704 9.622853 9.571361 All night
19 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 4.105072 0.930842 0.931252 shuffled 9.815050 9.834293 9.806595 9.687875 All day
20 0 2.851845 0.128921 0.010473 2.904591 1.913774 3.156308 0.135751 0.009908 3.191633 ... 5.100336 0.942522 0.943958 bootstrapped 11.045538 11.423545 11.008852 11.007540 0 night
21 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.922735 0.918514 0.919003 bootstrapped 9.902104 10.273422 9.870000 9.842186 0 day
22 0 4.296545 0.128921 0.010473 4.298063 1.913774 4.315086 0.135751 0.009908 4.315086 ... 5.100336 0.959649 0.959901 bootstrapped 9.367569 9.668718 9.366106 9.646146 1 night
23 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.922735 0.920330 0.920544 bootstrapped 9.049180 8.980535 9.050297 8.984355 1 day
24 0 3.552326 0.128921 0.010473 3.540803 1.913774 4.031129 0.135751 0.009908 4.031129 ... 5.100336 0.937821 0.938144 bootstrapped 10.828866 10.427326 10.830540 10.447823 2 night
25 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.922735 0.906842 0.907186 bootstrapped 10.033103 9.657617 10.034449 9.650147 2 day
26 0 NaN 0.128921 0.010473 NaN 1.913774 NaN 0.135751 0.009908 NaN ... 5.100336 0.000000 0.000000 bootstrapped NaN NaN NaN NaN 3 night
27 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.922735 0.923077 0.923077 bootstrapped 6.182185 10.714505 6.182185 10.714505 3 day
28 0 3.807345 0.128921 0.010473 3.817478 1.913774 3.981114 0.135751 0.009908 3.981114 ... 5.100336 0.950527 0.951043 bootstrapped 10.160382 10.433241 10.148419 10.243038 All night
29 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 2.922735 0.916450 0.916747 bootstrapped 9.499362 9.514587 9.491835 9.374044 All day
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
26100 12 1.334334 -0.241454 -0.001619 1.450051 -0.895109 1.157697 -0.302222 -0.002430 1.211485 ... 0.300000 0.950674 0.952648 bootstrapped 10.700082 11.367726 10.601685 10.594475 0 night
26101 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.309493 0.937984 0.938344 bootstrapped 9.850525 10.837459 9.700985 10.153979 0 day
26102 12 4.818055 -0.241454 -0.001619 4.823187 -0.895109 4.493087 -0.302222 -0.002430 4.462206 ... 0.300000 0.958728 0.959012 bootstrapped 11.029838 10.230476 11.029652 10.192290 1 night
26103 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.309493 0.905833 0.905825 bootstrapped 11.342809 11.155018 11.338339 11.127239 1 day
26104 12 -0.139286 -0.241454 -0.001619 -0.112368 -0.895109 0.800895 -0.302222 -0.002430 0.833830 ... 0.300000 0.954012 0.954458 bootstrapped 9.922268 8.526262 9.924266 8.515751 2 night
26105 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.309493 0.962797 0.962751 bootstrapped 10.415212 9.775775 10.416637 9.822381 2 day
26106 12 25.603128 -0.241454 -0.001619 25.603128 -0.895109 34.310349 -0.302222 -0.002430 34.310349 ... 0.300000 0.824992 0.824992 bootstrapped 7.388492 8.105724 7.388492 8.105724 3 night
26107 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.309493 0.304348 0.304348 bootstrapped 3.344322 3.271790 3.344322 3.271790 3 day
26108 12 2.154449 -0.241454 -0.001619 2.184887 -0.895109 2.293763 -0.302222 -0.002430 2.340943 ... 0.300000 0.955207 0.955764 bootstrapped 10.826116 9.978576 10.817469 9.692374 All night
26109 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.309493 0.936283 0.936301 bootstrapped 10.978678 11.289095 10.961825 11.029609 All day
26110 12 1.824125 -0.031081 -0.000441 1.868951 -0.076228 1.557028 -0.016176 0.000045 1.511779 ... 1.214286 0.949776 0.951908 shuffled 11.026323 11.326530 10.945150 10.491896 0 night
26111 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.264706 0.933762 0.934044 shuffled 9.420667 11.184974 9.151952 10.189263 0 day
26112 12 -0.872108 -0.031081 -0.000441 -0.855091 -0.076228 -0.654929 -0.016176 0.000045 -0.646191 ... 1.214286 0.954335 0.954648 shuffled 11.126129 10.300700 11.126955 10.273040 1 night
26113 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.264706 0.962759 0.962779 shuffled 9.441742 9.201356 9.440970 9.176669 1 day
26114 12 1.777568 -0.031081 -0.000441 1.746820 -0.076228 2.137904 -0.016176 0.000045 2.106434 ... 1.214286 0.953624 0.953977 shuffled 9.984000 8.779975 9.984911 8.766435 2 night
26115 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.264706 0.939100 0.939992 shuffled 8.120777 8.286566 8.132232 8.409153 2 day
26116 12 -5.956049 -0.031081 -0.000441 -5.956049 -0.076228 -0.118034 -0.016176 0.000045 -0.118034 ... 1.214286 0.854393 0.854393 shuffled 7.878152 7.983966 7.878152 7.983966 3 night
26117 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.264706 0.956445 0.956445 shuffled 7.770310 10.229486 7.770310 10.229486 3 day
26118 12 -0.244113 -0.031081 -0.000441 -0.240928 -0.076228 -0.313442 -0.016176 0.000045 -0.313442 ... 1.214286 0.952472 0.953026 shuffled 10.879993 10.096772 10.870719 9.817052 All night
26119 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.264706 0.954338 0.954700 shuffled 9.223177 9.450387 9.200480 9.150474 All day
26120 12 2.679168 -0.248998 -0.002698 2.962453 -0.945315 1.581139 -0.305564 -0.005265 1.664254 ... 0.272727 0.949243 0.951784 bootstrapped 11.116364 11.287857 11.067967 10.601303 0 night
26121 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.308167 0.904565 0.902174 bootstrapped 9.557230 11.099243 9.488019 11.246864 0 day
26122 12 5.223885 -0.248998 -0.002698 5.230086 -0.945315 4.609279 -0.305564 -0.005265 4.609279 ... 0.272727 0.956178 0.956429 bootstrapped 10.890327 10.288613 10.890950 10.270211 1 night
26123 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.308167 0.895453 0.895672 bootstrapped 11.894748 11.031881 11.886286 10.972977 1 day
26124 12 2.427910 -0.248998 -0.002698 2.453131 -0.945315 3.124969 -0.305564 -0.005265 3.151258 ... 0.272727 0.952819 0.953201 bootstrapped 9.813158 8.551876 9.814657 8.538122 2 night
26125 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.308167 0.939364 0.939485 bootstrapped 8.406747 9.256371 8.397433 9.217128 2 day
26126 12 NaN -0.248998 -0.002698 NaN -0.945315 NaN -0.305564 -0.005265 NaN ... 0.272727 0.914804 0.914804 bootstrapped 7.130447 8.717916 7.130447 8.717916 3 night
26127 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.308167 0.000000 0.000000 bootstrapped NaN NaN NaN NaN 3 day
26128 12 3.563506 -0.248998 -0.002698 3.614744 -0.945315 3.666723 -0.305564 -0.005265 3.684658 ... 0.272727 0.953279 0.953844 bootstrapped 10.683329 9.998309 10.675305 9.744998 All night
26129 12 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1.308167 0.920001 0.919942 bootstrapped 10.311362 11.185040 10.257271 10.840413 All day

26130 rows × 36 columns


In [3]:
real_result_df_list = []
shuffled_result_df_list = []
bootstrapped_result_df_list = []
for df in experiment_df_list:
    real_df = df[(df['result_type'] == 'real') & (df['tag_type'] == 'All')].reset_index()
    real_result_df_list.append(real_df)
    
    shuffled_df = df[(df['result_type'] == 'shuffled') & (df['tag_type'] == 'All')].reset_index()
    shuffled_result_df_list.append(shuffled_df)
    
    bootstrapped_df = df[(df['result_type'] == 'bootstrapped') & (df['tag_type'] == 'All')].reset_index()
    bootstrapped_result_df_list.append(bootstrapped_df)
    
real_result_df_list[0]
#shuffled_result_df_list[0]
#bootstrapped_result_df_list


Out[3]:
index day_num diff_mean_all_tracked_speeds diff_mean_clustering diff_mean_density diff_mean_min_tracked_speeds diff_mean_node_degree diff_median_all_tracked_speeds diff_median_clustering diff_median_density ... median_node_degree percent_idle_all_tracked percent_idle_min_tracked result_type spread_all_tracked_all_xy spread_all_tracked_individuals spread_min_tracked_all_xy spread_min_tracked_individuals tag_type time_period
0 8 0 3.727570 0.138153 0.010347 3.732812 2.006401 3.911097 0.145162 0.009864 ... 5.134129 0.949009 0.949477 real 10.184953 10.469189 10.174605 10.311756 All night
1 9 0 NaN NaN NaN NaN NaN NaN NaN NaN ... 2.848182 0.915367 0.915698 real 9.319706 9.397101 9.311868 9.256276 All day
2 2018 1 0.031370 -0.021842 -0.006518 0.034699 -0.664056 -0.037521 -0.001897 -0.006333 ... 3.787198 0.920364 0.920637 real 10.127591 10.040519 10.124454 9.976552 All night
3 2019 1 NaN NaN NaN NaN NaN NaN NaN NaN ... 4.251588 0.918329 0.918520 real 11.104734 10.882603 11.096950 10.765507 All day
4 4028 2 1.294042 -0.035528 -0.002035 1.279400 -0.182699 0.685316 -0.009045 -0.001911 ... 3.114318 0.930949 0.931214 real 11.333139 11.174720 11.323622 11.065927 All night
5 4029 2 NaN NaN NaN NaN NaN NaN NaN NaN ... 3.164447 0.909746 0.910220 real 12.077301 11.381253 12.069657 11.265147 All day
6 6038 3 -0.100310 0.065866 -0.000525 -0.091905 0.442375 -0.222089 0.074876 -0.000560 ... 2.869897 0.939839 0.940156 real 10.820764 10.064677 10.820742 9.981054 All night
7 6039 3 NaN NaN NaN NaN NaN NaN NaN NaN ... 2.349022 0.939808 0.939992 real 11.075039 10.387023 11.068490 10.270358 All day
8 8048 4 1.418010 -0.016678 0.001137 1.382756 0.107075 0.575571 -0.001176 0.001206 ... 2.747464 0.952712 0.953000 real 11.135516 10.373976 11.135168 10.288457 All night
9 8049 4 NaN NaN NaN NaN NaN NaN NaN NaN ... 2.595357 0.930737 0.931540 real 11.021027 10.596527 11.006094 10.436244 All day
10 10058 5 2.131969 0.046892 0.001399 2.123229 0.348734 1.191967 0.057387 0.001207 ... 2.451087 0.951903 0.952437 real 11.210403 10.876123 11.200470 10.701018 All night
11 10059 5 NaN NaN NaN NaN NaN NaN NaN NaN ... 2.077003 0.919456 0.920091 real 10.971206 10.136349 10.958798 9.980513 All day
12 12068 6 2.390443 0.040072 0.000378 2.400184 0.231568 1.672634 0.037536 -0.000251 ... 2.100632 0.948416 0.949184 real 12.016367 11.404208 12.006137 11.178519 All night
13 12069 6 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.861624 0.915131 0.915755 real 11.574726 9.872526 11.569398 9.704641 All day
14 14078 7 4.087537 0.080315 -0.000837 4.138952 0.380590 3.409079 0.073386 -0.001077 ... 1.890110 0.944823 0.946219 real 12.765470 11.816743 12.758971 11.598293 All night
15 14079 7 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.549194 0.890114 0.890835 real 11.730942 9.878437 11.726003 9.722188 All day
16 16088 8 3.223703 0.097598 0.001455 3.282759 0.504733 2.463223 0.083031 0.001307 ... 1.660273 0.936591 0.938240 real 12.576696 11.456850 12.576951 11.234677 All night
17 16089 8 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.202941 0.890162 0.890949 real 11.561709 9.774157 11.551448 9.542956 All day
18 18098 9 5.212056 0.082060 0.003053 5.243538 0.447041 4.670832 0.095476 0.003435 ... 1.869565 0.950302 0.951451 real 12.569939 10.812795 12.576334 10.611415 All night
19 18099 9 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.303337 0.889455 0.890099 real 11.384044 9.608958 11.374145 9.387318 All day
20 20108 10 6.610898 0.150634 0.006240 6.634919 0.735584 6.307743 0.152455 0.006317 ... 1.771213 0.936706 0.937753 real 12.042819 10.243681 12.041855 9.984304 All night
21 20109 10 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.037506 0.848373 0.849014 real 10.759027 9.270865 10.748867 9.070946 All day
22 22118 11 4.401962 0.135567 0.003151 4.427537 0.655859 3.816272 0.145063 0.003115 ... 1.556848 0.937552 0.938456 real 11.335028 10.312483 11.325518 10.016210 All night
23 22119 11 NaN NaN NaN NaN NaN NaN NaN NaN ... 0.978685 0.880893 0.881379 real 10.903631 9.110554 10.896411 8.888340 All day
24 24128 12 2.034245 -0.242169 -0.003239 2.050252 -0.926218 2.216991 -0.302776 -0.005474 ... 0.272727 0.953335 0.953884 real 10.798294 10.010503 10.789021 9.731230 All night
25 24129 12 NaN NaN NaN NaN NaN NaN NaN NaN ... 1.305556 0.934097 0.934470 real 10.369035 10.903431 10.344293 10.596636 All day

26 rows × 37 columns


In [4]:
shuffled_permutations = []

for shuffled_df in shuffled_result_df_list:
    num_perms = int(len(shuffled_df[shuffled_df['day_num']==0]) / 2)
    empty_lists = [[] for i in range(num_perms)]
    permutations = {}
    for m in ['diff_spread_all_tracked_all_xy', 'diff_median_all_tracked_speeds', 'diff_median_density', 'diff_percent_idle_all_tracked']:
        permutations[m] = copy.deepcopy(empty_lists)
    #permutations = {'diff_spread_all_tracked_all_xy': copy.deepcopy(empty_lists)}

    days_nums_in_experiment = list(shuffled_df['day_num'].unique())
    days_nums_in_experiment.sort()
    for day_num in days_nums_in_experiment:
        day_num_df = shuffled_df[shuffled_df['day_num'] == day_num]
        night_df = day_num_df[day_num_df['time_period'] == 'night']
        #day_df = day_num_df[day_num_df['time_period'] == 'day']
        
        for metric in permutations.keys():
            night_metric = list(night_df[metric])
            #day_metric = list(day_df[metric])
            for i, group in enumerate(permutations[metric]):
                permutations[metric][i].append(night_metric[i])
                #permutations[metric][i].extend([night_metric[i], day_metric[i]])
        
    shuffled_permutations.append(permutations)
       
                                                
print(shuffled_permutations[0]['diff_spread_all_tracked_all_xy'][0])


[0.1837881265283112, -0.040117690770541621, -0.17633706936921278, 0.095144232074222401, -0.12788181637796292, 0.17385101392836333, 0.17605830151413038, 0.17619568833483967, 0.040446375936904033, 0.041797692274547771, -0.0046216161158518787, 0.2766907763992954, 0.93926691919052641]

In [5]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(7.5,-2)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.15,-0.02)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1))}

for i, df in enumerate(real_result_df_list):
    night_df = df[df['time_period']=='night']
    
    for metric in shuffled_permutations[0].keys():
        
        plt.figure()

        for shuffled_days in shuffled_permutations[i][metric]:
            plt.plot(shuffled_days[0:14], marker='o', color='b', linestyle="None")

        plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
        
        metric_title, y_axis = formatting[metric]
        print(metric_title, y_axis)
        
        plt.title('{} {}'.format(hive_names[i], metric_title))
        plt.xlim(-0.5,13.5)
        plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
        plt.show()
    
    '''
    
    plt.xlabel('Days')
    plt.xlim(-0.5,24.5)
    x_len = len(df['spread_all_tracked_all_xy'][0:25])
    day_nums = []
    [day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
    plt.xticks(range(x_len), day_nums)
    plt.legend(loc='lower left')
    
    for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
        if time_period == 'night':
            plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
        else:
            plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
    '''
    plt.show()


Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.15, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.15, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.15, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.15, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)

In [6]:
bootstrapped_permutations = []

for bootstrapped_df in bootstrapped_result_df_list:
    num_perms = int(len(bootstrapped_df[bootstrapped_df['day_num']==0]) / 2)
    empty_lists = [[] for i in range(num_perms)]
    permutations = {}
    for m in ['diff_spread_all_tracked_all_xy', 'diff_median_all_tracked_speeds', 'diff_median_density', 'diff_percent_idle_all_tracked']:
        permutations[m] = copy.deepcopy(empty_lists)
    days_nums_in_experiment = list(bootstrapped_df['day_num'].unique())
    days_nums_in_experiment.sort()
    for day_num in days_nums_in_experiment:
        day_num_df = bootstrapped_df[bootstrapped_df['day_num'] == day_num]
        night_df = day_num_df[day_num_df['time_period'] == 'night']
        #day_df = day_num_df[day_num_df['time_period'] == 'day']
        
        for metric in permutations.keys():
            night_metric = list(night_df[metric])
            #day_metric = list(day_df[metric])
            for i, group in enumerate(permutations[metric]):
                permutations[metric][i].append(night_metric[i])
                #permutations[metric][i].extend([night_metric[i], day_metric[i]])
        
    bootstrapped_permutations.append(permutations)

In [7]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(7.5,-2)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.01,-0.02)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1))}

for i, df in enumerate(real_result_df_list):
    night_df = df[df['time_period']=='night']
    
    for metric in bootstrapped_permutations[0].keys():
        
        plt.figure()

        for bootstrapped_days in bootstrapped_permutations[i][metric]:
            plt.plot(bootstrapped_days[0:14], marker='o', color='b', linestyle="None")

        plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
        
        metric_title, y_axis = formatting[metric]
        print(metric_title, y_axis)
        
        plt.title('{} {}'.format(hive_names[i], metric_title))
        plt.xlim(-0.5,13.5)
        plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
        plt.show()


Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.01, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.01, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.01, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)
Difference in Day and Night Percentage of Time Idle for All Bees Tracked (0.1, -0.1)
Difference in Day and Night Median Speed for All Bees Tracked (7.5, -2)
Difference in Day and Night Median Density for All Bees Tracked (0.01, -0.02)
Difference in Day and Night Spread for All Bees Tracked (2, -3)

In [28]:
formatting = {'diff_spread_all_tracked_all_xy': ('Difference in Day and Night Spread for All Bees Tracked',(2,-3, 1.8)), 'diff_median_all_tracked_speeds': ('Difference in Day and Night Median Speed for All Bees Tracked',(9,-2.5, 8.5)), 'diff_median_density': ('Difference in Day and Night Median Density for All Bees Tracked',(0.02,-0.025, 0.017)), 'diff_percent_idle_all_tracked': ('Difference in Day and Night Percentage of Time Idle for All Bees Tracked',(0.1,-0.1, 0.09))}

for i, df in enumerate(real_result_df_list):
    night_df = df[df['time_period']=='night']
    
    for metric in shuffled_permutations[0].keys():
        
        indicate_sig = []
        p_values = []
        
        for j, result in enumerate(list(night_df[metric])[0:14]):
            num_diff = 1
            num = 1
            for shuffled_days in shuffled_permutations[i][metric]:
                num += 1
                if abs(shuffled_days[j]) > abs(result):
                    num_diff += 1

            p_value = num_diff / num
            p_values.append(p_value)
            if p_value >= 0.05:
                indicate_sig.append('')
            elif p_value < 0.001:
                indicate_sig.append('***')
            elif p_value < 0.01:
                indicate_sig.append('**')
            else:
                indicate_sig.append('*')
        
        plt.figure()
        
        '''
        for shuffled_days in shuffled_permutations[i][metric]:
            plt.plot(shuffled_days[0:14], zorder=-32, marker='o', color='b', linestyle="None")
        '''

        plt.plot(list(night_df[metric])[0:14], marker='o', color='r', linestyle="None") #, label='{}'.format(hive_names[i])
        
        confidence_upper_lower_list = [tuple(np.percentile(day_results, [2.5, 97.5])) for day_results in list(zip(*bootstrapped_permutations[i][metric]))]                                   
        shuffled_upper_lower_list = [tuple(np.percentile(day_results, [0, 100])) for day_results in list(zip(*shuffled_permutations[i][metric]))]                                   
        
        y_error_lower = []
        y_error_upper = []
        for j, real_value in enumerate(list(night_df[metric])[0:14]):
            y_error_lower.append(real_value - confidence_upper_lower_list[j][0])
            y_error_upper.append(confidence_upper_lower_list[j][1] - real_value)
            
        y_shuffle_error_lower = []
        y_shuffle_error_upper = []
        shuffled_median_value = []
        for j, real_value in enumerate(list(night_df[metric])[0:14]):
            real_value = np.median(shuffled_upper_lower_list[j])
            shuffled_median_value.append(real_value)
            y_shuffle_error_lower.append(real_value - shuffled_upper_lower_list[j][0])
            y_shuffle_error_upper.append(shuffled_upper_lower_list[j][1] - real_value)
        
        metric_title, y_axis = formatting[metric]
        #print(metric_title, y_axis)
        
        plt.title('{} {}'.format(hive_names[i], metric_title))
        plt.xlim(-0.5,13.5)
        plt.ylim(ymax=y_axis[0], ymin=y_axis[1])
        plt.xticks(range(len(list(night_df[metric])[0:14])), range(len(list(night_df[metric])[0:14])), size='large')
        plt.xlabel('Day', fontsize=18)
        plt.ylabel('Difference', fontsize=18)
        
        plt.errorbar(range(len(shuffled_median_value)), shuffled_median_value, yerr=(y_shuffle_error_lower, y_shuffle_error_upper), color='b', fmt="o", alpha=0.5)
        plt.errorbar(range(len(list(night_df[metric])[0:14])), list(night_df[metric])[0:14], yerr=(y_error_lower, y_error_upper), color='r', fmt="o", alpha=0.5)
        
        for j, txt in enumerate(indicate_sig):
            plt.annotate(txt, (j, y_axis[2]), size=30, horizontalalignment='center', color='k')
        
        plt.show()



In [78]:
# testing statistical tests

night_df = real_result_df_list[0][real_result_df_list[0]['time_period']=='night']

indicate_sig = []
p_values = []

for i, result in enumerate(list(night_df['diff_spread_all_tracked_all_xy'])[0:14]):
    num_diff = 1
    num = 1
    for shuffled_days in shuffled_permutations[0]['diff_spread_all_tracked_all_xy']:
        num += 1
        if abs(shuffled_days[i]) > abs(result):
            num_diff += 1
            
    p_value = num_diff / num
    p_values.append(p_value)
    if p_value >= 0.05:
        indicate_sig.append('')
    elif p_value < 0.001:
        indicate_sig.append('***')
    elif p_value < 0.01:
        indicate_sig.append('**')
    else:
        indicate_sig.append('*')

print(list(zip(p_values, indicate_sig)))

plt.figure()
for shuffled_days in shuffled_permutations[0]['diff_spread_all_tracked_all_xy']:
    plt.plot(shuffled_days[0:14], zorder=-32, marker='o', color='b', linestyle="None")

plt.plot(list(night_df['diff_spread_all_tracked_all_xy'])[0:14], marker='o', color='r', linestyle="None")


for i, txt in enumerate(indicate_sig):
    plt.annotate(txt, (i,1.2), size=30, horizontalalignment='center', color='k')
    



plt.xlim(-0.5,13.5)

plt.title('test spread')
plt.show()


[(0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.019801980198019802, '*'), (0.297029702970297, ''), (0.07920792079207921, ''), (0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.009900990099009901, '**'), (0.0297029702970297, '*'), (0.4752475247524752, '')]

In [93]:
extended_metrics = ['median_all_tracked_speeds', 'mean_all_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_min_tracked_individuals', 'percent_idle_all_tracked']
metrics_names = ['Median Speed All Bees Tracked', 'Mean Speed All Bees Tracked', 'Spread All Bees All Coordinates', 'Spread Filtered Bees Individuals', 'Percentage of Time Idle All Bees Tracked']

for k, metric in enumerate(extended_metrics):

    for i, df in enumerate(real_result_df_list):
        plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
    plt.title(metrics_names[k])
    plt.xlabel('Days')
    plt.xlim(-0.5,24.5)
    x_len = len(df[metric][0:25])
    day_nums = []
    [day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
    plt.xticks(range(x_len), day_nums)
    
    plt.legend(loc='lower left')

    for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
        if time_period == 'night':
            plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
        else:
            plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
    plt.show()



In [17]:
x_len = 24
day_nums = []

[day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]

for i, df in enumerate(real_result_df_list):
    plt.plot(df['median_node_degree'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median node degree'))
    plt.title('Median Node Degree')
    plt.xlabel('Days')
    plt.xticks(range(x_len), day_nums)
    plt.xlim(-0.5,24.5)
    plt.legend(loc='upper right')
    
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
    if time_period == 'night':
        plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
    else:
        plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()

for i, df in enumerate(real_result_df_list):
    plt.plot(df['median_density'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median density'))
    plt.title('Median Node Density')
    plt.xlabel('Days')
    plt.xticks(range(x_len), day_nums)
    plt.xlim(-0.5,24.5)
    plt.legend(loc='upper right')


for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
    if time_period == 'night':
        plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
    else:
        plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()

for i, df in enumerate(real_result_df_list):
    plt.plot(df['median_clustering'][0:25], marker='o', label='{} {}'.format(hive_names[i], 'median clustering'))
    plt.title('Median Node Clustering')
    plt.xlabel('Days')
    plt.xticks(range(x_len), day_nums)
    plt.xlim(-0.5,24.5)
    plt.legend(loc='upper right')
    
for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
    if time_period == 'night':
        plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
    else:
        plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
plt.show()



In [5]:
extended_metrics = ['mean_all_tracked_speeds', 'mean_min_tracked_speeds', 'median_all_tracked_speeds', 'median_min_tracked_speeds', 'spread_all_tracked_all_xy', 'spread_all_tracked_individuals', 'spread_min_tracked_all_xy', 'spread_min_tracked_individuals']
metrics_names = ['Mean Speed All Bees Tracked', 'Mean Speed Filtered Bees', 'Median Speed All Bees Tracked', 'Median Speed Filtered Bees', 'Spread All Bees All Coordinates', 'Spread All Bees Individuals', 'Spread Filtered Bees All Coordinates', 'Spread Filtered Bees Individuals']

for i, df in enumerate(real_result_df_list):
    for j, metric in enumerate(extended_metrics):
        plt.plot(df[metric][0:25], marker='o', label='{}'.format(metrics_names[j]))
    
    x_len = 24
    day_nums = []
    [day_nums.extend(['', ii + 1]) for ii in range(int(x_len / 2))]
    
    plt.title(hive_names[i])
    plt.xlabel('Days')
    plt.xlim(-0.5,24.5)
    plt.xticks(range(x_len), day_nums)

    plt.legend(loc='lower left')

    for j, time_period in enumerate(real_result_df_list[0]['time_period'][0:25]):
        if time_period == 'night':
            plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
        else:
            plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
    plt.show()



In [6]:
weather_metrics = ['cloudCover','dewPoint','humidity','pressure','temperature','visibility','windSpeed']
weather_metrics_names = ['Cloud Cover','Dewpoint','Humidity','Pressure','Temperature','Visibility','Wind Speed']

weather_files = ['../results/2_weather.csv', '../results/3_weather.csv', '../results/4_weather.csv', '../results/1_weather.csv']

weather_df_list = []
for file in weather_files:
    df = pd.read_csv(file, comment='#', header = 0)
    weather_df_list.append(df)

print(weather_df_list[0].dtypes)
weather_df_list[0].head()


apparentTemperature    float64
cloudCover             float64
date                    object
dewPoint               float64
hour                   float64
humidity               float64
precipType              object
pressure               float64
temperature            float64
time_period             object
visibility             float64
windBearing            float64
windSpeed              float64
dtype: object
Out[6]:
apparentTemperature cloudCover date dewPoint hour humidity precipType pressure temperature time_period visibility windBearing windSpeed
0 19.400000 0.000000 2016-02-26 12.720000 NaN 0.664286 rain 1012.294286 19.400000 night 9.980 197.285714 1.692857
1 26.071429 0.310000 2016-02-26 9.255714 NaN 0.358571 rain 1014.447143 26.835714 day 11.362 220.000000 2.835714
2 17.311429 0.666667 2016-02-27 13.911429 NaN 0.805714 rain 1017.654286 17.311429 night 9.980 109.714286 2.495714
3 21.164286 0.916667 2016-02-27 13.742857 NaN 0.628571 rain 1019.222857 21.164286 day 9.980 67.857143 3.130000
4 16.374286 0.666667 2016-02-28 13.510000 NaN 0.834286 rain 1017.848571 16.374286 night 9.980 76.428571 1.985714

In [7]:
for jj, metric in enumerate(weather_metrics):
    
    x_len = 24
    day_nums = []
    [day_nums.extend(['', i + 1]) for i in range(int(x_len / 2))]

    for i, df in enumerate(weather_df_list):
        plt.plot(df[metric][0:25], marker='o', label='{}'.format(hive_names[i]))
    plt.title(weather_metrics_names[jj])
    plt.xlabel('Days')
    plt.xticks(range(x_len), day_nums)
    plt.xlim(-0.5,24.5)
    plt.legend(loc='lower left')

    for j, time_period in enumerate(weather_df_list[0]['time_period'][0:25]):
        if time_period == 'night':
            plt.axvspan(j - 0.5, j + 0.5, facecolor='b', alpha=0.1, edgecolor='none')
        else:
            plt.axvspan(j - 0.5, j + 0.5, facecolor='y', alpha=0.1, edgecolor='none')
    plt.show()