this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime

In [9]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 2
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_2D_modisa_3.csv
out_filename: df_chl_out_2D_modisa_4.csv
Out[9]:
id time lon ve var_lon var_tmp vn spd var_lat lat temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
15828 34721 2002-11-03 67.570000 3.303000 0.000135 0.001823 2.865500 8.211375 0.000069 12.647250 29.435500 0.123307 -0.909012 -0.010569 NaN 514.4420
16081 34710 2002-11-05 63.160750 0.491500 0.000088 0.001596 10.219250 10.749875 0.000050 17.127000 28.991250 0.445077 -0.351565 0.033500 -1.474955 579.2250
16320 11089 2002-11-07 64.835375 -15.401500 0.000105 0.003391 2.263500 16.392000 0.000056 14.366875 28.939625 0.182671 -0.738330 0.024246 -1.615360 855.6750
16322 15707 2002-11-07 67.399125 -19.144750 0.000118 1000.000000 -21.695125 29.499000 0.000063 13.739500 NaN 0.161665 -0.791384 -0.004139 NaN 572.6910
16336 34315 2002-11-07 57.375375 -49.749875 0.000085 0.004589 4.788500 52.729375 0.000048 5.613000 29.647125 0.115356 -0.937960 -0.009359 NaN 824.3420
16340 34710 2002-11-07 63.170375 -0.728375 0.000148 0.001614 10.725375 11.110875 0.000075 17.297375 28.900250 0.376970 -0.423693 -0.068107 NaN 568.6320
16579 11089 2002-11-09 64.586375 -14.547875 0.000189 0.004779 -3.024000 15.786125 0.000090 14.356250 29.047125 0.194167 -0.711825 0.011496 -1.939453 878.5510
16581 15707 2002-11-09 67.213250 -5.045875 0.000183 1000.000000 -23.739500 24.623250 0.000089 13.350625 NaN 0.164932 -0.782695 0.003267 -2.485850 572.6400
16605 34721 2002-11-09 67.965125 13.566000 0.000215 0.002024 1.321875 15.994875 0.000099 12.587500 29.499125 0.143538 -0.843033 -0.006717 NaN 470.1010
16836 10206 2002-11-11 67.132250 0.870375 0.001112 1000.000000 0.028125 1.862250 0.000387 11.153750 NaN 0.125101 -0.902739 -0.010332 NaN 540.8280
16838 11089 2002-11-11 64.346000 -17.929000 0.000106 0.003450 -3.980500 18.692500 0.000056 14.287500 28.949875 0.192742 -0.715024 -0.001425 NaN 866.4410
16864 34721 2002-11-11 68.167500 10.140250 0.000095 0.001749 12.864500 17.991375 0.000052 12.704750 29.393250 0.116437 -0.933909 -0.027101 NaN 452.2190
17097 11089 2002-11-13 64.063625 -17.144125 0.000109 0.003639 -8.458625 20.047500 0.000058 14.201750 28.580125 0.310467 -0.507985 0.117725 -0.929131 848.3430
17376 34710 2002-11-15 62.952000 3.871500 0.000097 0.001616 42.243000 42.566500 0.000054 18.600125 27.833875 0.588544 -0.230221 0.096043 -1.017532 467.1530
17631 34315 2002-11-17 56.618250 27.657625 0.000099 0.003358 6.597500 28.794500 0.000053 8.295375 28.890875 0.135714 -0.867375 -0.023010 NaN 520.5600
17890 34315 2002-11-19 57.227875 40.891500 0.000082 0.003607 -16.705250 45.736625 0.000045 8.237625 28.946000 0.134862 -0.870110 -0.000852 NaN 560.4820
18412 34710 2002-11-23 61.505875 -28.221000 0.000107 0.001671 50.589125 58.013875 0.000058 21.247250 27.339625 0.461292 -0.336024 -0.053822 NaN 204.7940
18671 34710 2002-11-25 61.228750 3.868750 0.000083 0.001570 36.908250 40.428250 0.000045 21.894000 27.418125 0.472869 -0.325259 0.011577 -1.936404 149.9980
18930 34710 2002-11-27 61.789250 56.797500 0.000107 0.001730 59.141000 82.187625 0.000056 22.656125 27.353875 0.490433 -0.309420 0.017564 -1.755377 201.5180
19185 34315 2002-11-29 57.628250 -22.780000 0.000093 0.004921 -22.870625 32.527125 0.000050 6.674250 29.091000 0.144218 -0.840981 0.001275 -2.894491 732.5160
19189 34710 2002-11-29 62.757750 53.614125 0.000108 0.001707 -7.459750 60.761625 0.000057 23.191000 27.267125 0.538974 -0.268432 0.048541 -1.313891 215.2650
19430 15707 2002-12-01 64.433875 -11.834500 0.000121 1000.000000 -16.979625 21.206625 0.000064 11.775250 NaN 0.155115 -0.809346 -0.010187 NaN 839.5940
19444 34315 2002-12-01 57.136625 -43.623250 0.000097 0.004656 -13.519000 47.304750 0.000053 6.325875 29.152500 0.132107 -0.879074 -0.012111 NaN 741.0380
19448 34710 2002-12-01 63.540375 28.900750 0.000095 0.001725 -69.216625 78.193125 0.000052 22.506000 27.212750 2.014595 0.304188 1.475621 0.168975 291.2670
19454 34721 2002-12-01 68.144750 0.107500 0.000105 0.001705 8.952375 10.363500 0.000057 14.616000 28.928125 0.127701 -0.893806 -0.026448 NaN 550.8700
19703 34315 2002-12-03 56.320500 -60.573375 0.000100 0.003538 5.745375 61.158375 0.000053 6.289875 28.744000 0.141334 -0.849753 0.009227 -2.034940 698.6280
19707 34710 2002-12-03 63.536375 -16.860500 0.000196 0.001618 -72.597500 74.810750 0.000095 21.308000 27.071000 3.837143 0.584008 1.822548 0.260679 399.2320
19713 34721 2002-12-03 68.175500 0.083000 0.000107 0.001703 9.842750 12.343000 0.000058 14.761125 28.846000 0.129488 -0.887770 0.001787 -2.747874 558.2770
21002 34710 2002-12-13 64.403125 8.392625 0.000089 0.001617 11.196375 15.064125 0.000050 21.922750 26.346375 0.666574 -0.176152 0.180225 -0.744185 356.4060
21261 34710 2002-12-15 64.540375 6.939500 0.000188 0.001751 3.002125 8.841375 0.000092 22.029625 26.316750 0.586663 -0.231611 -0.079911 NaN 347.1890
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
639142 114917 2016-01-06 73.034500 3.679125 0.000030 0.001725 -11.626750 16.511875 0.000067 12.796250 29.599375 0.152041 -0.818039 0.002645 -2.577643 124.2240
640179 114945 2016-01-14 64.360375 -10.025125 0.000045 0.001819 -2.312875 10.748375 0.000103 11.534625 28.285875 0.165873 -0.780226 0.001062 -2.973958 842.6260
640437 114917 2016-01-16 72.883375 13.589000 0.000032 0.001995 18.156000 24.358875 0.000072 13.019500 29.498375 0.145469 -0.837230 -0.010957 NaN 147.4100
641474 114945 2016-01-24 63.422125 -15.508375 0.000052 0.001857 -2.947125 16.809250 0.000119 11.429875 27.825375 0.230810 -0.636745 0.027577 -1.559458 947.0290
641507 147127 2016-01-24 63.999375 -7.378750 0.000029 0.001707 -11.448125 14.115250 0.000062 17.388750 26.014875 0.723136 -0.140780 0.370582 -0.431116 632.1170
642251 114945 2016-01-30 62.758750 -21.994625 0.000081 0.001775 -1.689000 22.978750 0.000176 11.332000 27.811250 0.249393 -0.603116 -0.007247 NaN 902.8360
642284 147127 2016-01-30 63.762875 -11.290750 0.000033 0.001761 -21.071375 24.077750 0.000075 16.785125 25.873000 0.498726 -0.302138 -0.072032 NaN 654.2140
643061 147127 2016-02-05 63.346875 7.951375 0.000030 0.001599 -13.005125 18.579625 0.000068 16.240750 25.755250 0.557763 -0.253550 -0.001579 NaN 655.4240
643320 147127 2016-02-07 63.507125 3.390500 0.000018 0.002040 5.950500 12.145125 0.000041 16.145375 25.923750 1.839371 0.264669 1.281608 0.107755 676.9840
643579 147127 2016-02-09 63.413000 -14.578000 0.000025 0.001646 -3.235750 16.112750 0.000057 16.239500 25.853875 2.783544 0.444598 0.944173 -0.024948 664.6880
643838 147127 2016-02-11 63.122250 -18.242875 0.000015 0.001719 -4.686250 19.574125 0.000035 16.148625 25.919750 0.628723 -0.201541 -2.154821 NaN 645.3280
644064 114945 2016-02-13 61.092375 -17.913500 0.000190 0.001932 10.498250 21.127750 0.000369 11.686500 27.474500 0.323277 -0.490425 0.003536 -2.451548 719.9600
644097 147127 2016-02-13 62.951250 -9.086625 0.000021 0.001674 0.620625 13.320375 0.000050 16.119125 25.953375 0.690027 -0.161134 0.061304 -1.212510 629.2100
644107 60150420 2016-02-13 61.325500 17.384375 0.000005 0.001684 23.459000 29.365250 0.000003 8.555625 27.533875 0.181103 -0.742074 0.002238 -2.650210 865.5090
644323 114945 2016-02-15 60.796625 -17.572750 0.000057 0.001992 4.481250 18.944375 0.000130 11.797125 27.450125 0.304521 -0.516382 -0.018756 NaN 684.1940
644366 60150420 2016-02-15 61.501125 3.067375 0.000005 0.001684 19.457750 20.727875 0.000003 8.905125 27.459125 0.191219 -0.718470 0.010115 -1.995016 860.9300
646169 147127 2016-02-29 62.393375 -2.211750 0.000021 0.001621 -3.316000 4.783375 0.000047 16.148375 26.163125 1.065263 0.027457 0.489301 -0.310424 576.3870
646428 147127 2016-03-02 62.357000 -1.091125 0.000018 0.001607 -1.968500 4.452125 0.000040 16.115000 26.305625 0.748080 -0.126052 -0.317182 NaN 575.3420
646654 114945 2016-03-04 58.558000 -11.817250 0.000271 0.002018 2.511250 12.682625 0.000499 12.121625 27.794250 0.197348 -0.704766 0.012846 -1.891234 437.1820
646687 147127 2016-03-04 62.347500 -0.529750 0.000019 0.001620 -5.424875 8.089875 0.000046 16.075375 26.686250 0.581469 -0.235474 -0.166612 NaN 577.8640
646908 114873 2016-03-06 55.233625 -19.139750 0.000059 0.001751 -7.309125 20.571625 0.000130 8.470125 27.989000 0.153596 -0.813619 -0.004269 NaN 445.3440
646946 147127 2016-03-06 62.302000 -9.565750 0.000014 0.001622 -17.889250 21.602500 0.000031 15.891250 26.725375 0.461541 -0.335790 -0.119928 NaN 584.7310
647190 127429 2016-03-08 73.433000 -38.342250 0.000025 0.001743 2.449500 39.909500 0.000058 5.192875 29.816375 0.114650 -0.940624 -0.003402 NaN 17.5207
647205 147127 2016-03-08 62.019875 -22.442000 0.000022 0.001673 -4.468125 26.933625 0.000051 15.642250 26.796250 0.393819 -0.404703 -0.067722 NaN 577.7550
647464 147127 2016-03-10 61.795250 0.052125 0.000021 0.001705 12.220000 13.752125 0.000048 15.758375 27.590750 0.393192 -0.405396 -0.000627 NaN 552.5030
647474 60150420 2016-03-10 61.905750 -26.666750 0.000005 0.001684 -0.501250 26.782875 0.000003 10.351625 28.316750 0.125900 -0.899975 -0.018524 NaN 838.9730
647685 114873 2016-03-12 54.483500 -11.473250 0.000021 0.001851 -8.404250 14.929250 0.000048 8.216875 28.428125 0.128125 -0.892368 -0.009478 NaN 417.0810
647944 114873 2016-03-14 54.361125 -7.151000 0.000019 0.001713 -5.623625 9.931875 0.000044 8.080250 28.609750 0.125732 -0.900553 -0.002392 NaN 414.2320
649287 60150420 2016-03-24 58.873125 -38.982125 0.000005 0.001684 -11.351875 40.748625 0.000003 9.988250 28.701250 0.159817 -0.796378 -0.100210 NaN 549.9370
649536 147127 2016-03-26 61.967375 -0.919500 0.000018 0.001664 -15.201000 16.286000 0.000043 14.534125 28.357250 0.297591 -0.526380 0.019124 -1.718413 661.8910

805 rows × 16 columns


In [10]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ 2.0
check2 = df_chl_out_3.chl_rate.divide(2.0 *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[10]:
0.0

In [11]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[11]:
id time lon ve var_lon var_tmp vn spd var_lat lat temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
15828 34721 2002-11-03 67.570000 3.303000 0.000135 0.001823 2.865500 8.211375 0.000069 12.647250 29.435500 0.123307 -0.909012 -0.010569 NaN 514.442 -0.042856
16081 34710 2002-11-05 63.160750 0.491500 0.000088 0.001596 10.219250 10.749875 0.000050 17.127000 28.991250 0.445077 -0.351565 0.033500 -1.474955 579.225 0.037634
16320 11089 2002-11-07 64.835375 -15.401500 0.000105 0.003391 2.263500 16.392000 0.000056 14.366875 28.939625 0.182671 -0.738330 0.024246 -1.615360 855.675 0.066365
16322 15707 2002-11-07 67.399125 -19.144750 0.000118 1000.000000 -21.695125 29.499000 0.000063 13.739500 NaN 0.161665 -0.791384 -0.004139 NaN 572.691 -0.012801
16336 34315 2002-11-07 57.375375 -49.749875 0.000085 0.004589 4.788500 52.729375 0.000048 5.613000 29.647125 0.115356 -0.937960 -0.009359 NaN 824.342 -0.040566

In [12]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[12]:
count    805.000000
mean      -0.190115
std        1.170185
min      -15.737468
25%       -0.095636
50%       -0.005724
75%        0.082394
max        0.491977
Name: chlor_a_logE_rate, dtype: float64

In [13]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x11846e438>

In [14]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[14]:
<matplotlib.text.Text at 0x118590f60>

In [15]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[15]:
<matplotlib.text.Text at 0x118fa1860>

In [16]:
(np.log(0.194167)-np.log(0.182671))/2


Out[16]:
0.030515946441101893

In [17]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:11089, time:2002-11-07"
# (0.194167 - 0.182671) / (2*0.194167)    == 0.02960338265
#########
# Val 2:
# id:11089, time:2002-11-07"
# (np.log(0.194167)-np.log(0.182671))   / 2 == 0.030515946441101893 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[17]:
id time lon ve var_lon var_tmp vn spd var_lat lat temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
16836 10206 2002-11-11 67.132250 0.870375 0.001112 1000.000000 0.028125 1.862250 0.000387 11.153750 NaN 0.125101 -0.902739 -0.010332 NaN 540.828 -0.041295
23311 10206 2002-12-31 64.190250 -11.078750 0.001012 1000.000000 -3.439750 11.838875 0.000368 12.543625 NaN 0.171085 -0.766787 0.016540 -1.781477 874.995 0.048337
16320 11089 2002-11-07 64.835375 -15.401500 0.000105 0.003391 2.263500 16.392000 0.000056 14.366875 28.939625 0.182671 -0.738330 0.024246 -1.615360 855.675 0.066365
16579 11089 2002-11-09 64.586375 -14.547875 0.000189 0.004779 -3.024000 15.786125 0.000090 14.356250 29.047125 0.194167 -0.711825 0.011496 -1.939453 878.551 0.029603
16838 11089 2002-11-11 64.346000 -17.929000 0.000106 0.003450 -3.980500 18.692500 0.000056 14.287500 28.949875 0.192742 -0.715024 -0.001425 NaN 866.441 -0.003697

In [18]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[18]:
id time lon ve var_lon var_tmp vn spd var_lat lat temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
16836 10206 2002-11-11 67.132250 0.870375 0.001112 1000.000000 0.028125 1.862250 0.000387 11.153750 NaN 0.125101 -0.902739 -0.010332 NaN 540.828 -0.041295
23311 10206 2002-12-31 64.190250 -11.078750 0.001012 1000.000000 -3.439750 11.838875 0.000368 12.543625 NaN 0.171085 -0.766787 0.016540 -1.781477 874.995 0.048337
16320 11089 2002-11-07 64.835375 -15.401500 0.000105 0.003391 2.263500 16.392000 0.000056 14.366875 28.939625 0.182671 -0.738330 0.024246 -1.615360 855.675 0.066365
16579 11089 2002-11-09 64.586375 -14.547875 0.000189 0.004779 -3.024000 15.786125 0.000090 14.356250 29.047125 0.194167 -0.711825 0.011496 -1.939453 878.551 0.029603
16838 11089 2002-11-11 64.346000 -17.929000 0.000106 0.003450 -3.980500 18.692500 0.000056 14.287500 28.949875 0.192742 -0.715024 -0.001425 NaN 866.441 -0.003697

In [19]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_2D_modisa_4.csv
Out[19]:
id time lon ve var_lon var_tmp vn spd var_lat lat temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
16836 10206 2002-11-11 67.132250 0.870375 0.001112 1000.000000 0.028125 1.862250 0.000387 11.153750 NaN 0.125101 -0.902739 -0.010332 NaN 540.828 -0.041295
23311 10206 2002-12-31 64.190250 -11.078750 0.001012 1000.000000 -3.439750 11.838875 0.000368 12.543625 NaN 0.171085 -0.766787 0.016540 -1.781477 874.995 0.048337
16320 11089 2002-11-07 64.835375 -15.401500 0.000105 0.003391 2.263500 16.392000 0.000056 14.366875 28.939625 0.182671 -0.738330 0.024246 -1.615360 855.675 0.066365
16579 11089 2002-11-09 64.586375 -14.547875 0.000189 0.004779 -3.024000 15.786125 0.000090 14.356250 29.047125 0.194167 -0.711825 0.011496 -1.939453 878.551 0.029603
16838 11089 2002-11-11 64.346000 -17.929000 0.000106 0.003450 -3.980500 18.692500 0.000056 14.287500 28.949875 0.192742 -0.715024 -0.001425 NaN 866.441 -0.003697

In [20]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [159]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [160]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[160]:
<matplotlib.text.Text at 0x11b311c88>

In [163]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11d79cc88>

In [179]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    119.000000
mean       0.021413
std        0.174320
min       -0.269799
25%       -0.026604
50%       -0.000852
75%        0.033310
max        1.684376
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    144.000000
mean       0.059732
std        0.598350
min       -3.367546
25%       -0.041924
50%        0.001460
75%        0.071871
max        4.861381
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    177.000000
mean       0.002020
std        1.359429
min      -12.726983
25%       -0.047666
50%        0.006902
75%        0.108919
max       11.152574
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    196.000000
mean       0.465178
std        8.483293
min      -39.506426
25%       -0.129237
50%       -0.002522
75%        0.099199
max       44.944229
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    169.000000
mean      -0.938410
std        6.009227
min      -26.998233
25%       -0.119928
50%       -0.008352
75%        0.015741
max       31.032281
Name: chl_rate, dtype: float64

In [158]:
print("test")


test

In [ ]: