this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 7
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x115437cc0>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_7D_modisa_3.csv
out_filename: df_chl_out_7D_modisa_4.csv
Out[2]:
id time spd var_lat ve lon lat vn var_lon var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
4663 10206 2002-11-07 5.881464 0.000487 -2.351607 67.145571 11.112429 3.113143 0.001486 1000.000000 NaN 0.130267 -0.885166 -0.004264 NaN 540.9170
4665 11089 2002-11-07 17.183500 0.000067 -16.224571 64.522214 14.321929 -1.954857 0.000133 0.003821 28.931286 0.192224 -0.716192 0.067516 -1.170591 880.9380
4667 15707 2002-11-07 25.486857 0.000077 -9.886893 67.237571 13.279821 -21.813714 0.000155 1000.000000 NaN 0.164760 -0.783149 0.009444 -2.024855 569.8130
4685 34710 2002-11-07 16.909357 0.000073 -4.254286 63.074536 17.550536 15.411857 0.000146 0.001906 28.607679 0.392885 -0.405735 0.016794 -1.774846 542.5690
4691 34721 2002-11-07 16.744036 0.000066 9.964393 68.010643 12.662179 6.091821 0.000130 0.001844 29.422214 0.141941 -0.847893 -0.001058 NaN 467.9290
4922 10206 2002-11-14 3.310714 0.001253 -1.862964 67.061143 11.168893 0.388071 0.004840 1000.000000 NaN 0.145233 -0.837935 0.014966 -1.824894 549.5110
4924 11089 2002-11-14 31.006607 0.000061 -27.037464 63.432571 13.867750 -14.241071 0.000115 0.003591 28.311250 0.234748 -0.629398 0.042524 -1.371369 823.4870
4944 34710 2002-11-14 43.718750 0.000072 -12.182000 62.811500 19.120571 38.809643 0.000143 0.001775 27.771357 0.431795 -0.364722 0.038910 -1.409939 431.9610
4950 34721 2002-11-14 15.720571 0.000054 0.970036 68.344500 13.425500 14.954071 0.000098 0.001712 29.102000 0.156304 -0.806030 0.014363 -1.842747 463.4690
5181 10206 2002-11-21 4.650393 0.000887 -1.006821 66.993786 11.222714 2.276107 0.003094 1000.000000 NaN 0.131890 -0.879788 -0.013343 NaN 558.2280
5199 34315 2002-11-21 26.172500 0.000075 8.317429 57.932179 7.708929 -18.439250 0.000155 0.003484 28.561107 0.153773 -0.813120 0.014735 -1.831635 651.9540
5203 34710 2002-11-21 53.922607 0.000057 -7.815000 61.564929 21.375357 45.444714 0.000110 0.001673 27.401321 0.487654 -0.311888 0.055859 -1.252904 205.5000
5440 10206 2002-11-28 9.982857 0.000584 -9.219786 66.747107 11.368571 3.435071 0.001896 1000.000000 NaN 0.139830 -0.854400 0.007940 -2.100180 584.6810
5444 15707 2002-11-28 21.383500 0.000067 -14.033857 64.478036 11.849643 -14.967929 0.000128 1000.000000 NaN 0.165573 -0.781010 0.043876 -1.357773 835.8760
5458 34315 2002-11-28 45.224429 0.000051 -38.968750 57.152429 6.505107 -12.871571 0.000094 0.004536 28.990536 0.144399 -0.840437 -0.009374 NaN 724.8570
5461 34709 2002-11-28 23.102000 0.000064 -22.022400 74.560000 11.404000 -6.581400 0.000123 1000.000000 NaN 0.226762 -0.644430 -0.003435 NaN 93.6065
5462 34710 2002-11-28 73.454679 0.000066 27.711214 63.101786 22.416429 -34.115357 0.000129 0.001714 27.203000 0.555567 -0.255264 0.067913 -1.168050 305.1990
5701 11089 2002-12-05 39.818071 0.000052 7.996536 58.768000 15.050929 29.248643 0.000096 0.003712 27.217036 0.327284 -0.485075 -0.051179 NaN 374.3600
5721 34710 2002-12-05 35.425179 0.000067 15.452607 64.064607 20.894679 15.649964 0.000129 0.001806 26.703714 7.919321 0.898688 7.363754 0.867099 464.0160
5960 11089 2002-12-12 37.142464 0.000064 5.714429 59.882893 14.928107 -27.122714 0.000123 0.003629 27.114286 0.284417 -0.546044 -0.042867 NaN 473.2650
5980 34710 2002-12-12 12.120179 0.000066 5.550821 64.481964 21.968607 8.206964 0.000127 0.001703 26.409643 0.573134 -0.241744 -7.346187 NaN 351.6920
6217 10206 2002-12-19 9.386000 0.001232 -8.125214 64.930429 12.417786 4.508357 0.004582 1000.000000 NaN 0.155590 -0.808018 0.024163 -1.616840 791.3180
6219 11089 2002-12-19 23.566536 0.000062 -19.138571 59.142357 13.910857 -1.449250 0.000119 0.003462 27.090750 0.244831 -0.611134 -0.039586 NaN 497.8500
6239 34710 2002-12-19 10.117714 0.000059 1.596429 64.692500 22.029607 -5.205679 0.000111 0.001632 26.482429 0.534686 -0.271901 -0.038448 NaN 347.3690
6245 34721 2002-12-19 15.000750 0.000059 -6.390214 66.898393 15.011179 -2.284393 0.000110 0.001761 28.123500 0.156046 -0.806746 -0.085491 NaN 685.7980
6476 10206 2002-12-26 11.276643 0.000485 -10.488286 64.403250 12.569393 -0.180321 0.001438 1000.000000 NaN 0.161687 -0.791326 0.006097 -2.214908 849.9840
6478 11089 2002-12-26 20.869179 0.000064 0.788214 58.478500 14.595821 17.997464 0.000122 0.003560 26.712964 0.310186 -0.508378 0.065355 -1.184721 393.9530
6494 34315 2002-12-26 35.889214 0.000052 -31.255107 51.907643 9.167000 -4.632750 0.000099 0.003694 27.211286 0.304952 -0.515769 -0.026946 NaN 120.1410
6498 34710 2002-12-26 21.927036 0.000060 -7.163000 64.564214 21.250536 -20.276000 0.000113 0.001769 25.927607 0.390633 -0.408231 -0.144053 NaN 417.4360
6504 34721 2002-12-26 9.230107 0.000058 0.740107 66.836143 15.217429 8.719429 0.000108 0.001793 27.792714 0.176565 -0.753094 0.020519 -1.687840 705.1500
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
183302 114917 2016-01-21 21.374536 0.000113 -13.139714 72.454357 13.772286 5.974250 0.000050 0.001736 29.110107 0.146967 -0.832780 0.009182 -2.037070 207.8330
183303 114945 2016-01-21 14.945821 0.000135 -13.570250 63.468071 11.451286 -1.511357 0.000060 0.001818 27.844929 0.230810 -0.636745 0.067445 -1.171049 942.7340
183336 147127 2016-01-21 14.299143 0.000080 -8.474464 64.100393 17.402429 -7.015286 0.000037 0.001702 25.966536 0.481660 -0.317259 0.110470 -0.956756 640.2960
183561 114917 2016-01-28 21.277429 0.000101 -6.419071 71.818929 13.822607 -1.824643 0.000045 0.001870 29.082250 0.169643 -0.770464 0.022676 -1.644440 248.2880
183562 114945 2016-01-28 18.783571 0.000129 -17.327179 62.643214 11.375393 0.625464 0.000057 0.001895 27.786929 0.191321 -0.718237 -0.039489 NaN 893.5100
183595 147127 2016-01-28 17.761750 0.000048 -8.683536 63.740464 16.758714 -12.064214 0.000021 0.001701 25.906750 0.562320 -0.250017 0.080659 -1.093345 653.4980
183854 147127 2016-02-04 17.492286 0.000055 -3.826964 63.420964 16.241893 -5.800357 0.000024 0.001742 25.839714 1.611391 0.207201 1.049071 0.020805 661.6690
184113 147127 2016-02-11 15.215929 0.000044 -12.207357 62.870607 16.128607 -2.264786 0.000019 0.001707 25.847321 0.585368 -0.232571 -1.026022 NaN 619.6000
184123 60150420 2016-02-11 22.676607 0.000003 10.765821 61.326143 8.630464 18.819857 0.000005 0.001684 27.496929 0.168793 -0.772645 -0.017083 NaN 860.9450
184338 114917 2016-02-18 17.508607 0.000096 -6.792893 71.026036 13.717750 2.911286 0.000042 0.001798 28.717750 0.184201 -0.734708 -0.005852 NaN 264.0410
184339 114945 2016-02-18 16.975500 0.000177 -15.740393 60.104893 11.989393 3.155214 0.000082 0.001968 27.136179 0.336173 -0.473437 0.037838 -1.422070 607.9490
184372 147127 2016-02-18 14.439286 0.000065 -4.004857 62.378071 16.114929 1.416536 0.000028 0.001661 25.691500 0.672385 -0.172382 0.087017 -1.060395 578.8710
184382 60150420 2016-02-18 17.364607 0.000003 1.871321 61.527464 8.941214 -3.837714 0.000005 0.001684 27.388750 0.164424 -0.784036 -0.004370 NaN 862.7080
184593 114873 2016-02-25 47.681786 0.000094 -24.504929 57.069393 7.809643 36.035393 0.000042 0.001737 27.777143 0.167853 -0.775072 0.004007 -2.397157 590.3010
184598 114945 2016-02-25 16.985821 0.000178 -16.087000 59.255500 12.083071 -0.150857 0.000084 0.001988 27.327929 0.216736 -0.664070 -0.119437 NaN 515.5880
184631 147127 2016-02-25 4.899214 0.000041 0.056000 62.390143 16.152857 -0.554036 0.000018 0.001709 26.067750 0.844760 -0.073267 0.172375 -0.763526 576.3870
184852 114873 2016-03-03 25.772107 0.000100 -24.326857 55.385429 8.514857 -6.322321 0.000045 0.001743 27.909821 0.157784 -0.801938 -0.010069 NaN 446.1000
184857 114945 2016-03-03 11.442571 0.000396 -10.824000 58.453000 12.132357 1.469429 0.000221 0.001898 27.829357 0.197325 -0.704817 -0.019410 NaN 428.5260
184890 147127 2016-03-03 16.870929 0.000041 -9.578750 62.241750 15.903500 -8.266929 0.000018 0.001636 26.682036 0.542317 -0.265747 -0.302443 NaN 581.2960
185111 114873 2016-03-10 12.582857 0.000056 -9.354393 54.491107 8.183857 -5.210143 0.000024 0.001740 28.443857 0.135280 -0.868766 -0.022504 NaN 419.6270
185115 114917 2016-03-10 15.839250 0.000093 -12.232964 69.407179 13.777679 -7.084464 0.000041 0.001861 29.104893 0.198003 -0.703328 -0.009855 NaN 384.6920
185116 114945 2016-03-10 10.705179 0.000223 -7.728536 57.941536 11.978250 -5.198679 0.000103 0.001946 28.181321 0.195842 -0.708094 -0.001483 NaN 374.9290
185149 147127 2016-03-10 22.278679 0.000039 2.033429 61.980821 15.738429 -6.323500 0.000017 0.002426 27.831536 0.334466 -0.475648 -0.207851 NaN 568.7690
185159 60150420 2016-03-10 27.791964 0.000003 -22.587893 61.470821 10.137286 -7.758071 0.000005 0.001811 28.492821 0.146423 -0.834390 0.026231 -1.581191 800.4460
185370 114873 2016-03-17 11.174071 0.000066 -6.371893 54.215321 8.223036 7.358429 0.000030 0.001781 28.674929 0.119380 -0.923068 -0.015900 NaN 392.1640
185374 114917 2016-03-17 15.010286 0.000087 -5.992857 69.031107 13.331000 -7.445464 0.000038 0.001808 29.109000 0.150351 -0.822894 -0.047652 NaN 393.8130
185408 147127 2016-03-17 15.207071 0.000043 1.385607 61.872000 15.242107 -6.494893 0.000018 0.002131 28.531964 0.320894 -0.493638 -0.013572 NaN 595.0100
185418 60150420 2016-03-17 31.706714 0.000003 -30.421893 60.057929 10.040000 2.924964 0.000005 0.001742 28.580393 0.146215 -0.835008 -0.000208 NaN 663.7950
185667 147127 2016-03-24 16.203893 0.000055 -0.353071 61.987179 14.484500 -15.250821 0.000024 0.001725 28.516929 0.291352 -0.535583 -0.029543 NaN 665.2270
185926 147127 2016-03-31 9.666714 0.000044 6.411036 62.111179 14.133500 1.729036 0.000019 0.001756 29.063893 0.211449 -0.674794 -0.079902 NaN 703.9580

723 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
2.1060423912623583e-15

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time spd var_lat ve lon lat vn var_lon var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
4663 10206 2002-11-07 5.881464 0.000487 -2.351607 67.145571 11.112429 3.113143 0.001486 1000.000000 NaN 0.130267 -0.885166 -0.004264 NaN 540.917 -0.004676
4665 11089 2002-11-07 17.183500 0.000067 -16.224571 64.522214 14.321929 -1.954857 0.000133 0.003821 28.931286 0.192224 -0.716192 0.067516 -1.170591 880.938 0.050177
4667 15707 2002-11-07 25.486857 0.000077 -9.886893 67.237571 13.279821 -21.813714 0.000155 1000.000000 NaN 0.164760 -0.783149 0.009444 -2.024855 569.813 0.008188
4685 34710 2002-11-07 16.909357 0.000073 -4.254286 63.074536 17.550536 15.411857 0.000146 0.001906 28.607679 0.392885 -0.405735 0.016794 -1.774846 542.569 0.006106
4691 34721 2002-11-07 16.744036 0.000066 9.964393 68.010643 12.662179 6.091821 0.000130 0.001844 29.422214 0.141941 -0.847893 -0.001058 NaN 467.929 -0.001065

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    723.000000
mean      -0.051942
std        0.313747
min       -4.993384
25%       -0.033324
50%       -0.002568
75%        0.025332
max        0.138126
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1154378d0>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x117d76278>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x118e1f588>

In [20]:
(np.log(0.131890)-np.log(0.145233))   / freq


Out[20]:
-0.013767301065377284

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-21"
# (0.131890 - 0.145233) / (freq*0.131890)    == -0.014452519957107106
#########
# Val 2:
# id:10206, time:2002-11-21"
# (np.log(0.131890)-np.log(0.145233))   / freq == -0.013767301065377284 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time spd var_lat ve lon lat vn var_lon var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
4663 10206 2002-11-07 5.881464 0.000487 -2.351607 67.145571 11.112429 3.113143 0.001486 1000.0 NaN 0.130267 -0.885166 -0.004264 NaN 540.917 -0.004676
4922 10206 2002-11-14 3.310714 0.001253 -1.862964 67.061143 11.168893 0.388071 0.004840 1000.0 NaN 0.145233 -0.837935 0.014966 -1.824894 549.511 0.014721
5181 10206 2002-11-21 4.650393 0.000887 -1.006821 66.993786 11.222714 2.276107 0.003094 1000.0 NaN 0.131890 -0.879788 -0.013343 NaN 558.228 -0.014453
5440 10206 2002-11-28 9.982857 0.000584 -9.219786 66.747107 11.368571 3.435071 0.001896 1000.0 NaN 0.139830 -0.854400 0.007940 -2.100180 584.681 0.008112
6217 10206 2002-12-19 9.386000 0.001232 -8.125214 64.930429 12.417786 4.508357 0.004582 1000.0 NaN 0.155590 -0.808018 0.024163 -1.616840 791.318 0.022186

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time spd var_lat ve lon lat vn var_lon var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
4663 10206 2002-11-07 5.881464 0.000487 -2.351607 67.145571 11.112429 3.113143 0.001486 1000.0 NaN 0.130267 -0.885166 -0.004264 NaN 540.917 -0.004676
4922 10206 2002-11-14 3.310714 0.001253 -1.862964 67.061143 11.168893 0.388071 0.004840 1000.0 NaN 0.145233 -0.837935 0.014966 -1.824894 549.511 0.014721
5181 10206 2002-11-21 4.650393 0.000887 -1.006821 66.993786 11.222714 2.276107 0.003094 1000.0 NaN 0.131890 -0.879788 -0.013343 NaN 558.228 -0.014453
5440 10206 2002-11-28 9.982857 0.000584 -9.219786 66.747107 11.368571 3.435071 0.001896 1000.0 NaN 0.139830 -0.854400 0.007940 -2.100180 584.681 0.008112
6217 10206 2002-12-19 9.386000 0.001232 -8.125214 64.930429 12.417786 4.508357 0.004582 1000.0 NaN 0.155590 -0.808018 0.024163 -1.616840 791.318 0.022186

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_7D_modisa_4.csv
Out[12]:
id time spd var_lat ve lon lat vn var_lon var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
4663 10206 2002-11-07 5.881464 0.000487 -2.351607 67.145571 11.112429 3.113143 0.001486 1000.0 NaN 0.130267 -0.885166 -0.004264 NaN 540.917 -0.004676
4922 10206 2002-11-14 3.310714 0.001253 -1.862964 67.061143 11.168893 0.388071 0.004840 1000.0 NaN 0.145233 -0.837935 0.014966 -1.824894 549.511 0.014721
5181 10206 2002-11-21 4.650393 0.000887 -1.006821 66.993786 11.222714 2.276107 0.003094 1000.0 NaN 0.131890 -0.879788 -0.013343 NaN 558.228 -0.014453
5440 10206 2002-11-28 9.982857 0.000584 -9.219786 66.747107 11.368571 3.435071 0.001896 1000.0 NaN 0.139830 -0.854400 0.007940 -2.100180 584.681 0.008112
6217 10206 2002-12-19 9.386000 0.001232 -8.125214 64.930429 12.417786 4.508357 0.004582 1000.0 NaN 0.155590 -0.808018 0.024163 -1.616840 791.318 0.022186

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x11a61d630>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x10340acf8>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    106.000000
mean      -0.009087
std        0.187237
min       -1.371494
25%       -0.029798
50%       -0.002513
75%        0.037431
max        0.745820
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    114.000000
mean       0.047336
std        0.998323
min       -7.346187
25%       -0.037127
50%        0.012848
75%        0.092641
max        7.363754
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    159.000000
mean       0.191225
std        1.476104
min       -4.918748
25%       -0.051795
50%        0.001460
75%        0.096752
max       14.397376
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    167.000000
mean      -0.334437
std        7.440621
min      -51.763635
25%       -0.075132
50%       -0.000485
75%        0.093482
max       49.713420
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    177.000000
mean      -0.213897
std        2.524451
min      -20.049905
25%       -0.129243
50%       -0.013572
75%        0.010955
max       10.810810
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]: