this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 5
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x115437e80>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_5D_modisa_3.csv
out_filename: df_chl_out_5D_modisa_4.csv
Out[2]:
id time lat spd var_lon vn var_lat lon ve temp var_tmp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
6239 34710 2002-11-01 16.907900 13.195850 0.000120 12.28250 0.000063 63.130950 1.024050 28.998850 0.001754 0.386388 -0.412976 0.060749 -1.216459 592.46100
6476 10206 2002-11-06 11.049700 9.697750 0.001602 6.51645 0.000517 67.176750 -4.161400 NaN 1000.000000 0.133946 -0.873070 0.005581 -2.253287 536.67500
6498 34710 2002-11-06 17.339050 11.972150 0.000159 10.54180 0.000079 63.148450 -2.067400 28.832700 0.001884 0.379611 -0.420661 -0.006777 NaN 562.59100
6504 34721 2002-11-06 12.589150 15.204350 0.000143 0.91040 0.000071 67.828050 10.477750 29.497000 0.001856 0.148202 -0.829147 0.009522 -2.021272 487.07000
6735 10206 2002-11-11 11.160300 2.944400 0.001463 1.00360 0.000474 67.111450 -0.923050 NaN 1000.000000 0.125101 -0.902739 -0.008845 NaN 545.14200
6757 34710 2002-11-11 17.986600 27.674550 0.000102 26.75225 0.000055 62.959850 -2.543400 28.167400 0.001775 0.500674 -0.300445 0.121063 -0.916988 503.53300
6994 10206 2002-11-16 11.166650 3.065200 0.006093 -0.48170 0.001546 67.051800 -1.909650 NaN 1000.000000 0.140332 -0.852843 0.015231 -1.817272 549.51100
7016 34710 2002-11-16 19.448750 47.484750 0.000162 40.73835 0.000079 62.765100 -17.921000 27.723950 0.001810 0.569412 -0.244573 0.068738 -1.162806 416.43400
7257 15707 2002-11-21 12.418600 20.834500 0.000118 2.76235 0.000062 65.457300 -18.660150 NaN 1000.000000 0.162057 -0.790332 0.010989 -1.959042 735.57800
7275 34710 2002-11-21 21.038750 50.805600 0.000114 43.67610 0.000059 61.635900 -24.506700 27.402250 0.001703 0.449158 -0.347601 -0.120254 NaN 227.98300
7516 15707 2002-11-26 12.208550 20.374050 0.000162 -14.41925 0.000081 64.792800 -13.666750 NaN 1000.000000 0.135003 -0.869657 -0.027054 NaN 805.32400
7534 34710 2002-11-26 22.744150 66.321050 0.000104 28.91065 0.000055 62.066800 47.545300 27.330300 0.001701 0.541878 -0.266099 0.092720 -1.032829 231.22300
7771 10206 2002-12-01 11.424950 13.959800 0.003342 5.01475 0.000973 66.584900 -12.892850 NaN 1000.000000 0.130924 -0.882981 -0.006660 NaN 602.27400
7775 15707 2002-12-01 11.619250 22.353400 0.000116 -13.50155 0.000062 64.252100 -16.280450 NaN 1000.000000 0.158046 -0.801216 0.023043 -1.637461 856.04600
7789 34315 2002-12-01 6.330100 58.157650 0.000109 0.52415 0.000058 56.495950 -55.983300 28.835400 0.004026 0.119914 -0.921132 -0.027544 NaN 711.87200
7793 34710 2002-12-01 21.655450 69.408450 0.000146 -63.06905 0.000074 63.527500 9.568350 27.089100 0.001660 3.619618 0.558663 3.077740 0.488232 383.91300
7799 34721 2002-12-01 14.725750 11.062000 0.000100 9.16130 0.000055 68.159900 0.352700 28.865800 0.001697 0.139796 -0.854504 -0.010756 NaN 558.92400
8570 34710 2002-12-16 22.063000 6.143050 0.000122 0.69065 0.000063 64.611450 2.003650 26.491600 0.001814 0.444480 -0.352148 -0.172662 NaN 342.75400
8576 34721 2002-12-16 15.129600 15.943200 0.000099 -2.92125 0.000054 67.248500 -14.502400 28.288700 0.001807 0.125414 -0.901654 -0.013556 NaN 661.26500
8825 34315 2002-12-21 8.823800 13.802500 0.000096 8.97855 0.000051 52.963600 -8.062800 27.557800 0.003600 0.401161 -0.396681 0.127543 -0.894343 245.20900
8829 34710 2002-12-21 22.013750 11.916350 0.000117 -6.55820 0.000061 64.707850 2.118050 26.464250 0.001568 0.612960 -0.212568 0.168480 -0.773451 347.36900
8835 34721 2002-12-21 14.976800 13.454900 0.000117 -0.59270 0.000062 66.831300 -2.024600 28.068400 0.001751 0.151092 -0.820759 0.025678 -1.590439 690.68500
9066 10206 2002-12-26 12.579700 11.051750 0.001609 1.12345 0.000532 64.488450 -10.252100 NaN 1000.000000 0.163904 -0.785410 0.007255 -2.139362 841.41600
9068 11089 2002-12-26 14.441300 21.283550 0.000125 19.76885 0.000065 58.454900 -2.932550 26.813450 0.003647 0.241243 -0.617545 -0.061796 NaN 404.95000
9084 34315 2002-12-26 9.231000 35.503450 0.000099 3.20195 0.000052 52.172350 -33.056150 27.296800 0.003672 0.252959 -0.596950 -0.148202 NaN 149.08500
9088 34710 2002-12-26 21.419550 23.133500 0.000113 -22.07495 0.000060 64.631250 -6.203950 26.061850 0.001792 0.851984 -0.069569 0.239024 -0.621558 401.87300
9094 34721 2002-12-26 15.152400 7.583750 0.000108 7.00980 0.000058 66.831450 0.540150 27.862950 0.001797 0.153344 -0.814333 0.002252 -2.647429 700.25000
9325 10206 2002-12-31 12.498000 12.809400 0.000895 -4.28190 0.000329 64.051650 -11.660200 NaN 1000.000000 0.189273 -0.722911 0.025369 -1.595697 887.20100
9327 11089 2002-12-31 15.009900 15.872800 0.000110 3.21050 0.000060 58.673500 11.034750 26.357650 0.003798 0.322953 -0.490861 0.081710 -1.087725 368.89100
9343 34315 2002-12-31 8.585500 51.863400 0.000089 -38.94020 0.000049 50.876350 -33.313050 26.887500 0.003671 0.268110 -0.571686 0.015151 -1.819545 54.01980
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
256859 114945 2016-01-27 11.376800 17.881400 0.000057 -2.33095 0.000129 62.932650 -15.907350 27.844050 0.001795 0.256640 -0.590675 0.025830 -1.587876 923.69800
256892 147127 2016-01-27 16.989550 17.564300 0.000024 -14.72850 0.000056 63.876800 -7.991400 25.920500 0.001694 0.571200 -0.243212 0.018234 -1.739107 647.76800
257117 114917 2016-02-01 13.711650 18.565300 0.000038 -5.28340 0.000089 71.635400 -1.578900 29.013900 0.001843 0.175277 -0.756275 0.015317 -1.814832 239.79000
257151 147127 2016-02-01 16.459300 17.491550 0.000018 -9.91425 0.000040 63.503500 -8.650200 25.839850 0.001665 0.732946 -0.134928 0.161747 -0.791165 653.25100
257410 147127 2016-02-06 16.191400 15.680450 0.000025 -1.30390 0.000056 63.445250 -0.877050 25.858450 0.001797 1.611391 0.207201 0.878444 -0.056286 670.83500
257669 147127 2016-02-11 16.135050 17.410200 0.000017 -0.95920 0.000038 62.978300 -15.093000 25.891700 0.001701 0.534299 -0.272216 -1.077092 NaN 630.58300
257679 60150420 2016-02-11 8.473500 27.245550 0.000005 22.48145 0.000003 61.250300 14.878500 27.531300 0.001684 0.165648 -0.780815 -0.021582 NaN 862.66500
257894 114917 2016-02-16 13.686050 16.169250 0.000035 -0.05785 0.000081 71.127700 -4.842950 28.619300 0.001781 0.175518 -0.755678 -0.029977 NaN 258.05900
257928 147127 2016-02-16 16.118650 13.272050 0.000026 1.07575 0.000062 62.560950 -8.744950 25.691850 0.001654 0.643008 -0.191784 0.108709 -0.963735 596.65700
258153 114917 2016-02-21 13.742550 16.259400 0.000042 1.85835 0.000096 70.923350 -6.465100 28.815800 0.001852 0.170473 -0.768344 -0.005045 NaN 272.13500
258187 147127 2016-02-21 16.117850 11.748650 0.000026 -0.84125 0.000062 62.282500 1.579600 25.761850 0.001679 0.738294 -0.131771 0.095286 -1.020969 571.82300
258413 114945 2016-02-26 12.089650 17.098400 0.000100 1.29215 0.000208 59.254600 -16.203800 27.284600 0.002015 0.223546 -0.650633 -0.088383 NaN 515.58800
258446 147127 2016-02-26 16.158750 5.133350 0.000017 -0.91190 0.000038 62.400500 -0.319150 26.053250 0.001755 0.876987 -0.057007 0.138693 -0.857946 579.94000
258672 114945 2016-03-02 12.109450 14.505050 0.000139 1.26915 0.000270 58.627150 -13.569600 27.689400 0.001888 0.189231 -0.723007 -0.034315 NaN 446.33100
258705 147127 2016-03-02 16.071200 7.687300 0.000018 -5.59190 0.000041 62.348950 -0.817950 26.567500 0.001627 0.670635 -0.173514 -0.206352 NaN 577.86400
258930 114917 2016-03-07 13.937000 15.025750 0.000037 0.19960 0.000086 69.852350 -13.357400 29.105850 0.001868 0.175201 -0.756463 -0.144586 NaN 361.09000
258949 127429 2016-03-07 5.213615 35.608083 0.000028 0.11500 0.000065 73.535231 -34.391333 29.722308 0.001782 0.126091 -0.899317 0.014539 -1.837467 8.90195
258964 147127 2016-03-07 15.721700 22.244800 0.000020 -1.42040 0.000046 61.979700 -12.612650 27.074200 0.001664 0.368702 -0.433324 -0.301933 NaN 568.76900
259185 114873 2016-03-12 8.129800 10.994800 0.000022 -5.90670 0.000050 54.401600 -6.859250 28.540750 0.001762 0.127728 -0.893712 -0.015777 NaN 415.09100
259189 114917 2016-03-12 13.712650 16.031400 0.000044 -8.82190 0.000101 69.296650 -11.405850 29.059350 0.001792 0.161546 -0.791704 -0.013655 NaN 389.65100
259223 147127 2016-03-12 15.730450 25.689300 0.000015 -13.74090 0.000035 62.055050 2.825950 27.927850 0.002715 0.282156 -0.549510 -0.086546 NaN 575.36600
259233 60150420 2016-03-12 10.051550 28.195600 0.000005 -10.66080 0.000003 61.296850 -20.956350 28.563250 0.001862 0.147235 -0.831988 0.018845 -1.724796 787.12100
259444 114873 2016-03-17 8.149400 10.474950 0.000032 7.85775 0.000070 54.270200 -5.202400 28.650400 0.001786 0.118643 -0.925757 -0.009085 NaN 401.06500
259448 114917 2016-03-17 13.396450 15.275200 0.000032 -10.65000 0.000075 69.075300 -3.932150 29.078850 0.001813 0.107735 -0.967644 -0.053811 NaN 392.19700
259482 147127 2016-03-17 15.292400 13.540600 0.000017 -2.20955 0.000040 61.824650 -0.067350 28.508550 0.002278 0.329573 -0.482048 0.047417 -1.324067 588.84300
259703 114873 2016-03-22 8.576250 20.472650 0.000021 14.26275 0.000048 53.915250 -12.174350 28.666500 0.001717 0.127864 -0.893251 0.009221 -2.035229 340.13800
259707 114917 2016-03-22 13.067700 28.761300 0.000058 -13.65505 0.000128 68.630700 -14.469050 29.281400 0.001763 0.150761 -0.821711 0.043026 -1.366266 419.64800
259741 147127 2016-03-22 14.884600 19.190550 0.000027 -17.68160 0.000064 61.997500 0.957400 28.459150 0.001743 0.287153 -0.541886 -0.042420 NaN 632.47500
259962 114873 2016-03-27 9.299900 22.441900 0.000039 18.31615 0.000086 53.692850 2.621350 28.833000 0.001752 0.123338 -0.908902 -0.004526 NaN 280.67600
259966 114917 2016-03-27 12.342300 34.158400 0.000029 0.21915 0.000067 68.893400 22.483500 29.860500 0.002056 0.135423 -0.868308 -0.015338 NaN 366.69600

754 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
3.054956461412406e-15

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time lat spd var_lon vn var_lat lon ve temp var_tmp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
6239 34710 2002-11-01 16.90790 13.19585 0.000120 12.28250 0.000063 63.13095 1.02405 28.99885 0.001754 0.386388 -0.412976 0.060749 -1.216459 592.461 0.031445
6476 10206 2002-11-06 11.04970 9.69775 0.001602 6.51645 0.000517 67.17675 -4.16140 NaN 1000.000000 0.133946 -0.873070 0.005581 -2.253287 536.675 0.008333
6498 34710 2002-11-06 17.33905 11.97215 0.000159 10.54180 0.000079 63.14845 -2.06740 28.83270 0.001884 0.379611 -0.420661 -0.006777 NaN 562.591 -0.003570
6504 34721 2002-11-06 12.58915 15.20435 0.000143 0.91040 0.000071 67.82805 10.47775 29.49700 0.001856 0.148202 -0.829147 0.009522 -2.021272 487.070 0.012850
6735 10206 2002-11-11 11.16030 2.94440 0.001463 1.00360 0.000474 67.11145 -0.92305 NaN 1000.000000 0.125101 -0.902739 -0.008845 NaN 545.142 -0.014141

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    754.000000
mean      -0.059923
std        0.352221
min       -4.195474
25%       -0.046804
50%        0.001365
75%        0.036548
max        0.194120
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x115437550>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x1179d84e0>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x118b88198>

In [13]:
(np.log(0.140332)-np.log(0.125101))   / freq


Out[13]:
0.022977926547090809

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-16"
# (0.140332 - 0.125101) / (freq*0.140332)    == 0.0217070946042243
#########
# Val 2:
# id:10206, time:2002-11-16"
# (np.log(0.140332)-np.log(0.125101))   / freq == 0.022977926547090809 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time lat spd var_lon vn var_lat lon ve temp var_tmp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
6476 10206 2002-11-06 11.04970 9.69775 0.001602 6.51645 0.000517 67.17675 -4.16140 NaN 1000.0 0.133946 -0.873070 0.005581 -2.253287 536.675 0.008333
6735 10206 2002-11-11 11.16030 2.94440 0.001463 1.00360 0.000474 67.11145 -0.92305 NaN 1000.0 0.125101 -0.902739 -0.008845 NaN 545.142 -0.014141
6994 10206 2002-11-16 11.16665 3.06520 0.006093 -0.48170 0.001546 67.05180 -1.90965 NaN 1000.0 0.140332 -0.852843 0.015231 -1.817272 549.511 0.021707
7771 10206 2002-12-01 11.42495 13.95980 0.003342 5.01475 0.000973 66.58490 -12.89285 NaN 1000.0 0.130924 -0.882981 -0.006660 NaN 602.274 -0.010174
9066 10206 2002-12-26 12.57970 11.05175 0.001609 1.12345 0.000532 64.48845 -10.25210 NaN 1000.0 0.163904 -0.785410 0.007255 -2.139362 841.416 0.008853

In [14]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[14]:
id time lat spd var_lon vn var_lat lon ve temp var_tmp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
6476 10206 2002-11-06 11.04970 9.69775 0.001602 6.51645 0.000517 67.17675 -4.16140 NaN 1000.0 0.133946 -0.873070 0.005581 -2.253287 536.675 0.008333
6735 10206 2002-11-11 11.16030 2.94440 0.001463 1.00360 0.000474 67.11145 -0.92305 NaN 1000.0 0.125101 -0.902739 -0.008845 NaN 545.142 -0.014141
6994 10206 2002-11-16 11.16665 3.06520 0.006093 -0.48170 0.001546 67.05180 -1.90965 NaN 1000.0 0.140332 -0.852843 0.015231 -1.817272 549.511 0.021707
7771 10206 2002-12-01 11.42495 13.95980 0.003342 5.01475 0.000973 66.58490 -12.89285 NaN 1000.0 0.130924 -0.882981 -0.006660 NaN 602.274 -0.010174
9066 10206 2002-12-26 12.57970 11.05175 0.001609 1.12345 0.000532 64.48845 -10.25210 NaN 1000.0 0.163904 -0.785410 0.007255 -2.139362 841.416 0.008853

In [15]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_5D_modisa_4.csv
Out[15]:
id time lat spd var_lon vn var_lat lon ve temp var_tmp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
6476 10206 2002-11-06 11.04970 9.69775 0.001602 6.51645 0.000517 67.17675 -4.16140 NaN 1000.0 0.133946 -0.873070 0.005581 -2.253287 536.675 0.008333
6735 10206 2002-11-11 11.16030 2.94440 0.001463 1.00360 0.000474 67.11145 -0.92305 NaN 1000.0 0.125101 -0.902739 -0.008845 NaN 545.142 -0.014141
6994 10206 2002-11-16 11.16665 3.06520 0.006093 -0.48170 0.001546 67.05180 -1.90965 NaN 1000.0 0.140332 -0.852843 0.015231 -1.817272 549.511 0.021707
7771 10206 2002-12-01 11.42495 13.95980 0.003342 5.01475 0.000973 66.58490 -12.89285 NaN 1000.0 0.130924 -0.882981 -0.006660 NaN 602.274 -0.010174
9066 10206 2002-12-26 12.57970 11.05175 0.001609 1.12345 0.000532 64.48845 -10.25210 NaN 1000.0 0.163904 -0.785410 0.007255 -2.139362 841.416 0.008853

In [16]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [17]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [18]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[18]:
<matplotlib.text.Text at 0x11a1096a0>

In [19]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11a4cbef0>

In [20]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    97.000000
mean      0.006104
std       0.111961
min      -0.404157
25%      -0.047131
50%       0.002679
75%       0.045035
max       0.460749
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    129.000000
mean       0.046758
std        0.349320
min       -1.179396
25%       -0.032362
50%        0.015151
75%        0.101449
max        3.077740
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    164.000000
mean       0.049640
std        2.162357
min      -17.974065
25%       -0.035945
50%        0.009778
75%        0.064007
max       18.673444
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    192.000000
mean       0.539977
std        6.774659
min      -30.045783
25%       -0.097550
50%        0.003890
75%        0.168379
max       66.830140
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    172.000000
mean       0.091771
std        3.731832
min      -15.048695
25%       -0.126783
50%       -0.015113
75%        0.013408
max       21.522390
Name: chl_rate, dtype: float64

In [21]:
print("test")


test

In [ ]: