this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 9
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x114c38e80>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_9D_modisa_3.csv
out_filename: df_chl_out_9D_modisa_4.csv
Out[2]:
id time temp var_lat var_tmp ve var_lon vn lon spd lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
3627 10206 2002-11-07 NaN 0.000494 1000.000000 -2.217083 0.001535 2.990778 67.132000 5.446583 11.126222 0.130267 -0.885166 -0.004264 NaN 540.82800
3629 11089 2002-11-07 28.829472 0.000064 0.003812 -16.412472 0.000123 -3.991722 64.391056 17.995028 14.279667 0.197237 -0.705012 0.074821 -1.125976 872.56400
3631 15707 2002-11-07 NaN 0.000074 1000.000000 -12.316611 0.000147 -18.253056 67.155306 24.656417 13.142667 0.152200 -0.817584 -0.004472 NaN 574.11000
3649 34710 2002-11-07 28.448167 0.000069 0.001857 -2.827667 0.000135 19.539861 63.041861 20.774778 17.717111 0.372568 -0.428795 0.018603 -1.730417 531.86600
3886 10206 2002-11-16 NaN 0.001033 1000.000000 -1.089083 0.003872 0.501111 67.029167 4.028889 11.179833 0.145233 -0.837935 0.014966 -1.824894 553.88000
3908 34710 2002-11-16 27.578250 0.000072 0.001784 -22.552528 0.000145 43.271833 62.309722 50.700222 20.075111 0.519753 -0.284203 0.147185 -0.832136 342.85300
3914 34721 2002-11-16 29.067417 0.000055 0.001740 -0.677889 0.000102 12.004250 68.328250 12.875833 13.725389 0.140869 -0.851185 -0.011473 NaN 479.64400
4145 10206 2002-11-25 NaN 0.000959 1000.000000 -6.485944 0.003371 2.996944 66.854361 7.461361 11.321750 0.139399 -0.855741 -0.005834 NaN 571.47300
4163 34315 2002-11-25 28.913944 0.000055 0.004285 -24.691694 0.000103 -20.388611 57.571889 37.391222 6.818222 0.144874 -0.839008 0.008411 -2.075127 714.86500
4166 34709 2002-11-25 NaN 0.000066 1000.000000 -11.325471 0.000130 -18.946647 74.747500 26.364529 11.582167 0.210925 -0.675872 -0.089378 NaN 68.76290
4167 34710 2002-11-25 27.294583 0.000057 0.001671 29.505194 0.000108 -4.809917 62.472944 67.575167 22.456139 0.398372 -0.399711 -0.121380 NaN 269.94600
4173 34721 2002-11-25 28.970556 0.000058 0.001745 -1.697028 0.000108 9.686417 68.205083 11.675056 14.417861 0.140769 -0.851495 -0.000101 NaN 530.85500
4406 11089 2002-12-04 27.255528 0.000054 0.003654 7.070667 0.000100 25.006417 58.875722 40.874583 14.981139 0.304682 -0.516153 -0.052500 NaN 386.70300
4422 34315 2002-12-04 27.929389 0.000054 0.003589 -27.117417 0.000103 43.040361 54.838861 63.164750 7.494583 0.155347 -0.808697 0.010473 -1.979949 487.88800
4426 34710 2002-12-04 26.714083 0.000070 0.001759 10.868222 0.000136 7.639333 64.023417 37.253306 21.000139 4.420269 0.645449 4.021897 0.604431 453.99000
4665 11089 2002-12-13 27.108417 0.000067 0.003622 -5.787778 0.000129 -23.836694 59.787083 32.146417 14.522306 0.367653 -0.434562 0.062971 -1.200859 491.64200
4681 34315 2002-12-13 27.631806 0.000051 0.003517 -19.507167 0.000094 -7.936583 53.295944 31.310972 9.080639 0.349429 -0.456641 0.194082 -0.712015 253.12700
4685 34710 2002-12-13 26.466472 0.000063 0.001718 3.940139 0.000119 4.079000 64.554722 8.914750 22.029972 0.512620 -0.290204 -3.907649 NaN 347.18900
4922 10206 2002-12-22 NaN 0.000940 1000.000000 -9.425833 0.003372 2.559389 64.642194 10.393528 12.534861 0.155556 -0.808113 0.028268 -1.548705 823.52900
4924 11089 2002-12-22 26.925667 0.000060 0.003566 -10.770861 0.000114 13.984000 58.652222 22.169611 14.199306 0.308563 -0.510657 -0.059090 NaN 442.69400
4944 34710 2002-12-22 26.205472 0.000061 0.001702 -2.111278 0.000116 -16.493444 64.675000 18.709194 21.675778 0.776345 -0.109945 0.263725 -0.578849 380.50100
4950 34721 2002-12-22 27.943028 0.000061 0.001769 -0.080611 0.000115 5.353056 66.828528 9.572583 15.074583 0.160823 -0.793651 -0.017707 NaN 695.42800
5181 10206 2002-12-31 NaN 0.000537 1000.000000 -11.699944 0.001649 -4.870667 63.871028 13.154361 12.416694 0.256233 -0.591366 0.100677 -0.997072 907.45100
5183 11089 2002-12-31 26.255944 0.000063 0.003681 10.180333 0.000117 -9.022861 58.863417 20.751222 14.838250 0.320671 -0.493940 0.012108 -1.916916 399.72800
5203 34710 2002-12-31 25.381583 0.000077 0.001765 -14.688917 0.000151 3.447361 63.919083 23.436889 20.721889 0.404953 -0.392596 -0.371392 NaN 454.61700
5209 34721 2002-12-31 27.297111 0.000067 0.001785 -1.018972 0.000125 10.029167 66.825194 12.550472 15.682056 0.249487 -0.602952 0.088664 -1.052254 686.77800
5442 11089 2003-01-09 25.990083 0.000070 0.003848 -3.652639 0.000133 -23.210667 59.086667 23.881250 13.191722 0.378669 -0.421741 0.057998 -1.236591 499.51200
5462 34710 2003-01-09 25.317000 0.000050 0.001599 NaN 0.000087 NaN 63.361000 NaN 21.215000 0.493560 -0.306660 0.088607 -1.052530 385.80100
5468 34721 2003-01-09 26.964722 0.000074 0.001775 1.856056 0.000144 1.840833 66.829889 5.696417 16.075250 0.646135 -0.189677 0.396648 -0.401595 653.41700
5701 11089 2003-01-18 26.031778 0.000083 0.003669 -9.268667 0.000166 -7.472333 58.635444 14.541833 12.272028 0.342228 -0.465684 -0.036440 NaN 444.61900
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
142640 114945 2016-01-22 27.843139 0.000124 0.001814 -13.741944 0.000054 -2.704889 63.260528 15.402556 11.422111 0.219022 -0.659512 0.062111 -1.206833 957.57300
142673 147127 2016-01-22 25.954889 0.000069 0.001705 -7.336500 0.000031 -11.049722 63.997917 15.157444 17.256222 3.088808 0.489791 2.726069 0.435537 639.64100
142898 114917 2016-01-31 28.979722 0.000102 0.001814 -2.406389 0.000045 -2.192139 71.658056 20.776306 13.715611 0.175277 -0.756275 0.025753 -1.589177 238.71900
142899 114945 2016-01-31 27.714000 0.000160 0.001914 -16.739361 0.000073 1.794194 62.102083 17.199944 11.420722 0.234702 -0.629483 0.015680 -1.804661 832.47000
142932 147127 2016-01-31 25.849222 0.000050 0.001742 -3.506917 0.000022 -7.741306 63.514806 17.526722 16.385750 0.595535 -0.225093 -2.493273 NaN 659.08600
142942 60150420 2016-01-31 27.573417 0.000003 0.001684 0.030083 0.000005 9.659333 60.278528 20.998750 7.080889 0.200215 -0.698503 0.051684 -1.286640 868.93600
143157 114917 2016-02-09 28.722611 0.000104 0.001769 -6.625111 0.000046 -2.034972 71.368361 16.640417 13.739194 0.196200 -0.707300 0.020924 -1.679365 251.86900
143158 114945 2016-02-09 27.537306 0.000221 0.001917 -14.010833 0.000108 6.212028 61.105806 16.097361 11.660444 0.335989 -0.473675 0.101287 -0.994447 720.59500
143191 147127 2016-02-09 25.848778 0.000047 0.001693 -12.734167 0.000020 -2.480556 62.991139 15.415222 16.153250 0.607908 -0.216162 0.012373 -1.907516 630.58300
143201 60150420 2016-02-09 27.525250 0.000003 0.001684 12.848611 0.000005 21.157944 61.206611 25.635167 8.445139 0.170557 -0.768130 -0.029658 NaN 858.93700
143416 114917 2016-02-18 28.743500 0.000092 0.001803 -4.386583 0.000040 1.403861 70.963500 16.312361 13.722472 0.164850 -0.782911 -0.031351 NaN 270.00600
143417 114945 2016-02-18 27.177694 0.000163 0.001962 -15.543556 0.000075 2.389806 59.988667 16.562306 12.004306 0.336058 -0.473586 0.000069 -4.158208 594.47600
143450 147127 2016-02-18 25.736972 0.000058 0.001649 -2.414944 0.000025 1.146861 62.379667 12.345361 16.124167 0.672385 -0.172382 0.064477 -1.190592 576.38700
143671 114873 2016-02-27 27.733111 0.000087 0.001732 -31.747722 0.000038 15.603167 56.352083 41.416500 8.339083 0.167905 -0.774938 0.007953 -2.099460 503.05700
143675 114917 2016-02-27 29.000333 0.000105 0.001901 -9.653722 0.000048 2.385556 70.516333 18.903778 13.910333 0.229563 -0.639098 0.064713 -1.189008 312.89400
143676 114945 2016-02-27 27.512694 0.000254 0.001959 -14.878222 0.000128 1.333417 58.875833 15.882194 12.104000 0.226818 -0.644322 -0.109240 NaN 472.29000
143709 147127 2016-02-27 26.362972 0.000042 0.001699 -1.087917 0.000018 -3.401194 62.371639 6.515333 16.109972 0.805254 -0.094067 0.132868 -0.876579 578.87100
143930 114873 2016-03-07 28.297861 0.000075 0.001758 -13.045139 0.000033 -5.814056 54.692750 15.114833 8.266639 0.134190 -0.872280 -0.033715 NaN 432.92600
143934 114917 2016-03-07 29.101528 0.000091 0.001837 -12.179806 0.000040 -4.419611 69.618139 15.530750 13.849306 0.171656 -0.765342 -0.057907 NaN 373.81500
143935 114945 2016-03-07 28.085083 0.000321 0.001949 -8.156028 0.000171 -3.477639 58.077583 9.930167 12.047889 0.216250 -0.665043 -0.010568 NaN 386.44600
143953 127429 2016-03-07 29.722308 0.000065 0.001782 -34.391333 0.000028 0.115000 73.535231 35.608083 5.213615 0.126091 -0.899317 0.014539 -1.837467 8.90195
143968 147127 2016-03-07 27.421306 0.000042 0.001958 -2.641417 0.000018 -6.008333 62.014639 22.855139 15.764444 0.334474 -0.475637 -0.470779 NaN 569.24400
144189 114873 2016-03-16 28.674056 0.000064 0.001759 -6.747194 0.000029 8.358806 54.190139 12.918111 8.238500 0.119380 -0.923068 -0.014810 NaN 388.67300
144193 114917 2016-03-16 29.113333 0.000092 0.001790 -10.657028 0.000040 -6.384528 69.000583 17.882028 13.343750 0.128194 -0.892132 -0.043462 NaN 393.81300
144194 114945 2016-03-16 28.637028 0.000259 0.002067 -7.436000 0.000126 -8.739917 57.571528 13.411083 11.686028 0.563416 -0.249171 0.347165 -0.459464 343.56000
144227 147127 2016-03-16 28.483083 0.000046 0.002343 -1.734833 0.000020 -9.869556 61.908167 18.064667 15.216472 0.315957 -0.500372 -0.018517 NaN 601.17700
144237 60150420 2016-03-16 28.608389 0.000003 0.001734 -31.028806 0.000005 1.361028 60.046306 32.312861 10.023722 0.152523 -0.816664 0.035222 -1.453183 663.79500
144448 114873 2016-03-25 28.814056 0.000072 0.001766 -1.985639 0.000033 18.662333 53.708333 22.435778 9.284889 0.123338 -0.908902 0.003958 -2.402510 280.67600
144452 114917 2016-03-25 29.748944 0.000099 0.001962 15.114722 0.000045 -4.109111 68.883944 32.779667 12.568778 0.136725 -0.864153 0.008531 -2.069013 373.33400
144453 114945 2016-03-25 28.933889 0.000407 0.001999 9.933694 0.000218 -15.121167 57.704833 22.957333 10.818694 0.126456 -0.898060 -0.436959 NaN 393.37100

672 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
2.5532554586218947e-15

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time temp var_lat var_tmp ve var_lon vn lon spd lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3627 10206 2002-11-07 NaN 0.000494 1000.000000 -2.217083 0.001535 2.990778 67.132000 5.446583 11.126222 0.130267 -0.885166 -0.004264 NaN 540.828 -0.003637
3629 11089 2002-11-07 28.829472 0.000064 0.003812 -16.412472 0.000123 -3.991722 64.391056 17.995028 14.279667 0.197237 -0.705012 0.074821 -1.125976 872.564 0.042150
3631 15707 2002-11-07 NaN 0.000074 1000.000000 -12.316611 0.000147 -18.253056 67.155306 24.656417 13.142667 0.152200 -0.817584 -0.004472 NaN 574.110 -0.003264
3649 34710 2002-11-07 28.448167 0.000069 0.001857 -2.827667 0.000135 19.539861 63.041861 20.774778 17.717111 0.372568 -0.428795 0.018603 -1.730417 531.866 0.005548
3886 10206 2002-11-16 NaN 0.001033 1000.000000 -1.089083 0.003872 0.501111 67.029167 4.028889 11.179833 0.145233 -0.837935 0.014966 -1.824894 553.880 0.011450

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    672.000000
mean      -0.047204
std        0.298702
min       -5.161217
25%       -0.027176
50%       -0.002103
75%        0.018240
max        0.106001
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x114c38a90>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x1047210f0>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x117a1cbe0>

In [20]:
(np.log(0.139399)-np.log(0.145233))   / freq


Out[20]:
-0.004555447177321037

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-25"
# (0.139399 - 0.145233) / (freq*0.139399)    == -0.004650121035460963
#########
# Val 2:
# id:10206, time:2002-11-25"
# (np.log(0.139399)-np.log(0.145233))   / freq == -0.004555447177321037 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time temp var_lat var_tmp ve var_lon vn lon spd lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3627 10206 2002-11-07 NaN 0.000494 1000.0 -2.217083 0.001535 2.990778 67.132000 5.446583 11.126222 0.130267 -0.885166 -0.004264 NaN 540.828 -0.003637
3886 10206 2002-11-16 NaN 0.001033 1000.0 -1.089083 0.003872 0.501111 67.029167 4.028889 11.179833 0.145233 -0.837935 0.014966 -1.824894 553.880 0.011450
4145 10206 2002-11-25 NaN 0.000959 1000.0 -6.485944 0.003371 2.996944 66.854361 7.461361 11.321750 0.139399 -0.855741 -0.005834 NaN 571.473 -0.004650
4922 10206 2002-12-22 NaN 0.000940 1000.0 -9.425833 0.003372 2.559389 64.642194 10.393528 12.534861 0.155556 -0.808113 0.028268 -1.548705 823.529 0.020191
5181 10206 2002-12-31 NaN 0.000537 1000.0 -11.699944 0.001649 -4.870667 63.871028 13.154361 12.416694 0.256233 -0.591366 0.100677 -0.997072 907.451 0.043657

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time temp var_lat var_tmp ve var_lon vn lon spd lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3627 10206 2002-11-07 NaN 0.000494 1000.0 -2.217083 0.001535 2.990778 67.132000 5.446583 11.126222 0.130267 -0.885166 -0.004264 NaN 540.828 -0.003637
3886 10206 2002-11-16 NaN 0.001033 1000.0 -1.089083 0.003872 0.501111 67.029167 4.028889 11.179833 0.145233 -0.837935 0.014966 -1.824894 553.880 0.011450
4145 10206 2002-11-25 NaN 0.000959 1000.0 -6.485944 0.003371 2.996944 66.854361 7.461361 11.321750 0.139399 -0.855741 -0.005834 NaN 571.473 -0.004650
4922 10206 2002-12-22 NaN 0.000940 1000.0 -9.425833 0.003372 2.559389 64.642194 10.393528 12.534861 0.155556 -0.808113 0.028268 -1.548705 823.529 0.020191
5181 10206 2002-12-31 NaN 0.000537 1000.0 -11.699944 0.001649 -4.870667 63.871028 13.154361 12.416694 0.256233 -0.591366 0.100677 -0.997072 907.451 0.043657

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_9D_modisa_4.csv
Out[12]:
id time temp var_lat var_tmp ve var_lon vn lon spd lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3627 10206 2002-11-07 NaN 0.000494 1000.0 -2.217083 0.001535 2.990778 67.132000 5.446583 11.126222 0.130267 -0.885166 -0.004264 NaN 540.828 -0.003637
3886 10206 2002-11-16 NaN 0.001033 1000.0 -1.089083 0.003872 0.501111 67.029167 4.028889 11.179833 0.145233 -0.837935 0.014966 -1.824894 553.880 0.011450
4145 10206 2002-11-25 NaN 0.000959 1000.0 -6.485944 0.003371 2.996944 66.854361 7.461361 11.321750 0.139399 -0.855741 -0.005834 NaN 571.473 -0.004650
4922 10206 2002-12-22 NaN 0.000940 1000.0 -9.425833 0.003372 2.559389 64.642194 10.393528 12.534861 0.155556 -0.808113 0.028268 -1.548705 823.529 0.020191
5181 10206 2002-12-31 NaN 0.000537 1000.0 -11.699944 0.001649 -4.870667 63.871028 13.154361 12.416694 0.256233 -0.591366 0.100677 -0.997072 907.451 0.043657

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x119ddd588>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11a566da0>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    96.000000
mean      0.012455
std       0.429691
min      -2.486943
25%      -0.034751
50%      -0.001842
75%       0.037115
max       3.221807
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    107.000000
mean       0.043719
std        0.669715
min       -3.907649
25%       -0.039013
50%        0.006873
75%        0.091452
max        4.021897
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    138.000000
mean       0.112610
std        0.940781
min       -2.493273
25%       -0.035617
50%        0.003296
75%        0.071782
max        8.387535
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    163.000000
mean      -0.170131
std        4.628012
min      -36.659316
25%       -0.062838
50%        0.002575
75%        0.118639
max       29.752610
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    168.000000
mean      -0.260570
std        3.774943
min      -27.225871
25%       -0.139986
50%       -0.023220
75%        0.002949
max       26.473112
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]:


In [ ]: