this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 15
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x115437e80>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_15D_modisa_3.csv
out_filename: df_chl_out_15D_modisa_4.csv
Out[2]:
id time lon vn spd var_lon var_tmp ve temp var_lat lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
2073 10206 2002-11-01 67.239550 4.393483 7.707300 0.001396 1000.000000 -4.056967 NaN 0.000461 11.003467 0.131253 -0.881891 -0.000230 NaN 532.47000
2077 15707 2002-11-01 67.516567 -13.316117 24.136500 0.000146 1000.000000 -15.863733 NaN 0.000074 13.536150 0.158023 -0.801279 0.002436 -2.613278 552.07500
2095 34710 2002-11-01 63.079750 16.525517 17.614183 0.000127 0.001804 -1.195583 28.666317 0.000066 17.411183 0.392572 -0.406080 -0.167135 NaN 550.51700
2101 34721 2002-11-01 67.881250 6.486583 14.737233 0.000121 0.001813 6.646367 29.393383 0.000063 12.702833 0.152538 -0.816623 0.010258 -1.988946 481.64700
2332 10206 2002-11-16 66.993317 1.400700 4.379483 0.003644 1000.000000 -2.148200 NaN 0.000991 11.224017 0.140250 -0.853096 0.008997 -2.045886 558.22800
2336 15707 2002-11-16 65.486917 -5.950300 21.719050 0.000135 1000.000000 -18.206700 NaN 0.000070 12.378467 0.154963 -0.809773 -0.003061 NaN 730.53000
2354 34710 2002-11-16 62.155933 37.775033 54.870467 0.000127 0.001738 1.705867 27.485500 0.000064 21.077217 0.552854 -0.257389 0.160282 -0.795115 273.45700
2360 34721 2002-11-16 68.290100 10.951950 12.429300 0.000105 0.001749 -1.591367 29.041267 0.000056 13.955783 0.152553 -0.816579 0.000015 -4.818009 494.47500
2591 10206 2002-12-01 66.049467 6.777450 14.383300 0.007490 1000.000000 -12.568283 NaN 0.001855 11.718717 0.141565 -0.849043 0.001315 -2.881075 660.64700
2595 15707 2002-12-01 63.289417 -12.818550 29.650033 0.000138 1000.000000 -25.566517 NaN 0.000071 11.114050 0.247319 -0.606743 0.092356 -1.034533 960.32400
2609 34315 2002-12-01 55.034450 24.306317 60.364000 0.000101 0.003703 -36.634200 28.110217 0.000054 7.658883 0.118150 -0.927566 -0.016725 NaN 499.86800
2613 34710 2002-12-01 64.015367 -8.248783 41.290150 0.000130 0.001717 10.778067 26.730583 0.000067 21.432967 2.915652 0.464736 2.362798 0.373427 409.74400
2619 34721 2002-12-01 67.992300 5.328283 12.531333 0.000108 0.001753 -5.548633 28.641567 0.000058 14.994667 0.168081 -0.774481 0.015528 -1.808885 588.90900
2850 10206 2002-12-16 64.844550 3.686400 10.204233 0.002841 1000.000000 -8.721100 NaN 0.000811 12.433233 0.159126 -0.798257 0.017561 -1.755447 799.90500
2868 34315 2002-12-16 52.744250 -1.367500 22.396050 0.000095 0.003585 -14.391850 27.481417 0.000051 8.985083 0.311255 -0.506884 0.193105 -0.714206 214.02900
2872 34710 2002-12-16 64.650183 -9.314167 13.730967 0.000117 0.001725 -0.694083 26.339233 0.000062 21.832100 0.505400 -0.296365 -2.410253 NaN 368.75300
2878 34721 2002-12-16 66.970417 1.165283 12.327283 0.000108 0.001785 -5.328950 28.073350 0.000058 15.086267 0.167227 -0.776694 -0.000854 NaN 683.47300
3109 10206 2002-12-31 63.592417 -4.998550 13.272817 0.001646 1000.000000 -11.821550 NaN 0.000543 12.299100 0.287051 -0.542041 0.127924 -0.893047 935.97400
3111 11089 2002-12-31 58.973533 -16.817750 24.118383 0.000128 0.003712 4.544133 26.146367 0.000068 14.276650 0.230067 -0.638146 -0.094273 NaN 455.66200
3127 34315 2002-12-31 50.213400 -61.955000 76.608457 0.000105 0.003617 -44.019714 26.740714 0.000056 7.638171 0.321669 -0.492591 0.010414 -1.982396 45.51470
3131 34710 2002-12-31 63.904000 3.447361 23.436889 0.000149 0.001761 -14.688917 25.379838 0.000077 20.735216 0.461597 -0.335737 -0.043803 NaN 454.61700
3137 34721 2002-12-31 66.822550 6.702033 9.685683 0.000138 0.001788 -0.076750 27.172083 0.000072 15.832983 0.309126 -0.509864 0.141899 -0.848020 675.61700
3368 10206 2003-01-15 62.423889 -9.938717 13.699698 0.003280 1000.000000 -9.087642 NaN 0.000960 11.433148 0.338916 -0.469909 0.051865 -1.285128 866.94700
3370 11089 2003-01-15 58.621383 -8.301500 14.038417 0.000151 0.003781 -7.165833 26.072067 0.000077 12.282733 0.330743 -0.480509 0.100676 -0.997073 444.28100
3372 15707 2003-01-15 54.002717 -5.573450 35.922683 0.000211 1000.000000 -29.855333 NaN 0.000100 9.042950 0.452780 -0.344113 -0.103847 NaN 323.92300
3396 34721 2003-01-15 67.187200 -3.750967 9.448683 0.000126 0.001818 6.877467 26.832100 0.000067 15.964767 0.429697 -0.366838 0.120571 -0.918757 640.05800
3629 11089 2003-01-30 56.892533 -11.493067 26.129067 0.000140 0.003798 -21.053300 26.369467 0.000073 11.105367 0.311911 -0.505969 -0.018832 NaN 302.30700
3631 15707 2003-01-30 50.730028 -31.378943 60.077371 0.000176 1000.000000 -49.389343 NaN 0.000088 7.349722 0.325951 -0.486847 -0.126829 NaN 110.20700
3633 27139 2003-01-30 59.643033 -3.858350 32.703850 0.000171 0.003149 4.412433 25.042300 0.000085 19.524433 2.149826 0.332403 1.344336 0.128508 122.71700
3655 34721 2003-01-30 68.127233 -3.380383 9.659933 0.000132 0.001796 6.035083 26.739717 0.000070 15.351367 0.430069 -0.366462 0.000372 -3.429461 569.24500
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
84624 114945 2015-11-23 69.103400 -3.080033 10.111817 0.000050 0.001794 -5.392933 29.541333 0.000023 11.812233 0.132759 -0.876937 0.013752 -1.861629 333.95100
84657 147127 2015-11-23 64.410950 -0.198300 5.674267 0.000022 0.001686 -2.502350 27.760583 0.000011 17.746750 0.421986 -0.374702 0.072332 -1.140670 651.16200
84916 147127 2015-12-08 64.586200 -6.130517 8.335783 0.000026 0.001700 4.455233 27.126767 0.000013 17.364950 0.439556 -0.356986 0.017570 -1.755222 685.84000
85141 114917 2015-12-23 73.546750 24.809383 29.790217 0.000036 0.001774 -9.046767 29.660383 0.000042 11.666533 0.137114 -0.862917 0.012313 -1.909648 61.48260
85175 147127 2015-12-23 65.026517 1.475750 10.476167 0.000036 0.001686 1.872150 26.635300 0.000041 16.753867 0.502628 -0.298754 0.063072 -1.200166 720.39900
85400 114917 2016-01-07 72.912583 6.765117 24.006750 0.000036 0.001806 -3.130950 29.478683 0.000083 13.135200 0.142076 -0.847478 0.004962 -2.304339 160.55200
85401 114945 2016-01-07 64.408783 -0.272433 12.286217 0.000055 0.001804 -11.338717 28.324867 0.000124 11.528683 0.158406 -0.800230 0.020901 -1.679829 838.26500
85434 147127 2016-01-07 64.567500 3.390800 19.837250 0.000026 0.001673 -6.353083 26.369583 0.000061 17.357367 0.506074 -0.295786 0.003446 -2.462673 688.14500
85659 114917 2016-01-22 72.022417 -0.198383 21.397750 0.000046 0.001795 -8.814633 29.079617 0.000104 13.799200 0.157949 -0.801483 0.015873 -1.799345 241.36400
85660 114945 2016-01-22 62.867517 -0.428717 16.728783 0.000063 0.001856 -15.509183 27.798883 0.000140 11.411450 0.238479 -0.622550 0.080073 -1.096512 914.39700
85693 147127 2016-01-22 63.814283 -11.271417 16.532050 0.000027 0.001689 -8.157433 25.910167 0.000060 16.953317 0.827979 -0.081980 0.321906 -0.492271 647.14800
85918 114917 2016-02-06 71.377317 0.699517 17.915100 0.000047 0.001769 -4.802550 28.732783 0.000106 13.714500 0.191272 -0.718349 0.033323 -1.477259 247.72400
85919 114945 2016-02-06 61.087967 4.119383 16.259283 0.000097 0.001949 -14.784750 27.473083 0.000201 11.672067 0.316227 -0.500001 0.077748 -1.109311 720.59500
85952 147127 2016-02-06 62.994833 -0.395783 15.454233 0.000023 0.001717 -8.238333 25.814000 0.000052 16.148367 0.715855 -0.145175 -0.112124 NaN 630.58300
85962 60150420 2016-02-06 61.104250 15.987800 25.048267 0.000005 0.001684 11.150033 27.474983 0.000003 8.301667 0.163241 -0.787171 0.019850 -1.702235 857.47800
86177 114917 2016-02-21 70.669450 2.267617 17.480650 0.000045 0.001870 -7.391683 28.922800 0.000101 13.841933 0.177402 -0.751042 -0.013870 NaN 299.38400
86178 114945 2016-02-21 59.244267 1.724067 15.698550 0.000103 0.001935 -14.654417 27.398933 0.000212 12.083433 0.212607 -0.672423 -0.103620 NaN 515.58800
86211 147127 2016-02-21 62.343983 -2.448350 8.189767 0.000020 0.001687 0.147500 26.127533 0.000047 16.115933 0.783109 -0.106178 0.067254 -1.172282 575.34200
86221 60150420 2016-02-21 62.182783 10.184050 24.827667 0.000005 0.001684 9.056983 27.800667 0.000003 9.183283 0.156414 -0.805724 -0.006827 NaN 913.97900
86432 114873 2016-03-07 54.526967 -0.967700 12.910667 0.000033 0.001762 -9.364400 28.437383 0.000073 8.213433 0.130061 -0.885854 -0.033089 NaN 420.70400
86436 114917 2016-03-07 69.408100 -6.424100 15.444117 0.000038 0.001824 -9.565133 29.081350 0.000087 13.682033 0.171049 -0.766879 -0.006353 NaN 379.25400
86437 114945 2016-03-07 57.916583 -4.305600 10.538867 0.000150 0.001986 -7.845583 28.263217 0.000292 11.944450 0.203694 -0.691022 -0.008913 NaN 371.42700
86455 127429 2016-03-07 73.535231 0.115000 35.608083 0.000028 0.001782 -34.391333 29.722308 0.000065 5.213615 0.126091 -0.899317 0.020894 -1.679981 8.90195
86470 147127 2016-03-07 61.953133 -5.790283 20.491567 0.000017 0.002219 -3.284683 27.836867 0.000040 15.581517 0.375084 -0.425872 -0.408025 NaN 577.08000
86480 60150420 2016-03-07 61.270167 0.453733 28.051300 0.000005 0.001743 -24.144600 28.460750 0.000003 10.110400 0.145719 -0.836485 -0.010695 NaN 781.44500
86691 114873 2016-03-22 53.778750 17.899267 22.187817 0.000028 0.001755 -1.859817 28.859700 0.000063 9.297917 0.123761 -0.907415 -0.006299 NaN 288.53600
86695 114917 2016-03-22 69.074850 -0.350483 31.555317 0.000047 0.001882 11.762883 29.666200 0.000104 12.778483 0.137596 -0.861394 -0.033453 NaN 363.88400
86696 114945 2016-03-22 57.717667 -16.846567 23.051567 0.000191 0.002080 7.100083 28.983633 0.000362 10.774250 0.126456 -0.898060 -0.077238 NaN 395.54800
86729 147127 2016-03-22 62.026050 -9.086600 13.278133 0.000022 0.001717 2.244650 28.722933 0.000050 14.423550 0.245923 -0.609201 -0.129161 NaN 674.74200
86739 60150420 2016-03-22 58.546267 -9.177317 32.592383 0.000005 0.001711 -6.714867 28.862133 0.000003 9.349183 0.160826 -0.793644 0.015107 -1.820812 562.28600

500 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
8.656032975899036e-16

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time lon vn spd var_lon var_tmp ve temp var_lat lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
2073 10206 2002-11-01 67.239550 4.393483 7.707300 0.001396 1000.000000 -4.056967 NaN 0.000461 11.003467 0.131253 -0.881891 -0.000230 NaN 532.470 -0.000117
2077 15707 2002-11-01 67.516567 -13.316117 24.136500 0.000146 1000.000000 -15.863733 NaN 0.000074 13.536150 0.158023 -0.801279 0.002436 -2.613278 552.075 0.001028
2095 34710 2002-11-01 63.079750 16.525517 17.614183 0.000127 0.001804 -1.195583 28.666317 0.000066 17.411183 0.392572 -0.406080 -0.167135 NaN 550.517 -0.028383
2101 34721 2002-11-01 67.881250 6.486583 14.737233 0.000121 0.001813 6.646367 29.393383 0.000063 12.702833 0.152538 -0.816623 0.010258 -1.988946 481.647 0.004483
2332 10206 2002-11-16 66.993317 1.400700 4.379483 0.003644 1000.000000 -2.148200 NaN 0.000991 11.224017 0.140250 -0.853096 0.008997 -2.045886 558.228 0.004277

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    500.000000
mean      -0.030009
std        0.189782
min       -3.227182
25%       -0.018936
50%       -0.001345
75%        0.013129
max        0.062963
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x115437b38>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x117b1ada0>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x118ccfc88>

In [4]:
freq = 15
import numpy as np
(np.log(0.141565)-np.log(0.140250))   / freq


Out[4]:
0.00062216207751738162

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-12-01"
# (0.141565 - 0.140250) / (freq*0.141565)    == 0.0006192679452312756
#########
# Val 2:
# id:10206, time:2002-12-01"
# (np.log(0.141565)-np.log(0.140250))   / freq == 0.00062216207751738162 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time lon vn spd var_lon var_tmp ve temp var_lat lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
2073 10206 2002-11-01 67.239550 4.393483 7.707300 0.001396 1000.0 -4.056967 NaN 0.000461 11.003467 0.131253 -0.881891 -0.000230 NaN 532.470 -0.000117
2332 10206 2002-11-16 66.993317 1.400700 4.379483 0.003644 1000.0 -2.148200 NaN 0.000991 11.224017 0.140250 -0.853096 0.008997 -2.045886 558.228 0.004277
2591 10206 2002-12-01 66.049467 6.777450 14.383300 0.007490 1000.0 -12.568283 NaN 0.001855 11.718717 0.141565 -0.849043 0.001315 -2.881075 660.647 0.000619
2850 10206 2002-12-16 64.844550 3.686400 10.204233 0.002841 1000.0 -8.721100 NaN 0.000811 12.433233 0.159126 -0.798257 0.017561 -1.755447 799.905 0.007357
3109 10206 2002-12-31 63.592417 -4.998550 13.272817 0.001646 1000.0 -11.821550 NaN 0.000543 12.299100 0.287051 -0.542041 0.127924 -0.893047 935.974 0.029710

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time lon vn spd var_lon var_tmp ve temp var_lat lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
2073 10206 2002-11-01 67.239550 4.393483 7.707300 0.001396 1000.0 -4.056967 NaN 0.000461 11.003467 0.131253 -0.881891 -0.000230 NaN 532.470 -0.000117
2332 10206 2002-11-16 66.993317 1.400700 4.379483 0.003644 1000.0 -2.148200 NaN 0.000991 11.224017 0.140250 -0.853096 0.008997 -2.045886 558.228 0.004277
2591 10206 2002-12-01 66.049467 6.777450 14.383300 0.007490 1000.0 -12.568283 NaN 0.001855 11.718717 0.141565 -0.849043 0.001315 -2.881075 660.647 0.000619
2850 10206 2002-12-16 64.844550 3.686400 10.204233 0.002841 1000.0 -8.721100 NaN 0.000811 12.433233 0.159126 -0.798257 0.017561 -1.755447 799.905 0.007357
3109 10206 2002-12-31 63.592417 -4.998550 13.272817 0.001646 1000.0 -11.821550 NaN 0.000543 12.299100 0.287051 -0.542041 0.127924 -0.893047 935.974 0.029710

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_15D_modisa_4.csv
Out[12]:
id time lon vn spd var_lon var_tmp ve temp var_lat lat chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
2073 10206 2002-11-01 67.239550 4.393483 7.707300 0.001396 1000.0 -4.056967 NaN 0.000461 11.003467 0.131253 -0.881891 -0.000230 NaN 532.470 -0.000117
2332 10206 2002-11-16 66.993317 1.400700 4.379483 0.003644 1000.0 -2.148200 NaN 0.000991 11.224017 0.140250 -0.853096 0.008997 -2.045886 558.228 0.004277
2591 10206 2002-12-01 66.049467 6.777450 14.383300 0.007490 1000.0 -12.568283 NaN 0.001855 11.718717 0.141565 -0.849043 0.001315 -2.881075 660.647 0.000619
2850 10206 2002-12-16 64.844550 3.686400 10.204233 0.002841 1000.0 -8.721100 NaN 0.000811 12.433233 0.159126 -0.798257 0.017561 -1.755447 799.905 0.007357
3109 10206 2002-12-31 63.592417 -4.998550 13.272817 0.001646 1000.0 -11.821550 NaN 0.000543 12.299100 0.287051 -0.542041 0.127924 -0.893047 935.974 0.029710

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 52 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x11a516390>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x1194c1940>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    86.000000
mean     -0.051126
std       0.607245
min      -5.463820
25%      -0.039610
50%       0.005852
75%       0.075876
max       0.447062
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    81.000000
mean      0.072963
std       0.683830
min      -2.964967
25%      -0.017820
50%       0.032429
75%       0.127924
max       3.162658
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    106.000000
mean       0.123953
std        0.582316
min       -1.129739
25%       -0.021475
50%        0.009342
75%        0.118613
max        4.258058
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    103.000000
mean       0.440261
std        4.819858
min      -29.010900
25%       -0.081373
50%       -0.005371
75%        0.088618
max       33.742726
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    124.000000
mean      -0.840258
std        2.714265
min      -16.581776
25%       -0.316079
50%       -0.030401
75%       -0.001247
max        5.960847
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]: