this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 6
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x115438c88>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_6D_modisa_3.csv
out_filename: df_chl_out_6D_modisa_4.csv
Out[2]:
id time lon var_lon vn var_tmp spd var_lat temp lat ve chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
5181 10206 2002-11-01 67.400875 0.001188 6.497542 1000.000000 11.098375 0.000411 NaN 10.819333 -6.816792 0.132351 -0.878273 -0.011445 NaN 512.041
5183 11089 2002-11-01 65.187083 0.000106 5.029292 0.003775 12.775208 0.000057 28.979875 14.236667 -9.695500 0.124708 -0.904106 -0.006008 NaN 813.115
5203 34710 2002-11-01 63.136583 0.000115 12.004000 0.001725 12.873292 0.000061 28.993542 16.952292 1.252542 0.404965 -0.392582 0.069651 -1.157071 589.642
5440 10206 2002-11-07 67.149208 0.001453 3.659208 1000.000000 6.336958 0.000476 NaN 11.107000 -2.266292 0.130267 -0.885166 -0.002084 NaN 540.917
5442 11089 2002-11-07 64.589250 0.000133 -1.580333 0.003873 16.956875 0.000068 28.978875 14.336875 -15.959458 0.188381 -0.724962 0.063673 -1.196042 878.551
5444 15707 2002-11-07 67.265333 0.000158 -22.637667 1000.000000 26.327625 0.000079 NaN 13.365542 -9.709250 0.164893 -0.782798 0.005629 -2.249550 570.016
5462 34710 2002-11-07 63.098417 0.000156 13.738750 0.001941 15.450125 0.000078 28.687667 17.482833 -4.776875 0.383276 -0.416488 -0.021689 NaN 548.641
5468 34721 2002-11-07 67.971208 0.000136 3.860583 0.001854 16.149333 0.000068 29.455000 12.624125 10.850542 0.144376 -0.840506 0.001377 -2.861171 471.103
5701 11089 2002-11-13 63.741208 0.000116 -11.590250 0.003512 25.847125 0.000061 28.424792 14.022917 -22.455833 0.317086 -0.498823 0.128705 -0.890406 836.575
5721 34710 2002-11-13 62.930792 0.000120 38.649542 0.001646 39.569625 0.000062 27.875125 18.657250 -3.164125 0.537426 -0.269681 0.154150 -0.812056 465.142
5727 34721 2002-11-13 68.327750 0.000093 17.081000 0.001696 17.822542 0.000051 29.132417 13.243375 3.133792 0.148550 -0.828127 0.004174 -2.379413 456.196
5960 11089 2002-11-19 62.282042 0.000112 -10.847542 0.003717 36.300458 0.000060 28.016125 13.385042 -33.685417 0.278075 -0.555838 -0.039011 NaN 783.166
5976 34315 2002-11-19 57.637667 0.000145 -13.475375 0.003516 31.643208 0.000070 28.645750 8.049917 25.526000 0.116233 -0.934671 -0.031591 NaN 599.180
5980 34710 2002-11-19 61.998167 0.000143 41.829250 0.001875 52.192750 0.000071 27.476833 20.548250 -30.129292 0.494256 -0.306048 -0.043170 NaN 286.977
5986 34721 2002-11-19 68.305583 0.000108 10.309583 0.001802 11.369833 0.000058 29.039417 13.866333 -1.515417 0.133991 -0.872924 -0.014559 NaN 489.967
6235 34315 2002-11-25 57.884625 0.000107 -26.031125 0.004311 30.744375 0.000057 28.847667 7.073750 -12.923167 0.148477 -0.828339 0.032244 -1.491544 709.084
6239 34710 2002-11-25 61.925250 0.000099 29.529833 0.001669 61.125833 0.000053 27.346375 22.580375 38.093458 0.537776 -0.269399 0.043520 -1.361315 217.007
6245 34721 2002-11-25 68.232875 0.000109 9.373500 0.001763 11.759500 0.000058 29.002042 14.301375 -2.961583 0.151308 -0.820138 0.017317 -1.761527 523.262
6476 10206 2002-12-01 66.529750 0.003438 5.781167 1000.000000 14.679250 0.001004 NaN 11.451333 -13.332292 0.127859 -0.893269 -0.009725 NaN 606.805
6480 15707 2002-12-01 64.173042 0.000121 -13.654667 1000.000000 23.319167 0.000064 NaN 11.570958 -17.589583 0.144128 -0.841252 -0.028850 NaN 864.585
6498 34710 2002-12-01 63.567500 0.000144 -53.187208 0.001642 64.668625 0.000073 27.061250 21.464958 14.707583 1.062734 0.026424 0.524958 -0.279876 398.667
6504 34721 2002-12-01 68.157458 0.000096 8.375042 0.001724 10.951500 0.000053 28.839208 14.755917 -1.228375 0.147040 -0.832565 -0.004268 NaN 558.924
6757 34710 2002-12-07 64.251792 0.000122 27.901500 0.001834 31.448250 0.000064 26.605375 21.139083 7.759083 2.091943 0.320550 1.029209 0.012504 442.456
7016 34710 2002-12-13 64.510625 0.000130 5.847417 0.001732 10.354000 0.000067 26.393542 22.007792 6.050750 0.500791 -0.300343 -1.591152 NaN 347.264
7275 34710 2002-12-19 64.684292 0.000109 -3.977958 0.001641 9.608667 0.000058 26.486625 22.059042 2.507167 0.534686 -0.271901 0.033895 -1.469868 342.943
7281 34721 2002-12-19 66.911833 0.000100 -3.673875 0.001766 16.385750 0.000055 28.143125 15.013875 -7.502417 0.156046 -0.806746 -0.085491 NaN 685.798
7530 34315 2002-12-25 52.277208 0.000094 4.943042 0.003680 32.462708 0.000050 27.336292 9.191500 -29.250125 0.259593 -0.585707 -0.059411 NaN 158.580
7534 34710 2002-12-25 64.649667 0.000114 -20.491125 0.001756 21.473250 0.000060 26.127750 21.491792 -5.814625 0.851984 -0.069569 0.317298 -0.498533 392.696
7540 34721 2002-12-25 66.829167 0.000119 6.850250 0.001786 7.434917 0.000062 27.886750 15.126167 0.497292 0.153344 -0.814333 -0.002702 NaN 700.250
7771 10206 2002-12-31 64.004000 0.000999 -4.584875 1000.000000 13.410292 0.000361 NaN 12.477292 -12.156875 0.189273 -0.722911 0.015523 -1.809038 890.850
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
213865 114945 2016-01-22 63.417875 0.000063 -2.689042 0.001827 15.184125 0.000141 27.833542 11.441208 -13.982833 0.230810 -0.636745 0.062168 -1.206435 947.098
214123 114917 2016-01-28 71.836542 0.000046 -2.579958 0.001891 22.193250 0.000104 29.090958 13.828583 -5.295625 0.165727 -0.780607 0.010033 -1.998563 248.288
214124 114945 2016-01-28 62.719750 0.000058 0.308750 0.001874 19.054500 0.000130 27.799333 11.368875 -17.474333 0.197836 -0.703694 -0.032974 NaN 897.821
214157 147127 2016-01-28 63.769125 0.000022 -14.036375 0.001694 18.233208 0.000049 25.915083 16.797667 -8.029667 0.527837 -0.277500 -0.025128 NaN 654.214
214416 147127 2016-02-03 63.448208 0.000023 -5.727042 0.001775 17.525583 0.000052 25.837833 16.289875 -1.706583 1.001906 0.000827 0.474069 -0.324158 661.822
214675 147127 2016-02-09 63.162167 0.000020 -2.433792 0.001680 16.335750 0.000047 25.909000 16.169083 -13.969167 0.603671 -0.219200 -0.398235 NaN 646.832
214685 60150420 2016-02-09 61.056708 0.000005 24.742958 0.001684 30.675167 0.000003 27.572208 8.188042 17.750667 0.178866 -0.747473 -0.022825 NaN 861.277
214900 114917 2016-02-15 71.135417 0.000036 -2.744750 0.001774 17.011167 0.000083 28.617083 13.691250 -4.880792 0.175518 -0.755678 -0.027480 NaN 258.059
214934 147127 2016-02-15 62.591542 0.000024 1.452333 0.001664 14.603708 0.000055 25.695250 16.122167 -10.755125 0.630473 -0.200334 0.026802 -1.571839 594.244
214944 60150420 2016-02-15 61.522083 0.000005 2.897667 0.001684 15.425167 0.000003 27.371667 8.963375 0.118625 0.185971 -0.730554 0.007106 -2.148391 860.634
215159 114917 2016-02-21 70.899125 0.000041 2.090708 0.001824 15.345958 0.000094 28.806500 13.739333 -3.998625 0.165865 -0.780244 -0.009653 NaN 274.316
215193 147127 2016-02-21 62.302500 0.000023 -1.019083 0.001669 10.701417 0.000054 25.774375 16.124875 2.000625 0.738294 -0.131771 0.107821 -0.967296 569.309
215418 114917 2016-02-27 70.617458 0.000044 3.585458 0.001924 16.964875 0.000099 28.918708 13.867208 -7.416667 0.227879 -0.642297 0.062013 -1.207516 301.805
215419 114945 2016-02-27 59.056625 0.000090 0.930833 0.001972 18.146667 0.000189 27.354750 12.090792 -16.973292 0.207439 -0.683111 -0.000975 NaN 493.935
215452 147127 2016-02-27 62.385667 0.000019 -1.098042 0.001728 4.850958 0.000043 26.173417 16.143958 -1.313958 0.844760 -0.073267 0.106466 -0.972788 576.387
215677 114917 2016-03-04 70.139500 0.000049 1.072833 0.001806 18.792625 0.000106 29.097083 13.965708 -13.428167 0.199471 -0.700121 -0.028408 NaN 343.911
215678 114945 2016-03-04 58.409333 0.000249 0.703875 0.001914 9.981083 0.000442 27.887750 12.141708 -9.471333 0.207299 -0.683403 -0.000140 NaN 424.199
215711 147127 2016-03-04 62.223125 0.000018 -9.260750 0.001638 18.875333 0.000043 26.735958 15.869625 -10.845833 0.542317 -0.265747 -0.302443 NaN 580.549
215932 114873 2016-03-10 54.519833 0.000023 -5.832208 0.001750 13.804708 0.000054 28.413167 8.205375 -11.405500 0.135988 -0.866501 -0.022222 NaN 417.081
215936 114917 2016-03-10 69.444750 0.000040 -7.709375 0.001877 15.894292 0.000089 29.137000 13.806542 -11.905458 0.206624 -0.684818 0.007154 -2.145471 384.103
215937 114945 2016-03-10 57.964167 0.000108 -4.851042 0.001976 10.590792 0.000232 28.154167 11.995333 -8.106750 0.195842 -0.708094 -0.011457 NaN 379.222
215970 147127 2016-03-10 61.970625 0.000018 -3.755500 0.002119 20.329417 0.000041 27.767042 15.798042 6.565792 0.324060 -0.489374 -0.218256 NaN 565.936
216191 114873 2016-03-16 54.278292 0.000032 6.301833 0.001769 9.604417 0.000070 28.646667 8.133625 -3.843292 0.118643 -0.925757 -0.017344 NaN 401.065
216195 114917 2016-03-16 69.093042 0.000035 -9.430833 0.001805 15.314167 0.000081 29.051083 13.431125 -5.643125 0.110149 -0.958019 -0.096475 NaN 390.674
216229 147127 2016-03-16 61.860875 0.000016 -5.463208 0.002610 16.946208 0.000038 28.460208 15.307125 -4.249583 0.324520 -0.488759 0.000459 -3.337727 591.908
216454 114917 2016-03-22 68.609875 0.000053 -18.238000 0.001855 32.937125 0.000117 29.311083 12.966625 -6.349792 0.151247 -0.820314 0.041098 -1.386183 416.387
216488 147127 2016-03-22 61.992625 0.000026 -17.679542 0.001731 18.964917 0.000059 28.448292 14.815292 0.711042 0.292403 -0.534018 -0.032117 NaN 638.915
216498 60150420 2016-03-22 58.874167 0.000005 -11.590417 0.001751 35.581917 0.000003 28.640250 9.951083 -32.246375 0.183525 -0.736305 0.032967 -1.481922 552.253
216713 114917 2016-03-28 69.123875 0.000039 11.928375 0.001966 27.741250 0.000088 29.931250 12.471042 18.679458 0.133134 -0.875711 -0.018113 NaN 344.596
216757 60150420 2016-03-28 58.094208 0.000005 -19.345542 0.001684 35.464792 0.000003 28.911458 8.987708 10.463417 0.192533 -0.715496 0.009008 -2.045375 553.504

745 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
4.393119440275484e-15

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time lon var_lon vn var_tmp spd var_lat temp lat ve chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
5181 10206 2002-11-01 67.400875 0.001188 6.497542 1000.000000 11.098375 0.000411 NaN 10.819333 -6.816792 0.132351 -0.878273 -0.011445 NaN 512.041 -0.014412
5183 11089 2002-11-01 65.187083 0.000106 5.029292 0.003775 12.775208 0.000057 28.979875 14.236667 -9.695500 0.124708 -0.904106 -0.006008 NaN 813.115 -0.008029
5203 34710 2002-11-01 63.136583 0.000115 12.004000 0.001725 12.873292 0.000061 28.993542 16.952292 1.252542 0.404965 -0.392582 0.069651 -1.157071 589.642 0.028666
5440 10206 2002-11-07 67.149208 0.001453 3.659208 1000.000000 6.336958 0.000476 NaN 11.107000 -2.266292 0.130267 -0.885166 -0.002084 NaN 540.917 -0.002666
5442 11089 2002-11-07 64.589250 0.000133 -1.580333 0.003873 16.956875 0.000068 28.978875 14.336875 -15.959458 0.188381 -0.724962 0.063673 -1.196042 878.551 0.056334

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    745.000000
mean      -0.068723
std        0.390312
min       -5.503224
25%       -0.046302
50%       -0.003275
75%        0.029939
max        0.160096
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x11546a668>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x117da30f0>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x118ccf9b0>

In [20]:
(np.log(0.130267)-np.log(0.132351))   / freq


Out[20]:
-0.0026452156695248399

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-07"
# (0.130267 - 0.132351) / (freq*.130267)    == -0.0026663186634629937
#########
# Val 2:
# id:10206, time:2002-11-07"
# (np.log(0.130267)-np.log(0.132351))   / freq == -0.0026452156695248399 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time lon var_lon vn var_tmp spd var_lat temp lat ve chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
5181 10206 2002-11-01 67.400875 0.001188 6.497542 1000.0 11.098375 0.000411 NaN 10.819333 -6.816792 0.132351 -0.878273 -0.011445 NaN 512.041 -0.014412
5440 10206 2002-11-07 67.149208 0.001453 3.659208 1000.0 6.336958 0.000476 NaN 11.107000 -2.266292 0.130267 -0.885166 -0.002084 NaN 540.917 -0.002666
6476 10206 2002-12-01 66.529750 0.003438 5.781167 1000.0 14.679250 0.001004 NaN 11.451333 -13.332292 0.127859 -0.893269 -0.009725 NaN 606.805 -0.012677
7771 10206 2002-12-31 64.004000 0.000999 -4.584875 1000.0 13.410292 0.000361 NaN 12.477292 -12.156875 0.189273 -0.722911 0.015523 -1.809038 890.850 0.013669
8807 10206 2003-01-24 62.110722 0.002644 -8.204294 1000.0 13.179000 0.000795 NaN 11.109556 -10.128765 0.225299 -0.647241 -0.113617 NaN 838.923 -0.084049

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time lon var_lon vn var_tmp spd var_lat temp lat ve chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
5181 10206 2002-11-01 67.400875 0.001188 6.497542 1000.0 11.098375 0.000411 NaN 10.819333 -6.816792 0.132351 -0.878273 -0.011445 NaN 512.041 -0.014412
5440 10206 2002-11-07 67.149208 0.001453 3.659208 1000.0 6.336958 0.000476 NaN 11.107000 -2.266292 0.130267 -0.885166 -0.002084 NaN 540.917 -0.002666
6476 10206 2002-12-01 66.529750 0.003438 5.781167 1000.0 14.679250 0.001004 NaN 11.451333 -13.332292 0.127859 -0.893269 -0.009725 NaN 606.805 -0.012677
7771 10206 2002-12-31 64.004000 0.000999 -4.584875 1000.0 13.410292 0.000361 NaN 12.477292 -12.156875 0.189273 -0.722911 0.015523 -1.809038 890.850 0.013669
8807 10206 2003-01-24 62.110722 0.002644 -8.204294 1000.0 13.179000 0.000795 NaN 11.109556 -10.128765 0.225299 -0.647241 -0.113617 NaN 838.923 -0.084049

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_6D_modisa_4.csv
Out[12]:
id time lon var_lon vn var_tmp spd var_lat temp lat ve chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
5181 10206 2002-11-01 67.400875 0.001188 6.497542 1000.0 11.098375 0.000411 NaN 10.819333 -6.816792 0.132351 -0.878273 -0.011445 NaN 512.041 -0.014412
5440 10206 2002-11-07 67.149208 0.001453 3.659208 1000.0 6.336958 0.000476 NaN 11.107000 -2.266292 0.130267 -0.885166 -0.002084 NaN 540.917 -0.002666
6476 10206 2002-12-01 66.529750 0.003438 5.781167 1000.0 14.679250 0.001004 NaN 11.451333 -13.332292 0.127859 -0.893269 -0.009725 NaN 606.805 -0.012677
7771 10206 2002-12-31 64.004000 0.000999 -4.584875 1000.0 13.410292 0.000361 NaN 12.477292 -12.156875 0.189273 -0.722911 0.015523 -1.809038 890.850 0.013669
8807 10206 2003-01-24 62.110722 0.002644 -8.204294 1000.0 13.179000 0.000795 NaN 11.109556 -10.128765 0.225299 -0.647241 -0.113617 NaN 838.923 -0.084049

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x11a310c88>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11a59af98>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    107.000000
mean      -0.003057
std        0.127664
min       -0.314036
25%       -0.051853
50%       -0.005549
75%        0.038812
max        0.617254
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    115.000000
mean       0.013725
std        0.236352
min       -1.591152
25%       -0.033967
50%        0.011980
75%        0.068723
max        1.029209
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    173.000000
mean       0.081537
std        1.083109
min       -9.201785
25%       -0.058736
50%        0.004676
75%        0.086253
max        9.436026
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    185.000000
mean      -0.018115
std        8.207964
min      -61.745387
25%       -0.119316
50%       -0.009475
75%        0.128890
max       46.203613
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    165.000000
mean      -0.408649
std        4.984929
min      -32.863400
25%       -0.214682
50%       -0.017424
75%        0.012679
max       24.273974
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]:


In [ ]: