this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 8
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x115328cc0>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_8D_modisa_3.csv
out_filename: df_chl_out_8D_modisa_4.csv
Out[2]:
id time var_tmp lon spd ve var_lat lat var_lon temp vn chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
3886 10206 2002-11-01 1000.000000 67.351188 11.188906 -6.823625 0.000352 10.873656 0.000996 NaN 6.509875 0.137771 -0.860842 -0.012681 NaN 520.4050
3888 11089 2002-11-01 0.003679 65.099156 13.679406 -11.122000 0.000057 14.269219 0.000106 28.969813 4.337844 0.152450 -0.816873 0.027142 -1.566358 822.7430
3908 34710 2002-11-01 0.001698 63.145031 12.432687 0.757312 0.000064 17.038563 0.000123 28.970219 11.684344 0.383868 -0.415819 0.059694 -1.224066 584.0640
4145 10206 2002-11-09 1000.000000 67.108219 3.428062 -0.786375 0.000984 11.155719 0.003551 NaN 1.562844 0.132682 -0.877188 -0.005089 NaN 545.1970
4147 11089 2002-11-09 0.003868 64.193281 19.677781 -17.539250 0.000065 14.220969 0.000126 28.742188 -6.951906 0.201879 -0.694909 0.049429 -1.306018 858.3870
4149 15707 2002-11-09 1000.000000 67.022625 23.783812 -12.393500 0.000075 12.926656 0.000150 NaN -15.661781 0.153961 -0.812589 -0.005174 NaN 580.7750
4167 34710 2002-11-09 0.001878 63.000156 26.598219 -2.723375 0.000065 17.952812 0.000126 28.255188 25.294281 0.442761 -0.353831 0.058893 -1.229935 513.0270
4173 34721 2002-11-09 0.001813 68.182750 18.274406 8.236687 0.000063 12.879281 0.000124 29.291313 12.886094 0.151290 -0.820190 0.003190 -2.496244 457.1560
4426 34710 2002-11-17 0.001815 62.227781 51.033437 -25.647250 0.000075 20.239094 0.000152 27.549469 42.687531 0.466490 -0.331158 0.023729 -1.624715 327.3360
4432 34721 2002-11-17 0.001754 68.325406 12.116437 -1.096344 0.000056 13.776062 0.000104 29.060656 11.203437 0.131582 -0.880802 -0.019708 NaN 481.8100
4681 34315 2002-11-25 0.004397 57.697625 34.884469 -20.598188 0.000056 6.886781 0.000104 28.923875 -22.903094 0.142909 -0.844939 0.005349 -2.271687 713.3930
4684 34709 2002-11-25 1000.000000 74.747500 26.364529 -11.325471 0.000066 11.582167 0.000130 NaN -18.946647 0.236595 -0.625994 -0.106141 NaN 68.7629
4685 34710 2002-11-25 0.001683 62.329031 65.392656 35.795281 0.000052 22.561781 0.000098 27.312969 4.843219 0.495943 -0.304569 0.029453 -1.530875 258.0500
4691 34721 2002-11-25 0.001749 68.210844 11.410500 -2.194313 0.000058 14.380031 0.000108 28.983563 9.268219 0.139785 -0.854541 0.008202 -2.086080 528.2990
4944 34710 2002-12-03 0.001760 63.903438 46.047312 9.094031 0.000076 20.918094 0.000150 26.786125 -8.093344 1.431313 0.155735 0.935370 -0.029016 448.3200
5203 34710 2002-12-11 0.001715 64.458906 14.257563 5.068844 0.000064 21.914281 0.000123 26.455813 10.818781 0.614099 -0.211762 -0.817214 NaN 360.6660
5440 10206 2002-12-19 1000.000000 64.896875 9.617437 -8.368125 0.001140 12.434812 0.004192 NaN 4.556469 0.156649 -0.805072 0.017498 -1.757012 795.6110
5458 34315 2002-12-19 0.003568 52.956562 14.123563 -7.807813 0.000050 8.837594 0.000094 27.552594 5.683906 0.281399 -0.550677 -0.063772 NaN 241.1240
5462 34710 2002-12-19 0.001660 64.695437 10.502969 1.270844 0.000057 21.996188 0.000107 26.456375 -6.181844 0.479058 -0.319612 -0.135041 NaN 351.7960
5468 34721 2002-12-19 0.001752 66.889000 13.939531 -5.765406 0.000060 15.015063 0.000113 28.101500 -1.223813 0.156046 -0.806746 0.014478 -1.839282 685.7980
5699 10206 2002-12-27 1000.000000 64.271031 12.251438 -11.493313 0.000417 12.549094 0.001212 NaN -1.765500 0.187383 -0.727269 0.030735 -1.512374 866.4100
5701 11089 2002-12-27 0.003657 58.528844 18.858156 4.662594 0.000065 14.770719 0.000123 26.580312 13.433844 0.268435 -0.571160 -0.039752 NaN 381.1380
5721 34710 2002-12-27 0.001744 64.445625 23.197031 -10.800563 0.000066 21.020625 0.000127 25.767500 -18.935125 0.332565 -0.478123 -0.146493 NaN 445.3570
5727 34721 2002-12-27 0.001784 66.840469 11.281406 0.916906 0.000058 15.328750 0.000108 27.666062 9.685406 0.186440 -0.729462 0.030393 -1.517224 703.3780
5960 11089 2003-01-04 0.003628 59.107094 28.521781 5.199344 0.000070 14.353250 0.000134 26.104250 -26.206813 0.241114 -0.617778 -0.027321 NaN 456.8570
5980 34710 2003-01-04 0.001798 63.616619 25.564700 -15.161450 0.000082 20.756429 0.000162 25.268333 17.029500 0.432840 -0.363673 0.100275 -0.998807 427.6810
5986 34721 2003-01-04 0.001784 66.806469 9.439625 -0.686937 0.000069 15.917500 0.000130 27.099719 6.296906 0.569022 -0.244871 0.382582 -0.417275 668.1950
6219 11089 2003-01-12 0.003933 58.996656 17.337188 -5.146563 0.000077 12.791187 0.000150 26.045250 -15.677625 0.430869 -0.365655 0.189755 -0.721807 482.7110
6245 34721 2003-01-12 0.001790 66.872000 5.829688 3.039469 0.000073 16.100937 0.000143 26.924969 0.587656 0.706841 -0.150678 0.137819 -0.860690 647.3570
6478 11089 2003-01-20 0.003659 58.521563 15.764500 -9.533906 0.000074 12.180875 0.000144 25.981625 -8.695594 0.337209 -0.472101 -0.093660 NaN 436.7260
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
160769 114917 2016-02-01 0.001819 71.655594 19.631719 -0.602406 0.000104 13.694594 0.000047 28.963156 0.287500 0.175277 -0.756275 0.039308 -1.405514 238.7190
160803 147127 2016-02-01 0.001752 63.489719 16.704312 -2.310031 0.000046 16.346813 0.000021 25.847969 -6.202406 0.581770 -0.235249 -0.541896 NaN 662.0290
160813 60150420 2016-02-01 0.001684 60.247719 20.689094 2.720687 0.000003 7.097969 0.000005 27.543437 9.689937 0.201691 -0.695314 0.050676 -1.295198 868.9360
161028 114917 2016-02-09 0.001770 71.394656 16.747969 -6.097062 0.000110 13.744531 0.000049 28.736531 -1.324813 0.196200 -0.707300 0.020924 -1.679365 251.8690
161062 147127 2016-02-09 0.001683 63.043063 16.373969 -14.384594 0.000047 16.160937 0.000020 25.865125 -2.064625 0.542158 -0.265874 -0.039612 NaN 635.7060
161072 60150420 2016-02-09 0.001684 61.167812 28.188344 14.079844 0.000003 8.367313 0.000005 27.543937 23.421656 0.164384 -0.784139 -0.037306 NaN 859.9870
161287 114917 2016-02-17 0.001794 71.042531 17.292531 -7.299969 0.000090 13.715094 0.000039 28.704438 1.582844 0.184201 -0.734708 -0.012000 NaN 261.9900
161288 114945 2016-02-17 0.001968 60.167750 16.583125 -15.263531 0.000160 11.973937 0.000074 27.163656 3.474531 0.385036 -0.414498 0.036615 -1.436337 616.6080
161321 147127 2016-02-17 0.001675 62.402781 13.602531 -3.445594 0.000063 16.112031 0.000027 25.694813 0.513469 0.537049 -0.269986 -0.005109 NaN 582.4100
161331 60150420 2016-02-17 0.001684 61.526156 15.845250 2.012250 0.000003 8.957031 0.000005 27.387125 -2.976969 0.164424 -0.784036 0.000039 -4.407485 862.7080
161542 114873 2016-02-25 0.001738 56.937562 46.406563 -26.101250 0.000103 7.920000 0.000046 27.758813 31.911688 0.161817 -0.790975 -0.002742 NaN 576.1700
161546 114917 2016-02-25 0.001898 70.649250 15.755031 -4.553625 0.000093 13.835156 0.000041 28.897437 1.721063 0.227879 -0.642297 0.043678 -1.359740 295.7130
161547 114945 2016-02-25 0.001964 59.187937 17.389031 -16.443625 0.000170 12.082219 0.000080 27.346812 0.625844 0.222920 -0.651851 -0.162116 NaN 506.9260
161580 147127 2016-02-25 0.001698 62.385563 4.892375 -0.198031 0.000040 16.147094 0.000018 26.104094 -0.772781 0.844760 -0.073267 0.307711 -0.511857 576.3870
161801 114873 2016-03-04 0.001729 55.139125 22.003844 -20.524625 0.000084 8.443687 0.000037 28.017688 -6.779937 0.153205 -0.814728 -0.008613 NaN 443.0950
161805 114917 2016-03-04 0.001863 70.025500 17.934188 -13.646313 0.000098 13.959344 0.000045 29.127500 0.119406 0.185481 -0.731700 -0.042398 NaN 349.1780
161806 114945 2016-03-04 0.001882 58.335156 11.021281 -10.133969 0.000377 12.127812 0.000207 27.922812 -1.083656 0.207299 -0.683403 -0.015621 NaN 415.5460
161839 147127 2016-03-04 0.001655 62.116156 17.594531 -8.121344 0.000044 15.841812 0.000019 26.949656 -3.890563 0.446693 -0.349991 -0.398067 NaN 570.3880
162060 114873 2016-03-12 0.001779 54.368750 9.301438 -4.882906 0.000066 8.112656 0.000030 28.597687 -1.854719 0.124690 -0.904169 -0.028515 NaN 414.2320
162064 114917 2016-03-12 0.001815 69.226937 14.665219 -7.315469 0.000083 13.619938 0.000036 29.071594 -8.076125 0.158982 -0.798652 -0.026499 NaN 391.6030
162065 114945 2016-03-12 0.002030 57.813969 9.326250 -5.984000 0.000253 11.889969 0.000120 28.350969 -4.065969 0.189802 -0.721699 -0.017497 NaN 363.7230
162098 147127 2016-03-12 0.002710 61.957500 21.346375 -0.578969 0.000038 15.564375 0.000016 28.131781 -8.517750 0.302195 -0.519713 -0.144498 NaN 577.0800
162108 60150420 2016-03-12 0.001795 61.016375 27.715719 -22.813438 0.000003 10.009031 0.000005 28.580750 -5.131031 0.116803 -0.932544 -0.006667 NaN 760.1980
162319 114873 2016-03-20 0.001740 53.953719 20.981063 -9.490219 0.000044 8.552313 0.000019 28.649906 15.733344 0.133323 -0.875095 0.008633 -2.063830 346.2960
162323 114917 2016-03-20 0.001830 68.712938 29.604250 -7.033125 0.000114 13.048219 0.000051 29.248094 -17.772313 0.144908 -0.838907 -0.014074 NaN 411.6270
162357 147127 2016-03-20 0.001709 61.961781 17.396000 2.836375 0.000053 14.936375 0.000023 28.477188 -14.710938 0.326580 -0.486010 0.024385 -1.612870 629.2770
162367 60150420 2016-03-20 0.001734 59.155281 36.445312 -33.476469 0.000003 9.990875 0.000005 28.605438 -7.017594 0.201866 -0.694937 0.085062 -1.070262 576.3650
162578 114873 2016-03-28 0.001771 53.703469 21.149031 0.933969 0.000079 9.646187 0.000035 28.962937 18.252594 0.113328 -0.945661 -0.019995 NaN 265.0020
162582 114917 2016-03-28 0.001927 69.281031 29.713500 22.120531 0.000100 12.596750 0.000046 29.890781 12.361594 0.144557 -0.839961 -0.000351 NaN 331.8890
162626 60150420 2016-03-28 0.001684 58.262562 32.750687 10.682125 0.000003 8.938125 0.000005 28.979375 -9.834937 0.195704 -0.708399 -0.006162 NaN 569.0470

692 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
0.0

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time var_tmp lon spd ve var_lat lat var_lon temp vn chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 1000.000000 67.351188 11.188906 -6.823625 0.000352 10.873656 0.000996 NaN 6.509875 0.137771 -0.860842 -0.012681 NaN 520.405 -0.011506
3888 11089 2002-11-01 0.003679 65.099156 13.679406 -11.122000 0.000057 14.269219 0.000106 28.969813 4.337844 0.152450 -0.816873 0.027142 -1.566358 822.743 0.022255
3908 34710 2002-11-01 0.001698 63.145031 12.432687 0.757312 0.000064 17.038563 0.000123 28.970219 11.684344 0.383868 -0.415819 0.059694 -1.224066 584.064 0.019439
4145 10206 2002-11-09 1000.000000 67.108219 3.428062 -0.786375 0.000984 11.155719 0.003551 NaN 1.562844 0.132682 -0.877188 -0.005089 NaN 545.197 -0.004794
4147 11089 2002-11-09 0.003868 64.193281 19.677781 -17.539250 0.000065 14.220969 0.000126 28.742188 -6.951906 0.201879 -0.694909 0.049429 -1.306018 858.387 0.030606

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    692.000000
mean      -0.024882
std        0.146499
min       -1.782442
25%       -0.028650
50%       -0.002615
75%        0.022206
max        0.116349
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x11775eeb8>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x1179e12e8>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x117ba13c8>

In [20]:
(np.log(0.187383)-np.log(0.156649))   / freq


Out[20]:
0.022393377115180996

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-12-27"
# (0.187383 - 0.156649) / (freq*0.187383)    == 0.020502126660369394
#########
# Val 2:
# id:10206, time:2002-12-27"
# (np.log(0.187383)-np.log(0.156649))   / freq == 0.022393377115180996 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time var_tmp lon spd ve var_lat lat var_lon temp vn chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 1000.000000 67.351188 11.188906 -6.823625 0.000352 10.873656 0.000996 NaN 6.509875 0.137771 -0.860842 -0.012681 NaN 520.405 -0.011506
4145 10206 2002-11-09 1000.000000 67.108219 3.428062 -0.786375 0.000984 11.155719 0.003551 NaN 1.562844 0.132682 -0.877188 -0.005089 NaN 545.197 -0.004794
5440 10206 2002-12-19 1000.000000 64.896875 9.617437 -8.368125 0.001140 12.434812 0.004192 NaN 4.556469 0.156649 -0.805072 0.017498 -1.757012 795.611 0.013963
5699 10206 2002-12-27 1000.000000 64.271031 12.251438 -11.493313 0.000417 12.549094 0.001212 NaN -1.765500 0.187383 -0.727269 0.030735 -1.512374 866.410 0.020502
3888 11089 2002-11-01 0.003679 65.099156 13.679406 -11.122000 0.000057 14.269219 0.000106 28.969813 4.337844 0.152450 -0.816873 0.027142 -1.566358 822.743 0.022255

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time var_tmp lon spd ve var_lat lat var_lon temp vn chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 1000.000000 67.351188 11.188906 -6.823625 0.000352 10.873656 0.000996 NaN 6.509875 0.137771 -0.860842 -0.012681 NaN 520.405 -0.011506
4145 10206 2002-11-09 1000.000000 67.108219 3.428062 -0.786375 0.000984 11.155719 0.003551 NaN 1.562844 0.132682 -0.877188 -0.005089 NaN 545.197 -0.004794
5440 10206 2002-12-19 1000.000000 64.896875 9.617437 -8.368125 0.001140 12.434812 0.004192 NaN 4.556469 0.156649 -0.805072 0.017498 -1.757012 795.611 0.013963
5699 10206 2002-12-27 1000.000000 64.271031 12.251438 -11.493313 0.000417 12.549094 0.001212 NaN -1.765500 0.187383 -0.727269 0.030735 -1.512374 866.410 0.020502
3888 11089 2002-11-01 0.003679 65.099156 13.679406 -11.122000 0.000057 14.269219 0.000106 28.969813 4.337844 0.152450 -0.816873 0.027142 -1.566358 822.743 0.022255

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_8D_modisa_4.csv
Out[12]:
id time var_tmp lon spd ve var_lat lat var_lon temp vn chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 1000.000000 67.351188 11.188906 -6.823625 0.000352 10.873656 0.000996 NaN 6.509875 0.137771 -0.860842 -0.012681 NaN 520.405 -0.011506
4145 10206 2002-11-09 1000.000000 67.108219 3.428062 -0.786375 0.000984 11.155719 0.003551 NaN 1.562844 0.132682 -0.877188 -0.005089 NaN 545.197 -0.004794
5440 10206 2002-12-19 1000.000000 64.896875 9.617437 -8.368125 0.001140 12.434812 0.004192 NaN 4.556469 0.156649 -0.805072 0.017498 -1.757012 795.611 0.013963
5699 10206 2002-12-27 1000.000000 64.271031 12.251438 -11.493313 0.000417 12.549094 0.001212 NaN -1.765500 0.187383 -0.727269 0.030735 -1.512374 866.410 0.020502
3888 11089 2002-11-01 0.003679 65.099156 13.679406 -11.122000 0.000057 14.269219 0.000106 28.969813 4.337844 0.152450 -0.816873 0.027142 -1.566358 822.743 0.022255

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x11a4415f8>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11a7aadd8>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    109.000000
mean      -0.008042
std        0.160383
min       -0.617286
25%       -0.043352
50%       -0.003292
75%        0.029215
max        0.941259
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    103.000000
mean       0.057808
std        0.318363
min       -0.832372
25%       -0.044591
50%        0.022036
75%        0.087131
max        2.120178
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    157.000000
mean       0.182644
std        1.107132
min       -0.925911
25%       -0.040639
50%        0.011206
75%        0.100275
max       12.120585
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    157.000000
mean      -0.064656
std        4.148605
min      -30.828177
25%       -0.093045
50%       -0.004723
75%        0.075044
max       27.363718
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    166.000000
mean      -0.209397
std        2.927867
min      -14.264926
25%       -0.146459
50%       -0.022587
75%        0.002438
max       14.138201
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]:


In [ ]: