this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [3]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 8
suf = 'DOC'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x1153422b0>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_8DOC_modisa_3.csv
out_filename: df_chl_out_8DOC_modisa_4.csv
Out[3]:
id time spd vn var_lon lon lat var_lat ve var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
3886 10206 2002-11-01 11.188906 6.509875 0.000996 67.351188 10.873656 0.000352 -6.823625 1000.000000 NaN 0.132783 -0.876858 -0.017698 NaN 520.4050
3888 11089 2002-11-01 13.679406 4.337844 0.000106 65.099156 14.269219 0.000057 -11.122000 0.003679 28.969813 0.150789 -0.821630 0.025481 -1.593784 822.7430
3908 34710 2002-11-01 12.432687 11.684344 0.000123 63.145031 17.038563 0.000064 0.757312 0.001698 28.970219 0.388257 -0.410881 0.064084 -1.193250 584.0640
4145 10206 2002-11-09 3.428062 1.562844 0.003551 67.108219 11.155719 0.000984 -0.786375 1000.000000 NaN 0.135089 -0.869380 0.002306 -2.637141 545.1970
4147 11089 2002-11-09 19.677781 -6.951906 0.000126 64.193281 14.220969 0.000065 -17.539250 0.003868 28.742188 0.201879 -0.694909 0.051090 -1.291664 858.3870
4149 15707 2002-11-09 23.783812 -15.661781 0.000150 67.022625 12.926656 0.000075 -12.393500 1000.000000 NaN 0.153961 -0.812589 -0.004697 NaN 580.7750
4167 34710 2002-11-09 26.598219 25.294281 0.000126 63.000156 17.952812 0.000065 -2.723375 0.001878 28.255188 0.501054 -0.300115 0.112797 -0.947702 513.0270
4173 34721 2002-11-09 18.274406 12.886094 0.000124 68.182750 12.879281 0.000063 8.236687 0.001813 29.291313 0.150119 -0.823564 0.002713 -2.566549 457.1560
4426 34710 2002-11-17 51.033437 42.687531 0.000152 62.227781 20.239094 0.000075 -25.647250 0.001815 27.549469 0.480306 -0.318482 -0.020748 NaN 327.3360
4432 34721 2002-11-17 12.116437 11.203437 0.000104 68.325406 13.776062 0.000056 -1.096344 0.001754 29.060656 0.133791 -0.873573 -0.016328 NaN 481.8100
4681 34315 2002-11-25 34.884469 -22.903094 0.000104 57.697625 6.886781 0.000056 -20.598188 0.004397 28.923875 0.142745 -0.845439 0.004513 -2.345534 713.3930
4684 34709 2002-11-25 26.364529 -18.946647 0.000130 74.747500 11.582167 0.000066 -11.325471 1000.000000 NaN 0.236595 -0.625994 -0.070532 NaN 68.7629
4685 34710 2002-11-25 65.392656 4.843219 0.000098 62.329031 22.561781 0.000052 35.795281 0.001683 27.312969 0.482353 -0.316635 0.002047 -2.688882 258.0500
4691 34721 2002-11-25 11.410500 9.268219 0.000108 68.210844 14.380031 0.000058 -2.194313 0.001749 28.983563 0.142531 -0.846091 0.008740 -2.058489 528.2990
4944 34710 2002-12-03 46.047312 -8.093344 0.000150 63.903438 20.918094 0.000076 9.094031 0.001760 26.786125 1.431313 0.155735 0.948960 -0.022752 448.3200
5203 34710 2002-12-11 14.257563 10.818781 0.000123 64.458906 21.914281 0.000064 5.068844 0.001715 26.455813 0.592263 -0.227485 -0.839050 NaN 360.6660
5440 10206 2002-12-19 9.617437 4.556469 0.004192 64.896875 12.434812 0.001140 -8.368125 1000.000000 NaN 0.156649 -0.805072 0.019815 -1.703006 795.6110
5458 34315 2002-12-19 14.123563 5.683906 0.000094 52.956562 8.837594 0.000050 -7.807813 0.003568 27.552594 0.270939 -0.567128 -0.074232 NaN 241.1240
5462 34710 2002-12-19 10.502969 -6.181844 0.000107 64.695437 21.996188 0.000057 1.270844 0.001660 26.456375 0.484379 -0.314815 -0.107884 NaN 351.7960
5468 34721 2002-12-19 13.939531 -1.223813 0.000113 66.889000 15.015063 0.000060 -5.765406 0.001752 28.101500 0.157701 -0.802166 0.016133 -1.792285 685.7980
5699 10206 2002-12-27 12.251438 -1.765500 0.001212 64.271031 12.549094 0.000417 -11.493313 1000.000000 NaN 0.163432 -0.786663 0.006783 -2.168578 866.4100
5701 11089 2002-12-27 18.858156 13.433844 0.000123 58.528844 14.770719 0.000065 4.662594 0.003657 26.580312 0.227655 -0.642723 -0.080533 NaN 381.1380
5727 34721 2002-12-27 11.281406 9.685406 0.000108 66.840469 15.328750 0.000058 0.916906 0.001784 27.666062 0.179984 -0.744766 0.022283 -1.652026 703.3780
5958 10206 2003-01-04 12.856875 -5.715375 0.002190 63.550156 12.280437 0.000691 -11.053437 1000.000000 NaN 0.293834 -0.531898 0.130402 -0.884716 940.2960
5960 11089 2003-01-04 28.521781 -26.206813 0.000134 59.107094 14.353250 0.000070 5.199344 0.003628 26.104250 0.241114 -0.617778 0.013459 -1.870987 456.8570
5986 34721 2003-01-04 9.439625 6.296906 0.000130 66.806469 15.917500 0.000069 -0.686937 0.001784 27.099719 0.252779 -0.597259 0.072795 -1.137898 668.1950
6245 34721 2003-01-12 5.829688 0.587656 0.000143 66.872000 16.100937 0.000073 3.039469 0.001790 26.924969 0.784225 -0.105559 0.531446 -0.274541 647.3570
6480 15707 2003-01-20 29.502000 -2.061375 0.000165 53.534156 9.159719 0.000084 -23.901281 1000.000000 NaN 0.567945 -0.245694 0.138772 -0.857698 273.2370
6482 27139 2003-01-20 39.732875 -11.357875 0.000154 60.161406 21.526563 0.000077 -30.396656 0.003372 25.101438 0.889541 -0.050834 0.324802 -0.488381 70.0870
6504 34721 2003-01-20 11.056531 -6.067625 0.000134 67.260125 15.943687 0.000070 8.365000 0.001864 26.808875 0.450702 -0.346111 -0.333523 NaN 639.3160
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
160511 114945 2016-01-24 16.426625 -2.015563 0.000059 63.093344 11.397188 0.000130 -14.821188 0.001803 27.841750 0.261716 -0.582170 0.038290 -1.416910 940.9580
160544 147127 2016-01-24 16.039875 -13.398062 0.000025 63.918406 17.120500 0.000056 -6.248188 0.001685 25.938562 1.535089 0.186134 1.208838 0.082368 640.4780
160554 60150420 2016-01-24 29.255969 24.009875 0.000005 60.651625 6.411062 0.000003 -4.516375 0.001684 27.916875 0.151033 -0.820928 -0.028541 NaN 953.3690
160769 114917 2016-02-01 19.631719 0.287500 0.000047 71.655594 13.694594 0.000104 -0.602406 0.001819 28.963156 0.175277 -0.756275 0.038361 -1.416112 238.7190
160803 147127 2016-02-01 16.704312 -6.202406 0.000021 63.489719 16.346813 0.000046 -2.310031 0.001752 25.847969 0.637804 -0.195313 -0.897285 NaN 662.0290
160813 60150420 2016-02-01 20.689094 9.689937 0.000005 60.247719 7.097969 0.000003 2.720687 0.001684 27.543437 0.201691 -0.695314 0.050658 -1.295353 868.9360
161028 114917 2016-02-09 16.747969 -1.324813 0.000049 71.394656 13.744531 0.000110 -6.097062 0.001770 28.736531 0.196238 -0.707216 0.020962 -1.678577 251.8690
161062 147127 2016-02-09 16.373969 -2.064625 0.000020 63.043063 16.160937 0.000047 -14.384594 0.001683 25.865125 0.679547 -0.167780 0.041743 -1.379415 635.7060
161072 60150420 2016-02-09 28.188344 23.421656 0.000005 61.167812 8.367313 0.000003 14.079844 0.001684 27.543937 0.165788 -0.780447 -0.035903 NaN 859.9870
161287 114917 2016-02-17 17.292531 1.582844 0.000039 71.042531 13.715094 0.000090 -7.299969 0.001794 28.704438 0.185869 -0.730792 -0.010369 NaN 261.9900
161288 114945 2016-02-17 16.583125 3.474531 0.000074 60.167750 11.973937 0.000160 -15.263531 0.001968 27.163656 0.385036 -0.414498 0.039529 -1.403086 616.6080
161321 147127 2016-02-17 13.602531 0.513469 0.000027 62.402781 16.112031 0.000063 -3.445594 0.001675 25.694813 0.672385 -0.172382 -0.007162 NaN 582.4100
161331 60150420 2016-02-17 15.845250 -2.976969 0.000005 61.526156 8.957031 0.000003 2.012250 0.001684 27.387125 0.164424 -0.784036 -0.001364 NaN 862.7080
161542 114873 2016-02-25 46.406563 31.911688 0.000046 56.937562 7.920000 0.000103 -26.101250 0.001738 27.758813 0.161817 -0.790975 -0.002742 NaN 576.1700
161546 114917 2016-02-25 15.755031 1.721063 0.000041 70.649250 13.835156 0.000093 -4.553625 0.001898 28.897437 0.227879 -0.642297 0.042009 -1.376657 295.7130
161547 114945 2016-02-25 17.389031 0.625844 0.000080 59.187937 12.082219 0.000170 -16.443625 0.001964 27.346812 0.209156 -0.679530 -0.175880 NaN 506.9260
161580 147127 2016-02-25 4.892375 -0.772781 0.000018 62.385563 16.147094 0.000040 -0.198031 0.001698 26.104094 0.774010 -0.111254 0.101624 -0.993003 576.3870
161801 114873 2016-03-04 22.003844 -6.779937 0.000037 55.139125 8.443687 0.000084 -20.524625 0.001729 28.017688 0.151360 -0.819989 -0.010457 NaN 443.0950
161805 114917 2016-03-04 17.934188 0.119406 0.000045 70.025500 13.959344 0.000098 -13.646313 0.001863 29.127500 0.162528 -0.789072 -0.065351 NaN 349.1780
161806 114945 2016-03-04 11.021281 -1.083656 0.000207 58.335156 12.127812 0.000377 -10.133969 0.001882 27.922812 0.227034 -0.643909 0.017878 -1.747684 415.5460
161839 147127 2016-03-04 17.594531 -3.890563 0.000019 62.116156 15.841812 0.000044 -8.121344 0.001655 26.949656 0.376218 -0.424560 -0.397791 NaN 570.3880
162060 114873 2016-03-12 9.301438 -1.854719 0.000030 54.368750 8.112656 0.000066 -4.882906 0.001779 28.597687 0.122877 -0.910531 -0.028483 NaN 414.2320
162064 114917 2016-03-12 14.665219 -8.076125 0.000036 69.226937 13.619938 0.000083 -7.315469 0.001815 29.071594 0.158982 -0.798652 -0.003546 NaN 391.6030
162065 114945 2016-03-12 9.326250 -4.065969 0.000120 57.813969 11.889969 0.000253 -5.984000 0.002030 28.350969 0.169983 -0.769593 -0.057051 NaN 363.7230
162098 147127 2016-03-12 21.346375 -8.517750 0.000016 61.957500 15.564375 0.000038 -0.578969 0.002710 28.131781 0.342775 -0.464991 -0.033443 NaN 577.0800
162108 60150420 2016-03-12 27.715719 -5.131031 0.000005 61.016375 10.009031 0.000003 -22.813438 0.001795 28.580750 0.125524 -0.901275 0.002054 -2.687491 760.1980
162323 114917 2016-03-20 29.604250 -17.772313 0.000051 68.712938 13.048219 0.000114 -7.033125 0.001830 29.248094 0.144908 -0.838907 -0.014074 NaN 411.6270
162367 60150420 2016-03-20 36.445312 -7.017594 0.000005 59.155281 9.990875 0.000003 -33.476469 0.001734 28.605438 0.216656 -0.664229 0.091133 -1.040326 576.3650
162582 114917 2016-03-28 29.713500 12.361594 0.000046 69.281031 12.596750 0.000100 22.120531 0.001927 29.890781 0.144557 -0.839961 -0.000351 NaN 331.8890
162626 60150420 2016-03-28 32.750687 -9.834937 0.000005 58.262562 8.938125 0.000003 10.682125 0.001684 28.979375 0.195704 -0.708399 -0.020952 NaN 569.0470

683 rows × 16 columns


In [4]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[4]:
0.0

In [5]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[5]:
id time spd vn var_lon lon lat var_lat ve var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 11.188906 6.509875 0.000996 67.351188 10.873656 0.000352 -6.823625 1000.000000 NaN 0.132783 -0.876858 -0.017698 NaN 520.405 -0.016661
3888 11089 2002-11-01 13.679406 4.337844 0.000106 65.099156 14.269219 0.000057 -11.122000 0.003679 28.969813 0.150789 -0.821630 0.025481 -1.593784 822.743 0.021123
3908 34710 2002-11-01 12.432687 11.684344 0.000123 63.145031 17.038563 0.000064 0.757312 0.001698 28.970219 0.388257 -0.410881 0.064084 -1.193250 584.064 0.020632
4145 10206 2002-11-09 3.428062 1.562844 0.003551 67.108219 11.155719 0.000984 -0.786375 1000.000000 NaN 0.135089 -0.869380 0.002306 -2.637141 545.197 0.002134
4147 11089 2002-11-09 19.677781 -6.951906 0.000126 64.193281 14.220969 0.000065 -17.539250 0.003868 28.742188 0.201879 -0.694909 0.051090 -1.291664 858.387 0.031634

In [6]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[6]:
count    683.000000
mean      -0.032549
std        0.166130
min       -1.971716
25%       -0.033506
50%       -0.005183
75%        0.021785
max        0.120403
Name: chlor_a_logE_rate, dtype: float64

In [7]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x117748860>

In [8]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x117ab8438>

In [9]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[9]:
<matplotlib.text.Text at 0x118b32400>

In [22]:
(np.log(0.135089)-np.log(0.132783))   / freq


Out[22]:
0.0021522004602332889

In [11]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-09"
# (0.135089- 0.132783) / (freq*0.135089)    == 0.0021337784719703077
#########
# Val 2:
# id:10206, time:2002-11-09"
# (np.log(0.135089)-np.log(0.132783))   / freq == 0.0021522004602332889 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[11]:
id time spd vn var_lon lon lat var_lat ve var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 11.188906 6.509875 0.000996 67.351188 10.873656 0.000352 -6.823625 1000.0 NaN 0.132783 -0.876858 -0.017698 NaN 520.405 -0.016661
4145 10206 2002-11-09 3.428062 1.562844 0.003551 67.108219 11.155719 0.000984 -0.786375 1000.0 NaN 0.135089 -0.869380 0.002306 -2.637141 545.197 0.002134
5440 10206 2002-12-19 9.617437 4.556469 0.004192 64.896875 12.434812 0.001140 -8.368125 1000.0 NaN 0.156649 -0.805072 0.019815 -1.703006 795.611 0.015812
5699 10206 2002-12-27 12.251438 -1.765500 0.001212 64.271031 12.549094 0.000417 -11.493313 1000.0 NaN 0.163432 -0.786663 0.006783 -2.168578 866.410 0.005188
5958 10206 2003-01-04 12.856875 -5.715375 0.002190 63.550156 12.280437 0.000691 -11.053437 1000.0 NaN 0.293834 -0.531898 0.130402 -0.884716 940.296 0.055474

In [12]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[12]:
id time spd vn var_lon lon lat var_lat ve var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 11.188906 6.509875 0.000996 67.351188 10.873656 0.000352 -6.823625 1000.0 NaN 0.132783 -0.876858 -0.017698 NaN 520.405 -0.016661
4145 10206 2002-11-09 3.428062 1.562844 0.003551 67.108219 11.155719 0.000984 -0.786375 1000.0 NaN 0.135089 -0.869380 0.002306 -2.637141 545.197 0.002134
5440 10206 2002-12-19 9.617437 4.556469 0.004192 64.896875 12.434812 0.001140 -8.368125 1000.0 NaN 0.156649 -0.805072 0.019815 -1.703006 795.611 0.015812
5699 10206 2002-12-27 12.251438 -1.765500 0.001212 64.271031 12.549094 0.000417 -11.493313 1000.0 NaN 0.163432 -0.786663 0.006783 -2.168578 866.410 0.005188
5958 10206 2003-01-04 12.856875 -5.715375 0.002190 63.550156 12.280437 0.000691 -11.053437 1000.0 NaN 0.293834 -0.531898 0.130402 -0.884716 940.296 0.055474

In [13]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_8DOC_modisa_4.csv
Out[13]:
id time spd vn var_lon lon lat var_lat ve var_tmp temp chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
3886 10206 2002-11-01 11.188906 6.509875 0.000996 67.351188 10.873656 0.000352 -6.823625 1000.0 NaN 0.132783 -0.876858 -0.017698 NaN 520.405 -0.016661
4145 10206 2002-11-09 3.428062 1.562844 0.003551 67.108219 11.155719 0.000984 -0.786375 1000.0 NaN 0.135089 -0.869380 0.002306 -2.637141 545.197 0.002134
5440 10206 2002-12-19 9.617437 4.556469 0.004192 64.896875 12.434812 0.001140 -8.368125 1000.0 NaN 0.156649 -0.805072 0.019815 -1.703006 795.611 0.015812
5699 10206 2002-12-27 12.251438 -1.765500 0.001212 64.271031 12.549094 0.000417 -11.493313 1000.0 NaN 0.163432 -0.786663 0.006783 -2.168578 866.410 0.005188
5958 10206 2003-01-04 12.856875 -5.715375 0.002190 63.550156 12.280437 0.000691 -11.053437 1000.0 NaN 0.293834 -0.531898 0.130402 -0.884716 940.296 0.055474

In [14]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [15]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [16]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[16]:
<matplotlib.text.Text at 0x11a01df60>

In [17]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x118457e48>

In [18]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    108.000000
mean      -0.012186
std        0.115592
min       -0.372265
25%       -0.038421
50%       -0.000576
75%        0.041588
max        0.345365
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    109.000000
mean       0.028671
std        0.323727
min       -1.444104
25%       -0.044968
50%        0.005496
75%        0.062134
max        1.688675
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    145.000000
mean       0.130195
std        0.598494
min       -0.535257
25%       -0.044103
50%        0.006622
75%        0.132662
max        4.694123
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    158.000000
mean      -0.165618
std        4.219963
min      -28.683290
25%       -0.118069
50%       -0.005606
75%        0.063794
max       35.780121
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    163.000000
mean      -0.255519
std        3.027861
min      -17.483995
25%       -0.202936
50%       -0.025407
75%        0.001883
max       16.557178
Name: chl_rate, dtype: float64

In [19]:
print("test")


test

In [ ]:


In [ ]: