this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 4
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x114c3ac18>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_4D_modisa_3.csv
out_filename: df_chl_out_4D_modisa_4.csv
Out[2]:
id time ve vn lat spd var_tmp temp var_lon var_lat lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
7793 34710 2002-11-01 1.633063 12.896375 16.864937 13.935000 0.001790 28.994688 0.000128 0.000066 63.124500 0.385674 -0.413780 0.060035 -1.221596 595.3000
8030 10206 2002-11-05 -7.127375 6.176937 10.969438 11.645312 1000.000000 NaN 0.001244 0.000420 67.246562 0.142620 -0.845818 0.014256 -1.846018 528.3030
8034 15707 2002-11-05 -19.271875 -17.786375 13.879687 26.887063 1000.000000 NaN 0.000134 0.000069 67.560500 0.154235 -0.811817 -0.025134 NaN 558.6690
8052 34710 2002-11-05 -0.118437 10.472312 17.212188 10.930375 0.001605 28.945750 0.000118 0.000062 63.165562 0.407654 -0.389708 0.021980 -1.657972 573.8850
8058 34721 2002-11-05 6.933938 -2.230437 12.594938 14.224375 0.001764 29.537625 0.000098 0.000054 67.715438 0.154256 -0.811758 0.015577 -1.807530 499.8170
8289 10206 2002-11-09 0.022625 2.215375 11.142188 3.775187 1000.000000 NaN 0.001970 0.000627 67.122750 0.130267 -0.885166 -0.012353 NaN 540.8280
8291 11089 2002-11-09 -16.238438 -3.502250 14.321875 17.239313 0.004114 28.998500 0.000147 0.000073 64.466188 0.196834 -0.705899 0.026991 -1.568773 874.0360
8293 15707 2002-11-09 -4.991500 -23.108937 13.178562 24.741937 1000.000000 NaN 0.000178 0.000086 67.198438 0.167595 -0.775739 0.013360 -1.874194 571.2510
8317 34721 2002-11-09 11.853125 7.093188 12.646125 16.993125 0.001886 29.446188 0.000155 0.000075 68.066313 0.138921 -0.857231 -0.015335 NaN 463.7140
8550 11089 2002-11-13 -18.840062 -10.401563 14.120062 22.116250 0.003621 28.485875 0.000104 0.000056 63.920375 0.360733 -0.442814 0.163899 -0.785425 844.0840
8576 34721 2002-11-13 4.620250 18.679000 13.112438 19.555688 0.001739 29.136438 0.000094 0.000052 68.299188 0.149230 -0.826144 0.010309 -1.986797 453.5490
8825 34315 2002-11-17 34.274563 -5.053875 8.266500 37.265563 0.003482 28.918438 0.000090 0.000049 56.923063 0.130157 -0.885534 -0.015109 NaN 541.1860
8829 34710 2002-11-17 -22.952562 38.936375 19.620125 47.347313 0.001878 27.702813 0.000180 0.000086 62.715063 0.453457 -0.343464 -0.088070 NaN 402.9730
9088 34710 2002-11-21 -28.341938 46.438688 20.858063 54.719563 0.001752 27.396125 0.000124 0.000064 61.740500 0.508751 -0.293495 0.055294 -1.257322 247.2940
9346 34709 2002-11-25 -10.652937 -20.010063 11.607062 26.626625 1000.000000 NaN 0.000128 0.000065 74.781250 0.176726 -0.752700 -0.106943 NaN 62.5549
9347 34710 2002-11-25 30.333125 48.024625 22.275063 61.307938 0.001650 27.386000 0.000095 0.000050 61.509000 0.500707 -0.300416 -0.008044 NaN 172.1370
9584 10206 2002-11-29 -9.458500 2.887438 11.352438 10.056125 1000.000000 NaN 0.001644 0.000518 66.800250 0.141035 -0.850673 0.007791 -2.108406 575.8390
9588 15707 2002-11-29 -13.088625 -16.449750 11.903937 21.767312 1000.000000 NaN 0.000138 0.000071 64.532375 0.159588 -0.797000 0.018989 -1.721498 827.5410
9602 34315 2002-11-29 -33.201625 -18.194813 6.500062 39.915937 0.004788 29.121750 0.000095 0.000052 57.382438 0.138117 -0.859753 -0.012091 NaN 736.1100
9605 34709 2002-11-29 -22.086000 -1.932000 11.383000 22.171000 1000.000000 NaN 0.000145 0.000073 74.477500 0.222216 -0.653225 0.045490 -1.342084 103.7390
9606 34710 2002-11-29 41.257438 -38.338187 22.848500 69.477375 0.001716 27.239937 0.000102 0.000055 63.149062 1.618081 0.209000 1.117373 0.048198 259.2780
9865 34710 2002-12-03 7.611000 -45.172500 20.944437 57.906375 0.001600 26.985500 0.000169 0.000084 63.581063 2.082722 0.318631 0.464641 -0.332882 415.5490
9871 34721 2002-12-03 -1.896313 8.086375 14.825875 11.245500 0.001734 28.794750 0.000092 0.000051 68.163813 0.140905 -0.851074 0.001204 -2.919377 561.0450
10363 11089 2002-12-11 31.379250 -16.874688 15.459500 39.350187 0.003519 27.154250 0.000109 0.000058 59.619437 0.290047 -0.537532 -0.166750 NaN 415.4240
10642 34710 2002-12-15 4.879813 3.172938 22.050313 7.998937 0.001790 26.417125 0.000150 0.000075 64.564375 0.483880 -0.315262 -0.118284 NaN 342.7330
10901 34710 2002-12-19 1.952562 0.353062 22.081437 6.712688 0.001651 26.609875 0.000097 0.000053 64.647125 0.513772 -0.289230 0.029892 -1.524450 338.3960
10907 34721 2002-12-19 -10.201625 -4.116187 15.029062 18.585187 0.001776 28.196688 0.000101 0.000055 66.966938 0.147002 -0.832677 -0.060027 NaN 678.5840
11166 34721 2002-12-23 -1.329187 1.668563 15.001062 9.293875 0.001727 28.006312 0.000125 0.000065 66.811063 0.171676 -0.765290 0.024674 -1.607760 693.0460
11397 10206 2002-12-27 -10.298000 0.181000 12.586125 11.005313 1000.000000 NaN 0.001646 0.000541 64.445063 0.155624 -0.807923 -0.002679 NaN 845.7000
11399 11089 2002-12-27 -1.291313 20.622937 14.526750 21.825688 0.003616 26.773250 0.000136 0.000070 58.429000 0.251913 -0.598749 -0.064323 NaN 395.1920
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
320831 114917 2016-01-24 -16.917063 6.929375 13.889750 27.633563 0.001736 29.074875 0.000040 0.000092 72.346625 0.163614 -0.786180 0.027315 -1.563591 212.7340
320865 147127 2016-01-24 -2.512063 -9.555813 17.303313 11.850188 0.001671 25.943688 0.000024 0.000053 63.990250 3.403951 0.531983 3.039213 0.482761 637.1120
321124 147127 2016-01-28 -9.984312 -17.240312 16.937687 20.229563 0.001699 25.933438 0.000026 0.000059 63.846563 0.571200 -0.243212 -2.832751 NaN 650.4940
321383 147127 2016-02-01 -10.291000 -8.877500 16.500563 18.046250 0.001684 25.856437 0.000017 0.000038 63.552438 0.695658 -0.157604 0.124458 -0.904976 653.6040
321642 147127 2016-02-05 5.670938 -3.527313 16.193062 15.362375 0.001820 25.839500 0.000024 0.000055 63.427000 1.188966 0.075169 0.493307 -0.306882 667.7210
321901 147127 2016-02-09 -16.410437 -3.961000 16.194062 17.843438 0.001682 25.886813 0.000020 0.000046 63.267625 0.610285 -0.214467 -0.578681 NaN 654.2810
321911 60150420 2016-02-09 17.933813 25.384937 8.004250 31.330125 0.001684 27.591375 0.000005 0.000003 60.922312 0.207339 -0.683320 0.011986 -1.921312 860.7080
322127 114945 2016-02-13 -17.743125 7.489750 11.741812 20.036063 0.001962 27.462312 0.000123 0.000249 60.944500 0.336234 -0.473359 0.050242 -1.298937 702.0730
322160 147127 2016-02-13 -12.358750 -0.168250 16.127813 14.904500 0.001684 25.843438 0.000021 0.000048 62.818500 0.587664 -0.230871 -0.022621 NaN 615.9530
322170 60150420 2016-02-13 10.225875 21.458375 8.730375 25.046563 0.001684 27.496500 0.000005 0.000003 61.413313 0.187874 -0.726134 -0.019465 NaN 861.8530
322419 147127 2016-02-17 -8.317250 2.657062 16.115000 13.661250 0.001649 25.676125 0.000026 0.000060 62.544438 0.643008 -0.191784 0.055344 -1.256931 593.0820
322644 114917 2016-02-21 -8.015687 5.072125 13.739500 16.956188 0.001825 28.792938 0.000045 0.000103 70.976375 0.170473 -0.768344 -0.005045 NaN 270.0060
322678 147127 2016-02-21 1.426063 -1.630125 16.109063 13.543812 0.001701 25.713500 0.000028 0.000066 62.261125 0.738294 -0.131771 0.095286 -1.020969 568.3130
322937 147127 2016-02-25 1.255375 1.096688 16.162500 5.167000 0.001782 25.973813 0.000016 0.000037 62.395938 0.500435 -0.300653 -0.237859 NaN 573.9260
322947 60150420 2016-02-25 26.987875 6.735063 8.777312 37.642125 0.001684 27.858687 0.000005 0.000003 62.151625 0.143249 -0.843909 -0.021175 NaN 929.3830
323196 147127 2016-02-29 -1.651437 -2.642250 16.131688 4.617750 0.001614 26.234375 0.000019 0.000043 62.375188 0.959535 -0.017939 0.459101 -0.338092 576.3870
323422 114945 2016-03-04 -9.769750 1.725000 12.135625 10.421313 0.001897 27.850438 0.000163 0.000311 58.478313 0.197325 -0.704817 0.000749 -3.125359 428.5260
323455 147127 2016-03-04 -5.047750 -11.657063 15.983312 14.846188 0.001621 26.705813 0.000017 0.000039 62.324750 0.529893 -0.275812 -0.429642 NaN 582.9760
323676 114873 2016-03-08 -16.128750 -4.735750 8.340313 17.440125 0.001719 28.143500 0.000031 0.000073 54.846500 0.143506 -0.843130 -0.013996 NaN 443.2800
323699 127429 2016-03-08 -38.342250 2.449500 5.201111 39.909500 0.001826 29.785111 0.000025 0.000057 73.400333 0.130367 -0.884832 0.014651 -1.834139 13.3174
323714 147127 2016-03-08 -11.194938 3.875938 15.700313 20.342875 0.001689 27.193500 0.000022 0.000049 61.907562 0.362270 -0.440967 -0.167623 NaN 565.1070
323935 114873 2016-03-12 -9.312125 -7.013938 8.148563 12.430563 0.001782 28.518937 0.000020 0.000046 54.422312 0.128119 -0.892387 -0.015387 NaN 415.0910
323973 147127 2016-03-12 9.822625 -11.743250 15.817875 23.618063 0.002326 27.855188 0.000016 0.000037 62.058312 0.241103 -0.617797 -0.121167 NaN 569.7840
324232 147127 2016-03-16 -10.980562 -5.292250 15.310875 19.074687 0.003095 28.408375 0.000017 0.000040 61.856687 0.312578 -0.505041 0.071475 -1.145846 591.9080
324491 147127 2016-03-20 7.115187 -11.506687 15.208000 16.031250 0.001703 28.577188 0.000018 0.000042 61.929812 0.346970 -0.459708 0.034392 -1.463542 604.2410
324501 60150420 2016-03-20 -33.806875 2.054750 10.116375 35.299813 0.001785 28.558250 0.000005 0.000003 59.689688 0.658022 -0.181760 0.510525 -0.291983 624.3980
324712 114873 2016-03-24 -9.021375 22.263375 8.777375 27.266875 0.001719 28.638125 0.000017 0.000040 53.766125 0.129282 -0.888461 0.011951 -1.922596 316.6820
324750 147127 2016-03-24 -1.442438 -17.915188 14.664750 18.760750 0.001714 28.377188 0.000027 0.000064 61.993750 0.288169 -0.540352 -0.058801 NaN 651.9630
324760 60150420 2016-03-24 -33.146062 -16.089938 9.865375 37.590812 0.001684 28.652625 0.000005 0.000003 58.620875 0.223613 -0.650503 -0.434409 NaN 534.9490
324971 114873 2016-03-28 1.727937 15.405188 9.364250 19.915375 0.001734 28.868125 0.000045 0.000100 53.704937 0.120811 -0.917895 -0.008471 NaN 276.7750

774 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
0.0

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time ve vn lat spd var_tmp temp var_lon var_lat lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
7793 34710 2002-11-01 1.633063 12.896375 16.864937 13.935000 0.001790 28.994688 0.000128 0.000066 63.124500 0.385674 -0.413780 0.060035 -1.221596 595.300 0.038916
8030 10206 2002-11-05 -7.127375 6.176937 10.969438 11.645312 1000.000000 NaN 0.001244 0.000420 67.246562 0.142620 -0.845818 0.014256 -1.846018 528.303 0.024989
8034 15707 2002-11-05 -19.271875 -17.786375 13.879687 26.887063 1000.000000 NaN 0.000134 0.000069 67.560500 0.154235 -0.811817 -0.025134 NaN 558.669 -0.040740
8052 34710 2002-11-05 -0.118437 10.472312 17.212188 10.930375 0.001605 28.945750 0.000118 0.000062 63.165562 0.407654 -0.389708 0.021980 -1.657972 573.885 0.013480
8058 34721 2002-11-05 6.933938 -2.230437 12.594938 14.224375 0.001764 29.537625 0.000098 0.000054 67.715438 0.154256 -0.811758 0.015577 -1.807530 499.817 0.025245

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    774.000000
mean      -0.104321
std        0.637559
min       -8.727518
25%       -0.068108
50%       -0.000827
75%        0.044028
max        0.245494
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x114c3a668>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x11729d278>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x1183409b0>

In [21]:
(np.log(0.130267)-np.log(0.142620))   / freq


Out[21]:
-0.022649390062320029

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:11089, time:2002-11-09"
# (0.130267 - 0.142620) / (freq*(0.130267) )   == -0.023707078538693614
#########
# Val 2:
# id:11089, time:2002-11-07"
# (np.log(0.130267)-np.log(0.142620))   / freq == -0.022649390062320029 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time ve vn lat spd var_tmp temp var_lon var_lat lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
8030 10206 2002-11-05 -7.127375 6.176937 10.969438 11.645312 1000.0 NaN 0.001244 0.000420 67.246562 0.142620 -0.845818 0.014256 -1.846018 528.303 0.024989
8289 10206 2002-11-09 0.022625 2.215375 11.142188 3.775187 1000.0 NaN 0.001970 0.000627 67.122750 0.130267 -0.885166 -0.012353 NaN 540.828 -0.023708
9584 10206 2002-11-29 -9.458500 2.887438 11.352438 10.056125 1000.0 NaN 0.001644 0.000518 66.800250 0.141035 -0.850673 0.007791 -2.108406 575.839 0.013810
11397 10206 2002-12-27 -10.298000 0.181000 12.586125 11.005313 1000.0 NaN 0.001646 0.000541 64.445063 0.155624 -0.807923 -0.002679 NaN 845.700 -0.004304
11656 10206 2002-12-31 -12.688625 -3.712000 12.512063 13.497563 1000.0 NaN 0.000777 0.000294 64.097000 0.175074 -0.756778 0.019450 -1.711073 882.902 0.027774

In [11]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[11]:
id time ve vn lat spd var_tmp temp var_lon var_lat lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
8030 10206 2002-11-05 -7.127375 6.176937 10.969438 11.645312 1000.0 NaN 0.001244 0.000420 67.246562 0.142620 -0.845818 0.014256 -1.846018 528.303 0.024989
8289 10206 2002-11-09 0.022625 2.215375 11.142188 3.775187 1000.0 NaN 0.001970 0.000627 67.122750 0.130267 -0.885166 -0.012353 NaN 540.828 -0.023708
9584 10206 2002-11-29 -9.458500 2.887438 11.352438 10.056125 1000.0 NaN 0.001644 0.000518 66.800250 0.141035 -0.850673 0.007791 -2.108406 575.839 0.013810
11397 10206 2002-12-27 -10.298000 0.181000 12.586125 11.005313 1000.0 NaN 0.001646 0.000541 64.445063 0.155624 -0.807923 -0.002679 NaN 845.700 -0.004304
11656 10206 2002-12-31 -12.688625 -3.712000 12.512063 13.497563 1000.0 NaN 0.000777 0.000294 64.097000 0.175074 -0.756778 0.019450 -1.711073 882.902 0.027774

In [12]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_4D_modisa_4.csv
Out[12]:
id time ve vn lat spd var_tmp temp var_lon var_lat lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
8030 10206 2002-11-05 -7.127375 6.176937 10.969438 11.645312 1000.0 NaN 0.001244 0.000420 67.246562 0.142620 -0.845818 0.014256 -1.846018 528.303 0.024989
8289 10206 2002-11-09 0.022625 2.215375 11.142188 3.775187 1000.0 NaN 0.001970 0.000627 67.122750 0.130267 -0.885166 -0.012353 NaN 540.828 -0.023708
9584 10206 2002-11-29 -9.458500 2.887438 11.352438 10.056125 1000.0 NaN 0.001644 0.000518 66.800250 0.141035 -0.850673 0.007791 -2.108406 575.839 0.013810
11397 10206 2002-12-27 -10.298000 0.181000 12.586125 11.005313 1000.0 NaN 0.001646 0.000541 64.445063 0.155624 -0.807923 -0.002679 NaN 845.700 -0.004304
11656 10206 2002-12-31 -12.688625 -3.712000 12.512063 13.497563 1000.0 NaN 0.000777 0.000294 64.097000 0.175074 -0.756778 0.019450 -1.711073 882.902 0.027774

In [13]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [14]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [15]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[15]:
<matplotlib.text.Text at 0x119e04278>

In [16]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x11aa0bb70>

In [17]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    102.000000
mean       0.020365
std        0.163387
min       -0.335095
25%       -0.023952
50%        0.010107
75%        0.036200
max        1.117373
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    121.000000
mean       0.034917
std        0.189265
min       -0.316076
25%       -0.041403
50%        0.004956
75%        0.068338
max        1.424472
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    171.000000
mean       0.194033
std        3.028976
min      -13.251391
25%       -0.108855
50%        0.002778
75%        0.099613
max       29.425299
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    190.000000
mean      -0.586618
std        7.516451
min      -59.546418
25%       -0.099804
50%        0.003100
75%        0.158382
max       34.378983
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    190.000000
mean      -0.118652
std        5.788752
min      -37.328540
25%       -0.124904
50%       -0.015206
75%        0.015472
max       40.412247
Name: chl_rate, dtype: float64

In [18]:
print("test")


test

In [ ]: