this is a file for calculating the Lagrangian rate of change of the log-scale $Chl_a$ using $\frac{D \ ( log_{e} Chl_a)}{Dt}:=\frac{1}{Chl_a}\frac{D \ ( Chl_a)}{Dt}$

  • A few remarks:
    • Unit $mg/(m^3 \cdot day)$
    • Natural logarithm added
    • All the rates on the same time frequency
    • validate the rate of change of the log-scale rate Chl-a by FD
    • Monthly trends
      • aaa
      • aaa

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import xarray as xr
from datetime import datetime
import datetime


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
# load the floats data 
# ********************
# *** CSV files ***
# ********************
# load the floats data, take the lon and lat as list out and calculate the distance

# load CSV output    
# some how the CSV Format has some compatibility issues here
# see readme file for the file convetion in the experiments,
# for instance "3" indicates distance is addd to the dataset 


plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()



# freqency
freq = 3
suf = 'D'
in_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_3.csv'
out_filename = 'df_chl_out_'+str(freq)+ suf +'_modisa_4.csv'
folder = './data_collector_modisa_chla9km/'
direc = folder + in_filename
direc

print('in_filename and path:', direc)
print('out_filename:', out_filename)

df_chl_out_3 = pd.read_csv(direc, index_col='index')
df_chl_out_3


<matplotlib.figure.Figure at 0x114c3acf8>
in_filename and path: ./data_collector_modisa_chla9km/df_chl_out_3D_modisa_3.csv
out_filename: df_chl_out_3D_modisa_4.csv
Out[2]:
id time lat var_tmp var_lon ve spd vn var_lat temp lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist
index
10620 10206 2002-11-04 10.885583 1000.000000 0.001747 -6.069667 11.224333 6.904000 0.000579 NaN 67.315250 0.145567 -0.836937 0.017202 -1.764421 524.45200
10648 34721 2002-11-04 12.628833 0.001778 0.000122 6.291000 13.099250 -0.428083 0.000064 29.590750 67.626250 0.129693 -0.887083 -0.024359 NaN 509.24500
10879 10206 2002-11-07 11.064250 1000.000000 0.000558 -5.759333 10.497583 6.697417 0.000221 NaN 67.174083 0.129001 -0.889407 -0.016566 NaN 536.67500
10881 11089 2002-11-07 14.365167 0.003795 0.000151 -15.957833 16.718083 1.865000 0.000075 28.995083 64.770000 0.192121 -0.716425 0.033696 -1.472422 859.72600
10883 15707 2002-11-07 13.640333 1000.000000 0.000132 -15.104667 29.831500 -24.346083 0.000068 NaN 67.346250 0.158005 -0.801329 -0.008466 NaN 573.15900
10897 34315 2002-11-07 5.700083 0.004677 0.000116 -46.239333 53.404667 15.849750 0.000059 29.688667 57.189833 0.118153 -0.927555 -0.021739 NaN 807.01100
10901 34710 2002-11-07 17.340000 0.001818 0.000190 -2.268917 12.218750 11.470250 0.000091 28.845500 63.155917 0.375238 -0.425694 -0.069839 NaN 562.59100
10907 34721 2002-11-07 12.584833 0.001786 0.000096 11.166083 15.920500 -2.891250 0.000052 29.468167 67.824667 0.148202 -0.829147 0.018509 -1.732629 487.07000
11138 10206 2002-11-10 11.149750 1000.000000 0.002349 1.226750 2.176333 0.621000 0.000731 NaN 67.124333 0.125101 -0.902739 -0.003900 NaN 540.82800
11140 11089 2002-11-10 14.308583 0.003952 0.000116 -15.961083 17.195667 -5.025667 0.000060 28.962667 64.408500 0.192742 -0.715024 0.000621 -3.206904 873.27900
11142 15707 2002-11-10 13.090750 1000.000000 0.000184 -4.313833 22.823750 -20.929250 0.000089 NaN 67.184417 0.170973 -0.767072 0.012968 -1.887127 568.69300
11399 11089 2002-11-13 14.165250 0.003688 0.000103 -17.318500 20.071333 -8.814500 0.000056 28.530667 63.994667 0.360733 -0.442814 0.167991 -0.774714 844.52600
11678 34710 2002-11-16 19.128833 0.001602 0.000148 -7.399000 47.715167 46.157000 0.000074 27.781083 62.932833 0.605980 -0.217542 -0.034225 NaN 443.75400
11937 34710 2002-11-19 20.054583 0.002103 0.000187 -32.605500 46.955167 33.501500 0.000088 27.591500 62.375333 0.507668 -0.294420 -0.098311 NaN 350.34500
12196 34710 2002-11-22 21.041917 0.001646 0.000100 -27.653083 57.430333 50.157000 0.000055 27.362167 61.621000 0.441319 -0.355247 -0.066349 NaN 226.03400
12455 34710 2002-11-25 22.065083 0.001567 0.000091 19.554250 52.860000 44.119417 0.000048 27.408250 61.330833 0.390903 -0.407932 -0.050416 NaN 158.17400
12692 10206 2002-11-28 11.322667 1000.000000 0.000845 -5.209000 5.937833 2.495417 0.000307 NaN 66.894750 0.132228 -0.878677 -0.001016 NaN 567.10800
12696 15707 2002-11-28 12.095000 1000.000000 0.000149 -13.231000 20.828083 -15.165333 0.000076 NaN 64.683500 0.153653 -0.813459 -0.006318 NaN 812.36000
12710 34315 2002-11-28 6.768083 0.005121 0.000087 -21.462667 33.214917 -24.851250 0.000048 29.046917 57.717583 0.153439 -0.814063 0.001311 -2.882561 725.04800
12714 34710 2002-11-28 23.095667 0.001770 0.000107 56.632667 69.391667 14.940250 0.000057 27.284500 62.519667 0.588584 -0.230192 0.197682 -0.704034 221.37900
12951 10206 2002-12-01 11.384667 1000.000000 0.003508 -11.984083 12.573333 3.490667 0.001023 NaN 66.684000 0.139830 -0.854400 0.007602 -2.119073 589.04500
12955 15707 2002-12-01 11.721250 1000.000000 0.000121 -12.597750 20.215917 -15.040583 0.000064 NaN 64.379583 0.161023 -0.793112 0.007370 -2.132532 843.66700
12969 34315 2002-12-01 6.307167 0.004235 0.000096 -48.228750 50.684917 -9.103583 0.000052 29.046500 56.946417 0.134822 -0.870239 -0.018618 NaN 735.60800
12973 34710 2002-12-01 22.207667 0.001675 0.000125 12.328667 80.473833 -73.489417 0.000065 27.191000 63.568333 0.788932 -0.102960 0.200348 -0.698215 321.77200
12979 34721 2002-12-01 14.650833 0.001708 0.000106 0.832083 11.506167 10.312250 0.000057 28.907583 68.149500 0.135054 -0.869493 -0.019095 NaN 553.53200
13212 11089 2002-12-04 14.188917 0.003586 0.000110 -24.566083 48.281167 39.895250 0.000059 27.466250 58.787417 0.298087 -0.525657 -0.108770 NaN 449.99100
13232 34710 2002-12-04 20.722250 0.001609 0.000164 17.086500 48.863417 -32.885000 0.000082 26.931500 63.566667 10.239810 1.010292 9.450878 0.975472 423.85600
13238 34721 2002-12-04 14.861000 0.001740 0.000087 -3.288833 10.396833 6.437833 0.000048 28.770833 68.165417 0.161038 -0.793072 0.025984 -1.585294 563.83300
13471 11089 2002-12-07 15.168333 0.004033 0.000087 11.451250 38.454750 34.846500 0.000048 27.085583 58.635333 0.440695 -0.355862 0.142608 -0.845856 352.90400
14009 34710 2002-12-13 21.956750 0.001636 0.000119 8.957000 14.217000 9.327500 0.000063 26.319667 64.438250 0.651648 -0.185987 0.162169 -0.790031 356.40600
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
427021 114917 2016-01-16 13.104167 0.001911 0.000035 8.435250 23.692167 19.455167 0.000080 29.429417 72.908667 0.133496 -0.874533 -0.017362 NaN 156.16900
427022 114945 2016-01-16 11.498917 0.001832 0.000047 -7.396750 8.324333 -1.833250 0.000108 28.283250 64.183500 0.155065 -0.809487 -0.012161 NaN 864.29700
427280 114917 2016-01-19 13.500250 0.001723 0.000036 -13.451833 18.388500 10.154000 0.000084 29.226667 72.785917 0.158714 -0.799384 0.025219 -1.598276 195.00100
427832 147127 2016-01-25 17.261083 0.001672 0.000018 0.013833 11.027917 -9.357917 0.000040 25.934833 63.975750 3.403951 0.531983 2.960363 0.471345 639.64100
428091 147127 2016-01-28 17.017833 0.001711 0.000022 -8.951750 18.937417 -16.302917 0.000052 25.958167 63.890250 0.620610 -0.207181 -2.783341 NaN 648.44400
428101 60150420 2016-01-28 6.736917 0.001684 0.000005 -7.343250 22.604583 18.776417 0.000003 27.948250 60.653250 0.146449 -0.834314 -0.003338 NaN 928.38400
428868 147127 2016-02-06 16.159333 0.001897 0.000024 8.256917 15.392250 -0.016000 0.000056 25.861500 63.466750 1.093246 0.038718 0.606639 -0.217070 673.87100
429127 147127 2016-02-09 16.211583 0.001616 0.000023 -16.721167 18.352250 -3.910667 0.000052 25.857833 63.342667 2.783544 0.444598 1.690298 0.227963 658.47000
429137 60150420 2016-02-09 7.906500 0.001684 0.000005 18.460167 32.677083 26.584500 0.000003 27.593583 60.857833 0.219196 -0.659167 0.025982 -1.585328 861.52300
429353 114945 2016-02-12 11.660667 0.001849 0.000139 -14.905250 18.051750 8.568083 0.000274 27.525833 61.152250 0.367707 -0.434498 0.041080 -1.386371 724.91000
429386 147127 2016-02-12 16.126583 0.001743 0.000018 -11.217167 14.319250 -0.956917 0.000043 25.960167 62.981667 0.588187 -0.230485 -2.195357 NaN 630.58300
429612 114945 2016-02-15 11.820000 0.001986 0.000044 -15.690333 17.241750 4.890750 0.000100 27.418750 60.733667 0.337130 -0.472203 -0.030577 NaN 679.29300
429645 147127 2016-02-15 16.121583 0.001720 0.000020 -10.264167 13.574167 -2.574083 0.000046 25.728333 62.649083 0.630473 -0.200334 0.042286 -1.373803 601.44700
430129 114917 2016-02-21 13.700667 0.001780 0.000052 -2.854917 13.991417 8.911750 0.000119 28.740000 71.024417 0.190552 -0.719987 0.015034 -1.822930 264.04100
430907 114945 2016-03-01 12.083750 0.001952 0.000050 -16.573917 17.664000 1.229500 0.000114 27.430417 58.856917 0.226657 -0.644630 0.003111 -2.507076 472.29000
430940 147127 2016-03-01 16.121000 0.001618 0.000017 -2.044167 4.499333 -1.960750 0.000040 26.234000 62.365000 0.849309 -0.070934 -0.114502 NaN 576.38700
431166 114945 2016-03-04 12.130417 0.001934 0.000204 -10.688083 11.353250 2.138583 0.000385 27.828583 58.514250 0.197348 -0.704766 -0.029309 NaN 432.85400
431199 147127 2016-03-04 16.042000 0.001640 0.000018 -0.635833 9.844083 -8.007500 0.000041 26.742083 62.343583 0.581469 -0.235474 -0.267840 NaN 577.86400
431424 114917 2016-03-07 13.934833 0.001758 0.000041 -12.728500 14.803667 2.159917 0.000095 29.030583 69.964917 0.138490 -0.858581 -0.122725 NaN 352.11300
431443 127429 2016-03-07 5.209167 0.001723 0.000028 -34.391333 35.608083 0.115000 0.000066 29.737917 73.568250 0.126091 -0.899317 0.014539 -1.837467 8.64527
431458 147127 2016-03-07 15.697250 0.001637 0.000019 -21.055833 27.906583 -10.514000 0.000044 26.729833 62.102667 0.391455 -0.407318 -0.190014 NaN 581.48900
431717 147127 2016-03-10 15.807083 0.001874 0.000019 5.180667 15.739250 11.880500 0.000042 27.598250 61.822917 0.393192 -0.405396 0.001737 -2.760320 549.95600
431727 60150420 2016-03-10 10.341500 0.001684 0.000005 -25.197000 25.479083 -1.877250 0.000003 28.377583 61.807833 0.127561 -0.894282 0.002002 -2.698500 830.62300
431938 114873 2016-03-13 8.114833 0.001796 0.000018 -7.164500 10.921250 -7.296917 0.000042 28.570167 54.387667 0.125060 -0.902881 -0.011190 NaN 414.23200
431943 114945 2016-03-13 11.927750 0.002166 0.000103 -5.810583 8.381667 -3.117917 0.000223 28.244167 57.861250 0.205448 -0.687297 -0.048991 NaN 367.14500
431976 147127 2016-03-13 15.789000 0.002364 0.000017 7.950917 24.919583 -19.391500 0.000039 27.935833 62.118333 0.232429 -0.633710 -0.160763 NaN 575.89700
432235 147127 2016-03-16 15.310000 0.003590 0.000017 -16.990417 22.820333 -8.108083 0.000041 28.341333 61.887167 0.312578 -0.505041 0.080149 -1.096101 594.98900
432494 147127 2016-03-19 15.304250 0.001631 0.000015 8.491250 11.072083 -2.818333 0.000035 28.579083 61.834583 0.336461 -0.473065 0.023883 -1.621906 588.84300
432719 114917 2016-03-22 13.169000 0.001759 0.000049 -20.684833 23.017750 -0.291917 0.000113 29.237833 68.815667 0.132253 -0.878594 0.024518 -1.610508 405.10400
433233 114873 2016-03-28 9.289583 0.001750 0.000055 0.297500 22.486333 17.305583 0.000122 28.824333 53.694500 0.123338 -0.908902 -0.003175 NaN 280.67600

739 rows × 16 columns


In [3]:
# All the rates on the same time frequency
check1 = df_chl_out_3.chl_rate/ df_chl_out_3.chlor_a 
check1 = check1/ freq
check2 = df_chl_out_3.chl_rate.divide(freq *df_chl_out_3.chlor_a, axis = 'index')
# an check 
np.sum(abs(check1 - check2))


Out[3]:
6.860844900015428e-15

In [4]:
# add the column to the dataframe and output the dataset
df_chl_out_3['chlor_a_logE_rate'] = pd.Series(np.array(check2), index=df_chl_out_3.index)
df_chl_out_3.head()


Out[4]:
id time lat var_tmp var_lon ve spd vn var_lat temp lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
10620 10206 2002-11-04 10.885583 1000.000000 0.001747 -6.069667 11.224333 6.904000 0.000579 NaN 67.315250 0.145567 -0.836937 0.017202 -1.764421 524.452 0.039391
10648 34721 2002-11-04 12.628833 0.001778 0.000122 6.291000 13.099250 -0.428083 0.000064 29.590750 67.626250 0.129693 -0.887083 -0.024359 NaN 509.245 -0.062607
10879 10206 2002-11-07 11.064250 1000.000000 0.000558 -5.759333 10.497583 6.697417 0.000221 NaN 67.174083 0.129001 -0.889407 -0.016566 NaN 536.675 -0.042806
10881 11089 2002-11-07 14.365167 0.003795 0.000151 -15.957833 16.718083 1.865000 0.000075 28.995083 64.770000 0.192121 -0.716425 0.033696 -1.472422 859.726 0.058463
10883 15707 2002-11-07 13.640333 1000.000000 0.000132 -15.104667 29.831500 -24.346083 0.000068 NaN 67.346250 0.158005 -0.801329 -0.008466 NaN 573.159 -0.017860

In [5]:
df_chl_out_3.chlor_a_logE_rate.describe()  # more scattered on the left hand side


Out[5]:
count    739.000000
mean      -0.121453
std        0.758143
min      -10.428087
25%       -0.077504
50%       -0.004309
75%        0.059677
max        0.321857
Name: chlor_a_logE_rate, dtype: float64

In [6]:
# visualize the ROC of log(chl_a) around the arabian sea region
fig, ax  = plt.subplots(figsize=(12,10))
df_chl_out_3.plot(kind='scatter', x='lon', y='lat', c='chlor_a_logE_rate', cmap='RdBu_r', vmin=check2.median()-0.5*check2.std(), vmax=check2.max(), edgecolor='none', ax=ax, title = 'rate of change of the log-scale chl-a')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x114c3a908>

In [7]:
# histogram for non standarized data
axdf_chl = df_chl_out_3.chlor_a_logE_rate.dropna().hist(bins=100,range=[-1.5,0.5]) # there are very a few small values on the left
axdf_chl.set_title('histogram of the rate of change of the log-scale chl-a')


Out[7]:
<matplotlib.text.Text at 0x117325048>

In [8]:
# standarized series
tmp = df_chl_out_3.chlor_a_logE_rate.dropna()
tmp = (tmp - tmp.mean())/tmp.std()
axdf_chl_stdan = tmp.hist(bins=100,range=[-1.5,0.5])   # there are very a few small values on the left
axdf_chl_stdan.set_title('histogram of the standardized rate of change of the log-scale chl-a')


Out[8]:
<matplotlib.text.Text at 0x1184d18d0>

In [12]:
(0.129001 - 0.145567) / (freq*0.129001)


Out[12]:
-0.042805869721940136

In [10]:
###########################
# On 2D-subsampling Dataset
###########################
# Val 1:
# id:10206, time:2002-11-07"
# (0.129001 - 0.145567) / (freq*0.129001)    == -0.042805869721940136
#########
# Val 2:
# id:10206, time:2002-11-07"
# (np.log(0.129001)-np.log(0.145567))   / freq == -0.040272101817569204 # very close to the value above

df_chl_out_3.sort_values(by=['id', 'time']).head()


Out[10]:
id time lat var_tmp var_lon ve spd vn var_lat temp lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
10620 10206 2002-11-04 10.885583 1000.0 0.001747 -6.069667 11.224333 6.904000 0.000579 NaN 67.315250 0.145567 -0.836937 0.017202 -1.764421 524.452 0.039391
10879 10206 2002-11-07 11.064250 1000.0 0.000558 -5.759333 10.497583 6.697417 0.000221 NaN 67.174083 0.129001 -0.889407 -0.016566 NaN 536.675 -0.042806
11138 10206 2002-11-10 11.149750 1000.0 0.002349 1.226750 2.176333 0.621000 0.000731 NaN 67.124333 0.125101 -0.902739 -0.003900 NaN 540.828 -0.010392
12692 10206 2002-11-28 11.322667 1000.0 0.000845 -5.209000 5.937833 2.495417 0.000307 NaN 66.894750 0.132228 -0.878677 -0.001016 NaN 567.108 -0.002561
12951 10206 2002-12-01 11.384667 1000.0 0.003508 -11.984083 12.573333 3.490667 0.001023 NaN 66.684000 0.139830 -0.854400 0.007602 -2.119073 589.045 0.018122

In [13]:
# convert into datetime
df_chl_out_3['time'] = pd.to_datetime(df_chl_out_3['time']) # ,format='%m/%d/%y %I:%M%p'
df_chl_out_3.sort_values(by=['id', 'time']).head()  # a check


Out[13]:
id time lat var_tmp var_lon ve spd vn var_lat temp lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
10620 10206 2002-11-04 10.885583 1000.0 0.001747 -6.069667 11.224333 6.904000 0.000579 NaN 67.315250 0.145567 -0.836937 0.017202 -1.764421 524.452 0.039391
10879 10206 2002-11-07 11.064250 1000.0 0.000558 -5.759333 10.497583 6.697417 0.000221 NaN 67.174083 0.129001 -0.889407 -0.016566 NaN 536.675 -0.042806
11138 10206 2002-11-10 11.149750 1000.0 0.002349 1.226750 2.176333 0.621000 0.000731 NaN 67.124333 0.125101 -0.902739 -0.003900 NaN 540.828 -0.010392
12692 10206 2002-11-28 11.322667 1000.0 0.000845 -5.209000 5.937833 2.495417 0.000307 NaN 66.894750 0.132228 -0.878677 -0.001016 NaN 567.108 -0.002561
12951 10206 2002-12-01 11.384667 1000.0 0.003508 -11.984083 12.573333 3.490667 0.001023 NaN 66.684000 0.139830 -0.854400 0.007602 -2.119073 589.045 0.018122

In [14]:
# CSV CSV CSV CSV with specfic index
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist}
# df_chl_out_3.csv --  {lat, lon, temp, chl_rate, dist, chlor_a_log10_rate}

# 3 represents 3 features: {temp, chl_rate, dist}
# 4 represents 4 features: {temp, chl_rate, dist, chlor_a_log10_rate }
print('out_filename:', out_filename)
df_chl_out_3.to_csv(out_filename, sep=',', index_label = 'index')

# load CSV output
test = pd.read_csv(out_filename, index_col='index')

# a check
test.sort_values(by=['id', 'time']).head()


out_filename: df_chl_out_3D_modisa_4.csv
Out[14]:
id time lat var_tmp var_lon ve spd vn var_lat temp lon chlor_a chlor_a_log10 chl_rate chl_rate_log10 dist chlor_a_logE_rate
index
10620 10206 2002-11-04 10.885583 1000.0 0.001747 -6.069667 11.224333 6.904000 0.000579 NaN 67.315250 0.145567 -0.836937 0.017202 -1.764421 524.452 0.039391
10879 10206 2002-11-07 11.064250 1000.0 0.000558 -5.759333 10.497583 6.697417 0.000221 NaN 67.174083 0.129001 -0.889407 -0.016566 NaN 536.675 -0.042806
11138 10206 2002-11-10 11.149750 1000.0 0.002349 1.226750 2.176333 0.621000 0.000731 NaN 67.124333 0.125101 -0.902739 -0.003900 NaN 540.828 -0.010392
12692 10206 2002-11-28 11.322667 1000.0 0.000845 -5.209000 5.937833 2.495417 0.000307 NaN 66.894750 0.132228 -0.878677 -0.001016 NaN 567.108 -0.002561
12951 10206 2002-12-01 11.384667 1000.0 0.003508 -11.984083 12.573333 3.490667 0.001023 NaN 66.684000 0.139830 -0.854400 0.007602 -2.119073 589.045 0.018122

In [15]:
## check the week numbers of the range from Nov-01-01 to Mar-01-01
for year in range(2002, 2017):
    print(str(year)+'-11-01 is week', datetime.datetime(year, 11, 1).isocalendar()[1])  # 44, 45, 

print('----')
for year in range(2002, 2017):
    print(str(year)+'-3-31 is week', datetime.datetime(year, 3, 31).isocalendar()[1])  # 13, 14

plt.show()


2002-11-01 is week 44
2003-11-01 is week 44
2004-11-01 is week 45
2005-11-01 is week 44
2006-11-01 is week 44
2007-11-01 is week 44
2008-11-01 is week 44
2009-11-01 is week 44
2010-11-01 is week 44
2011-11-01 is week 44
2012-11-01 is week 44
2013-11-01 is week 44
2014-11-01 is week 44
2015-11-01 is week 44
2016-11-01 is week 44
----
2002-3-31 is week 13
2003-3-31 is week 14
2004-3-31 is week 14
2005-3-31 is week 13
2006-3-31 is week 13
2007-3-31 is week 13
2008-3-31 is week 14
2009-3-31 is week 14
2010-3-31 is week 13
2011-3-31 is week 13
2012-3-31 is week 13
2013-3-31 is week 13
2014-3-31 is week 14
2015-3-31 is week 14
2016-3-31 is week 13

In [16]:
##### weekly plot on the Lagrangian rate of change of the chl-a
#sns.set(style="white")
#sns.set(color_codes=True)

### 
# Approach 1 depreciated
#grouped = df_timed.chl_rate.groupby(df_timed.index.week)
#grouped.plot.box()

### 
# Approach 2
# prepare data  a. use index or columns to group

###
# select the corresponding weeks, prepare the data
df_timed = df_chl_out_3.set_index('time')
df_timed['week'] = df_timed.index.week

mask_NovMar = (df_timed.week<=14) | (df_timed.week >=44)
df_timed_NovMar = df_timed[mask_NovMar]
#df_timed_NovMar.head()

# now rotate the index to make Nov-01-01 the first month
print('the min and max of the week index is %d, %d :' % (df_timed_NovMar.week.min(), df_timed_NovMar.week.max()) )
# make the 44th week the 1st week
df_timed_NovMar['week_rotate'] = (df_timed_NovMar.week + 10 ) % 53
df_timed_NovMar.week_rotate.describe() # now from 1 to 24


axes1=df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chl_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-3,2)
axes1.set_title("Line plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-3, 2, 0.5))
plt.xticks(np.arange(1, 25, 1))
plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chl_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1.6,1.6)
axes2.set_title("Box plot of the weekly data on the rate of change of the $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.show()

# the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")


the min and max of the week index is 1, 53 :

In [17]:
# weekly plot on the Lagrangian rate of change of the log-scale chl-a
# This is the rate of change on the exponential scale
axes1=df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].mean().plot(linestyle="-",color='b', linewidth=1)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.75).plot(linestyle="--",color='g', linewidth=0.35)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_timed_NovMar.groupby(['week_rotate'])['chlor_a_logE_rate'].quantile(.25).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-1,0.5)
axes1.set_title("Line plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
plt.yticks(np.arange(-1, 0.5, 0.25))
plt.xticks(np.arange(1, 25, 1))
#plt.show()


# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
axes2 = df_timed_NovMar.boxplot(column='chlor_a_logE_rate', by='week_rotate')
plt.suptitle("") # equivalent
axes2.set_ylim(-1,0.5)
axes2.set_title("Box plot of the weekly data on the rate of change of the log-scale $Chl_a$ Concentration",  fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the log-scale $Chl_a$ in $mg/(m^3 day)$', fontsize=10)
#plt.show()


#plt.close('all')


Out[17]:
<matplotlib.text.Text at 0x1199aeac8>

In [18]:
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()


<matplotlib.figure.Figure at 0x1033b2128>

In [19]:
# spatial plot for different months --  totally five months 1, 2, 3, 11, 12, 
for i in range(0,5,1):
    month_ind = np.array([11,12,1,2,3])
    month_names = ['November', 'December','January','February', 'March']
    aa = df_timed_NovMar[df_timed_NovMar.index.month == month_ind[i]]
    fig, ax  = plt.subplots(figsize=(8,6))
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.median()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.median()-0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    ##aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=aa.chl_rate.mean()-0.5*aa.chl_rate.std(), vmax=aa.chl_rate.mean()+0.5*aa.chl_rate.std(), edgecolor='none', ax=ax, title = 'rate of change of the $Chl_a$')
    print('\n\n summary of the Chl_rate \n', aa.chl_rate.describe())
    aa.plot(kind='scatter', x='lon', y='lat', c='chl_rate', cmap='RdBu_r', vmin=-0.6, vmax=0.6, edgecolor='none', ax=ax, title = 'Rate of change of the $Chl_a$ in %s' % (month_names[i]))
    plt.xticks(np.arange(45, 80, 2.5))
    plt.yticks(np.arange(0, 28, 2.5))
    plt.show()



 summary of the Chl_rate 
 count    101.000000
mean       0.000854
std        0.070760
min       -0.178500
25%       -0.029405
50%       -0.004043
75%        0.026300
max        0.257682
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    126.000000
mean       0.103577
std        0.870768
min       -1.212929
25%       -0.054818
50%        0.007898
75%        0.083648
max        9.450878
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    178.000000
mean      -0.092638
std        1.452385
min      -18.524991
25%       -0.061779
50%        0.005437
75%        0.090876
max        2.960363
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    178.000000
mean       0.353711
std        6.403601
min      -42.178406
25%       -0.101225
50%       -0.004184
75%        0.313860
max       38.302670
Name: chl_rate, dtype: float64

 summary of the Chl_rate 
 count    156.000000
mean      -0.474700
std        5.669407
min      -30.210748
25%       -0.219846
50%       -0.015016
75%        0.023531
max       40.143596
Name: chl_rate, dtype: float64

In [20]:
print("test")


test

In [ ]: