In [1]:
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
from matplotlib import pyplot as plt
from dask.diagnostics import ProgressBar
import seaborn as sns
from matplotlib.colors import LogNorm
In [2]:
# resampling frequency in number of days
freq=15
In [3]:
# load preprocessed float data, and start the interpolation right here!!!!
var7 = "chlrateweekno"
var6 = "sst4"
var5 = "par"
var4 = "t865"
var3 = "kd490"
var2 = "cdm"
var1 = "chl"
vardist = "dist"
indir_prefix = "../../data_collector_globcolour/output.data.interpolate/2017GDPfloat/" + "df_Globcolor_"
indir = indir_prefix + var1 + vardist + var2 + var3 + var4 + var5 + var6 + var7 + "_" + str(freq) + "d.csv"
floatDF_tmp = pd.read_csv(indir,index_col=0)
print(floatDF_tmp)
### plot for id 125776, which will be fit by LDS
plt.figure(figsize=(8,6))
floatDF_tmp[floatDF_tmp.id == 135776].plot(x='time', y ='chl_rate', title=('id - %d' % 135776) )
plt.show();
plt.close("all")
In [ ]:
In [ ]:
In [4]:
# https://stackoverflow.com/questions/16780014/import-file-from-parent-directory
# https://stackoverflow.com/questions/16771894/python-nameerror-global-name-file-is-not-defined
import os, sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath("__file__")))))
from tools import chl_rates # del(chl_rates)
import importlib
importlib.reload(chl_rates)
Out[4]:
In [5]:
print("\n ******* Reduce the Dataset to 'Nov-01 to Mar-31' ******* \n" )
floatsDF_NovMar = chl_rates.reduce_to_NovMar(floatDF_tmp)
print("\n ******* weekly plots and spatial plots on various rates of chl-a and log-scale chl-a ******* \n")
chl_rates.spatial_plots_chl_rate_weekly(floatsDF_NovMar )
Out[5]:
In [ ]:
In [ ]:
In [6]:
floatsDF_NovMar['time'] = pd.to_datetime(floatsDF_NovMar['time']) # ,format='%m/%d/%y %I:%M%p'
floatsDF_NovMar = floatsDF_NovMar.set_index('time')
# add a new column to the dataset
floatsDF_NovMar['year'] = floatsDF_NovMar.index.year
floatsDF_NovMar[:20]
year_max = floatsDF_NovMar.index.year.max()
year_min = floatsDF_NovMar.index.year.min()
print('year_max', year_max)
print('year_min', year_min)
#print(df_timed)
In [7]:
### Part 1: Plot for the mean of the weekly trends
### Mean
start = year_min
for i in range(0,3):
#plt.subplot(311+i)
plt.figure(figsize=(8,6))
plt.title("mean of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
for current_year in range(start, start+5):
mask = floatsDF_NovMar.year == current_year
df_yearly = floatsDF_NovMar[mask]
#print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
#print(df_yearly.chl_rate_week.describe())
axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].mean().plot(linestyle="-", linewidth=0.8, label='%d' % current_year )
start = start + 5
axes1.set_ylim(-1.5, 1)
#axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration", fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
plt.yticks(np.arange(-1.5, 1, 0.25))
plt.xticks(np.arange(1, 25, 1))
axes1.legend(bbox_to_anchor=(1.15, 1.05))
#plt.savefig(str(freq)+"D_weekly_mean_" + str(i) +".pdf")
plt.show()
plt.close()
In [8]:
### Part 4: Plot for the 75% quantile of the weekly trends
### 85% quantile
start = year_min
for i in range(0,3):
#plt.subplot(311+i)
plt.figure(figsize=(8,6))
plt.title("85% of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
for current_year in range(start, start+5):
mask = floatsDF_NovMar.year == current_year
df_yearly = floatsDF_NovMar[mask]
#print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
#print(df_yearly.chl_rate_week.describe())
axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.85).plot(linestyle="-", linewidth=0.8, label='%d' % current_year )
start = start + 4
axes1.set_ylim(-1, 1)
#axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration", fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
plt.yticks(np.arange(-1, 1, 0.25))
plt.xticks(np.arange(1, 25, 1))
axes1.legend(bbox_to_anchor=(1.15, 1.05))
#plt.savefig(str(freq)+"D_weekly_quantile(75)_" + str(i) +".pdf")
plt.show()
plt.close()
In [9]:
### Part 2: Plot for the median of the weekly trends
### Median
start = year_min
for i in range(0,3):
#plt.subplot(311+i)
plt.figure(figsize=(8,6))
plt.title("median of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
for current_year in range(start, start+5):
mask = floatsDF_NovMar.year == current_year
df_yearly = floatsDF_NovMar[mask]
#print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
#print(df_yearly.chl_rate_week.describe())
axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.50).plot(linestyle="-", linewidth=0.8, label='%d' % current_year )
start = start + 4
axes1.set_ylim(-1, 1)
#axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration", fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
plt.yticks(np.arange(-1, 1, 0.25))
plt.xticks(np.arange(1, 25, 1))
axes1.legend(bbox_to_anchor=(1.15, 1.05))
#plt.savefig(str(freq)+"D_weekly_median_" + str(i) +".pdf")
plt.show()
plt.close()
In [10]:
### Part 3: Plot for the 25% quantile of the weekly trends
### 15% quantile
start = year_min
for i in range(0,3):
#plt.subplot(311+i)
plt.figure(figsize=(8,6))
plt.title("15% quantile of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
for current_year in range(start, start+5):
mask = floatsDF_NovMar.year == current_year
df_yearly = floatsDF_NovMar[mask]
#print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
#print(df_yearly.chl_rate_week.describe())
axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.15).plot(linestyle="-", linewidth=0.8, label='%d' % current_year )
start = start + 4
axes1.set_ylim(-2, 1)
#axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration", fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
plt.yticks(np.arange(-2, 1, 0.25))
plt.xticks(np.arange(1, 25, 1))
axes1.legend(bbox_to_anchor=(1.15, 1.05))
#plt.savefig(str(freq)+"D_weekly_quantile(25)_" + str(i) +".pdf")
plt.show()
plt.close()
In [11]:
#matplotlib.pyplot.close("all")
plt.close('all')
plt.cla() # axis
plt.clf() # figure
plt.show()
In [ ]:
In [ ]:
In [12]:
year_max = floatsDF_NovMar.index.year.max()
year_min = floatsDF_NovMar.index.year.min()
print('year_max', year_max)
print('year_min', year_min)
#print(df_timed)
In [13]:
# [print(i) for i in range(year_min,year_max+1)]
# for each year
for current_year in range(year_min, year_max+1):
mask = floatsDF_NovMar.year == current_year
df_yearly = floatsDF_NovMar[mask]
print('\n summary statistics of chl_rate_stand for the year %d \n' % (current_year) )
print(df_yearly.chl_rate_stand.describe())
plt.figure(figsize=(8,6))
axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].mean().plot(linestyle="-",color='b', linewidth=1)
df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.85).plot(linestyle="--",color='g', linewidth=0.35)
df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.15).plot(linestyle="--",color='g', linewidth=0.35)
axes1.set_ylim(-2, 2)
axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the standardized rate of change per day of the $Chl_a$ Concentration", fontsize=10)
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot day)$', fontsize=10)
plt.yticks(np.arange(-2, 2, 0.25))
plt.xticks(np.arange(1, 25, 1))
plt.show()
plt.close()
# http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
#http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
plt.figure(figsize=(8,6))
axes2 = df_yearly.boxplot(column='chl_rate_stand', by='week_rotate')
axes2.set_ylim(-2, 2)
#axes2.set_title("Box plot of the weekly data \n on the rate of change per week of the $Chl_a$ Concentration", fontsize=10)
plt.title("Year " + str(current_year) + ": Box plot of the weekly data \n on the standardized rate of change per day of the $Chl_a$ Concentration", fontsize=10)
plt.suptitle("") # remove auto-title
plt.xlabel('week', fontsize=10)
plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot day)$', fontsize=10)
plt.yticks(np.arange(-2, 2, 0.25))
plt.xticks(np.arange(1, 25, 1))
plt.show()
plt.close()
# the rate of change is slower on the regular scale
#matplotlib.pyplot.close("all")
plt.close('all')
plt.cla() # axis
plt.clf() # figure
plt.show()
In [ ]:
In [ ]: