In [1]:
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
from matplotlib import pyplot as plt
from dask.diagnostics import ProgressBar
import seaborn as sns
from matplotlib.colors import LogNorm
In [2]:
# resampling frequency in number of days
#freq=2
In [3]:
from tools.load_GlobColor_dataset import load_dataset
import importlib
importlib.reload(load_dataset)
Out[3]:
In [5]:
############### CHL1
ds_daily = load_dataset.load_chl1()
ds_daily.chlor_a.sel(time='2002-04-28').plot()
Out[5]:
In [6]:
freq_resample = str(8) + 'D'
ds_8day = ds_daily.resample(freq_resample, dim='time') # see the above for doc, test case, & default behavior
ds_8day
Out[6]:
In [7]:
# check data quality
both_datasets = [ds_8day, ds_daily]
print([(ds.nbytes / 1e6) for ds in both_datasets])
In [8]:
def fix_bad_data(ds):
# for some reason, the cloud / land mask is backwards on some data
# this is obvious because there are chlorophyl values less than zero
bad_data = ds.chlor_a.groupby('time').min() < 0
# loop through and fix
for n in np.nonzero(bad_data.values)[0]:
data = ds.chlor_a[n].values
ds.chlor_a.values[n] = np.ma.masked_less(data, 0).filled(np.nan)
In [9]:
[fix_bad_data(ds) for ds in both_datasets]
Out[9]:
In [10]:
# Count the number of ocean data points
(ds_8day.chlor_a>0).sum(dim='time').plot()
Out[10]:
In [11]:
# find a mask for the land
ocean_mask = (ds_8day.chlor_a>0).sum(dim='time')>0
num_ocean_points = ocean_mask.sum().values
ocean_mask.plot()
plt.title('%g total ocean points' % num_ocean_points)
Out[11]:
In [12]:
plt.figure(figsize=(8,6))
ds_daily.chlor_a.sel(time='2002-11-18',method='nearest').plot(norm=LogNorm())
Out[12]:
In [13]:
ds_daily.groupby('time').count() # information from original data
Out[13]:
In [14]:
ds_daily.chlor_a.groupby('time').count()/float(num_ocean_points)
Out[14]:
In [15]:
count_8day,count_daily = [ds.chlor_a.groupby('time').count()/float(num_ocean_points)
for ds in (ds_8day,ds_daily)]
plt.figure(figsize=(12,4))
count_8day.plot(color='k')
count_daily.plot(color='r')
plt.legend(['8 day','daily'])
Out[15]:
In [16]:
count_8day_clim, coundt_daily_clim = [count.groupby('time.month').mean() # monthly data
for count in (count_8day, count_daily)]
In [17]:
# mean value of the monthly data on the count of nonzeros
plt.figure(figsize=(12,4))
count_8day_clim.plot(color='k')
coundt_daily_clim.plot(color='r')
plt.legend(['8 day', 'daily'])
Out[17]:
From the above figure, we see that data coverage is highest in the winter (especially Feburary) and lowest in summer.
In [18]:
# Maps of individual days
target_date = '2003-02-15'
plt.figure(figsize=(8,6))
ds_8day.chlor_a.sel(time=target_date, method='nearest').plot(norm=LogNorm())
Out[18]:
In [19]:
plt.figure(figsize=(8,6))
ds_daily.chlor_a.sel(time=target_date, method='nearest').plot(norm=LogNorm())
Out[19]:
In [20]:
freq_resample = 'M' # resampling using month end
print("resampling frequency is ", freq_resample)
ds_resample = ds_daily.resample(freq_resample, dim='time') # see the above for doc, test case, & default behavior
ds_resample
Out[20]:
In [21]:
# http://xarray.pydata.org/en/stable/time-series.html
# key: xarray
'''
print(ds_resample['time.month'])
print(ds_resample['time.year'])
print(ds_resample['time.year']==2002)
'''
Out[21]:
In [22]:
### Approach 1
### gather the data, and plot
### norm=LogNorm() removed
### use robust (outlier not considered)
### hard to remove cmap
for year in range(2003, 2017):
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(10,6))
for month in range(1,4):
plt.subplot(130+month)
time_target = str(year) + "-0" + str(month)
im = ds_resample.chlor_a.sel(time=time_target, method='nearest').plot(robust=True, cmap="jet")
#fig.subplots_adjust(right=0.8)
#cbar_ax = fig.add_axes([0.85, 0.15, 0.025, 0.7])
#fig.colorbar(im, cax=cbar_ax, )
plt.show()
plt.close()
In [23]:
##### some tests ####
time_target = [np.datetime64(str(year) + '-02'), np.datetime64(str(year) + '-03'), np.datetime64(str(year) + '-04')]
print(time_target)
ds_resample.chlor_a.sel(time= list(time_target), method='nearest').plot(x='lon', y='lat', col='time', cmap="jet", robust=True, col_wrap=3)
Out[23]:
In [24]:
### Approach 2
### use xarray: http://xarray.pydata.org/en/stable/plotting.html
### gather the data, and plot
### norm=LogNorm() removed
### use robust (outlier not considered)
for year in range(2003, 2017):
time_target = [np.datetime64(str(year) + '-02'), np.datetime64(str(year) + '-03'), np.datetime64(str(year) + '-04')]
ds_resample.chlor_a.sel(time= list(time_target), method='nearest').plot(x='lon', y='lat', col='time', cmap="jet", robust=True, col_wrap=3)
In [25]:
### Approach 3
### use xarray: http://xarray.pydata.org/en/stable/plotting.html
### focus on a particular month, and plot
### norm=LogNorm() removed
### use robust (outlier not considered)
time_target2 = [np.datetime64(str(year) + '-02') for year in np.array([2003, 2004,2005,2006,2010,2016])]
#time_target3 = [np.datetime64(str(year) + '-03') for year in range(2003, 2017)]
#time_target4 = [np.datetime64(str(year) + '-04') for year in range(2003, 2017)]
ds_resample.chlor_a.sel(time= list(time_target2), method='nearest').plot(x='lon', y='lat', col='time', cmap="jet", robust=True, col_wrap=3)
Out[25]:
In [ ]: