In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import xray
from netCDF4 import num2date
import matplotlib.pyplot as plt
In [3]:
print("numpy version : ", np.__version__)
print("pandas version : ", pd.__version__)
print("xray version : ", xray.version.version)
In [4]:
dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
'360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]}
In [7]:
def leap_year(year, calendar='standard'):
"""Determine if year is a leap year"""
leap = False
if ((calendar in ['standard', 'gregorian',
'proleptic_gregorian', 'julian']) and
(year % 4 == 0)):
leap = True
if ((calendar == 'proleptic_gregorian') and
(year % 100 == 0) and
(year % 400 != 0)):
leap = False
elif ((calendar in ['standard', 'gregorian']) and
(year % 100 == 0) and (year % 400 != 0) and
(year < 1583)):
leap = False
return leap
def get_dpm(time, calendar='standard'):
"""
return a array of days per month corresponding to the months provided in `months`
"""
month_length = np.zeros(len(time), dtype=np.int)
cal_days = dpm[calendar]
for i, (month, year) in enumerate(zip(time.month, time.year)):
month_length[i] = cal_days[month]
if leap_year(year, calendar=calendar):
month_length[i] += 1
return month_length
In [5]:
monthly_mean_file = 'RASM_example_data.nc'
ds = xray.open_dataset(monthly_mean_file, decode_coords=False)
print(ds)
In [8]:
month_length = xray.DataArray(get_dpm(ds.time.to_index(),
calendar='noleap'),
coords=[ds.time], name='month_length')
In [9]:
weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()
In [10]:
np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))
In [11]:
ds_weighted = (ds * weights).groupby('time.season').sum(dim='time')
In [12]:
ds_weighted
Out[12]:
In [24]:
ds.groupby('time.season').mean('time').plot()
In [ ]: