In [2]:
%matplotlib inline
Here is an example of how to easily manipulate a toy weather dataset using xarray and other recommended Python libraries:
Shared setup:
In [3]:
import xarray as xr
import numpy as np
import pandas as pd
import seaborn as sns # pandas aware plotting library
np.random.seed(123)
times = pd.date_range('2000-01-01', '2001-12-31', name='time')
annual_cycle = np.sin(2 * np.pi * (times.dayofyear / 365.25 - 0.28))
base = 10 + 15 * annual_cycle.reshape(-1, 1)
tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)
ds = xr.Dataset({'tmin': (('time', 'location'), tmin_values),
'tmax': (('time', 'location'), tmax_values)},
{'time': times, 'location': ['IA', 'IN', 'IL']})
In [40]:
ds
Out[40]:
In [5]:
df = ds.to_dataframe()
In [6]:
df.head()
Out[6]:
In [7]:
df.describe()
Out[7]:
In [8]:
ds.mean(dim='location').to_dataframe().plot()
Out[8]:
In [9]:
sns.pairplot(df.reset_index(), vars=ds.data_vars)
Out[9]:
In [10]:
freeze = (ds['tmin'] <= 0).groupby('time.month').mean('time')
In [11]:
freeze
Out[11]:
In [12]:
freeze.to_pandas().plot()
Out[12]:
In [13]:
monthly_avg = ds.resample('1MS', dim='time', how='mean')
In [14]:
monthly_avg.sel(location='IA').to_dataframe().plot(style='s-')
Out[14]:
Note that MS
here refers to Month-Start; M
labels Month-End (the last day of the month).
In [15]:
climatology = ds.groupby('time.month').mean('time')
In [16]:
anomalies = ds.groupby('time.month') - climatology
In [17]:
anomalies.mean('location').to_dataframe()[['tmin', 'tmax']].plot()
Out[17]:
In [18]:
# throw away the first half of every month
some_missing = ds.tmin.sel(time=ds['time.day'] > 15).reindex_like(ds)
In [19]:
filled = some_missing.groupby('time.month').fillna(climatology.tmin)
In [20]:
both = xr.Dataset({'some_missing': some_missing, 'filled': filled})
In [21]:
both
Out[21]:
In [22]:
df = both.sel(time='2000').mean('location').reset_coords(drop=True).to_dataframe()
In [23]:
df[['filled', 'some_missing']].plot()
Out[23]:
In [ ]: