In [1]:
%matplotlib inline
import pandas as pd
In [2]:
# Load the Berkeley Earth Land & Ocean Dataset
url = 'http://berkeleyearth.lbl.gov/auto/Global/Land_and_Ocean_complete.txt'
# url = 'Land_and_Ocean_complete.txt'
data = pd.read_csv(url, sep='\s+', comment='%',
usecols=(0, 1, 2),
names=('year', 'month', 'anomaly'))
In [3]:
data
Out[3]:
In [4]:
# Dataset has two different temperature estimations
data = data[data.year >= 1980].drop_duplicates(subset=('year', 'month'), keep='last')
In [5]:
data.index = data.apply(
lambda x: pd.datetime.strptime(
'{:.0f} {:.0f}'.format(x['year'], x['month']), '%Y %m'),
axis=1)
data = data.drop(['year', 'month'], axis=1)
In [6]:
data.tail(5)
Out[6]:
In [7]:
data.anomaly.plot()
Out[7]:
In [8]:
yearly = data.rolling(window=12)
In [9]:
yearly.anomaly.mean().plot()
Out[9]:
In [10]:
(yearly.anomaly.max() - yearly.anomaly.min()).plot()
Out[10]:
In [11]:
yearly.anomaly.std().plot()
Out[11]:
In [12]:
lustrum = data.rolling(window=60)
In [13]:
lustrum.anomaly.mean().plot()
Out[13]: