In [1]:
%matplotlib inline
import pandas as pd

In [2]:
# Load the Berkeley Earth Land & Ocean Dataset
url = 'http://berkeleyearth.lbl.gov/auto/Global/Land_and_Ocean_complete.txt'
# url = 'Land_and_Ocean_complete.txt'
data = pd.read_csv(url, sep='\s+', comment='%',
            usecols=(0, 1, 2),
            names=('year', 'month', 'anomaly'))

In [3]:
data


Out[3]:
year month anomaly
0 1850 1 -0.730
1 1850 2 -0.073
2 1850 3 -0.340
3 1850 4 -0.627
4 1850 5 -0.735
5 1850 6 -0.418
6 1850 7 -0.272
7 1850 8 -0.241
8 1850 9 -0.515
9 1850 10 -0.658
10 1850 11 -0.552
11 1850 12 -0.405
12 1851 1 -0.216
13 1851 2 -0.380
14 1851 3 -0.626
15 1851 4 -0.709
16 1851 5 -0.335
17 1851 6 -0.279
18 1851 7 -0.197
19 1851 8 -0.264
20 1851 9 -0.349
21 1851 10 -0.124
22 1851 11 -0.354
23 1851 12 -0.061
24 1852 1 -0.116
25 1852 2 -0.526
26 1852 3 -0.765
27 1852 4 -0.529
28 1852 5 -0.184
29 1852 6 -0.176
... ... ... ...
3984 2014 10 0.637
3985 2014 11 0.489
3986 2014 12 0.662
3987 2015 1 0.711
3988 2015 2 0.752
3989 2015 3 0.748
3990 2015 4 0.672
3991 2015 5 0.681
3992 2015 6 0.700
3993 2015 7 0.629
3994 2015 8 0.694
3995 2015 9 0.721
3996 2015 10 0.889
3997 2015 11 0.835
3998 2015 12 1.007
3999 2016 1 0.971
4000 2016 2 1.091
4001 2016 3 1.110
4002 2016 4 0.975
4003 2016 5 0.804
4004 2016 6 0.701
4005 2016 7 0.716
4006 2016 8 0.831
4007 2016 9 0.690
4008 2016 10 0.692
4009 2016 11 0.664
4010 2016 12 0.727
4011 2017 1 0.853
4012 2017 2 0.932
4013 2017 3 0.973

4014 rows × 3 columns


In [4]:
# Dataset has two different temperature estimations
data = data[data.year >= 1980].drop_duplicates(subset=('year', 'month'), keep='last')

In [5]:
data.index = data.apply(
    lambda x: pd.datetime.strptime(
        '{:.0f} {:.0f}'.format(x['year'], x['month']), '%Y %m'),
    axis=1)
data = data.drop(['year', 'month'], axis=1)

In [6]:
data.tail(5)


Out[6]:
anomaly
2016-11-01 0.664
2016-12-01 0.727
2017-01-01 0.853
2017-02-01 0.932
2017-03-01 0.973

In [7]:
data.anomaly.plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x1180a5da0>

In [8]:
yearly = data.rolling(window=12)

In [9]:
yearly.anomaly.mean().plot()


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f56ffd0>

In [10]:
(yearly.anomaly.max() - yearly.anomaly.min()).plot()


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x118e62a20>

In [11]:
yearly.anomaly.std().plot()


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x118e294a8>

In [12]:
lustrum = data.rolling(window=60)

In [13]:
lustrum.anomaly.mean().plot()


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x11c52cf60>