notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd



In [2]:

    
# Load the Berkeley Earth Land & Ocean Dataset
url = 'http://berkeleyearth.lbl.gov/auto/Global/Land_and_Ocean_complete.txt'
# url = 'Land_and_Ocean_complete.txt'
data = pd.read_csv(url, sep='\s+', comment='%',
            usecols=(0, 1, 2),
            names=('year', 'month', 'anomaly'))



In [3]:

    
data









    Out[3]:






  
    
      
      year
      month
      anomaly
    
  
  
    
      0
      1850
      1
      -0.730
    
    
      1
      1850
      2
      -0.073
    
    
      2
      1850
      3
      -0.340
    
    
      3
      1850
      4
      -0.627
    
    
      4
      1850
      5
      -0.735
    
    
      5
      1850
      6
      -0.418
    
    
      6
      1850
      7
      -0.272
    
    
      7
      1850
      8
      -0.241
    
    
      8
      1850
      9
      -0.515
    
    
      9
      1850
      10
      -0.658
    
    
      10
      1850
      11
      -0.552
    
    
      11
      1850
      12
      -0.405
    
    
      12
      1851
      1
      -0.216
    
    
      13
      1851
      2
      -0.380
    
    
      14
      1851
      3
      -0.626
    
    
      15
      1851
      4
      -0.709
    
    
      16
      1851
      5
      -0.335
    
    
      17
      1851
      6
      -0.279
    
    
      18
      1851
      7
      -0.197
    
    
      19
      1851
      8
      -0.264
    
    
      20
      1851
      9
      -0.349
    
    
      21
      1851
      10
      -0.124
    
    
      22
      1851
      11
      -0.354
    
    
      23
      1851
      12
      -0.061
    
    
      24
      1852
      1
      -0.116
    
    
      25
      1852
      2
      -0.526
    
    
      26
      1852
      3
      -0.765
    
    
      27
      1852
      4
      -0.529
    
    
      28
      1852
      5
      -0.184
    
    
      29
      1852
      6
      -0.176
    
    
      ...
      ...
      ...
      ...
    
    
      3984
      2014
      10
      0.637
    
    
      3985
      2014
      11
      0.489
    
    
      3986
      2014
      12
      0.662
    
    
      3987
      2015
      1
      0.711
    
    
      3988
      2015
      2
      0.752
    
    
      3989
      2015
      3
      0.748
    
    
      3990
      2015
      4
      0.672
    
    
      3991
      2015
      5
      0.681
    
    
      3992
      2015
      6
      0.700
    
    
      3993
      2015
      7
      0.629
    
    
      3994
      2015
      8
      0.694
    
    
      3995
      2015
      9
      0.721
    
    
      3996
      2015
      10
      0.889
    
    
      3997
      2015
      11
      0.835
    
    
      3998
      2015
      12
      1.007
    
    
      3999
      2016
      1
      0.971
    
    
      4000
      2016
      2
      1.091
    
    
      4001
      2016
      3
      1.110
    
    
      4002
      2016
      4
      0.975
    
    
      4003
      2016
      5
      0.804
    
    
      4004
      2016
      6
      0.701
    
    
      4005
      2016
      7
      0.716
    
    
      4006
      2016
      8
      0.831
    
    
      4007
      2016
      9
      0.690
    
    
      4008
      2016
      10
      0.692
    
    
      4009
      2016
      11
      0.664
    
    
      4010
      2016
      12
      0.727
    
    
      4011
      2017
      1
      0.853
    
    
      4012
      2017
      2
      0.932
    
    
      4013
      2017
      3
      0.973
    
  

4014 rows × 3 columns



In [4]:

    
# Dataset has two different temperature estimations
data = data[data.year >= 1980].drop_duplicates(subset=('year', 'month'), keep='last')



In [5]:

    
data.index = data.apply(
    lambda x: pd.datetime.strptime(
        '{:.0f} {:.0f}'.format(x['year'], x['month']), '%Y %m'),
    axis=1)
data = data.drop(['year', 'month'], axis=1)



In [6]:

    
data.tail(5)



In [7]:

    
data.anomaly.plot()









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x1180a5da0>



In [8]:

    
yearly = data.rolling(window=12)



In [9]:

    
yearly.anomaly.mean().plot()









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x10f56ffd0>



In [10]:

    
(yearly.anomaly.max() - yearly.anomaly.min()).plot()









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x118e62a20>



In [11]:

    
yearly.anomaly.std().plot()









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x118e294a8>



In [12]:

    
lustrum = data.rolling(window=60)



In [13]:

    
lustrum.anomaly.mean().plot()









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x11c52cf60>

	year	month	anomaly
0	1850	1	-0.730
1	1850	2	-0.073
2	1850	3	-0.340
3	1850	4	-0.627
4	1850	5	-0.735
5	1850	6	-0.418
6	1850	7	-0.272
7	1850	8	-0.241
8	1850	9	-0.515
9	1850	10	-0.658
10	1850	11	-0.552
11	1850	12	-0.405
12	1851	1	-0.216
13	1851	2	-0.380
14	1851	3	-0.626
15	1851	4	-0.709
16	1851	5	-0.335
17	1851	6	-0.279
18	1851	7	-0.197
19	1851	8	-0.264
20	1851	9	-0.349
21	1851	10	-0.124
22	1851	11	-0.354
23	1851	12	-0.061
24	1852	1	-0.116
25	1852	2	-0.526
26	1852	3	-0.765
27	1852	4	-0.529
28	1852	5	-0.184
29	1852	6	-0.176
...	...	...	...
3984	2014	10	0.637
3985	2014	11	0.489
3986	2014	12	0.662
3987	2015	1	0.711
3988	2015	2	0.752
3989	2015	3	0.748
3990	2015	4	0.672
3991	2015	5	0.681
3992	2015	6	0.700
3993	2015	7	0.629
3994	2015	8	0.694
3995	2015	9	0.721
3996	2015	10	0.889
3997	2015	11	0.835
3998	2015	12	1.007
3999	2016	1	0.971
4000	2016	2	1.091
4001	2016	3	1.110
4002	2016	4	0.975
4003	2016	5	0.804
4004	2016	6	0.701
4005	2016	7	0.716
4006	2016	8	0.831
4007	2016	9	0.690
4008	2016	10	0.692
4009	2016	11	0.664
4010	2016	12	0.727
4011	2017	1	0.853
4012	2017	2	0.932
4013	2017	3	0.973

	anomaly
2016-11-01	0.664
2016-12-01	0.727
2017-01-01	0.853
2017-02-01	0.932
2017-03-01	0.973