In [1]:
# show all plots inside Jupyter
%matplotlib inline

In [2]:
import pandas as pd

In [3]:
# background on the dataset: https://en.wikipedia.org/wiki/North_Atlantic_oscillation
NAO = pd.read_table('data/NAO.txt')

In [4]:
type(NAO)


Out[4]:
pandas.core.frame.DataFrame

In [5]:
NAO.head()


Out[5]:
year NAO
0 1864 -1.02
1 1865 -1.24
2 1866 0.54
3 1867 -1.38
4 1868 2.81

In [6]:
NAO.describe()


Out[6]:
year NAO
count 146.000000 146.000000
mean 1936.500000 0.193356
std 42.290661 1.907480
min 1864.000000 -4.890000
25% 1900.250000 -1.027500
50% 1936.500000 0.245000
75% 1972.750000 1.580000
max 2009.000000 5.080000

In [7]:
# get the first 5 values from column NAO
NAO.NAO[:5]


Out[7]:
0   -1.02
1   -1.24
2    0.54
3   -1.38
4    2.81
Name: NAO, dtype: float64

In [8]:
# get min / max value from a column
print min(NAO.year), max(NAO.year)


1864 2009

In [9]:
# Coerce data to a time series

dates = pd.to_datetime(NAO.year, format="%Y")
dates.head()


Out[9]:
0   1864-01-01 00:00:00
1   1865-01-01 00:00:00
2   1866-01-01 00:00:00
3   1867-01-01 00:00:00
4   1868-01-01 00:00:00
Name: year, dtype: datetime64[ns]

In [10]:
# NAO.NAO.index = dates

In [15]:
# pd.Series(NAO.NAO, index=dates)

In [26]:
pd.Series(NAO.NAO.values, index=dates).plot()


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9058432650>

In [30]:
NAO.NAO.reindex(dates)


Out[30]:
year
1864-01-01 00:00:00   NaN
1865-01-01 00:00:00   NaN
1866-01-01 00:00:00   NaN
1867-01-01 00:00:00   NaN
1868-01-01 00:00:00   NaN
1869-01-01 00:00:00   NaN
1870-01-01 00:00:00   NaN
1871-01-01 00:00:00   NaN
1872-01-01 00:00:00   NaN
1873-01-01 00:00:00   NaN
1874-01-01 00:00:00   NaN
1875-01-01 00:00:00   NaN
1876-01-01 00:00:00   NaN
1877-01-01 00:00:00   NaN
1878-01-01 00:00:00   NaN
1879-01-01 00:00:00   NaN
1880-01-01 00:00:00   NaN
1881-01-01 00:00:00   NaN
1882-01-01 00:00:00   NaN
1883-01-01 00:00:00   NaN
1884-01-01 00:00:00   NaN
1885-01-01 00:00:00   NaN
1886-01-01 00:00:00   NaN
1887-01-01 00:00:00   NaN
1888-01-01 00:00:00   NaN
1889-01-01 00:00:00   NaN
1890-01-01 00:00:00   NaN
1891-01-01 00:00:00   NaN
1892-01-01 00:00:00   NaN
1893-01-01 00:00:00   NaN
                       ..
1980-01-01            NaN
1981-01-01            NaN
1982-01-01            NaN
1983-01-01            NaN
1984-01-01            NaN
1985-01-01            NaN
1986-01-01            NaN
1987-01-01            NaN
1988-01-01            NaN
1989-01-01            NaN
1990-01-01            NaN
1991-01-01            NaN
1992-01-01            NaN
1993-01-01            NaN
1994-01-01            NaN
1995-01-01            NaN
1996-01-01            NaN
1997-01-01            NaN
1998-01-01            NaN
1999-01-01            NaN
2000-01-01            NaN
2001-01-01            NaN
2002-01-01            NaN
2003-01-01            NaN
2004-01-01            NaN
2005-01-01            NaN
2006-01-01            NaN
2007-01-01            NaN
2008-01-01            NaN
2009-01-01            NaN
Name: NAO, dtype: float64

In [31]:
NAO.NAO


Out[31]:
0     -1.02
1     -1.24
2      0.54
3     -1.38
4      2.81
5      1.70
6     -3.01
7     -1.01
8     -0.76
9     -0.50
10     2.32
11    -1.35
12     0.21
13     0.05
14     1.46
15    -2.22
16     0.89
17    -3.80
18     3.87
19    -0.23
20     1.44
21    -0.89
22    -1.12
23     0.45
24    -2.75
25    -0.01
26     1.78
27    -0.82
28    -2.02
29    -1.07
       ... 
116    0.56
117    2.05
118    0.80
119    3.42
120    1.60
121   -0.63
122    0.50
123   -0.75
124    0.72
125    5.08
126    3.96
127    1.03
128    3.28
129    2.67
130    3.03
131    3.96
132   -3.78
133   -0.17
134    0.72
135    1.70
136    2.80
137   -1.90
138    0.76
139    0.20
140   -0.07
141    0.12
142   -1.09
143    2.80
144    2.11
145   -0.40
Name: NAO, dtype: float64

In [ ]: