In [1]:
import numpy as np
import pandas as pd
In [2]:
s = pd.Series([1, 2, 3, 4, 5])
s
Out[2]:
0 1
1 2
2 3
3 4
4 5
dtype: int64
In [5]:
s.index
Out[5]:
Int64Index([0, 1, 2, 3, 4], dtype=int64)
In [6]:
s.values
Out[6]:
array([1, 2, 3, 4, 5])
In [7]:
s
Out[7]:
0 1
1 2
2 3
3 4
4 5
dtype: int64
In [10]:
s2 = pd.Series([1, 2, 3, 4, 5],
index=['one', 'two', 'three', 'four', 'five'])
s2
Out[10]:
one 1
two 2
three 3
four 4
five 5
dtype: int64
In [12]:
s2.index[0]
Out[12]:
'one'
In [13]:
s2
Out[13]:
one 1
two 2
three 3
four 4
five 5
dtype: int64
In [14]:
s2['three']
Out[14]:
3
In [15]:
s2[2]
Out[15]:
3
In [17]:
s2[['one', 'three', 'two']]
Out[17]:
one 1
three 3
two 2
dtype: int64
In [18]:
s2
Out[18]:
one 1
two 2
three 3
four 4
five 5
dtype: int64
In [20]:
s3 = s2[:3]
In [21]:
s2
Out[21]:
one 1
two 2
three 3
four 4
five 5
dtype: int64
In [22]:
s3
Out[22]:
one 1
two 2
three 3
dtype: int64
In [23]:
s3.to_dict()
Out[23]:
{'one': 1, 'three': 3, 'two': 2}
In [25]:
pd.Series(s3.to_dict())
Out[25]:
one 1
three 3
two 2
dtype: int64
In [49]:
df = pd.DataFrame({'A': s2, 'B': s3})
df
Out[49]:
A
B
five
5
NaN
four
4
NaN
one
1
1
three
3
3
two
2
2
In [50]:
df['A']
Out[50]:
five 5
four 4
one 1
three 3
two 2
Name: A, dtype: int64
In [51]:
df['B']
Out[51]:
five NaN
four NaN
one 1
three 3
two 2
Name: B, dtype: float64
In [52]:
df.ix['four']
Out[52]:
A 4
B NaN
Name: four, dtype: float64
In [53]:
df.columns
Out[53]:
Index([u'A', u'B'], dtype=object)
In [54]:
df.index
Out[54]:
Index([u'five', u'four', u'one', u'three', u'two'], dtype=object)
In [55]:
df.values
Out[55]:
array([[ 5., nan],
[ 4., nan],
[ 1., 1.],
[ 3., 3.],
[ 2., 2.]])
In [56]:
df
Out[56]:
A
B
five
5
NaN
four
4
NaN
one
1
1
three
3
3
two
2
2
In [57]:
df.values
Out[57]:
array([[ 5., nan],
[ 4., nan],
[ 1., 1.],
[ 3., 3.],
[ 2., 2.]])
In [58]:
df2 = pd.DataFrame(df.values, columns=['A', 'B'])
In [59]:
df2['A'] = 7.5
df2
Out[59]:
A
B
0
7.5
NaN
1
7.5
NaN
2
7.5
1
3
7.5
3
4
7.5
2
In [61]:
!head -n 10 fred_fx.csv
DATE,EXJPUS,INTGSBJPM193N,TB6MS
1990-01-01 00:00:00,144.9819,6.64,7.55
1990-02-01 00:00:00,145.6932,6.92,7.7
1990-03-01 00:00:00,153.3082,7.36,7.85
1990-04-01 00:00:00,158.4586,7.24,7.84
1990-05-01 00:00:00,154.0441,6.7,7.76
1990-06-01 00:00:00,153.6957,7.06,7.63
1990-07-01 00:00:00,149.0395,7.48,7.52
1990-08-01 00:00:00,147.4609,8.19,7.38
1990-09-01 00:00:00,138.4405,8.62,7.32
In [63]:
fred = pd.read_csv('fred_fx.csv')
fred
Out[63]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 269
Data columns (total 4 columns):
DATE 270 non-null values
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3), object(1)
In [65]:
fred.head(10)
Out[65]:
DATE
EXJPUS
INTGSBJPM193N
TB6MS
0
1990-01-01 00:00:00
144.9819
6.64
7.55
1
1990-02-01 00:00:00
145.6932
6.92
7.70
2
1990-03-01 00:00:00
153.3082
7.36
7.85
3
1990-04-01 00:00:00
158.4586
7.24
7.84
4
1990-05-01 00:00:00
154.0441
6.70
7.76
5
1990-06-01 00:00:00
153.6957
7.06
7.63
6
1990-07-01 00:00:00
149.0395
7.48
7.52
7
1990-08-01 00:00:00
147.4609
8.19
7.38
8
1990-09-01 00:00:00
138.4405
8.62
7.32
9
1990-10-01 00:00:00
129.5909
7.68
7.16
In [ ]:
fred['EXJPUS']
In [70]:
fred.EXJPUS
Out[70]:
0 144.9819
1 145.6932
2 153.3082
3 158.4586
4 154.0441
5 153.6957
6 149.0395
7 147.4609
8 138.4405
9 129.5909
10 129.2155
11 133.8890
12 133.6986
13 130.5358
14 137.3867
...
255 83.1771
256 81.1257
257 80.4259
258 79.2425
259 76.9657
260 76.7957
261 76.6430
262 77.5595
263 77.7967
264 76.9640
265 78.4700
266 82.4659
267 81.2524
268 79.6668
269 79.3152
Name: EXJPUS, Length: 270, dtype: float64
In [72]:
fred.EXJPUS.index[:10]
Out[72]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)
In [74]:
fred.DATE[:5]
Out[74]:
0 1990-01-01 00:00:00
1 1990-02-01 00:00:00
2 1990-03-01 00:00:00
3 1990-04-01 00:00:00
4 1990-05-01 00:00:00
Name: DATE, dtype: object
In [75]:
fred.DATE[0]
Out[75]:
'1990-01-01 00:00:00'
In [76]:
# Several ways to convert to date
from datetime import datetime
import dateutil.parser as parser
print parser.parse(fred.DATE[0])
Out[76]:
datetime.datetime(1990, 1, 1, 0, 0)
In [77]:
datetime.strptime(fred.DATE[0], '%Y-%m-%d %H:%M:%S')
Out[77]:
datetime.datetime(1990, 1, 1, 0, 0)
In [79]:
dates = [datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in fred.DATE]
dates
Out[79]:
[datetime.datetime(1990, 1, 1, 0, 0),
datetime.datetime(1990, 2, 1, 0, 0),
datetime.datetime(1990, 3, 1, 0, 0),
datetime.datetime(1990, 4, 1, 0, 0),
datetime.datetime(1990, 5, 1, 0, 0),
datetime.datetime(1990, 6, 1, 0, 0),
datetime.datetime(1990, 7, 1, 0, 0),
datetime.datetime(1990, 8, 1, 0, 0),
datetime.datetime(1990, 9, 1, 0, 0),
datetime.datetime(1990, 10, 1, 0, 0),
datetime.datetime(1990, 11, 1, 0, 0),
datetime.datetime(1990, 12, 1, 0, 0),
datetime.datetime(1991, 1, 1, 0, 0),
datetime.datetime(1991, 2, 1, 0, 0),
datetime.datetime(1991, 3, 1, 0, 0),
datetime.datetime(1991, 4, 1, 0, 0),
datetime.datetime(1991, 5, 1, 0, 0),
datetime.datetime(1991, 6, 1, 0, 0),
datetime.datetime(1991, 7, 1, 0, 0),
datetime.datetime(1991, 8, 1, 0, 0),
datetime.datetime(1991, 9, 1, 0, 0),
datetime.datetime(1991, 10, 1, 0, 0),
datetime.datetime(1991, 11, 1, 0, 0),
datetime.datetime(1991, 12, 1, 0, 0),
datetime.datetime(1992, 1, 1, 0, 0),
datetime.datetime(1992, 2, 1, 0, 0),
datetime.datetime(1992, 3, 1, 0, 0),
datetime.datetime(1992, 4, 1, 0, 0),
datetime.datetime(1992, 5, 1, 0, 0),
datetime.datetime(1992, 6, 1, 0, 0),
datetime.datetime(1992, 7, 1, 0, 0),
datetime.datetime(1992, 8, 1, 0, 0),
datetime.datetime(1992, 9, 1, 0, 0),
datetime.datetime(1992, 10, 1, 0, 0),
datetime.datetime(1992, 11, 1, 0, 0),
datetime.datetime(1992, 12, 1, 0, 0),
datetime.datetime(1993, 1, 1, 0, 0),
datetime.datetime(1993, 2, 1, 0, 0),
datetime.datetime(1993, 3, 1, 0, 0),
datetime.datetime(1993, 4, 1, 0, 0),
datetime.datetime(1993, 5, 1, 0, 0),
datetime.datetime(1993, 6, 1, 0, 0),
datetime.datetime(1993, 7, 1, 0, 0),
datetime.datetime(1993, 8, 1, 0, 0),
datetime.datetime(1993, 9, 1, 0, 0),
datetime.datetime(1993, 10, 1, 0, 0),
datetime.datetime(1993, 11, 1, 0, 0),
datetime.datetime(1993, 12, 1, 0, 0),
datetime.datetime(1994, 1, 1, 0, 0),
datetime.datetime(1994, 2, 1, 0, 0),
datetime.datetime(1994, 3, 1, 0, 0),
datetime.datetime(1994, 4, 1, 0, 0),
datetime.datetime(1994, 5, 1, 0, 0),
datetime.datetime(1994, 6, 1, 0, 0),
datetime.datetime(1994, 7, 1, 0, 0),
datetime.datetime(1994, 8, 1, 0, 0),
datetime.datetime(1994, 9, 1, 0, 0),
datetime.datetime(1994, 10, 1, 0, 0),
datetime.datetime(1994, 11, 1, 0, 0),
datetime.datetime(1994, 12, 1, 0, 0),
datetime.datetime(1995, 1, 1, 0, 0),
datetime.datetime(1995, 2, 1, 0, 0),
datetime.datetime(1995, 3, 1, 0, 0),
datetime.datetime(1995, 4, 1, 0, 0),
datetime.datetime(1995, 5, 1, 0, 0),
datetime.datetime(1995, 6, 1, 0, 0),
datetime.datetime(1995, 7, 1, 0, 0),
datetime.datetime(1995, 8, 1, 0, 0),
datetime.datetime(1995, 9, 1, 0, 0),
datetime.datetime(1995, 10, 1, 0, 0),
datetime.datetime(1995, 11, 1, 0, 0),
datetime.datetime(1995, 12, 1, 0, 0),
datetime.datetime(1996, 1, 1, 0, 0),
datetime.datetime(1996, 2, 1, 0, 0),
datetime.datetime(1996, 3, 1, 0, 0),
datetime.datetime(1996, 4, 1, 0, 0),
datetime.datetime(1996, 5, 1, 0, 0),
datetime.datetime(1996, 6, 1, 0, 0),
datetime.datetime(1996, 7, 1, 0, 0),
datetime.datetime(1996, 8, 1, 0, 0),
datetime.datetime(1996, 9, 1, 0, 0),
datetime.datetime(1996, 10, 1, 0, 0),
datetime.datetime(1996, 11, 1, 0, 0),
datetime.datetime(1996, 12, 1, 0, 0),
datetime.datetime(1997, 1, 1, 0, 0),
datetime.datetime(1997, 2, 1, 0, 0),
datetime.datetime(1997, 3, 1, 0, 0),
datetime.datetime(1997, 4, 1, 0, 0),
datetime.datetime(1997, 5, 1, 0, 0),
datetime.datetime(1997, 6, 1, 0, 0),
datetime.datetime(1997, 7, 1, 0, 0),
datetime.datetime(1997, 8, 1, 0, 0),
datetime.datetime(1997, 9, 1, 0, 0),
datetime.datetime(1997, 10, 1, 0, 0),
datetime.datetime(1997, 11, 1, 0, 0),
datetime.datetime(1997, 12, 1, 0, 0),
datetime.datetime(1998, 1, 1, 0, 0),
datetime.datetime(1998, 2, 1, 0, 0),
datetime.datetime(1998, 3, 1, 0, 0),
datetime.datetime(1998, 4, 1, 0, 0),
datetime.datetime(1998, 5, 1, 0, 0),
datetime.datetime(1998, 6, 1, 0, 0),
datetime.datetime(1998, 7, 1, 0, 0),
datetime.datetime(1998, 8, 1, 0, 0),
datetime.datetime(1998, 9, 1, 0, 0),
datetime.datetime(1998, 10, 1, 0, 0),
datetime.datetime(1998, 11, 1, 0, 0),
datetime.datetime(1998, 12, 1, 0, 0),
datetime.datetime(1999, 1, 1, 0, 0),
datetime.datetime(1999, 2, 1, 0, 0),
datetime.datetime(1999, 3, 1, 0, 0),
datetime.datetime(1999, 4, 1, 0, 0),
datetime.datetime(1999, 5, 1, 0, 0),
datetime.datetime(1999, 6, 1, 0, 0),
datetime.datetime(1999, 7, 1, 0, 0),
datetime.datetime(1999, 8, 1, 0, 0),
datetime.datetime(1999, 9, 1, 0, 0),
datetime.datetime(1999, 10, 1, 0, 0),
datetime.datetime(1999, 11, 1, 0, 0),
datetime.datetime(1999, 12, 1, 0, 0),
datetime.datetime(2000, 1, 1, 0, 0),
datetime.datetime(2000, 2, 1, 0, 0),
datetime.datetime(2000, 3, 1, 0, 0),
datetime.datetime(2000, 4, 1, 0, 0),
datetime.datetime(2000, 5, 1, 0, 0),
datetime.datetime(2000, 6, 1, 0, 0),
datetime.datetime(2000, 7, 1, 0, 0),
datetime.datetime(2000, 8, 1, 0, 0),
datetime.datetime(2000, 9, 1, 0, 0),
datetime.datetime(2000, 10, 1, 0, 0),
datetime.datetime(2000, 11, 1, 0, 0),
datetime.datetime(2000, 12, 1, 0, 0),
datetime.datetime(2001, 1, 1, 0, 0),
datetime.datetime(2001, 2, 1, 0, 0),
datetime.datetime(2001, 3, 1, 0, 0),
datetime.datetime(2001, 4, 1, 0, 0),
datetime.datetime(2001, 5, 1, 0, 0),
datetime.datetime(2001, 6, 1, 0, 0),
datetime.datetime(2001, 7, 1, 0, 0),
datetime.datetime(2001, 8, 1, 0, 0),
datetime.datetime(2001, 9, 1, 0, 0),
datetime.datetime(2001, 10, 1, 0, 0),
datetime.datetime(2001, 11, 1, 0, 0),
datetime.datetime(2001, 12, 1, 0, 0),
datetime.datetime(2002, 1, 1, 0, 0),
datetime.datetime(2002, 2, 1, 0, 0),
datetime.datetime(2002, 3, 1, 0, 0),
datetime.datetime(2002, 4, 1, 0, 0),
datetime.datetime(2002, 5, 1, 0, 0),
datetime.datetime(2002, 6, 1, 0, 0),
datetime.datetime(2002, 7, 1, 0, 0),
datetime.datetime(2002, 8, 1, 0, 0),
datetime.datetime(2002, 9, 1, 0, 0),
datetime.datetime(2002, 10, 1, 0, 0),
datetime.datetime(2002, 11, 1, 0, 0),
datetime.datetime(2002, 12, 1, 0, 0),
datetime.datetime(2003, 1, 1, 0, 0),
datetime.datetime(2003, 2, 1, 0, 0),
datetime.datetime(2003, 3, 1, 0, 0),
datetime.datetime(2003, 4, 1, 0, 0),
datetime.datetime(2003, 5, 1, 0, 0),
datetime.datetime(2003, 6, 1, 0, 0),
datetime.datetime(2003, 7, 1, 0, 0),
datetime.datetime(2003, 8, 1, 0, 0),
datetime.datetime(2003, 9, 1, 0, 0),
datetime.datetime(2003, 10, 1, 0, 0),
datetime.datetime(2003, 11, 1, 0, 0),
datetime.datetime(2003, 12, 1, 0, 0),
datetime.datetime(2004, 1, 1, 0, 0),
datetime.datetime(2004, 2, 1, 0, 0),
datetime.datetime(2004, 3, 1, 0, 0),
datetime.datetime(2004, 4, 1, 0, 0),
datetime.datetime(2004, 5, 1, 0, 0),
datetime.datetime(2004, 6, 1, 0, 0),
datetime.datetime(2004, 7, 1, 0, 0),
datetime.datetime(2004, 8, 1, 0, 0),
datetime.datetime(2004, 9, 1, 0, 0),
datetime.datetime(2004, 10, 1, 0, 0),
datetime.datetime(2004, 11, 1, 0, 0),
datetime.datetime(2004, 12, 1, 0, 0),
datetime.datetime(2005, 1, 1, 0, 0),
datetime.datetime(2005, 2, 1, 0, 0),
datetime.datetime(2005, 3, 1, 0, 0),
datetime.datetime(2005, 4, 1, 0, 0),
datetime.datetime(2005, 5, 1, 0, 0),
datetime.datetime(2005, 6, 1, 0, 0),
datetime.datetime(2005, 7, 1, 0, 0),
datetime.datetime(2005, 8, 1, 0, 0),
datetime.datetime(2005, 9, 1, 0, 0),
datetime.datetime(2005, 10, 1, 0, 0),
datetime.datetime(2005, 11, 1, 0, 0),
datetime.datetime(2005, 12, 1, 0, 0),
datetime.datetime(2006, 1, 1, 0, 0),
datetime.datetime(2006, 2, 1, 0, 0),
datetime.datetime(2006, 3, 1, 0, 0),
datetime.datetime(2006, 4, 1, 0, 0),
datetime.datetime(2006, 5, 1, 0, 0),
datetime.datetime(2006, 6, 1, 0, 0),
datetime.datetime(2006, 7, 1, 0, 0),
datetime.datetime(2006, 8, 1, 0, 0),
datetime.datetime(2006, 9, 1, 0, 0),
datetime.datetime(2006, 10, 1, 0, 0),
datetime.datetime(2006, 11, 1, 0, 0),
datetime.datetime(2006, 12, 1, 0, 0),
datetime.datetime(2007, 1, 1, 0, 0),
datetime.datetime(2007, 2, 1, 0, 0),
datetime.datetime(2007, 3, 1, 0, 0),
datetime.datetime(2007, 4, 1, 0, 0),
datetime.datetime(2007, 5, 1, 0, 0),
datetime.datetime(2007, 6, 1, 0, 0),
datetime.datetime(2007, 7, 1, 0, 0),
datetime.datetime(2007, 8, 1, 0, 0),
datetime.datetime(2007, 9, 1, 0, 0),
datetime.datetime(2007, 10, 1, 0, 0),
datetime.datetime(2007, 11, 1, 0, 0),
datetime.datetime(2007, 12, 1, 0, 0),
datetime.datetime(2008, 1, 1, 0, 0),
datetime.datetime(2008, 2, 1, 0, 0),
datetime.datetime(2008, 3, 1, 0, 0),
datetime.datetime(2008, 4, 1, 0, 0),
datetime.datetime(2008, 5, 1, 0, 0),
datetime.datetime(2008, 6, 1, 0, 0),
datetime.datetime(2008, 7, 1, 0, 0),
datetime.datetime(2008, 8, 1, 0, 0),
datetime.datetime(2008, 9, 1, 0, 0),
datetime.datetime(2008, 10, 1, 0, 0),
datetime.datetime(2008, 11, 1, 0, 0),
datetime.datetime(2008, 12, 1, 0, 0),
datetime.datetime(2009, 1, 1, 0, 0),
datetime.datetime(2009, 2, 1, 0, 0),
datetime.datetime(2009, 3, 1, 0, 0),
datetime.datetime(2009, 4, 1, 0, 0),
datetime.datetime(2009, 5, 1, 0, 0),
datetime.datetime(2009, 6, 1, 0, 0),
datetime.datetime(2009, 7, 1, 0, 0),
datetime.datetime(2009, 8, 1, 0, 0),
datetime.datetime(2009, 9, 1, 0, 0),
datetime.datetime(2009, 10, 1, 0, 0),
datetime.datetime(2009, 11, 1, 0, 0),
datetime.datetime(2009, 12, 1, 0, 0),
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 2, 1, 0, 0),
datetime.datetime(2010, 3, 1, 0, 0),
datetime.datetime(2010, 4, 1, 0, 0),
datetime.datetime(2010, 5, 1, 0, 0),
datetime.datetime(2010, 6, 1, 0, 0),
datetime.datetime(2010, 7, 1, 0, 0),
datetime.datetime(2010, 8, 1, 0, 0),
datetime.datetime(2010, 9, 1, 0, 0),
datetime.datetime(2010, 10, 1, 0, 0),
datetime.datetime(2010, 11, 1, 0, 0),
datetime.datetime(2010, 12, 1, 0, 0),
datetime.datetime(2011, 1, 1, 0, 0),
datetime.datetime(2011, 2, 1, 0, 0),
datetime.datetime(2011, 3, 1, 0, 0),
datetime.datetime(2011, 4, 1, 0, 0),
datetime.datetime(2011, 5, 1, 0, 0),
datetime.datetime(2011, 6, 1, 0, 0),
datetime.datetime(2011, 7, 1, 0, 0),
datetime.datetime(2011, 8, 1, 0, 0),
datetime.datetime(2011, 9, 1, 0, 0),
datetime.datetime(2011, 10, 1, 0, 0),
datetime.datetime(2011, 11, 1, 0, 0),
datetime.datetime(2011, 12, 1, 0, 0),
datetime.datetime(2012, 1, 1, 0, 0),
datetime.datetime(2012, 2, 1, 0, 0),
datetime.datetime(2012, 3, 1, 0, 0),
datetime.datetime(2012, 4, 1, 0, 0),
datetime.datetime(2012, 5, 1, 0, 0),
datetime.datetime(2012, 6, 1, 0, 0)]
In [82]:
pd.DatetimeIndex(dates)
Out[82]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None
In [83]:
# Series in, Series out
pd.to_datetime(fred.DATE)
Out[83]:
0 1990-01-01 00:00:00
1 1990-02-01 00:00:00
2 1990-03-01 00:00:00
3 1990-04-01 00:00:00
4 1990-05-01 00:00:00
5 1990-06-01 00:00:00
6 1990-07-01 00:00:00
7 1990-08-01 00:00:00
8 1990-09-01 00:00:00
9 1990-10-01 00:00:00
10 1990-11-01 00:00:00
11 1990-12-01 00:00:00
12 1991-01-01 00:00:00
13 1991-02-01 00:00:00
14 1991-03-01 00:00:00
...
255 2011-04-01 00:00:00
256 2011-05-01 00:00:00
257 2011-06-01 00:00:00
258 2011-07-01 00:00:00
259 2011-08-01 00:00:00
260 2011-09-01 00:00:00
261 2011-10-01 00:00:00
262 2011-11-01 00:00:00
263 2011-12-01 00:00:00
264 2012-01-01 00:00:00
265 2012-02-01 00:00:00
266 2012-03-01 00:00:00
267 2012-04-01 00:00:00
268 2012-05-01 00:00:00
269 2012-06-01 00:00:00
Name: DATE, Length: 270, dtype: datetime64[ns]
In [84]:
# NumPy array in, DatetimeIndex out
pd.to_datetime(fred.DATE.values)
Out[84]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None
In [86]:
pd.to_datetime(fred.DATE, format='%Y-%m-%d %H:%M:%S')
Out[86]:
0 1990-01-01 00:00:00
1 1990-02-01 00:00:00
2 1990-03-01 00:00:00
3 1990-04-01 00:00:00
4 1990-05-01 00:00:00
5 1990-06-01 00:00:00
6 1990-07-01 00:00:00
7 1990-08-01 00:00:00
8 1990-09-01 00:00:00
9 1990-10-01 00:00:00
10 1990-11-01 00:00:00
11 1990-12-01 00:00:00
12 1991-01-01 00:00:00
13 1991-02-01 00:00:00
14 1991-03-01 00:00:00
...
255 2011-04-01 00:00:00
256 2011-05-01 00:00:00
257 2011-06-01 00:00:00
258 2011-07-01 00:00:00
259 2011-08-01 00:00:00
260 2011-09-01 00:00:00
261 2011-10-01 00:00:00
262 2011-11-01 00:00:00
263 2011-12-01 00:00:00
264 2012-01-01 00:00:00
265 2012-02-01 00:00:00
266 2012-03-01 00:00:00
267 2012-04-01 00:00:00
268 2012-05-01 00:00:00
269 2012-06-01 00:00:00
Name: DATE, Length: 270, dtype: datetime64[ns]
In [88]:
pd.to_datetime(['96/21/05'], format='%y/%d/%m')
Out[88]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1996-05-21 00:00:00]
Length: 1, Freq: None, Timezone: None
In [89]:
fred
Out[89]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 269
Data columns (total 4 columns):
DATE 270 non-null values
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3), object(1)
In [90]:
print fred.to_string()
DATE EXJPUS INTGSBJPM193N TB6MS
0 1990-01-01 00:00:00 144.9819 6.640 7.55
1 1990-02-01 00:00:00 145.6932 6.920 7.70
2 1990-03-01 00:00:00 153.3082 7.360 7.85
3 1990-04-01 00:00:00 158.4586 7.240 7.84
4 1990-05-01 00:00:00 154.0441 6.700 7.76
5 1990-06-01 00:00:00 153.6957 7.060 7.63
6 1990-07-01 00:00:00 149.0395 7.480 7.52
7 1990-08-01 00:00:00 147.4609 8.190 7.38
8 1990-09-01 00:00:00 138.4405 8.620 7.32
9 1990-10-01 00:00:00 129.5909 7.680 7.16
10 1990-11-01 00:00:00 129.2155 7.400 7.03
11 1990-12-01 00:00:00 133.8890 7.060 6.70
12 1991-01-01 00:00:00 133.6986 6.830 6.28
13 1991-02-01 00:00:00 130.5358 6.660 5.93
14 1991-03-01 00:00:00 137.3867 6.890 5.92
15 1991-04-01 00:00:00 137.1127 7.000 5.71
16 1991-05-01 00:00:00 138.2218 6.870 5.61
17 1991-06-01 00:00:00 139.7475 7.120 5.75
18 1991-07-01 00:00:00 137.8300 6.800 5.70
19 1991-08-01 00:00:00 136.8164 6.550 5.39
20 1991-09-01 00:00:00 134.2995 6.110 5.25
21 1991-10-01 00:00:00 130.7723 6.020 5.04
22 1991-11-01 00:00:00 129.6321 5.930 4.61
23 1991-12-01 00:00:00 128.0395 5.530 4.10
24 1992-01-01 00:00:00 125.4614 5.530 3.87
25 1992-02-01 00:00:00 127.6989 5.410 3.93
26 1992-03-01 00:00:00 132.8627 5.330 4.18
27 1992-04-01 00:00:00 133.5395 5.480 3.87
28 1992-05-01 00:00:00 130.7710 5.410 3.75
29 1992-06-01 00:00:00 126.8355 5.140 3.77
30 1992-07-01 00:00:00 125.8817 4.740 3.28
31 1992-08-01 00:00:00 126.2310 4.620 3.21
32 1992-09-01 00:00:00 122.5967 4.490 2.96
33 1992-10-01 00:00:00 121.1652 4.410 3.04
34 1992-11-01 00:00:00 123.8800 4.400 3.34
35 1992-12-01 00:00:00 124.0409 4.370 3.36
36 1993-01-01 00:00:00 124.9932 4.310 3.14
37 1993-02-01 00:00:00 120.7595 3.760 3.07
38 1993-03-01 00:00:00 117.0174 4.170 3.05
39 1993-04-01 00:00:00 112.4114 4.260 2.97
40 1993-05-01 00:00:00 110.3430 4.350 3.07
41 1993-06-01 00:00:00 107.4118 4.090 3.20
42 1993-07-01 00:00:00 107.6914 3.900 3.16
43 1993-08-01 00:00:00 103.7650 3.690 3.14
44 1993-09-01 00:00:00 105.5748 3.360 3.06
45 1993-10-01 00:00:00 107.0200 3.110 3.12
46 1993-11-01 00:00:00 107.8765 2.730 3.26
47 1993-12-01 00:00:00 109.9130 2.580 3.23
48 1994-01-01 00:00:00 111.4415 3.290 3.15
49 1994-02-01 00:00:00 106.3011 3.510 3.43
50 1994-03-01 00:00:00 105.0974 3.460 3.78
51 1994-04-01 00:00:00 103.4843 3.510 4.09
52 1994-05-01 00:00:00 103.7533 3.290 4.60
53 1994-06-01 00:00:00 102.5264 3.670 4.55
54 1994-07-01 00:00:00 98.4450 3.800 4.75
55 1994-08-01 00:00:00 99.9404 4.140 4.88
56 1994-09-01 00:00:00 98.7743 3.920 5.04
57 1994-10-01 00:00:00 98.3530 4.080 5.39
58 1994-11-01 00:00:00 98.0440 3.990 5.72
59 1994-12-01 00:00:00 100.1824 3.910 6.21
60 1995-01-01 00:00:00 99.7660 3.950 6.21
61 1995-02-01 00:00:00 98.2368 3.740 6.03
62 1995-03-01 00:00:00 90.5196 3.010 5.89
63 1995-04-01 00:00:00 83.6895 2.790 5.77
64 1995-05-01 00:00:00 85.1127 2.160 5.67
65 1995-06-01 00:00:00 84.6355 2.140 5.42
66 1995-07-01 00:00:00 87.3970 2.170 5.37
67 1995-08-01 00:00:00 94.7383 2.410 5.41
68 1995-09-01 00:00:00 100.5455 1.950 5.30
69 1995-10-01 00:00:00 100.8390 2.010 5.32
70 1995-11-01 00:00:00 101.9400 1.880 5.27
71 1995-12-01 00:00:00 101.8495 2.170 5.13
72 1996-01-01 00:00:00 105.7514 2.320 4.92
73 1996-02-01 00:00:00 105.7880 2.580 4.77
74 1996-03-01 00:00:00 105.9400 2.300 4.96
75 1996-04-01 00:00:00 107.1995 2.550 5.06
76 1996-05-01 00:00:00 106.3423 2.350 5.12
77 1996-06-01 00:00:00 108.9600 2.400 5.25
78 1996-07-01 00:00:00 109.1909 2.530 5.30
79 1996-08-01 00:00:00 107.8659 2.140 5.13
80 1996-09-01 00:00:00 109.9310 2.050 5.24
81 1996-10-01 00:00:00 112.4123 1.800 5.11
82 1996-11-01 00:00:00 112.2958 1.750 5.07
83 1996-12-01 00:00:00 113.9810 1.930 5.04
84 1997-01-01 00:00:00 117.9124 1.770 5.10
85 1997-02-01 00:00:00 122.9621 1.780 5.06
86 1997-03-01 00:00:00 122.7738 1.750 5.26
87 1997-04-01 00:00:00 125.6377 1.850 5.37
88 1997-05-01 00:00:00 119.1924 2.030 5.30
89 1997-06-01 00:00:00 114.2857 1.920 5.13
90 1997-07-01 00:00:00 115.3759 1.700 5.12
91 1997-08-01 00:00:00 117.9295 1.600 5.19
92 1997-09-01 00:00:00 120.8900 1.540 5.09
93 1997-10-01 00:00:00 121.0605 1.350 5.09
94 1997-11-01 00:00:00 125.3817 1.430 5.17
95 1997-12-01 00:00:00 129.7341 1.530 5.24
96 1998-01-01 00:00:00 129.5475 1.420 5.03
97 1998-02-01 00:00:00 125.8516 1.400 5.07
98 1998-03-01 00:00:00 129.0823 1.280 5.04
99 1998-04-01 00:00:00 131.7536 1.080 5.06
100 1998-05-01 00:00:00 134.8960 1.080 5.14
101 1998-06-01 00:00:00 140.3305 1.210 5.12
102 1998-07-01 00:00:00 140.7874 1.150 5.03
103 1998-08-01 00:00:00 144.6800 0.980 4.95
104 1998-09-01 00:00:00 134.4805 0.660 4.63
105 1998-10-01 00:00:00 121.0486 0.690 4.05
106 1998-11-01 00:00:00 120.2895 0.850 4.42
107 1998-12-01 00:00:00 117.0709 1.360 4.40
108 1999-01-01 00:00:00 113.2900 2.100 4.33
109 1999-02-01 00:00:00 116.6684 1.995 4.44
110 1999-03-01 00:00:00 119.4730 1.745 4.47
111 1999-04-01 00:00:00 119.7723 1.405 4.37
112 1999-05-01 00:00:00 121.9995 1.480 4.56
113 1999-06-01 00:00:00 120.7245 1.835 4.82
114 1999-07-01 00:00:00 119.3305 1.785 4.58
115 1999-08-01 00:00:00 113.2268 1.910 4.87
116 1999-09-01 00:00:00 106.8752 1.710 4.88
117 1999-10-01 00:00:00 105.9650 1.810 4.98
118 1999-11-01 00:00:00 104.6485 1.830 5.20
119 1999-12-01 00:00:00 102.5843 1.645 5.44
120 2000-01-01 00:00:00 105.2960 1.710 5.50
121 2000-02-01 00:00:00 109.3885 1.835 5.72
122 2000-03-01 00:00:00 106.3074 1.770 5.85
123 2000-04-01 00:00:00 105.6270 1.760 5.81
124 2000-05-01 00:00:00 108.3205 1.655 6.10
125 2000-06-01 00:00:00 106.1255 1.760 5.97
126 2000-07-01 00:00:00 108.2115 1.675 6.00
127 2000-08-01 00:00:00 108.0804 1.895 6.07
128 2000-09-01 00:00:00 106.8375 1.840 5.98
129 2000-10-01 00:00:00 108.4429 1.815 6.04
130 2000-11-01 00:00:00 109.0095 1.615 6.06
131 2000-12-01 00:00:00 112.2090 1.640 5.68
132 2001-01-01 00:00:00 116.6719 1.500 4.95
133 2001-02-01 00:00:00 116.2337 1.300 4.71
134 2001-03-01 00:00:00 121.5050 1.270 4.28
135 2001-04-01 00:00:00 123.7710 1.290 3.85
136 2001-05-01 00:00:00 121.7682 1.240 3.62
137 2001-06-01 00:00:00 122.3510 1.210 3.45
138 2001-07-01 00:00:00 124.4981 1.330 3.45
139 2001-08-01 00:00:00 121.3670 1.375 3.29
140 2001-09-01 00:00:00 118.6117 1.420 2.63
141 2001-10-01 00:00:00 121.4536 1.300 2.12
142 2001-11-01 00:00:00 122.4055 1.355 1.88
143 2001-12-01 00:00:00 127.5945 1.365 1.78
144 2002-01-01 00:00:00 132.6833 1.480 1.73
145 2002-02-01 00:00:00 133.6426 1.525 1.82
146 2002-03-01 00:00:00 131.0610 1.390 2.01
147 2002-04-01 00:00:00 130.7718 1.370 1.93
148 2002-05-01 00:00:00 126.3750 1.385 1.86
149 2002-06-01 00:00:00 123.2905 1.310 1.79
150 2002-07-01 00:00:00 117.8991 1.310 1.70
151 2002-08-01 00:00:00 118.9927 1.175 1.60
152 2002-09-01 00:00:00 121.0780 1.175 1.60
153 2002-10-01 00:00:00 123.9077 0.985 1.56
154 2002-11-01 00:00:00 121.6079 0.995 1.27
155 2002-12-01 00:00:00 121.8929 0.900 1.24
156 2003-01-01 00:00:00 118.8133 0.800 1.20
157 2003-02-01 00:00:00 119.3379 0.780 1.18
158 2003-03-01 00:00:00 118.6871 0.700 1.13
159 2003-04-01 00:00:00 119.8950 0.605 1.14
160 2003-05-01 00:00:00 117.3681 0.530 1.08
161 2003-06-01 00:00:00 118.3290 0.835 0.92
162 2003-07-01 00:00:00 118.6959 0.935 0.95
163 2003-08-01 00:00:00 118.6624 1.455 1.03
164 2003-09-01 00:00:00 114.8000 1.385 1.01
165 2003-10-01 00:00:00 109.4955 1.460 1.00
166 2003-11-01 00:00:00 109.1778 1.295 1.02
167 2003-12-01 00:00:00 107.7377 1.360 0.99
168 2004-01-01 00:00:00 106.2685 1.310 0.97
169 2004-02-01 00:00:00 106.7079 1.225 0.99
170 2004-03-01 00:00:00 108.5157 1.440 0.99
171 2004-04-01 00:00:00 107.6564 1.530 1.09
172 2004-05-01 00:00:00 112.1960 1.520 1.31
173 2004-06-01 00:00:00 109.4336 1.775 1.60
174 2004-07-01 00:00:00 109.4871 1.850 1.66
175 2004-08-01 00:00:00 110.2336 1.535 1.72
176 2004-09-01 00:00:00 110.0914 1.440 1.87
177 2004-10-01 00:00:00 108.7835 1.490 2.00
178 2004-11-01 00:00:00 104.6990 1.445 2.27
179 2004-12-01 00:00:00 103.8104 1.430 2.43
180 2005-01-01 00:00:00 103.3410 1.320 2.61
181 2005-02-01 00:00:00 104.9442 1.465 2.77
182 2005-03-01 00:00:00 105.2543 1.320 3.00
183 2005-04-01 00:00:00 107.1938 1.235 3.05
184 2005-05-01 00:00:00 106.5952 1.240 3.08
185 2005-06-01 00:00:00 108.7473 1.165 3.13
186 2005-07-01 00:00:00 111.9535 1.305 3.42
187 2005-08-01 00:00:00 110.6065 1.345 3.66
188 2005-09-01 00:00:00 111.2390 1.475 3.67
189 2005-10-01 00:00:00 114.8695 1.545 3.99
190 2005-11-01 00:00:00 118.4540 1.430 4.15
191 2005-12-01 00:00:00 118.4624 1.470 4.18
192 2006-01-01 00:00:00 115.4765 1.560 4.31
193 2006-02-01 00:00:00 117.8605 1.580 4.52
194 2006-03-01 00:00:00 117.2778 1.765 4.62
195 2006-04-01 00:00:00 117.0695 1.920 4.72
196 2006-05-01 00:00:00 111.7305 1.810 4.82
197 2006-06-01 00:00:00 114.6250 1.915 4.97
198 2006-07-01 00:00:00 115.7670 1.920 5.06
199 2006-08-01 00:00:00 115.9243 1.610 4.97
200 2006-09-01 00:00:00 117.2145 1.665 4.89
201 2006-10-01 00:00:00 118.6090 1.710 4.92
202 2006-11-01 00:00:00 117.3205 1.645 4.95
203 2006-12-01 00:00:00 117.3220 1.675 4.88
204 2007-01-01 00:00:00 120.4471 1.695 4.95
205 2007-02-01 00:00:00 120.5047 1.625 4.96
206 2007-03-01 00:00:00 117.2600 1.650 4.89
207 2007-04-01 00:00:00 118.9324 1.620 4.86
208 2007-05-01 00:00:00 120.7732 1.745 4.78
209 2007-06-01 00:00:00 122.6886 1.870 4.76
210 2007-07-01 00:00:00 121.4148 1.790 4.83
211 2007-08-01 00:00:00 116.7335 1.595 4.38
212 2007-09-01 00:00:00 115.0435 1.675 4.05
213 2007-10-01 00:00:00 115.8661 1.600 4.01
214 2007-11-01 00:00:00 111.0729 1.470 3.46
215 2007-12-01 00:00:00 112.4490 1.500 3.23
216 2008-01-01 00:00:00 107.8181 1.430 2.75
217 2008-02-01 00:00:00 107.0300 1.355 2.04
218 2008-03-01 00:00:00 100.7562 1.275 1.48
219 2008-04-01 00:00:00 102.6777 1.575 1.55
220 2008-05-01 00:00:00 104.3595 1.750 1.82
221 2008-06-01 00:00:00 106.9152 1.590 2.13
222 2008-07-01 00:00:00 106.8518 1.530 1.93
223 2008-08-01 00:00:00 109.3624 1.405 1.92
224 2008-09-01 00:00:00 106.5748 1.460 1.61
225 2008-10-01 00:00:00 99.9659 1.465 1.20
226 2008-11-01 00:00:00 96.9656 1.390 0.73
227 2008-12-01 00:00:00 91.2750 1.165 0.26
228 2009-01-01 00:00:00 90.1205 1.290 0.30
229 2009-02-01 00:00:00 92.9158 1.270 0.45
230 2009-03-01 00:00:00 97.8550 1.345 0.42
231 2009-04-01 00:00:00 98.9200 1.420 0.35
232 2009-05-01 00:00:00 96.6445 1.480 0.30
233 2009-06-01 00:00:00 96.6145 1.350 0.31
234 2009-07-01 00:00:00 94.3670 1.410 0.27
235 2009-08-01 00:00:00 94.8971 1.305 0.26
236 2009-09-01 00:00:00 91.2748 1.290 0.21
237 2009-10-01 00:00:00 90.3671 1.405 0.16
238 2009-11-01 00:00:00 89.2674 1.260 0.15
239 2009-12-01 00:00:00 89.9509 1.285 0.17
240 2010-01-01 00:00:00 91.1011 1.315 0.15
241 2010-02-01 00:00:00 90.1395 1.295 0.18
242 2010-03-01 00:00:00 90.7161 1.390 0.22
243 2010-04-01 00:00:00 93.4527 1.280 0.24
244 2010-05-01 00:00:00 91.9730 1.255 0.22
245 2010-06-01 00:00:00 90.8059 1.080 0.19
246 2010-07-01 00:00:00 87.5005 1.065 0.20
247 2010-08-01 00:00:00 85.3727 0.960 0.19
248 2010-09-01 00:00:00 84.3571 0.930 0.19
249 2010-10-01 00:00:00 81.7285 0.930 0.18
250 2010-11-01 00:00:00 82.5180 1.190 0.18
251 2010-12-01 00:00:00 83.3376 1.120 0.19
252 2011-01-01 00:00:00 82.6250 1.210 0.18
253 2011-02-01 00:00:00 82.5368 1.255 0.17
254 2011-03-01 00:00:00 81.6470 1.250 0.16
255 2011-04-01 00:00:00 83.1771 1.200 0.12
256 2011-05-01 00:00:00 81.1257 1.150 0.09
257 2011-06-01 00:00:00 80.4259 1.130 0.10
258 2011-07-01 00:00:00 79.2425 1.075 0.08
259 2011-08-01 00:00:00 76.9657 1.030 0.06
260 2011-09-01 00:00:00 76.7957 1.025 0.04
261 2011-10-01 00:00:00 76.6430 1.045 0.05
262 2011-11-01 00:00:00 77.5595 1.070 0.05
263 2011-12-01 00:00:00 77.7967 0.980 0.05
264 2012-01-01 00:00:00 76.9640 0.960 0.07
265 2012-02-01 00:00:00 78.4700 0.955 0.12
266 2012-03-01 00:00:00 82.4659 0.985 0.14
267 2012-04-01 00:00:00 81.2524 0.885 0.14
268 2012-05-01 00:00:00 79.6668 0.825 0.15
269 2012-06-01 00:00:00 79.3152 NaN 0.15
In [93]:
from IPython.display import HTML
HTML(fred.to_html())
Out[93]:
DATE
EXJPUS
INTGSBJPM193N
TB6MS
0
1990-01-01 00:00:00
144.9819
6.640
7.55
1
1990-02-01 00:00:00
145.6932
6.920
7.70
2
1990-03-01 00:00:00
153.3082
7.360
7.85
3
1990-04-01 00:00:00
158.4586
7.240
7.84
4
1990-05-01 00:00:00
154.0441
6.700
7.76
5
1990-06-01 00:00:00
153.6957
7.060
7.63
6
1990-07-01 00:00:00
149.0395
7.480
7.52
7
1990-08-01 00:00:00
147.4609
8.190
7.38
8
1990-09-01 00:00:00
138.4405
8.620
7.32
9
1990-10-01 00:00:00
129.5909
7.680
7.16
10
1990-11-01 00:00:00
129.2155
7.400
7.03
11
1990-12-01 00:00:00
133.8890
7.060
6.70
12
1991-01-01 00:00:00
133.6986
6.830
6.28
13
1991-02-01 00:00:00
130.5358
6.660
5.93
14
1991-03-01 00:00:00
137.3867
6.890
5.92
15
1991-04-01 00:00:00
137.1127
7.000
5.71
16
1991-05-01 00:00:00
138.2218
6.870
5.61
17
1991-06-01 00:00:00
139.7475
7.120
5.75
18
1991-07-01 00:00:00
137.8300
6.800
5.70
19
1991-08-01 00:00:00
136.8164
6.550
5.39
20
1991-09-01 00:00:00
134.2995
6.110
5.25
21
1991-10-01 00:00:00
130.7723
6.020
5.04
22
1991-11-01 00:00:00
129.6321
5.930
4.61
23
1991-12-01 00:00:00
128.0395
5.530
4.10
24
1992-01-01 00:00:00
125.4614
5.530
3.87
25
1992-02-01 00:00:00
127.6989
5.410
3.93
26
1992-03-01 00:00:00
132.8627
5.330
4.18
27
1992-04-01 00:00:00
133.5395
5.480
3.87
28
1992-05-01 00:00:00
130.7710
5.410
3.75
29
1992-06-01 00:00:00
126.8355
5.140
3.77
30
1992-07-01 00:00:00
125.8817
4.740
3.28
31
1992-08-01 00:00:00
126.2310
4.620
3.21
32
1992-09-01 00:00:00
122.5967
4.490
2.96
33
1992-10-01 00:00:00
121.1652
4.410
3.04
34
1992-11-01 00:00:00
123.8800
4.400
3.34
35
1992-12-01 00:00:00
124.0409
4.370
3.36
36
1993-01-01 00:00:00
124.9932
4.310
3.14
37
1993-02-01 00:00:00
120.7595
3.760
3.07
38
1993-03-01 00:00:00
117.0174
4.170
3.05
39
1993-04-01 00:00:00
112.4114
4.260
2.97
40
1993-05-01 00:00:00
110.3430
4.350
3.07
41
1993-06-01 00:00:00
107.4118
4.090
3.20
42
1993-07-01 00:00:00
107.6914
3.900
3.16
43
1993-08-01 00:00:00
103.7650
3.690
3.14
44
1993-09-01 00:00:00
105.5748
3.360
3.06
45
1993-10-01 00:00:00
107.0200
3.110
3.12
46
1993-11-01 00:00:00
107.8765
2.730
3.26
47
1993-12-01 00:00:00
109.9130
2.580
3.23
48
1994-01-01 00:00:00
111.4415
3.290
3.15
49
1994-02-01 00:00:00
106.3011
3.510
3.43
50
1994-03-01 00:00:00
105.0974
3.460
3.78
51
1994-04-01 00:00:00
103.4843
3.510
4.09
52
1994-05-01 00:00:00
103.7533
3.290
4.60
53
1994-06-01 00:00:00
102.5264
3.670
4.55
54
1994-07-01 00:00:00
98.4450
3.800
4.75
55
1994-08-01 00:00:00
99.9404
4.140
4.88
56
1994-09-01 00:00:00
98.7743
3.920
5.04
57
1994-10-01 00:00:00
98.3530
4.080
5.39
58
1994-11-01 00:00:00
98.0440
3.990
5.72
59
1994-12-01 00:00:00
100.1824
3.910
6.21
60
1995-01-01 00:00:00
99.7660
3.950
6.21
61
1995-02-01 00:00:00
98.2368
3.740
6.03
62
1995-03-01 00:00:00
90.5196
3.010
5.89
63
1995-04-01 00:00:00
83.6895
2.790
5.77
64
1995-05-01 00:00:00
85.1127
2.160
5.67
65
1995-06-01 00:00:00
84.6355
2.140
5.42
66
1995-07-01 00:00:00
87.3970
2.170
5.37
67
1995-08-01 00:00:00
94.7383
2.410
5.41
68
1995-09-01 00:00:00
100.5455
1.950
5.30
69
1995-10-01 00:00:00
100.8390
2.010
5.32
70
1995-11-01 00:00:00
101.9400
1.880
5.27
71
1995-12-01 00:00:00
101.8495
2.170
5.13
72
1996-01-01 00:00:00
105.7514
2.320
4.92
73
1996-02-01 00:00:00
105.7880
2.580
4.77
74
1996-03-01 00:00:00
105.9400
2.300
4.96
75
1996-04-01 00:00:00
107.1995
2.550
5.06
76
1996-05-01 00:00:00
106.3423
2.350
5.12
77
1996-06-01 00:00:00
108.9600
2.400
5.25
78
1996-07-01 00:00:00
109.1909
2.530
5.30
79
1996-08-01 00:00:00
107.8659
2.140
5.13
80
1996-09-01 00:00:00
109.9310
2.050
5.24
81
1996-10-01 00:00:00
112.4123
1.800
5.11
82
1996-11-01 00:00:00
112.2958
1.750
5.07
83
1996-12-01 00:00:00
113.9810
1.930
5.04
84
1997-01-01 00:00:00
117.9124
1.770
5.10
85
1997-02-01 00:00:00
122.9621
1.780
5.06
86
1997-03-01 00:00:00
122.7738
1.750
5.26
87
1997-04-01 00:00:00
125.6377
1.850
5.37
88
1997-05-01 00:00:00
119.1924
2.030
5.30
89
1997-06-01 00:00:00
114.2857
1.920
5.13
90
1997-07-01 00:00:00
115.3759
1.700
5.12
91
1997-08-01 00:00:00
117.9295
1.600
5.19
92
1997-09-01 00:00:00
120.8900
1.540
5.09
93
1997-10-01 00:00:00
121.0605
1.350
5.09
94
1997-11-01 00:00:00
125.3817
1.430
5.17
95
1997-12-01 00:00:00
129.7341
1.530
5.24
96
1998-01-01 00:00:00
129.5475
1.420
5.03
97
1998-02-01 00:00:00
125.8516
1.400
5.07
98
1998-03-01 00:00:00
129.0823
1.280
5.04
99
1998-04-01 00:00:00
131.7536
1.080
5.06
100
1998-05-01 00:00:00
134.8960
1.080
5.14
101
1998-06-01 00:00:00
140.3305
1.210
5.12
102
1998-07-01 00:00:00
140.7874
1.150
5.03
103
1998-08-01 00:00:00
144.6800
0.980
4.95
104
1998-09-01 00:00:00
134.4805
0.660
4.63
105
1998-10-01 00:00:00
121.0486
0.690
4.05
106
1998-11-01 00:00:00
120.2895
0.850
4.42
107
1998-12-01 00:00:00
117.0709
1.360
4.40
108
1999-01-01 00:00:00
113.2900
2.100
4.33
109
1999-02-01 00:00:00
116.6684
1.995
4.44
110
1999-03-01 00:00:00
119.4730
1.745
4.47
111
1999-04-01 00:00:00
119.7723
1.405
4.37
112
1999-05-01 00:00:00
121.9995
1.480
4.56
113
1999-06-01 00:00:00
120.7245
1.835
4.82
114
1999-07-01 00:00:00
119.3305
1.785
4.58
115
1999-08-01 00:00:00
113.2268
1.910
4.87
116
1999-09-01 00:00:00
106.8752
1.710
4.88
117
1999-10-01 00:00:00
105.9650
1.810
4.98
118
1999-11-01 00:00:00
104.6485
1.830
5.20
119
1999-12-01 00:00:00
102.5843
1.645
5.44
120
2000-01-01 00:00:00
105.2960
1.710
5.50
121
2000-02-01 00:00:00
109.3885
1.835
5.72
122
2000-03-01 00:00:00
106.3074
1.770
5.85
123
2000-04-01 00:00:00
105.6270
1.760
5.81
124
2000-05-01 00:00:00
108.3205
1.655
6.10
125
2000-06-01 00:00:00
106.1255
1.760
5.97
126
2000-07-01 00:00:00
108.2115
1.675
6.00
127
2000-08-01 00:00:00
108.0804
1.895
6.07
128
2000-09-01 00:00:00
106.8375
1.840
5.98
129
2000-10-01 00:00:00
108.4429
1.815
6.04
130
2000-11-01 00:00:00
109.0095
1.615
6.06
131
2000-12-01 00:00:00
112.2090
1.640
5.68
132
2001-01-01 00:00:00
116.6719
1.500
4.95
133
2001-02-01 00:00:00
116.2337
1.300
4.71
134
2001-03-01 00:00:00
121.5050
1.270
4.28
135
2001-04-01 00:00:00
123.7710
1.290
3.85
136
2001-05-01 00:00:00
121.7682
1.240
3.62
137
2001-06-01 00:00:00
122.3510
1.210
3.45
138
2001-07-01 00:00:00
124.4981
1.330
3.45
139
2001-08-01 00:00:00
121.3670
1.375
3.29
140
2001-09-01 00:00:00
118.6117
1.420
2.63
141
2001-10-01 00:00:00
121.4536
1.300
2.12
142
2001-11-01 00:00:00
122.4055
1.355
1.88
143
2001-12-01 00:00:00
127.5945
1.365
1.78
144
2002-01-01 00:00:00
132.6833
1.480
1.73
145
2002-02-01 00:00:00
133.6426
1.525
1.82
146
2002-03-01 00:00:00
131.0610
1.390
2.01
147
2002-04-01 00:00:00
130.7718
1.370
1.93
148
2002-05-01 00:00:00
126.3750
1.385
1.86
149
2002-06-01 00:00:00
123.2905
1.310
1.79
150
2002-07-01 00:00:00
117.8991
1.310
1.70
151
2002-08-01 00:00:00
118.9927
1.175
1.60
152
2002-09-01 00:00:00
121.0780
1.175
1.60
153
2002-10-01 00:00:00
123.9077
0.985
1.56
154
2002-11-01 00:00:00
121.6079
0.995
1.27
155
2002-12-01 00:00:00
121.8929
0.900
1.24
156
2003-01-01 00:00:00
118.8133
0.800
1.20
157
2003-02-01 00:00:00
119.3379
0.780
1.18
158
2003-03-01 00:00:00
118.6871
0.700
1.13
159
2003-04-01 00:00:00
119.8950
0.605
1.14
160
2003-05-01 00:00:00
117.3681
0.530
1.08
161
2003-06-01 00:00:00
118.3290
0.835
0.92
162
2003-07-01 00:00:00
118.6959
0.935
0.95
163
2003-08-01 00:00:00
118.6624
1.455
1.03
164
2003-09-01 00:00:00
114.8000
1.385
1.01
165
2003-10-01 00:00:00
109.4955
1.460
1.00
166
2003-11-01 00:00:00
109.1778
1.295
1.02
167
2003-12-01 00:00:00
107.7377
1.360
0.99
168
2004-01-01 00:00:00
106.2685
1.310
0.97
169
2004-02-01 00:00:00
106.7079
1.225
0.99
170
2004-03-01 00:00:00
108.5157
1.440
0.99
171
2004-04-01 00:00:00
107.6564
1.530
1.09
172
2004-05-01 00:00:00
112.1960
1.520
1.31
173
2004-06-01 00:00:00
109.4336
1.775
1.60
174
2004-07-01 00:00:00
109.4871
1.850
1.66
175
2004-08-01 00:00:00
110.2336
1.535
1.72
176
2004-09-01 00:00:00
110.0914
1.440
1.87
177
2004-10-01 00:00:00
108.7835
1.490
2.00
178
2004-11-01 00:00:00
104.6990
1.445
2.27
179
2004-12-01 00:00:00
103.8104
1.430
2.43
180
2005-01-01 00:00:00
103.3410
1.320
2.61
181
2005-02-01 00:00:00
104.9442
1.465
2.77
182
2005-03-01 00:00:00
105.2543
1.320
3.00
183
2005-04-01 00:00:00
107.1938
1.235
3.05
184
2005-05-01 00:00:00
106.5952
1.240
3.08
185
2005-06-01 00:00:00
108.7473
1.165
3.13
186
2005-07-01 00:00:00
111.9535
1.305
3.42
187
2005-08-01 00:00:00
110.6065
1.345
3.66
188
2005-09-01 00:00:00
111.2390
1.475
3.67
189
2005-10-01 00:00:00
114.8695
1.545
3.99
190
2005-11-01 00:00:00
118.4540
1.430
4.15
191
2005-12-01 00:00:00
118.4624
1.470
4.18
192
2006-01-01 00:00:00
115.4765
1.560
4.31
193
2006-02-01 00:00:00
117.8605
1.580
4.52
194
2006-03-01 00:00:00
117.2778
1.765
4.62
195
2006-04-01 00:00:00
117.0695
1.920
4.72
196
2006-05-01 00:00:00
111.7305
1.810
4.82
197
2006-06-01 00:00:00
114.6250
1.915
4.97
198
2006-07-01 00:00:00
115.7670
1.920
5.06
199
2006-08-01 00:00:00
115.9243
1.610
4.97
200
2006-09-01 00:00:00
117.2145
1.665
4.89
201
2006-10-01 00:00:00
118.6090
1.710
4.92
202
2006-11-01 00:00:00
117.3205
1.645
4.95
203
2006-12-01 00:00:00
117.3220
1.675
4.88
204
2007-01-01 00:00:00
120.4471
1.695
4.95
205
2007-02-01 00:00:00
120.5047
1.625
4.96
206
2007-03-01 00:00:00
117.2600
1.650
4.89
207
2007-04-01 00:00:00
118.9324
1.620
4.86
208
2007-05-01 00:00:00
120.7732
1.745
4.78
209
2007-06-01 00:00:00
122.6886
1.870
4.76
210
2007-07-01 00:00:00
121.4148
1.790
4.83
211
2007-08-01 00:00:00
116.7335
1.595
4.38
212
2007-09-01 00:00:00
115.0435
1.675
4.05
213
2007-10-01 00:00:00
115.8661
1.600
4.01
214
2007-11-01 00:00:00
111.0729
1.470
3.46
215
2007-12-01 00:00:00
112.4490
1.500
3.23
216
2008-01-01 00:00:00
107.8181
1.430
2.75
217
2008-02-01 00:00:00
107.0300
1.355
2.04
218
2008-03-01 00:00:00
100.7562
1.275
1.48
219
2008-04-01 00:00:00
102.6777
1.575
1.55
220
2008-05-01 00:00:00
104.3595
1.750
1.82
221
2008-06-01 00:00:00
106.9152
1.590
2.13
222
2008-07-01 00:00:00
106.8518
1.530
1.93
223
2008-08-01 00:00:00
109.3624
1.405
1.92
224
2008-09-01 00:00:00
106.5748
1.460
1.61
225
2008-10-01 00:00:00
99.9659
1.465
1.20
226
2008-11-01 00:00:00
96.9656
1.390
0.73
227
2008-12-01 00:00:00
91.2750
1.165
0.26
228
2009-01-01 00:00:00
90.1205
1.290
0.30
229
2009-02-01 00:00:00
92.9158
1.270
0.45
230
2009-03-01 00:00:00
97.8550
1.345
0.42
231
2009-04-01 00:00:00
98.9200
1.420
0.35
232
2009-05-01 00:00:00
96.6445
1.480
0.30
233
2009-06-01 00:00:00
96.6145
1.350
0.31
234
2009-07-01 00:00:00
94.3670
1.410
0.27
235
2009-08-01 00:00:00
94.8971
1.305
0.26
236
2009-09-01 00:00:00
91.2748
1.290
0.21
237
2009-10-01 00:00:00
90.3671
1.405
0.16
238
2009-11-01 00:00:00
89.2674
1.260
0.15
239
2009-12-01 00:00:00
89.9509
1.285
0.17
240
2010-01-01 00:00:00
91.1011
1.315
0.15
241
2010-02-01 00:00:00
90.1395
1.295
0.18
242
2010-03-01 00:00:00
90.7161
1.390
0.22
243
2010-04-01 00:00:00
93.4527
1.280
0.24
244
2010-05-01 00:00:00
91.9730
1.255
0.22
245
2010-06-01 00:00:00
90.8059
1.080
0.19
246
2010-07-01 00:00:00
87.5005
1.065
0.20
247
2010-08-01 00:00:00
85.3727
0.960
0.19
248
2010-09-01 00:00:00
84.3571
0.930
0.19
249
2010-10-01 00:00:00
81.7285
0.930
0.18
250
2010-11-01 00:00:00
82.5180
1.190
0.18
251
2010-12-01 00:00:00
83.3376
1.120
0.19
252
2011-01-01 00:00:00
82.6250
1.210
0.18
253
2011-02-01 00:00:00
82.5368
1.255
0.17
254
2011-03-01 00:00:00
81.6470
1.250
0.16
255
2011-04-01 00:00:00
83.1771
1.200
0.12
256
2011-05-01 00:00:00
81.1257
1.150
0.09
257
2011-06-01 00:00:00
80.4259
1.130
0.10
258
2011-07-01 00:00:00
79.2425
1.075
0.08
259
2011-08-01 00:00:00
76.9657
1.030
0.06
260
2011-09-01 00:00:00
76.7957
1.025
0.04
261
2011-10-01 00:00:00
76.6430
1.045
0.05
262
2011-11-01 00:00:00
77.5595
1.070
0.05
263
2011-12-01 00:00:00
77.7967
0.980
0.05
264
2012-01-01 00:00:00
76.9640
0.960
0.07
265
2012-02-01 00:00:00
78.4700
0.955
0.12
266
2012-03-01 00:00:00
82.4659
0.985
0.14
267
2012-04-01 00:00:00
81.2524
0.885
0.14
268
2012-05-01 00:00:00
79.6668
0.825
0.15
269
2012-06-01 00:00:00
79.3152
NaN
0.15
In [95]:
# let's try this again
fred = pd.read_csv('fred_fx.csv')
fred.dtypes
Out[95]:
DATE object
EXJPUS float64
INTGSBJPM193N float64
TB6MS float64
dtype: object
In [96]:
fred['DATE'] = pd.to_datetime(fred['DATE'])
fred.dtypes
Out[96]:
DATE datetime64[ns]
EXJPUS float64
INTGSBJPM193N float64
TB6MS float64
dtype: object
In [98]:
fred.index[:50]
Out[98]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], dtype=int64)
In [100]:
# Returns new object without inplace=True!
fred.set_index('DATE', inplace=True)
In [101]:
fred
Out[101]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3)
In [102]:
fred.head()
Out[102]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
144.9819
6.64
7.55
1990-02-01
145.6932
6.92
7.70
1990-03-01
153.3082
7.36
7.85
1990-04-01
158.4586
7.24
7.84
1990-05-01
154.0441
6.70
7.76
In [104]:
fred.EXJPUS
Out[104]:
DATE
1990-01-01 144.9819
1990-02-01 145.6932
1990-03-01 153.3082
1990-04-01 158.4586
1990-05-01 154.0441
1990-06-01 153.6957
1990-07-01 149.0395
1990-08-01 147.4609
1990-09-01 138.4405
1990-10-01 129.5909
1990-11-01 129.2155
1990-12-01 133.8890
1991-01-01 133.6986
1991-02-01 130.5358
1991-03-01 137.3867
...
2011-04-01 83.1771
2011-05-01 81.1257
2011-06-01 80.4259
2011-07-01 79.2425
2011-08-01 76.9657
2011-09-01 76.7957
2011-10-01 76.6430
2011-11-01 77.5595
2011-12-01 77.7967
2012-01-01 76.9640
2012-02-01 78.4700
2012-03-01 82.4659
2012-04-01 81.2524
2012-05-01 79.6668
2012-06-01 79.3152
Name: EXJPUS, Length: 270, dtype: float64
In [106]:
type(fred.EXJPUS)
Out[106]:
[pandas.core.series.TimeSeries,
pandas.core.series.Series,
pandas.core.generic.PandasContainer,
pandas.core.base.PandasObject,
pandas.core.base.StringMixin,
numpy.ndarray,
object]
In [108]:
fred.EXJPUS
Out[108]:
DATE
1990-01-01 144.9819
1990-02-01 145.6932
1990-03-01 153.3082
1990-04-01 158.4586
1990-05-01 154.0441
1990-06-01 153.6957
1990-07-01 149.0395
1990-08-01 147.4609
1990-09-01 138.4405
1990-10-01 129.5909
1990-11-01 129.2155
1990-12-01 133.8890
1991-01-01 133.6986
1991-02-01 130.5358
1991-03-01 137.3867
...
2011-04-01 83.1771
2011-05-01 81.1257
2011-06-01 80.4259
2011-07-01 79.2425
2011-08-01 76.9657
2011-09-01 76.7957
2011-10-01 76.6430
2011-11-01 77.5595
2011-12-01 77.7967
2012-01-01 76.9640
2012-02-01 78.4700
2012-03-01 82.4659
2012-04-01 81.2524
2012-05-01 79.6668
2012-06-01 79.3152
Name: EXJPUS, Length: 270, dtype: float64
In [109]:
fred.index
Out[109]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None
In [110]:
fred.index[5]
Out[110]:
Timestamp('1990-06-01 00:00:00', tz=None)
In [113]:
stamp = fred.index[5]
stamp
Out[113]:
Timestamp('1990-06-01 00:00:00', tz=None)
In [115]:
stamp.year, stamp.month, stamp.day
Out[115]:
(1990, 6, 1)
In [116]:
stamp.weekday()
Out[116]:
4
In [117]:
fred.index.year
Out[117]:
array([1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,
1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991,
1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992,
1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993,
1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994, 1994,
1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995, 1995, 1995,
1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996, 1996, 1996, 1996,
1996, 1996, 1996, 1996, 1996, 1996, 1996, 1997, 1997, 1997, 1997,
1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1999, 1999,
1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 2000,
2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001,
2001, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002,
2002, 2002, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,
2003, 2003, 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004,
2004, 2004, 2004, 2004, 2005, 2005, 2005, 2005, 2005, 2005, 2005,
2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2006,
2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007, 2007, 2007, 2007,
2007, 2007, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2009, 2009, 2009,
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2010, 2010,
2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2011,
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,
2012, 2012, 2012, 2012, 2012, 2012], dtype=int32)
In [118]:
fred.index.weekday
Out[118]:
array([0, 3, 3, 6, 1, 4, 6, 2, 5, 0, 3, 5, 1, 4, 4, 0, 2, 5, 0, 3, 6, 1, 4,
6, 2, 5, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3, 6, 2, 4,
0, 2, 5, 1, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 2, 5, 0, 3, 5, 1, 4,
6, 2, 4, 0, 3, 4, 0, 2, 5, 0, 3, 6, 1, 4, 6, 2, 5, 5, 1, 3, 6, 1, 4,
0, 2, 5, 0, 3, 6, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3,
6, 2, 4, 0, 2, 5, 1, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4, 0, 3, 3, 6, 1, 4,
6, 2, 5, 0, 3, 5, 1, 4, 4, 0, 2, 5, 0, 3, 6, 1, 4, 6, 2, 5, 5, 1, 3,
6, 1, 4, 0, 2, 5, 0, 3, 6, 0, 3, 5, 1, 3, 6, 2, 4, 0, 2, 5, 1, 1, 4,
6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4, 0, 3, 3,
6, 1, 4, 6, 2, 5, 0, 3, 5, 1, 4, 5, 1, 3, 6, 1, 4, 0, 2, 5, 0, 3, 6,
6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3, 6, 2, 4, 0, 2, 5,
1, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 3, 6, 1, 4], dtype=int32)
In [119]:
fred
Out[119]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3)
In [120]:
fred.EXJPUS
Out[120]:
DATE
1990-01-01 144.9819
1990-02-01 145.6932
1990-03-01 153.3082
1990-04-01 158.4586
1990-05-01 154.0441
1990-06-01 153.6957
1990-07-01 149.0395
1990-08-01 147.4609
1990-09-01 138.4405
1990-10-01 129.5909
1990-11-01 129.2155
1990-12-01 133.8890
1991-01-01 133.6986
1991-02-01 130.5358
1991-03-01 137.3867
...
2011-04-01 83.1771
2011-05-01 81.1257
2011-06-01 80.4259
2011-07-01 79.2425
2011-08-01 76.9657
2011-09-01 76.7957
2011-10-01 76.6430
2011-11-01 77.5595
2011-12-01 77.7967
2012-01-01 76.9640
2012-02-01 78.4700
2012-03-01 82.4659
2012-04-01 81.2524
2012-05-01 79.6668
2012-06-01 79.3152
Name: EXJPUS, Length: 270, dtype: float64
In [121]:
fred.EXJPUS.plot()
Out[121]:
<matplotlib.axes.AxesSubplot at 0x106af4890>
In [122]:
fred.EXJPUS['1999':'2007']
Out[122]:
DATE
1999-01-01 113.2900
1999-02-01 116.6684
1999-03-01 119.4730
1999-04-01 119.7723
1999-05-01 121.9995
1999-06-01 120.7245
1999-07-01 119.3305
1999-08-01 113.2268
1999-09-01 106.8752
1999-10-01 105.9650
1999-11-01 104.6485
1999-12-01 102.5843
2000-01-01 105.2960
2000-02-01 109.3885
2000-03-01 106.3074
...
2006-10-01 118.6090
2006-11-01 117.3205
2006-12-01 117.3220
2007-01-01 120.4471
2007-02-01 120.5047
2007-03-01 117.2600
2007-04-01 118.9324
2007-05-01 120.7732
2007-06-01 122.6886
2007-07-01 121.4148
2007-08-01 116.7335
2007-09-01 115.0435
2007-10-01 115.8661
2007-11-01 111.0729
2007-12-01 112.4490
Name: EXJPUS, Length: 108, dtype: float64
In [123]:
fred.EXJPUS['1999-05':'2001-09']
Out[123]:
DATE
1999-05-01 121.9995
1999-06-01 120.7245
1999-07-01 119.3305
1999-08-01 113.2268
1999-09-01 106.8752
1999-10-01 105.9650
1999-11-01 104.6485
1999-12-01 102.5843
2000-01-01 105.2960
2000-02-01 109.3885
2000-03-01 106.3074
2000-04-01 105.6270
2000-05-01 108.3205
2000-06-01 106.1255
2000-07-01 108.2115
2000-08-01 108.0804
2000-09-01 106.8375
2000-10-01 108.4429
2000-11-01 109.0095
2000-12-01 112.2090
2001-01-01 116.6719
2001-02-01 116.2337
2001-03-01 121.5050
2001-04-01 123.7710
2001-05-01 121.7682
2001-06-01 122.3510
2001-07-01 124.4981
2001-08-01 121.3670
2001-09-01 118.6117
Name: EXJPUS, dtype: float64
In [124]:
stamp
Out[124]:
Timestamp('1990-06-01 00:00:00', tz=None)
In [126]:
fred.EXJPUS[stamp]
Out[126]:
153.69569999999999
In [127]:
fred.ix[stamp]
Out[127]:
EXJPUS 153.6957
INTGSBJPM193N 7.0600
TB6MS 7.6300
Name: 1990-06-01 00:00:00, dtype: float64
In [128]:
fred
Out[128]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3)
In [131]:
fred.head(20)
Out[131]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
144.9819
6.64
7.55
1990-02-01
145.6932
6.92
7.70
1990-03-01
153.3082
7.36
7.85
1990-04-01
158.4586
7.24
7.84
1990-05-01
154.0441
6.70
7.76
1990-06-01
153.6957
7.06
7.63
1990-07-01
149.0395
7.48
7.52
1990-08-01
147.4609
8.19
7.38
1990-09-01
138.4405
8.62
7.32
1990-10-01
129.5909
7.68
7.16
1990-11-01
129.2155
7.40
7.03
1990-12-01
133.8890
7.06
6.70
1991-01-01
133.6986
6.83
6.28
1991-02-01
130.5358
6.66
5.93
1991-03-01
137.3867
6.89
5.92
1991-04-01
137.1127
7.00
5.71
1991-05-01
138.2218
6.87
5.61
1991-06-01
139.7475
7.12
5.75
1991-07-01
137.8300
6.80
5.70
1991-08-01
136.8164
6.55
5.39
In [133]:
years = fred.index.year
years
Out[133]:
array([1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,
1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991,
1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992,
1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993,
1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994, 1994,
1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995, 1995, 1995,
1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996, 1996, 1996, 1996,
1996, 1996, 1996, 1996, 1996, 1996, 1996, 1997, 1997, 1997, 1997,
1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1998, 1998, 1998,
1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1999, 1999,
1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 2000,
2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001,
2001, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002,
2002, 2002, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,
2003, 2003, 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004,
2004, 2004, 2004, 2004, 2005, 2005, 2005, 2005, 2005, 2005, 2005,
2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2006,
2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007, 2007, 2007, 2007,
2007, 2007, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008,
2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2009, 2009, 2009,
2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2010, 2010,
2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2011,
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,
2012, 2012, 2012, 2012, 2012, 2012], dtype=int32)
In [136]:
annual_min = fred.groupby(years).min()
annual_max = fred.groupby(years).max()
In [139]:
# Chapter 7
annual_min_and_max = pd.concat([annual_min, annual_max],
axis=1, keys=['min', 'max'])
annual_min_and_max
Out[139]:
min
max
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
INTGSBJPM193N
TB6MS
1990
129.2155
6.640
6.70
158.4586
8.620
7.85
1991
128.0395
5.530
4.10
139.7475
7.120
6.28
1992
121.1652
4.370
2.96
133.5395
5.530
4.18
1993
103.7650
2.580
2.97
124.9932
4.350
3.26
1994
98.0440
3.290
3.15
111.4415
4.140
6.21
1995
83.6895
1.880
5.13
101.9400
3.950
6.21
1996
105.7514
1.750
4.77
113.9810
2.580
5.30
1997
114.2857
1.350
5.06
129.7341
2.030
5.37
1998
117.0709
0.660
4.05
144.6800
1.420
5.14
1999
102.5843
1.405
4.33
121.9995
2.100
5.44
2000
105.2960
1.615
5.50
112.2090
1.895
6.10
2001
116.2337
1.210
1.78
127.5945
1.500
4.95
2002
117.8991
0.900
1.24
133.6426
1.525
2.01
2003
107.7377
0.530
0.92
119.8950
1.460
1.20
2004
103.8104
1.225
0.97
112.1960
1.850
2.43
2005
103.3410
1.165
2.61
118.4624
1.545
4.18
2006
111.7305
1.560
4.31
118.6090
1.920
5.06
2007
111.0729
1.470
3.23
122.6886
1.870
4.96
2008
91.2750
1.165
0.26
109.3624
1.750
2.75
2009
89.2674
1.260
0.15
98.9200
1.480
0.45
2010
81.7285
0.930
0.15
93.4527
1.390
0.24
2011
76.6430
0.980
0.04
83.1771
1.255
0.18
2012
76.9640
0.825
0.07
82.4659
0.985
0.15
In [141]:
fred.EXJPUS.resample('A-DEC', how='min')
Out[141]:
DATE
1990-12-31 129.2155
1991-12-31 128.0395
1992-12-31 121.1652
1993-12-31 103.7650
1994-12-31 98.0440
1995-12-31 83.6895
1996-12-31 105.7514
1997-12-31 114.2857
1998-12-31 117.0709
1999-12-31 102.5843
2000-12-31 105.2960
2001-12-31 116.2337
2002-12-31 117.8991
2003-12-31 107.7377
2004-12-31 103.8104
2005-12-31 103.3410
2006-12-31 111.7305
2007-12-31 111.0729
2008-12-31 91.2750
2009-12-31 89.2674
2010-12-31 81.7285
2011-12-31 76.6430
2012-12-31 76.9640
Freq: A-DEC, dtype: float64
In [142]:
fred.EXJPUS.resample('A-DEC', how=['min', 'max'])
Out[142]:
min
max
DATE
1990-12-31
129.2155
158.4586
1991-12-31
128.0395
139.7475
1992-12-31
121.1652
133.5395
1993-12-31
103.7650
124.9932
1994-12-31
98.0440
111.4415
1995-12-31
83.6895
101.9400
1996-12-31
105.7514
113.9810
1997-12-31
114.2857
129.7341
1998-12-31
117.0709
144.6800
1999-12-31
102.5843
121.9995
2000-12-31
105.2960
112.2090
2001-12-31
116.2337
127.5945
2002-12-31
117.8991
133.6426
2003-12-31
107.7377
119.8950
2004-12-31
103.8104
112.1960
2005-12-31
103.3410
118.4624
2006-12-31
111.7305
118.6090
2007-12-31
111.0729
122.6886
2008-12-31
91.2750
109.3624
2009-12-31
89.2674
98.9200
2010-12-31
81.7285
93.4527
2011-12-31
76.6430
83.1771
2012-12-31
76.9640
82.4659
In [144]:
annual_minmax = fred.resample('A-DEC', how=['min', 'max'])
annual_minmax
Out[144]:
EXJPUS
INTGSBJPM193N
TB6MS
min
max
min
max
min
max
DATE
1990-12-31
129.2155
158.4586
6.640
8.620
6.70
7.85
1991-12-31
128.0395
139.7475
5.530
7.120
4.10
6.28
1992-12-31
121.1652
133.5395
4.370
5.530
2.96
4.18
1993-12-31
103.7650
124.9932
2.580
4.350
2.97
3.26
1994-12-31
98.0440
111.4415
3.290
4.140
3.15
6.21
1995-12-31
83.6895
101.9400
1.880
3.950
5.13
6.21
1996-12-31
105.7514
113.9810
1.750
2.580
4.77
5.30
1997-12-31
114.2857
129.7341
1.350
2.030
5.06
5.37
1998-12-31
117.0709
144.6800
0.660
1.420
4.05
5.14
1999-12-31
102.5843
121.9995
1.405
2.100
4.33
5.44
2000-12-31
105.2960
112.2090
1.615
1.895
5.50
6.10
2001-12-31
116.2337
127.5945
1.210
1.500
1.78
4.95
2002-12-31
117.8991
133.6426
0.900
1.525
1.24
2.01
2003-12-31
107.7377
119.8950
0.530
1.460
0.92
1.20
2004-12-31
103.8104
112.1960
1.225
1.850
0.97
2.43
2005-12-31
103.3410
118.4624
1.165
1.545
2.61
4.18
2006-12-31
111.7305
118.6090
1.560
1.920
4.31
5.06
2007-12-31
111.0729
122.6886
1.470
1.870
3.23
4.96
2008-12-31
91.2750
109.3624
1.165
1.750
0.26
2.75
2009-12-31
89.2674
98.9200
1.260
1.480
0.15
0.45
2010-12-31
81.7285
93.4527
0.930
1.390
0.15
0.24
2011-12-31
76.6430
83.1771
0.980
1.255
0.04
0.18
2012-12-31
76.9640
82.4659
0.825
0.985
0.07
0.15
In [145]:
annual_minmax.columns
Out[145]:
MultiIndex
[(u'EXJPUS', u'min'), (u'EXJPUS', u'max'), (u'INTGSBJPM193N', u'min'), (u'INTGSBJPM193N', u'max'), (u'TB6MS', u'min'), (u'TB6MS', u'max')]
In [146]:
annual_minmax['EXJPUS', 'max']
Out[146]:
DATE
1990-12-31 158.4586
1991-12-31 139.7475
1992-12-31 133.5395
1993-12-31 124.9932
1994-12-31 111.4415
1995-12-31 101.9400
1996-12-31 113.9810
1997-12-31 129.7341
1998-12-31 144.6800
1999-12-31 121.9995
2000-12-31 112.2090
2001-12-31 127.5945
2002-12-31 133.6426
2003-12-31 119.8950
2004-12-31 112.1960
2005-12-31 118.4624
2006-12-31 118.6090
2007-12-31 122.6886
2008-12-31 109.3624
2009-12-31 98.9200
2010-12-31 93.4527
2011-12-31 83.1771
2012-12-31 82.4659
Freq: A-DEC, Name: (EXJPUS, max), dtype: float64
In [148]:
annual_minmax[('EXJPUS', 'max')].index
Out[148]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-12-31 00:00:00, ..., 2012-12-31 00:00:00]
Length: 23, Freq: A-DEC, Timezone: None
In [150]:
# Your own aggregation function
def mad(x):
return np.abs(x - x.mean()).mean()
fred.EXJPUS.resample('A-DEC', how=mad)
Out[150]:
DATE
1990-12-31 8.022794
1991-12-31 3.344775
1992-12-31 2.995399
1993-12-31 5.042639
1994-12-31 3.238742
1995-12-31 6.529103
1996-12-31 2.323658
1997-12-31 3.502692
1998-12-31 7.003133
1999-12-31 5.948200
2000-12-31 1.485524
2001-12-31 2.212117
2002-12-31 4.699887
2003-12-31 3.742594
2004-12-31 1.940404
2005-12-31 4.125758
2006-12-31 1.370915
2007-12-31 3.027983
2008-12-31 4.209392
2009-12-31 2.950133
2010-12-31 3.614492
2011-12-31 2.211200
2012-12-31 1.446733
Freq: A-DEC, dtype: float64
In [154]:
# Works with non-numeric data
import pandas.util.testing as tm
tm.rands(10)
Out[154]:
'ujx32j8s6C'
In [158]:
choices = np.array([rands(10) for i in xrange(50)])
draws = np.random.randint(0, 50, size=1000)
timestamps = pd.date_range('2013-10-17 13:10', periods=1000, freq='S')
ts = Series(choices.take(draws), index=timestamps)
ts[:5]
Out[158]:
2013-10-17 13:10:00 LiockWHKcs
2013-10-17 13:10:01 JCVdXVljNc
2013-10-17 13:10:02 qHT39t1dbZ
2013-10-17 13:10:03 dIzYNzp5x4
2013-10-17 13:10:04 vTX5A53FQq
Freq: S, dtype: object
In [ ]:
# Hint!
ts.unique()
'M' : Monthly how=['mean', 'count']
In [ ]:
# If you're having trouble displaying DataFrames in the console
pd.options.display.line_width = 1000
In [159]:
!head -n 10 SFHousing.csv
In [162]:
house = pd.read_csv('SFHousing.csv')
Out[162]:
0 $425,500
1 $1,160,000
2 $774,025
3 $660,000
4 $450,000
5 $680,000
6 $820,000
7 $227,000
8 $476,000
9 $640,000
10 $1,365,000
11 $470,000
12 $945,000
13 $475,000
14 $850,000
...
1152 $1,105,000
1153 $920,000
1154 $650,000
1155 $1,530,000
1156 $1,810,000
1157 $1,850,000
1158 $1,140,000
1159 $879,000
1160 $1,085,000
1161 $430,000
1162 $842,930
1163 $901,000
1164 $872,000
1165 $1,800,000
1166 $1,935,000
Name: Sale_Amount, Length: 1167, dtype: object
In [165]:
def parse_dollars(x):
return float(x.replace('$', '').replace(',', ''))
house.Sale_Amount.map(parse_dollars)
Out[165]:
0 425500
1 1160000
2 774025
3 660000
4 450000
5 680000
6 820000
7 227000
8 476000
9 640000
10 1365000
11 470000
12 945000
13 475000
14 850000
...
1152 1105000
1153 920000
1154 650000
1155 1530000
1156 1810000
1157 1850000
1158 1140000
1159 879000
1160 1085000
1161 430000
1162 842930
1163 901000
1164 872000
1165 1800000
1166 1935000
Name: Sale_Amount, Length: 1167, dtype: float64
In [164]:
house.Sale_Amount.str.replace('[$,]', '').map(float)
Out[164]:
0 425500
1 1160000
2 774025
3 660000
4 450000
5 680000
6 820000
7 227000
8 476000
9 640000
10 1365000
11 470000
12 945000
13 475000
14 850000
...
1152 1105000
1153 920000
1154 650000
1155 1530000
1156 1810000
1157 1850000
1158 1140000
1159 879000
1160 1085000
1161 430000
1162 842930
1163 901000
1164 872000
1165 1800000
1166 1935000
Name: Sale_Amount, Length: 1167, dtype: float64
In [167]:
house
Out[167]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1167 entries, 0 to 1166
Data columns (total 6 columns):
APN 1167 non-null values
ADDRESS 1167 non-null values
Date_Sale 1167 non-null values
Sale_Amount 1167 non-null values
Num_Bedrooms 1167 non-null values
BLKLOT 1167 non-null values
dtypes: float64(1), int64(1), object(4)
In [168]:
house['Sale_Amount'] = house.Sale_Amount.str.replace('[$,]', '').map(float)
house['Date_Sale'] = pd.to_datetime(house['Date_Sale'])
In [171]:
pd.read_csv?
In [172]:
house = pd.read_csv('SFHousing.csv', parse_dates='Date_Sale',
converters={'Sale_Amount': parse_dollars},
index_col='Date_Sale')
house
Out[172]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1167 entries, 2012-01-03 00:00:00 to 2012-12-11 00:00:00
Data columns (total 5 columns):
APN 1167 non-null values
ADDRESS 1167 non-null values
Sale_Amount 1167 non-null values
Num_Bedrooms 1167 non-null values
BLKLOT 1167 non-null values
dtypes: float64(1), int64(1), object(3)
In [179]:
results = house['Sale_Amount'].resample('M', how=['mean', 'count',
'min', 'max'])
results
Out[179]:
mean
count
min
max
Date_Sale
2012-01-31
786415.250000
68
1500
2004000
2012-02-29
743212.036585
82
92500
2750000
2012-03-31
852983.142857
105
10000
2515000
2012-04-30
873141.256000
125
29000
3800000
2012-05-31
1041000.985401
137
245000
4750000
2012-06-30
850681.054795
146
208000
1998000
2012-07-31
1073750.650000
100
208000
6000000
2012-08-31
2013070.721591
176
208000
16600000
2012-09-30
1084000.875000
104
119000
5100000
2012-10-31
963303.260274
73
208000
3500000
2012-11-30
982842.959184
49
430000
2200000
2012-12-31
1867500.000000
2
1800000
1935000
In [180]:
results['mean'].plot()
Out[180]:
<matplotlib.axes.AxesSubplot at 0x107588610>
In [181]:
mask = house['Sale_Amount'] > 100000
results2 = house[mask]['Sale_Amount'].resample('M', how=['mean', 'count',
'min', 'max'])
results['mean'].plot()
results2['mean'].plot()
Out[181]:
<matplotlib.axes.AxesSubplot at 0x1077fb810>
In [182]:
results = house['Sale_Amount'].resample('M', how=['mean', 'std'])
results
Out[182]:
mean
std
Date_Sale
2012-01-31
786415.250000
444604.955801
2012-02-29
743212.036585
419249.847713
2012-03-31
852983.142857
467924.650333
2012-04-30
873141.256000
487755.836551
2012-05-31
1041000.985401
707571.144498
2012-06-30
850681.054795
391403.908165
2012-07-31
1073750.650000
892045.137817
2012-08-31
2013070.721591
3979886.034985
2012-09-30
1084000.875000
682614.097054
2012-10-31
963303.260274
476037.983246
2012-11-30
982842.959184
366576.180111
2012-12-31
1867500.000000
95459.415460
In [188]:
sigma = 0.5
lower = results['mean'] - sigma * results['std']
upper = results['mean'] + sigma * results['std']
results['mean'].plot(style='k-')
lower.plot(style='k--')
upper.plot(style='k--')
Out[188]:
<matplotlib.axes.AxesSubplot at 0x107f784d0>
In [189]:
fred
Out[189]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3)
In [191]:
fred.head(10)
Out[191]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
144.9819
6.64
7.55
1990-02-01
145.6932
6.92
7.70
1990-03-01
153.3082
7.36
7.85
1990-04-01
158.4586
7.24
7.84
1990-05-01
154.0441
6.70
7.76
1990-06-01
153.6957
7.06
7.63
1990-07-01
149.0395
7.48
7.52
1990-08-01
147.4609
8.19
7.38
1990-09-01
138.4405
8.62
7.32
1990-10-01
129.5909
7.68
7.16
In [192]:
fred.shift(3).head(10)
Out[192]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
NaN
NaN
NaN
1990-02-01
NaN
NaN
NaN
1990-03-01
NaN
NaN
NaN
1990-04-01
144.9819
6.64
7.55
1990-05-01
145.6932
6.92
7.70
1990-06-01
153.3082
7.36
7.85
1990-07-01
158.4586
7.24
7.84
1990-08-01
154.0441
6.70
7.76
1990-09-01
153.6957
7.06
7.63
1990-10-01
149.0395
7.48
7.52
In [194]:
fred_1diff = fred - fred.shift(1)
fred_1diff.head(10)
Out[194]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
NaN
NaN
NaN
1990-02-01
0.7113
0.28
0.15
1990-03-01
7.6150
0.44
0.15
1990-04-01
5.1504
-0.12
-0.01
1990-05-01
-4.4145
-0.54
-0.08
1990-06-01
-0.3484
0.36
-0.13
1990-07-01
-4.6562
0.42
-0.11
1990-08-01
-1.5786
0.71
-0.14
1990-09-01
-9.0204
0.43
-0.06
1990-10-01
-8.8496
-0.94
-0.16
In [195]:
fred_1diff.corr()
Out[195]:
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
1.000000
0.114982
0.238944
INTGSBJPM193N
0.114982
1.000000
0.178863
TB6MS
0.238944
0.178863
1.000000
In [196]:
(fred - fred.shift(6)).corr()
Out[196]:
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
1.000000
0.004250
0.091739
INTGSBJPM193N
0.004250
1.000000
0.326939
TB6MS
0.091739
0.326939
1.000000
In [199]:
s2
Out[199]:
one 1
two 2
three 3
four 4
five 5
dtype: int64
In [200]:
s3
Out[200]:
one 1
two 2
three 3
dtype: int64
In [201]:
s2 + s3
Out[201]:
five NaN
four NaN
one 2
three 6
two 4
dtype: float64
In [203]:
def lag_corr(table, periods):
# you may not care but...
return (table[periods:] - table.shift(periods)).corr()
def pctchg_corr(table, periods):
# you may not care but...
return (table[periods:] / table.shift(periods) - 1).corr()
all_lags = [lag_corr(fred, i) for i in xrange(1, 12)]
lags_onetable = pd.concat(all_lags, keys=range(1, 12))
lags_onetable
Out[203]:
EXJPUS
INTGSBJPM193N
TB6MS
1
EXJPUS
1.000000
0.114982
0.238944
INTGSBJPM193N
0.114982
1.000000
0.178863
TB6MS
0.238944
0.178863
1.000000
2
EXJPUS
1.000000
0.091502
0.229482
INTGSBJPM193N
0.091502
1.000000
0.250218
TB6MS
0.229482
0.250218
1.000000
3
EXJPUS
1.000000
0.045051
0.193890
INTGSBJPM193N
0.045051
1.000000
0.281884
TB6MS
0.193890
0.281884
1.000000
4
EXJPUS
1.000000
0.012804
0.162092
INTGSBJPM193N
0.012804
1.000000
0.303291
TB6MS
0.162092
0.303291
1.000000
5
EXJPUS
1.000000
0.005297
0.127369
INTGSBJPM193N
0.005297
1.000000
0.322181
TB6MS
0.127369
0.322181
1.000000
6
EXJPUS
1.000000
0.004250
0.091739
INTGSBJPM193N
0.004250
1.000000
0.326939
TB6MS
0.091739
0.326939
1.000000
7
EXJPUS
1.000000
-0.000657
0.063158
INTGSBJPM193N
-0.000657
1.000000
0.320909
TB6MS
0.063158
0.320909
1.000000
8
EXJPUS
1.000000
-0.007450
0.042993
INTGSBJPM193N
-0.007450
1.000000
0.320119
TB6MS
0.042993
0.320119
1.000000
9
EXJPUS
1.000000
-0.015944
0.032352
INTGSBJPM193N
-0.015944
1.000000
0.323683
TB6MS
0.032352
0.323683
1.000000
10
EXJPUS
1.000000
-0.016366
0.027600
INTGSBJPM193N
-0.016366
1.000000
0.327883
TB6MS
0.027600
0.327883
1.000000
11
EXJPUS
1.000000
-0.024651
0.026665
INTGSBJPM193N
-0.024651
1.000000
0.329450
TB6MS
0.026665
0.329450
1.000000
In [208]:
unstacked = lags_onetable.unstack(1)
unstacked
Out[208]:
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
INTGSBJPM193N
TB6MS
EXJPUS
INTGSBJPM193N
TB6MS
1
1
0.114982
0.238944
0.114982
1
0.178863
0.238944
0.178863
1
2
1
0.091502
0.229482
0.091502
1
0.250218
0.229482
0.250218
1
3
1
0.045051
0.193890
0.045051
1
0.281884
0.193890
0.281884
1
4
1
0.012804
0.162092
0.012804
1
0.303291
0.162092
0.303291
1
5
1
0.005297
0.127369
0.005297
1
0.322181
0.127369
0.322181
1
6
1
0.004250
0.091739
0.004250
1
0.326939
0.091739
0.326939
1
7
1
-0.000657
0.063158
-0.000657
1
0.320909
0.063158
0.320909
1
8
1
-0.007450
0.042993
-0.007450
1
0.320119
0.042993
0.320119
1
9
1
-0.015944
0.032352
-0.015944
1
0.323683
0.032352
0.323683
1
10
1
-0.016366
0.027600
-0.016366
1
0.327883
0.027600
0.327883
1
11
1
-0.024651
0.026665
-0.024651
1
0.329450
0.026665
0.329450
1
In [209]:
unstacked['TB6MS', 'EXJPUS'].plot(label='TB-JP')
unstacked['TB6MS', 'INTGSBJPM193N'].plot(label='TB-INT')
Out[209]:
<matplotlib.axes.AxesSubplot at 0x1073f8590>
In [210]:
fred_1diff
Out[210]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS 269 non-null values
INTGSBJPM193N 268 non-null values
TB6MS 269 non-null values
dtypes: float64(3)
In [213]:
fred_1diff.EXJPUS.corr(fred_1diff.EXJPUS.shift(1))
Out[213]:
0.28489041350111521
In [215]:
fred_1diff.shift(1).head()
Out[215]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
NaN
NaN
NaN
1990-02-01
NaN
NaN
NaN
1990-03-01
0.7113
0.28
0.15
1990-04-01
7.6150
0.44
0.15
1990-05-01
5.1504
-0.12
-0.01
In [216]:
fred_1diff.corrwith(fred_1diff.shift(1))
Out[216]:
EXJPUS 0.284890
INTGSBJPM193N 0.153984
TB6MS 0.511621
dtype: float64
In [218]:
pd.DataFrame({'Lag1': fred_1diff.corrwith(fred_1diff.shift(1)),
'Lag2': fred_1diff.corrwith(fred_1diff.shift(2))})
Out[218]:
Lag1
Lag2
EXJPUS
0.284890
0.058668
INTGSBJPM193N
0.153984
0.027434
TB6MS
0.511621
0.317941
In [221]:
lag_acorr_table = pd.DataFrame({'Lag%d' % i:
fred_1diff.corrwith(fred_1diff.shift(i))
for i in range(1, 7)})
lag_acorr_table
Out[221]:
Lag1
Lag2
Lag3
Lag4
Lag5
Lag6
EXJPUS
0.284890
0.058668
-0.004404
-0.114183
-0.239900
-0.210275
INTGSBJPM193N
0.153984
0.027434
-0.175531
-0.122494
0.024914
0.107726
TB6MS
0.511621
0.317941
0.295669
0.158632
0.146528
0.221403
In [222]:
lag_acorr_table.T
Out[222]:
EXJPUS
INTGSBJPM193N
TB6MS
Lag1
0.284890
0.153984
0.511621
Lag2
0.058668
0.027434
0.317941
Lag3
-0.004404
-0.175531
0.295669
Lag4
-0.114183
-0.122494
0.158632
Lag5
-0.239900
0.024914
0.146528
Lag6
-0.210275
0.107726
0.221403
In [224]:
fred.head(10)
Out[224]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
144.9819
6.64
7.55
1990-02-01
145.6932
6.92
7.70
1990-03-01
153.3082
7.36
7.85
1990-04-01
158.4586
7.24
7.84
1990-05-01
154.0441
6.70
7.76
1990-06-01
153.6957
7.06
7.63
1990-07-01
149.0395
7.48
7.52
1990-08-01
147.4609
8.19
7.38
1990-09-01
138.4405
8.62
7.32
1990-10-01
129.5909
7.68
7.16
In [228]:
fred.shift(10, freq='H').head(10)
Out[228]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01 10:00:00
144.9819
6.64
7.55
1990-02-01 10:00:00
145.6932
6.92
7.70
1990-03-01 10:00:00
153.3082
7.36
7.85
1990-04-01 10:00:00
158.4586
7.24
7.84
1990-05-01 10:00:00
154.0441
6.70
7.76
1990-06-01 10:00:00
153.6957
7.06
7.63
1990-07-01 10:00:00
149.0395
7.48
7.52
1990-08-01 10:00:00
147.4609
8.19
7.38
1990-09-01 10:00:00
138.4405
8.62
7.32
1990-10-01 10:00:00
129.5909
7.68
7.16
In [226]:
fred.shift(2, freq='M').head(10)
Out[226]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-02-28
144.9819
6.64
7.55
1990-03-31
145.6932
6.92
7.70
1990-04-30
153.3082
7.36
7.85
1990-05-31
158.4586
7.24
7.84
1990-06-30
154.0441
6.70
7.76
1990-07-31
153.6957
7.06
7.63
1990-08-31
149.0395
7.48
7.52
1990-09-30
147.4609
8.19
7.38
1990-10-31
138.4405
8.62
7.32
1990-11-30
129.5909
7.68
7.16
In [233]:
pfred = fred.to_period('M')
pfred.head(10).index
Out[233]:
<class 'pandas.tseries.period.PeriodIndex'>
freq: M
[1990-01, ..., 1990-10]
length: 10
In [239]:
pfred.index[0]
Out[239]:
Period('1990-01', 'M')
In [240]:
pfred.index[0].asfreq('S', 'end')
Out[240]:
Period('1990-01-31 23:59:59', 'S')
In [241]:
pfred.index[0].asfreq('S', 'start')
Out[241]:
Period('1990-01-01 00:00:00', 'S')
In [244]:
pfred.index[0].asfreq('H', 'end') - 5
Out[244]:
Period('1990-01-31 18:00', 'H')
In [248]:
# 7th business day
(pfred.index[0].asfreq('B', 'start') + 6).to_timestamp()
Out[248]:
Timestamp('1990-01-09 00:00:00', tz=None)
In [250]:
fred.head()
Out[250]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-01
144.9819
6.64
7.55
1990-02-01
145.6932
6.92
7.70
1990-03-01
153.3082
7.36
7.85
1990-04-01
158.4586
7.24
7.84
1990-05-01
154.0441
6.70
7.76
In [252]:
fred.shift(4, freq='D').head()
Out[252]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-05
144.9819
6.64
7.55
1990-02-05
145.6932
6.92
7.70
1990-03-05
153.3082
7.36
7.85
1990-04-05
158.4586
7.24
7.84
1990-05-05
154.0441
6.70
7.76
In [257]:
fred.shift(4, freq='D').resample('D').head(50)
Out[257]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-05
144.9819
6.64
7.55
1990-01-06
NaN
NaN
NaN
1990-01-07
NaN
NaN
NaN
1990-01-08
NaN
NaN
NaN
1990-01-09
NaN
NaN
NaN
1990-01-10
NaN
NaN
NaN
1990-01-11
NaN
NaN
NaN
1990-01-12
NaN
NaN
NaN
1990-01-13
NaN
NaN
NaN
1990-01-14
NaN
NaN
NaN
1990-01-15
NaN
NaN
NaN
1990-01-16
NaN
NaN
NaN
1990-01-17
NaN
NaN
NaN
1990-01-18
NaN
NaN
NaN
1990-01-19
NaN
NaN
NaN
1990-01-20
NaN
NaN
NaN
1990-01-21
NaN
NaN
NaN
1990-01-22
NaN
NaN
NaN
1990-01-23
NaN
NaN
NaN
1990-01-24
NaN
NaN
NaN
1990-01-25
NaN
NaN
NaN
1990-01-26
NaN
NaN
NaN
1990-01-27
NaN
NaN
NaN
1990-01-28
NaN
NaN
NaN
1990-01-29
NaN
NaN
NaN
1990-01-30
NaN
NaN
NaN
1990-01-31
NaN
NaN
NaN
1990-02-01
NaN
NaN
NaN
1990-02-02
NaN
NaN
NaN
1990-02-03
NaN
NaN
NaN
1990-02-04
NaN
NaN
NaN
1990-02-05
145.6932
6.92
7.70
1990-02-06
NaN
NaN
NaN
1990-02-07
NaN
NaN
NaN
1990-02-08
NaN
NaN
NaN
1990-02-09
NaN
NaN
NaN
1990-02-10
NaN
NaN
NaN
1990-02-11
NaN
NaN
NaN
1990-02-12
NaN
NaN
NaN
1990-02-13
NaN
NaN
NaN
1990-02-14
NaN
NaN
NaN
1990-02-15
NaN
NaN
NaN
1990-02-16
NaN
NaN
NaN
1990-02-17
NaN
NaN
NaN
1990-02-18
NaN
NaN
NaN
1990-02-19
NaN
NaN
NaN
1990-02-20
NaN
NaN
NaN
1990-02-21
NaN
NaN
NaN
1990-02-22
NaN
NaN
NaN
1990-02-23
NaN
NaN
NaN
In [259]:
fred.shift(4, freq='D').resample('D').apply(pd.Series.interpolate).head(50)
Out[259]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-05
144.981900
6.640000
7.550000
1990-01-06
145.004845
6.649032
7.554839
1990-01-07
145.027790
6.658065
7.559677
1990-01-08
145.050735
6.667097
7.564516
1990-01-09
145.073681
6.676129
7.569355
1990-01-10
145.096626
6.685161
7.574194
1990-01-11
145.119571
6.694194
7.579032
1990-01-12
145.142516
6.703226
7.583871
1990-01-13
145.165461
6.712258
7.588710
1990-01-14
145.188406
6.721290
7.593548
1990-01-15
145.211352
6.730323
7.598387
1990-01-16
145.234297
6.739355
7.603226
1990-01-17
145.257242
6.748387
7.608065
1990-01-18
145.280187
6.757419
7.612903
1990-01-19
145.303132
6.766452
7.617742
1990-01-20
145.326077
6.775484
7.622581
1990-01-21
145.349023
6.784516
7.627419
1990-01-22
145.371968
6.793548
7.632258
1990-01-23
145.394913
6.802581
7.637097
1990-01-24
145.417858
6.811613
7.641935
1990-01-25
145.440803
6.820645
7.646774
1990-01-26
145.463748
6.829677
7.651613
1990-01-27
145.486694
6.838710
7.656452
1990-01-28
145.509639
6.847742
7.661290
1990-01-29
145.532584
6.856774
7.666129
1990-01-30
145.555529
6.865806
7.670968
1990-01-31
145.578474
6.874839
7.675806
1990-02-01
145.601419
6.883871
7.680645
1990-02-02
145.624365
6.892903
7.685484
1990-02-03
145.647310
6.901935
7.690323
1990-02-04
145.670255
6.910968
7.695161
1990-02-05
145.693200
6.920000
7.700000
1990-02-06
145.965164
6.935714
7.705357
1990-02-07
146.237129
6.951429
7.710714
1990-02-08
146.509093
6.967143
7.716071
1990-02-09
146.781057
6.982857
7.721429
1990-02-10
147.053021
6.998571
7.726786
1990-02-11
147.324986
7.014286
7.732143
1990-02-12
147.596950
7.030000
7.737500
1990-02-13
147.868914
7.045714
7.742857
1990-02-14
148.140879
7.061429
7.748214
1990-02-15
148.412843
7.077143
7.753571
1990-02-16
148.684807
7.092857
7.758929
1990-02-17
148.956771
7.108571
7.764286
1990-02-18
149.228736
7.124286
7.769643
1990-02-19
149.500700
7.140000
7.775000
1990-02-20
149.772664
7.155714
7.780357
1990-02-21
150.044629
7.171429
7.785714
1990-02-22
150.316593
7.187143
7.791071
1990-02-23
150.588557
7.202857
7.796429
In [255]:
fred.shift(4, freq='D').resample('D', fill_method='bfill').head(50)
Out[255]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-01-05
144.9819
6.64
7.55
1990-01-06
145.6932
6.92
7.70
1990-01-07
145.6932
6.92
7.70
1990-01-08
145.6932
6.92
7.70
1990-01-09
145.6932
6.92
7.70
1990-01-10
145.6932
6.92
7.70
1990-01-11
145.6932
6.92
7.70
1990-01-12
145.6932
6.92
7.70
1990-01-13
145.6932
6.92
7.70
1990-01-14
145.6932
6.92
7.70
1990-01-15
145.6932
6.92
7.70
1990-01-16
145.6932
6.92
7.70
1990-01-17
145.6932
6.92
7.70
1990-01-18
145.6932
6.92
7.70
1990-01-19
145.6932
6.92
7.70
1990-01-20
145.6932
6.92
7.70
1990-01-21
145.6932
6.92
7.70
1990-01-22
145.6932
6.92
7.70
1990-01-23
145.6932
6.92
7.70
1990-01-24
145.6932
6.92
7.70
1990-01-25
145.6932
6.92
7.70
1990-01-26
145.6932
6.92
7.70
1990-01-27
145.6932
6.92
7.70
1990-01-28
145.6932
6.92
7.70
1990-01-29
145.6932
6.92
7.70
1990-01-30
145.6932
6.92
7.70
1990-01-31
145.6932
6.92
7.70
1990-02-01
145.6932
6.92
7.70
1990-02-02
145.6932
6.92
7.70
1990-02-03
145.6932
6.92
7.70
1990-02-04
145.6932
6.92
7.70
1990-02-05
145.6932
6.92
7.70
1990-02-06
153.3082
7.36
7.85
1990-02-07
153.3082
7.36
7.85
1990-02-08
153.3082
7.36
7.85
1990-02-09
153.3082
7.36
7.85
1990-02-10
153.3082
7.36
7.85
1990-02-11
153.3082
7.36
7.85
1990-02-12
153.3082
7.36
7.85
1990-02-13
153.3082
7.36
7.85
1990-02-14
153.3082
7.36
7.85
1990-02-15
153.3082
7.36
7.85
1990-02-16
153.3082
7.36
7.85
1990-02-17
153.3082
7.36
7.85
1990-02-18
153.3082
7.36
7.85
1990-02-19
153.3082
7.36
7.85
1990-02-20
153.3082
7.36
7.85
1990-02-21
153.3082
7.36
7.85
1990-02-22
153.3082
7.36
7.85
1990-02-23
153.3082
7.36
7.85
In [262]:
stamp = pd.Timestamp(datetime.now())
stamp
Out[262]:
Timestamp('2013-10-17 15:23:57.495752', tz=None)
In [264]:
print stamp.tz
None
In [267]:
stamp.tz_localize('US/Pacific')
Out[267]:
Timestamp('2013-10-17 15:23:57.495752-0700', tz='US/Pacific')
In [268]:
stamp_pac = stamp.tz_localize('US/Pacific')
stamp_pac
Out[268]:
Timestamp('2013-10-17 15:23:57.495752-0700', tz='US/Pacific')
In [273]:
stamp_pac.tz_convert('Asia/Tokyo')
Out[273]:
18
In [274]:
stamp_pac.tz_convert('Asia/Tokyo').hour
Out[274]:
7
In [275]:
stamp_pac.tz_convert('Asia/Tokyo').day
Out[275]:
18
In [277]:
stamp_pac.tz_convert('Asia/Tokyo').value
Out[277]:
1382048637495752000
In [278]:
stamp_pac.tz_convert('Asia/Tokyo').tz_convert('utc').value
Out[278]:
1382048637495752000
In [285]:
fred_shifted = fred.shift(1, freq='4D9H30T')
fred_shang = fred_shifted\
.tz_localize('US/Eastern')\
.tz_convert('Asia/Shanghai')
fred_shang
Out[285]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-05 22:30:00+08:00 to 2012-06-05 21:30:00+08:00
Data columns (total 3 columns):
EXJPUS 270 non-null values
INTGSBJPM193N 269 non-null values
TB6MS 270 non-null values
dtypes: float64(3)
In [287]:
fred_shang.index
Out[287]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-05 22:30:00, ..., 2012-06-05 21:30:00]
Length: 270, Freq: None, Timezone: Asia/Shanghai
In [293]:
fred_shifted = fred.shift(1, freq='4D9H30T')
fred_shifted\
.tz_localize('US/Eastern')\
.tz_convert('US/Pacific').resample('A-DEC', how='mean')
Out[293]:
EXJPUS
INTGSBJPM193N
TB6MS
DATE
1990-12-31 00:00:00-08:00
144.818167
7.362500
7.453333
1991-12-31 00:00:00-08:00
134.507742
6.525833
5.440833
1992-12-31 00:00:00-08:00
126.747042
4.944167
3.546667
1993-12-31 00:00:00-08:00
111.231417
3.692500
3.122500
1994-12-31 00:00:00-08:00
102.195258
3.714167
4.632500
1995-12-31 00:00:00-08:00
94.105783
2.531667
5.565833
1996-12-31 00:00:00-08:00
108.804842
2.225000
5.080833
1997-12-31 00:00:00-08:00
121.094650
1.687500
5.176667
1998-12-31 00:00:00-08:00
130.818200
1.096667
4.828333
1999-12-31 00:00:00-08:00
113.713167
1.770833
4.745000
2000-12-31 00:00:00-08:00
107.821308
1.747500
5.898333
2001-12-31 00:00:00-08:00
121.519267
1.329583
3.334167
2002-12-31 00:00:00-08:00
125.266875
1.250000
1.675833
2003-12-31 00:00:00-08:00
115.916642
1.011667
1.054167
2004-12-31 00:00:00-08:00
108.156925
1.499167
1.575000
2005-12-31 00:00:00-08:00
110.138392
1.359583
3.392500
2006-12-31 00:00:00-08:00
116.349758
1.731250
4.802500
2007-12-31 00:00:00-08:00
117.765483
1.652917
4.430000
2008-12-31 00:00:00-08:00
103.379350
1.449167
1.618333
2009-12-31 00:00:00-08:00
93.599550
1.342500
0.279167
2010-12-31 00:00:00-08:00
87.750225
1.150833
0.194167
2011-12-31 00:00:00-08:00
79.711717
1.118333
0.095833
2012-12-31 00:00:00-08:00
79.689050
0.922000
0.128333
In [297]:
!cat homework.md
=======================
Python for Data Science
=======================
Homework for 2013-10-17 Lecture (Wes McKinney)
==============================================
Download a dump of data about closed GitHub issues for the pandas project here:
https://www.dropbox.com/s/pe6dqooznrfynii/closed.json
Use the built-in json library to read this file into memory. Each element in
the list contains information about a GitHub issue and all developer comments
that were made on it in the 'comments' field.
1) Make a DataFrame with one row per issue with the following columns extracted
from the issue data:
ntitle, created_at, labels, closed_at, user, id
Transform the user values to be simply the 'login' string, so that the user
column contains only string usernames.
2) Remove duplicate rows by id from the DataFrame you just created using the id
column's duplicated method.
4) Convert the created_at and closed_at columns from string to datetime type.
5) Now construct appropriate time series and pandas functions to make the
following plots:
- Number of issues created by month
- Number of distinct users creating issues each month (hint: you can pass a
function to resample's how argument, and there's nothing wrong with having
string values in a TimeSeries)
6) Make a table and an accompanying plot illustrating:
- The mean number of days it took for issues to be closed by the month they
were opened. In other words, for closed issues created in August 2012, how
long were they open on average? (hint: use the total_seconds function on the
timedelta objects computed when subtracting datetime objects). Also show the
number of issues in each month in the table.
7) Make a DataFrame containing all the comments for all of the issues. You will
want to add an 'id' attribute to each comment while doing so so that each row
contains a single comment and has the id of the issue it belongs to.
Convert the 'created' column to datetime format; note you will need to multiply
the values (appropriately converted to integers) by 1000000 to get them in
nanoseconds and pass to to_datetime.
8) For each month, compute a table summarizing the following for each month:
- Total number of issue comments
- The "chattiest" user (most number of comments)
- The percentage of total comments made by the chattiest users
- The number of distinct participants in the issue comments
9) Create a helper 'labels' table from the issues data with two columns: id and
label. If an issue has 3 elements in its 'labels' value, add 3 rows to the
table. If an issue does not have any labels, place a single row with None as
the label (hint: construct a list of tuples, then make the DataFrame).
10) Now, join the issues data with the labels helper table (pandas.merge). Add
a column to this table containing the number of days (as a floating point
number) it took to close each issue.
11) Compute a table containing the average time to close for each label
type. Now make a plot comparing mean time to close by month for Enhancement
versus Bug issue types.
Download a dump of data about closed GitHub issues for the pandas project here:
https://www.dropbox.com/s/pe6dqooznrfynii/closed.json
Use the built-in json library to read this file into memory. Each element in the list contains information about a GitHub issue and all developer comments that were made on it in the 'comments' field.
1) Make a DataFrame with one row per issue with the following columns extracted from the issue data:
ntitle, created_at, labels, closed_at, user, id
Transform the user values to be simply the 'login' string, so that the user column contains only string usernames.
2) Remove duplicate rows by id from the DataFrame you just created using the id column's duplicated method.
4) Convert the created_at and closed_at columns from string to datetime type.
5) Now construct appropriate time series and pandas functions to make the following plots:
Number of issues created by month
Number of distinct users creating issues each month (hint: you can pass a function to resample's how argument, and there's nothing wrong with having string values in a TimeSeries)
6) Make a table and an accompanying plot illustrating:
7) Make a DataFrame containing all the comments for all of the issues. You will want to add an 'id' attribute to each comment while doing so so that each row contains a single comment and has the id of the issue it belongs to.
Convert the 'created' column to datetime format; note you will need to multiply the values (appropriately converted to integers) by 1000000 to get them in nanoseconds and pass to to_datetime.
8) For each month, compute a table summarizing the following for each month:
9) Create a helper 'labels' table from the issues data with two columns: id and label. If an issue has 3 elements in its 'labels' value, add 3 rows to the table. If an issue does not have any labels, place a single row with None as the label (hint: construct a list of tuples, then make the DataFrame).
10) Now, join the issues data with the labels helper table (pandas.merge). Add a column to this table containing the number of days (as a floating point number) it took to close each issue.
11) Compute a table containing the average time to close for each label type. Now make a plot comparing mean time to close by month for Enhancement versus Bug issue types.
In [298]:
import json
closed_issues = json.load(open('closed.json'))
closed_issues[0]
Out[298]:
{u'assignee': None,
u'body': u're: pystatsmodels e-mail\r\n\r\n\r\nhi everyone,\r\n\r\njust getting started with pandas and i was wondering if someone could\r\nhelp me out. do pandas.DataMatrix objects support per item comparison\r\noperations?\r\n\r\ni have a two data matrices, and i want to do something like this:\r\n\r\ndiv[div > 0.5 * price] = 0\r\n\r\nthis would work if div and price were numpy.ndarray objects. any idea\r\nhow i would do something like this with pandas.DataMatrix objects?\r\n\r\nthanks,\r\nandy\r\n',
u'closed_at': u'2011-02-19T23:13:48Z',
u'comments': [{u'author': u'wesm',
u'created': u'1298157227000',
u'text': u'implemented in git HEAD',
u'updated': u'1298157227000'}],
u'comments_url': u'https://api.github.com/repos/pydata/pandas/issues/1/comments',
u'created_at': u'2010-09-29T00:45:31Z',
u'events_url': u'https://api.github.com/repos/pydata/pandas/issues/1/events',
u'html_url': u'https://github.com/pydata/pandas/issues/1',
u'id': 337721,
u'labels': [],
u'labels_url': u'https://api.github.com/repos/pydata/pandas/issues/1/labels{/name}',
u'milestone': None,
u'number': 1,
u'pull_request': {u'diff_url': None, u'html_url': None, u'patch_url': None},
u'state': u'closed',
u'title': u'Enable element-wise comparison operations in DataMatrix objects',
u'updated_at': u'2013-04-26T21:25:39Z',
u'url': u'https://api.github.com/repos/pydata/pandas/issues/1',
u'user': {u'avatar_url': u'https://secure.gravatar.com/avatar/2c08a3eed709a9d1a2654cea45aa466f?d=https://a248.e.akamai.net/assets.github.com%2Fimages%2Fgravatars%2Fgravatar-user-420.png',
u'events_url': u'https://api.github.com/users/wesm/events{/privacy}',
u'followers_url': u'https://api.github.com/users/wesm/followers',
u'following_url': u'https://api.github.com/users/wesm/following',
u'gists_url': u'https://api.github.com/users/wesm/gists{/gist_id}',
u'gravatar_id': u'2c08a3eed709a9d1a2654cea45aa466f',
u'html_url': u'https://github.com/wesm',
u'id': 329591,
u'login': u'wesm',
u'organizations_url': u'https://api.github.com/users/wesm/orgs',
u'received_events_url': u'https://api.github.com/users/wesm/received_events',
u'repos_url': u'https://api.github.com/users/wesm/repos',
u'starred_url': u'https://api.github.com/users/wesm/starred{/owner}{/repo}',
u'subscriptions_url': u'https://api.github.com/users/wesm/subscriptions',
u'type': u'User',
u'url': u'https://api.github.com/users/wesm'}}
In [ ]:
Content source: yigong/AY250
Similar notebooks: