In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 2, 3, 4, 5])
s


Out[2]:
0    1
1    2
2    3
3    4
4    5
dtype: int64

In [5]:
s.index


Out[5]:
Int64Index([0, 1, 2, 3, 4], dtype=int64)

In [6]:
s.values


Out[6]:
array([1, 2, 3, 4, 5])

In [7]:
s


Out[7]:
0    1
1    2
2    3
3    4
4    5
dtype: int64

In [10]:
s2 = pd.Series([1, 2, 3, 4, 5],
              index=['one', 'two', 'three', 'four', 'five'])
s2


Out[10]:
one      1
two      2
three    3
four     4
five     5
dtype: int64

In [12]:
s2.index[0]


Out[12]:
'one'

In [13]:
s2


Out[13]:
one      1
two      2
three    3
four     4
five     5
dtype: int64

In [14]:
s2['three']


Out[14]:
3

In [15]:
s2[2]


Out[15]:
3

In [17]:
s2[['one', 'three', 'two']]


Out[17]:
one      1
three    3
two      2
dtype: int64

In [18]:
s2


Out[18]:
one      1
two      2
three    3
four     4
five     5
dtype: int64

In [20]:
s3 = s2[:3]

In [21]:
s2


Out[21]:
one      1
two      2
three    3
four     4
five     5
dtype: int64

In [22]:
s3


Out[22]:
one      1
two      2
three    3
dtype: int64

In [23]:
s3.to_dict()


Out[23]:
{'one': 1, 'three': 3, 'two': 2}

In [25]:
pd.Series(s3.to_dict())


Out[25]:
one      1
three    3
two      2
dtype: int64

In [49]:
df = pd.DataFrame({'A': s2, 'B': s3})
df


Out[49]:
A B
five 5 NaN
four 4 NaN
one 1 1
three 3 3
two 2 2

In [50]:
df['A']


Out[50]:
five     5
four     4
one      1
three    3
two      2
Name: A, dtype: int64

In [51]:
df['B']


Out[51]:
five    NaN
four    NaN
one       1
three     3
two       2
Name: B, dtype: float64

In [52]:
df.ix['four']


Out[52]:
A     4
B   NaN
Name: four, dtype: float64

In [53]:
df.columns


Out[53]:
Index([u'A', u'B'], dtype=object)

In [54]:
df.index


Out[54]:
Index([u'five', u'four', u'one', u'three', u'two'], dtype=object)

In [55]:
df.values


Out[55]:
array([[  5.,  nan],
       [  4.,  nan],
       [  1.,   1.],
       [  3.,   3.],
       [  2.,   2.]])

In [56]:
df


Out[56]:
A B
five 5 NaN
four 4 NaN
one 1 1
three 3 3
two 2 2

In [57]:
df.values


Out[57]:
array([[  5.,  nan],
       [  4.,  nan],
       [  1.,   1.],
       [  3.,   3.],
       [  2.,   2.]])

In [58]:
df2 = pd.DataFrame(df.values, columns=['A', 'B'])

In [59]:
df2['A'] = 7.5
df2


Out[59]:
A B
0 7.5 NaN
1 7.5 NaN
2 7.5 1
3 7.5 3
4 7.5 2

Time series basics

http://bit.ly/19UG34V


In [61]:
!head -n 10 fred_fx.csv


DATE,EXJPUS,INTGSBJPM193N,TB6MS
1990-01-01 00:00:00,144.9819,6.64,7.55
1990-02-01 00:00:00,145.6932,6.92,7.7
1990-03-01 00:00:00,153.3082,7.36,7.85
1990-04-01 00:00:00,158.4586,7.24,7.84
1990-05-01 00:00:00,154.0441,6.7,7.76
1990-06-01 00:00:00,153.6957,7.06,7.63
1990-07-01 00:00:00,149.0395,7.48,7.52
1990-08-01 00:00:00,147.4609,8.19,7.38
1990-09-01 00:00:00,138.4405,8.62,7.32

In [63]:
fred = pd.read_csv('fred_fx.csv')
fred


Out[63]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 269
Data columns (total 4 columns):
DATE             270  non-null values
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3), object(1)

In [65]:
fred.head(10)


Out[65]:
DATE EXJPUS INTGSBJPM193N TB6MS
0 1990-01-01 00:00:00 144.9819 6.64 7.55
1 1990-02-01 00:00:00 145.6932 6.92 7.70
2 1990-03-01 00:00:00 153.3082 7.36 7.85
3 1990-04-01 00:00:00 158.4586 7.24 7.84
4 1990-05-01 00:00:00 154.0441 6.70 7.76
5 1990-06-01 00:00:00 153.6957 7.06 7.63
6 1990-07-01 00:00:00 149.0395 7.48 7.52
7 1990-08-01 00:00:00 147.4609 8.19 7.38
8 1990-09-01 00:00:00 138.4405 8.62 7.32
9 1990-10-01 00:00:00 129.5909 7.68 7.16

In [ ]:
fred['EXJPUS']

In [70]:
fred.EXJPUS


Out[70]:
0     144.9819
1     145.6932
2     153.3082
3     158.4586
4     154.0441
5     153.6957
6     149.0395
7     147.4609
8     138.4405
9     129.5909
10    129.2155
11    133.8890
12    133.6986
13    130.5358
14    137.3867
...
255    83.1771
256    81.1257
257    80.4259
258    79.2425
259    76.9657
260    76.7957
261    76.6430
262    77.5595
263    77.7967
264    76.9640
265    78.4700
266    82.4659
267    81.2524
268    79.6668
269    79.3152
Name: EXJPUS, Length: 270, dtype: float64

In [72]:
fred.EXJPUS.index[:10]


Out[72]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [74]:
fred.DATE[:5]


Out[74]:
0    1990-01-01 00:00:00
1    1990-02-01 00:00:00
2    1990-03-01 00:00:00
3    1990-04-01 00:00:00
4    1990-05-01 00:00:00
Name: DATE, dtype: object

In [75]:
fred.DATE[0]


Out[75]:
'1990-01-01 00:00:00'

In [76]:
# Several ways to convert to date
from datetime import datetime
import dateutil.parser as parser

print parser.parse(fred.DATE[0])


Out[76]:
datetime.datetime(1990, 1, 1, 0, 0)

In [77]:
datetime.strptime(fred.DATE[0], '%Y-%m-%d %H:%M:%S')


Out[77]:
datetime.datetime(1990, 1, 1, 0, 0)

In [79]:
dates = [datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in fred.DATE]
dates


Out[79]:
[datetime.datetime(1990, 1, 1, 0, 0),
 datetime.datetime(1990, 2, 1, 0, 0),
 datetime.datetime(1990, 3, 1, 0, 0),
 datetime.datetime(1990, 4, 1, 0, 0),
 datetime.datetime(1990, 5, 1, 0, 0),
 datetime.datetime(1990, 6, 1, 0, 0),
 datetime.datetime(1990, 7, 1, 0, 0),
 datetime.datetime(1990, 8, 1, 0, 0),
 datetime.datetime(1990, 9, 1, 0, 0),
 datetime.datetime(1990, 10, 1, 0, 0),
 datetime.datetime(1990, 11, 1, 0, 0),
 datetime.datetime(1990, 12, 1, 0, 0),
 datetime.datetime(1991, 1, 1, 0, 0),
 datetime.datetime(1991, 2, 1, 0, 0),
 datetime.datetime(1991, 3, 1, 0, 0),
 datetime.datetime(1991, 4, 1, 0, 0),
 datetime.datetime(1991, 5, 1, 0, 0),
 datetime.datetime(1991, 6, 1, 0, 0),
 datetime.datetime(1991, 7, 1, 0, 0),
 datetime.datetime(1991, 8, 1, 0, 0),
 datetime.datetime(1991, 9, 1, 0, 0),
 datetime.datetime(1991, 10, 1, 0, 0),
 datetime.datetime(1991, 11, 1, 0, 0),
 datetime.datetime(1991, 12, 1, 0, 0),
 datetime.datetime(1992, 1, 1, 0, 0),
 datetime.datetime(1992, 2, 1, 0, 0),
 datetime.datetime(1992, 3, 1, 0, 0),
 datetime.datetime(1992, 4, 1, 0, 0),
 datetime.datetime(1992, 5, 1, 0, 0),
 datetime.datetime(1992, 6, 1, 0, 0),
 datetime.datetime(1992, 7, 1, 0, 0),
 datetime.datetime(1992, 8, 1, 0, 0),
 datetime.datetime(1992, 9, 1, 0, 0),
 datetime.datetime(1992, 10, 1, 0, 0),
 datetime.datetime(1992, 11, 1, 0, 0),
 datetime.datetime(1992, 12, 1, 0, 0),
 datetime.datetime(1993, 1, 1, 0, 0),
 datetime.datetime(1993, 2, 1, 0, 0),
 datetime.datetime(1993, 3, 1, 0, 0),
 datetime.datetime(1993, 4, 1, 0, 0),
 datetime.datetime(1993, 5, 1, 0, 0),
 datetime.datetime(1993, 6, 1, 0, 0),
 datetime.datetime(1993, 7, 1, 0, 0),
 datetime.datetime(1993, 8, 1, 0, 0),
 datetime.datetime(1993, 9, 1, 0, 0),
 datetime.datetime(1993, 10, 1, 0, 0),
 datetime.datetime(1993, 11, 1, 0, 0),
 datetime.datetime(1993, 12, 1, 0, 0),
 datetime.datetime(1994, 1, 1, 0, 0),
 datetime.datetime(1994, 2, 1, 0, 0),
 datetime.datetime(1994, 3, 1, 0, 0),
 datetime.datetime(1994, 4, 1, 0, 0),
 datetime.datetime(1994, 5, 1, 0, 0),
 datetime.datetime(1994, 6, 1, 0, 0),
 datetime.datetime(1994, 7, 1, 0, 0),
 datetime.datetime(1994, 8, 1, 0, 0),
 datetime.datetime(1994, 9, 1, 0, 0),
 datetime.datetime(1994, 10, 1, 0, 0),
 datetime.datetime(1994, 11, 1, 0, 0),
 datetime.datetime(1994, 12, 1, 0, 0),
 datetime.datetime(1995, 1, 1, 0, 0),
 datetime.datetime(1995, 2, 1, 0, 0),
 datetime.datetime(1995, 3, 1, 0, 0),
 datetime.datetime(1995, 4, 1, 0, 0),
 datetime.datetime(1995, 5, 1, 0, 0),
 datetime.datetime(1995, 6, 1, 0, 0),
 datetime.datetime(1995, 7, 1, 0, 0),
 datetime.datetime(1995, 8, 1, 0, 0),
 datetime.datetime(1995, 9, 1, 0, 0),
 datetime.datetime(1995, 10, 1, 0, 0),
 datetime.datetime(1995, 11, 1, 0, 0),
 datetime.datetime(1995, 12, 1, 0, 0),
 datetime.datetime(1996, 1, 1, 0, 0),
 datetime.datetime(1996, 2, 1, 0, 0),
 datetime.datetime(1996, 3, 1, 0, 0),
 datetime.datetime(1996, 4, 1, 0, 0),
 datetime.datetime(1996, 5, 1, 0, 0),
 datetime.datetime(1996, 6, 1, 0, 0),
 datetime.datetime(1996, 7, 1, 0, 0),
 datetime.datetime(1996, 8, 1, 0, 0),
 datetime.datetime(1996, 9, 1, 0, 0),
 datetime.datetime(1996, 10, 1, 0, 0),
 datetime.datetime(1996, 11, 1, 0, 0),
 datetime.datetime(1996, 12, 1, 0, 0),
 datetime.datetime(1997, 1, 1, 0, 0),
 datetime.datetime(1997, 2, 1, 0, 0),
 datetime.datetime(1997, 3, 1, 0, 0),
 datetime.datetime(1997, 4, 1, 0, 0),
 datetime.datetime(1997, 5, 1, 0, 0),
 datetime.datetime(1997, 6, 1, 0, 0),
 datetime.datetime(1997, 7, 1, 0, 0),
 datetime.datetime(1997, 8, 1, 0, 0),
 datetime.datetime(1997, 9, 1, 0, 0),
 datetime.datetime(1997, 10, 1, 0, 0),
 datetime.datetime(1997, 11, 1, 0, 0),
 datetime.datetime(1997, 12, 1, 0, 0),
 datetime.datetime(1998, 1, 1, 0, 0),
 datetime.datetime(1998, 2, 1, 0, 0),
 datetime.datetime(1998, 3, 1, 0, 0),
 datetime.datetime(1998, 4, 1, 0, 0),
 datetime.datetime(1998, 5, 1, 0, 0),
 datetime.datetime(1998, 6, 1, 0, 0),
 datetime.datetime(1998, 7, 1, 0, 0),
 datetime.datetime(1998, 8, 1, 0, 0),
 datetime.datetime(1998, 9, 1, 0, 0),
 datetime.datetime(1998, 10, 1, 0, 0),
 datetime.datetime(1998, 11, 1, 0, 0),
 datetime.datetime(1998, 12, 1, 0, 0),
 datetime.datetime(1999, 1, 1, 0, 0),
 datetime.datetime(1999, 2, 1, 0, 0),
 datetime.datetime(1999, 3, 1, 0, 0),
 datetime.datetime(1999, 4, 1, 0, 0),
 datetime.datetime(1999, 5, 1, 0, 0),
 datetime.datetime(1999, 6, 1, 0, 0),
 datetime.datetime(1999, 7, 1, 0, 0),
 datetime.datetime(1999, 8, 1, 0, 0),
 datetime.datetime(1999, 9, 1, 0, 0),
 datetime.datetime(1999, 10, 1, 0, 0),
 datetime.datetime(1999, 11, 1, 0, 0),
 datetime.datetime(1999, 12, 1, 0, 0),
 datetime.datetime(2000, 1, 1, 0, 0),
 datetime.datetime(2000, 2, 1, 0, 0),
 datetime.datetime(2000, 3, 1, 0, 0),
 datetime.datetime(2000, 4, 1, 0, 0),
 datetime.datetime(2000, 5, 1, 0, 0),
 datetime.datetime(2000, 6, 1, 0, 0),
 datetime.datetime(2000, 7, 1, 0, 0),
 datetime.datetime(2000, 8, 1, 0, 0),
 datetime.datetime(2000, 9, 1, 0, 0),
 datetime.datetime(2000, 10, 1, 0, 0),
 datetime.datetime(2000, 11, 1, 0, 0),
 datetime.datetime(2000, 12, 1, 0, 0),
 datetime.datetime(2001, 1, 1, 0, 0),
 datetime.datetime(2001, 2, 1, 0, 0),
 datetime.datetime(2001, 3, 1, 0, 0),
 datetime.datetime(2001, 4, 1, 0, 0),
 datetime.datetime(2001, 5, 1, 0, 0),
 datetime.datetime(2001, 6, 1, 0, 0),
 datetime.datetime(2001, 7, 1, 0, 0),
 datetime.datetime(2001, 8, 1, 0, 0),
 datetime.datetime(2001, 9, 1, 0, 0),
 datetime.datetime(2001, 10, 1, 0, 0),
 datetime.datetime(2001, 11, 1, 0, 0),
 datetime.datetime(2001, 12, 1, 0, 0),
 datetime.datetime(2002, 1, 1, 0, 0),
 datetime.datetime(2002, 2, 1, 0, 0),
 datetime.datetime(2002, 3, 1, 0, 0),
 datetime.datetime(2002, 4, 1, 0, 0),
 datetime.datetime(2002, 5, 1, 0, 0),
 datetime.datetime(2002, 6, 1, 0, 0),
 datetime.datetime(2002, 7, 1, 0, 0),
 datetime.datetime(2002, 8, 1, 0, 0),
 datetime.datetime(2002, 9, 1, 0, 0),
 datetime.datetime(2002, 10, 1, 0, 0),
 datetime.datetime(2002, 11, 1, 0, 0),
 datetime.datetime(2002, 12, 1, 0, 0),
 datetime.datetime(2003, 1, 1, 0, 0),
 datetime.datetime(2003, 2, 1, 0, 0),
 datetime.datetime(2003, 3, 1, 0, 0),
 datetime.datetime(2003, 4, 1, 0, 0),
 datetime.datetime(2003, 5, 1, 0, 0),
 datetime.datetime(2003, 6, 1, 0, 0),
 datetime.datetime(2003, 7, 1, 0, 0),
 datetime.datetime(2003, 8, 1, 0, 0),
 datetime.datetime(2003, 9, 1, 0, 0),
 datetime.datetime(2003, 10, 1, 0, 0),
 datetime.datetime(2003, 11, 1, 0, 0),
 datetime.datetime(2003, 12, 1, 0, 0),
 datetime.datetime(2004, 1, 1, 0, 0),
 datetime.datetime(2004, 2, 1, 0, 0),
 datetime.datetime(2004, 3, 1, 0, 0),
 datetime.datetime(2004, 4, 1, 0, 0),
 datetime.datetime(2004, 5, 1, 0, 0),
 datetime.datetime(2004, 6, 1, 0, 0),
 datetime.datetime(2004, 7, 1, 0, 0),
 datetime.datetime(2004, 8, 1, 0, 0),
 datetime.datetime(2004, 9, 1, 0, 0),
 datetime.datetime(2004, 10, 1, 0, 0),
 datetime.datetime(2004, 11, 1, 0, 0),
 datetime.datetime(2004, 12, 1, 0, 0),
 datetime.datetime(2005, 1, 1, 0, 0),
 datetime.datetime(2005, 2, 1, 0, 0),
 datetime.datetime(2005, 3, 1, 0, 0),
 datetime.datetime(2005, 4, 1, 0, 0),
 datetime.datetime(2005, 5, 1, 0, 0),
 datetime.datetime(2005, 6, 1, 0, 0),
 datetime.datetime(2005, 7, 1, 0, 0),
 datetime.datetime(2005, 8, 1, 0, 0),
 datetime.datetime(2005, 9, 1, 0, 0),
 datetime.datetime(2005, 10, 1, 0, 0),
 datetime.datetime(2005, 11, 1, 0, 0),
 datetime.datetime(2005, 12, 1, 0, 0),
 datetime.datetime(2006, 1, 1, 0, 0),
 datetime.datetime(2006, 2, 1, 0, 0),
 datetime.datetime(2006, 3, 1, 0, 0),
 datetime.datetime(2006, 4, 1, 0, 0),
 datetime.datetime(2006, 5, 1, 0, 0),
 datetime.datetime(2006, 6, 1, 0, 0),
 datetime.datetime(2006, 7, 1, 0, 0),
 datetime.datetime(2006, 8, 1, 0, 0),
 datetime.datetime(2006, 9, 1, 0, 0),
 datetime.datetime(2006, 10, 1, 0, 0),
 datetime.datetime(2006, 11, 1, 0, 0),
 datetime.datetime(2006, 12, 1, 0, 0),
 datetime.datetime(2007, 1, 1, 0, 0),
 datetime.datetime(2007, 2, 1, 0, 0),
 datetime.datetime(2007, 3, 1, 0, 0),
 datetime.datetime(2007, 4, 1, 0, 0),
 datetime.datetime(2007, 5, 1, 0, 0),
 datetime.datetime(2007, 6, 1, 0, 0),
 datetime.datetime(2007, 7, 1, 0, 0),
 datetime.datetime(2007, 8, 1, 0, 0),
 datetime.datetime(2007, 9, 1, 0, 0),
 datetime.datetime(2007, 10, 1, 0, 0),
 datetime.datetime(2007, 11, 1, 0, 0),
 datetime.datetime(2007, 12, 1, 0, 0),
 datetime.datetime(2008, 1, 1, 0, 0),
 datetime.datetime(2008, 2, 1, 0, 0),
 datetime.datetime(2008, 3, 1, 0, 0),
 datetime.datetime(2008, 4, 1, 0, 0),
 datetime.datetime(2008, 5, 1, 0, 0),
 datetime.datetime(2008, 6, 1, 0, 0),
 datetime.datetime(2008, 7, 1, 0, 0),
 datetime.datetime(2008, 8, 1, 0, 0),
 datetime.datetime(2008, 9, 1, 0, 0),
 datetime.datetime(2008, 10, 1, 0, 0),
 datetime.datetime(2008, 11, 1, 0, 0),
 datetime.datetime(2008, 12, 1, 0, 0),
 datetime.datetime(2009, 1, 1, 0, 0),
 datetime.datetime(2009, 2, 1, 0, 0),
 datetime.datetime(2009, 3, 1, 0, 0),
 datetime.datetime(2009, 4, 1, 0, 0),
 datetime.datetime(2009, 5, 1, 0, 0),
 datetime.datetime(2009, 6, 1, 0, 0),
 datetime.datetime(2009, 7, 1, 0, 0),
 datetime.datetime(2009, 8, 1, 0, 0),
 datetime.datetime(2009, 9, 1, 0, 0),
 datetime.datetime(2009, 10, 1, 0, 0),
 datetime.datetime(2009, 11, 1, 0, 0),
 datetime.datetime(2009, 12, 1, 0, 0),
 datetime.datetime(2010, 1, 1, 0, 0),
 datetime.datetime(2010, 2, 1, 0, 0),
 datetime.datetime(2010, 3, 1, 0, 0),
 datetime.datetime(2010, 4, 1, 0, 0),
 datetime.datetime(2010, 5, 1, 0, 0),
 datetime.datetime(2010, 6, 1, 0, 0),
 datetime.datetime(2010, 7, 1, 0, 0),
 datetime.datetime(2010, 8, 1, 0, 0),
 datetime.datetime(2010, 9, 1, 0, 0),
 datetime.datetime(2010, 10, 1, 0, 0),
 datetime.datetime(2010, 11, 1, 0, 0),
 datetime.datetime(2010, 12, 1, 0, 0),
 datetime.datetime(2011, 1, 1, 0, 0),
 datetime.datetime(2011, 2, 1, 0, 0),
 datetime.datetime(2011, 3, 1, 0, 0),
 datetime.datetime(2011, 4, 1, 0, 0),
 datetime.datetime(2011, 5, 1, 0, 0),
 datetime.datetime(2011, 6, 1, 0, 0),
 datetime.datetime(2011, 7, 1, 0, 0),
 datetime.datetime(2011, 8, 1, 0, 0),
 datetime.datetime(2011, 9, 1, 0, 0),
 datetime.datetime(2011, 10, 1, 0, 0),
 datetime.datetime(2011, 11, 1, 0, 0),
 datetime.datetime(2011, 12, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 3, 1, 0, 0),
 datetime.datetime(2012, 4, 1, 0, 0),
 datetime.datetime(2012, 5, 1, 0, 0),
 datetime.datetime(2012, 6, 1, 0, 0)]

In [82]:
pd.DatetimeIndex(dates)


Out[82]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None

In [83]:
# Series in, Series out
pd.to_datetime(fred.DATE)


Out[83]:
0    1990-01-01 00:00:00
1    1990-02-01 00:00:00
2    1990-03-01 00:00:00
3    1990-04-01 00:00:00
4    1990-05-01 00:00:00
5    1990-06-01 00:00:00
6    1990-07-01 00:00:00
7    1990-08-01 00:00:00
8    1990-09-01 00:00:00
9    1990-10-01 00:00:00
10   1990-11-01 00:00:00
11   1990-12-01 00:00:00
12   1991-01-01 00:00:00
13   1991-02-01 00:00:00
14   1991-03-01 00:00:00
...
255   2011-04-01 00:00:00
256   2011-05-01 00:00:00
257   2011-06-01 00:00:00
258   2011-07-01 00:00:00
259   2011-08-01 00:00:00
260   2011-09-01 00:00:00
261   2011-10-01 00:00:00
262   2011-11-01 00:00:00
263   2011-12-01 00:00:00
264   2012-01-01 00:00:00
265   2012-02-01 00:00:00
266   2012-03-01 00:00:00
267   2012-04-01 00:00:00
268   2012-05-01 00:00:00
269   2012-06-01 00:00:00
Name: DATE, Length: 270, dtype: datetime64[ns]

In [84]:
# NumPy array in, DatetimeIndex out
pd.to_datetime(fred.DATE.values)


Out[84]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None

In [86]:
pd.to_datetime(fred.DATE, format='%Y-%m-%d %H:%M:%S')


Out[86]:
0    1990-01-01 00:00:00
1    1990-02-01 00:00:00
2    1990-03-01 00:00:00
3    1990-04-01 00:00:00
4    1990-05-01 00:00:00
5    1990-06-01 00:00:00
6    1990-07-01 00:00:00
7    1990-08-01 00:00:00
8    1990-09-01 00:00:00
9    1990-10-01 00:00:00
10   1990-11-01 00:00:00
11   1990-12-01 00:00:00
12   1991-01-01 00:00:00
13   1991-02-01 00:00:00
14   1991-03-01 00:00:00
...
255   2011-04-01 00:00:00
256   2011-05-01 00:00:00
257   2011-06-01 00:00:00
258   2011-07-01 00:00:00
259   2011-08-01 00:00:00
260   2011-09-01 00:00:00
261   2011-10-01 00:00:00
262   2011-11-01 00:00:00
263   2011-12-01 00:00:00
264   2012-01-01 00:00:00
265   2012-02-01 00:00:00
266   2012-03-01 00:00:00
267   2012-04-01 00:00:00
268   2012-05-01 00:00:00
269   2012-06-01 00:00:00
Name: DATE, Length: 270, dtype: datetime64[ns]

In [88]:
pd.to_datetime(['96/21/05'], format='%y/%d/%m')


Out[88]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1996-05-21 00:00:00]
Length: 1, Freq: None, Timezone: None

In [89]:
fred


Out[89]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 269
Data columns (total 4 columns):
DATE             270  non-null values
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3), object(1)

In [90]:
print fred.to_string()


                    DATE    EXJPUS  INTGSBJPM193N  TB6MS
0    1990-01-01 00:00:00  144.9819          6.640   7.55
1    1990-02-01 00:00:00  145.6932          6.920   7.70
2    1990-03-01 00:00:00  153.3082          7.360   7.85
3    1990-04-01 00:00:00  158.4586          7.240   7.84
4    1990-05-01 00:00:00  154.0441          6.700   7.76
5    1990-06-01 00:00:00  153.6957          7.060   7.63
6    1990-07-01 00:00:00  149.0395          7.480   7.52
7    1990-08-01 00:00:00  147.4609          8.190   7.38
8    1990-09-01 00:00:00  138.4405          8.620   7.32
9    1990-10-01 00:00:00  129.5909          7.680   7.16
10   1990-11-01 00:00:00  129.2155          7.400   7.03
11   1990-12-01 00:00:00  133.8890          7.060   6.70
12   1991-01-01 00:00:00  133.6986          6.830   6.28
13   1991-02-01 00:00:00  130.5358          6.660   5.93
14   1991-03-01 00:00:00  137.3867          6.890   5.92
15   1991-04-01 00:00:00  137.1127          7.000   5.71
16   1991-05-01 00:00:00  138.2218          6.870   5.61
17   1991-06-01 00:00:00  139.7475          7.120   5.75
18   1991-07-01 00:00:00  137.8300          6.800   5.70
19   1991-08-01 00:00:00  136.8164          6.550   5.39
20   1991-09-01 00:00:00  134.2995          6.110   5.25
21   1991-10-01 00:00:00  130.7723          6.020   5.04
22   1991-11-01 00:00:00  129.6321          5.930   4.61
23   1991-12-01 00:00:00  128.0395          5.530   4.10
24   1992-01-01 00:00:00  125.4614          5.530   3.87
25   1992-02-01 00:00:00  127.6989          5.410   3.93
26   1992-03-01 00:00:00  132.8627          5.330   4.18
27   1992-04-01 00:00:00  133.5395          5.480   3.87
28   1992-05-01 00:00:00  130.7710          5.410   3.75
29   1992-06-01 00:00:00  126.8355          5.140   3.77
30   1992-07-01 00:00:00  125.8817          4.740   3.28
31   1992-08-01 00:00:00  126.2310          4.620   3.21
32   1992-09-01 00:00:00  122.5967          4.490   2.96
33   1992-10-01 00:00:00  121.1652          4.410   3.04
34   1992-11-01 00:00:00  123.8800          4.400   3.34
35   1992-12-01 00:00:00  124.0409          4.370   3.36
36   1993-01-01 00:00:00  124.9932          4.310   3.14
37   1993-02-01 00:00:00  120.7595          3.760   3.07
38   1993-03-01 00:00:00  117.0174          4.170   3.05
39   1993-04-01 00:00:00  112.4114          4.260   2.97
40   1993-05-01 00:00:00  110.3430          4.350   3.07
41   1993-06-01 00:00:00  107.4118          4.090   3.20
42   1993-07-01 00:00:00  107.6914          3.900   3.16
43   1993-08-01 00:00:00  103.7650          3.690   3.14
44   1993-09-01 00:00:00  105.5748          3.360   3.06
45   1993-10-01 00:00:00  107.0200          3.110   3.12
46   1993-11-01 00:00:00  107.8765          2.730   3.26
47   1993-12-01 00:00:00  109.9130          2.580   3.23
48   1994-01-01 00:00:00  111.4415          3.290   3.15
49   1994-02-01 00:00:00  106.3011          3.510   3.43
50   1994-03-01 00:00:00  105.0974          3.460   3.78
51   1994-04-01 00:00:00  103.4843          3.510   4.09
52   1994-05-01 00:00:00  103.7533          3.290   4.60
53   1994-06-01 00:00:00  102.5264          3.670   4.55
54   1994-07-01 00:00:00   98.4450          3.800   4.75
55   1994-08-01 00:00:00   99.9404          4.140   4.88
56   1994-09-01 00:00:00   98.7743          3.920   5.04
57   1994-10-01 00:00:00   98.3530          4.080   5.39
58   1994-11-01 00:00:00   98.0440          3.990   5.72
59   1994-12-01 00:00:00  100.1824          3.910   6.21
60   1995-01-01 00:00:00   99.7660          3.950   6.21
61   1995-02-01 00:00:00   98.2368          3.740   6.03
62   1995-03-01 00:00:00   90.5196          3.010   5.89
63   1995-04-01 00:00:00   83.6895          2.790   5.77
64   1995-05-01 00:00:00   85.1127          2.160   5.67
65   1995-06-01 00:00:00   84.6355          2.140   5.42
66   1995-07-01 00:00:00   87.3970          2.170   5.37
67   1995-08-01 00:00:00   94.7383          2.410   5.41
68   1995-09-01 00:00:00  100.5455          1.950   5.30
69   1995-10-01 00:00:00  100.8390          2.010   5.32
70   1995-11-01 00:00:00  101.9400          1.880   5.27
71   1995-12-01 00:00:00  101.8495          2.170   5.13
72   1996-01-01 00:00:00  105.7514          2.320   4.92
73   1996-02-01 00:00:00  105.7880          2.580   4.77
74   1996-03-01 00:00:00  105.9400          2.300   4.96
75   1996-04-01 00:00:00  107.1995          2.550   5.06
76   1996-05-01 00:00:00  106.3423          2.350   5.12
77   1996-06-01 00:00:00  108.9600          2.400   5.25
78   1996-07-01 00:00:00  109.1909          2.530   5.30
79   1996-08-01 00:00:00  107.8659          2.140   5.13
80   1996-09-01 00:00:00  109.9310          2.050   5.24
81   1996-10-01 00:00:00  112.4123          1.800   5.11
82   1996-11-01 00:00:00  112.2958          1.750   5.07
83   1996-12-01 00:00:00  113.9810          1.930   5.04
84   1997-01-01 00:00:00  117.9124          1.770   5.10
85   1997-02-01 00:00:00  122.9621          1.780   5.06
86   1997-03-01 00:00:00  122.7738          1.750   5.26
87   1997-04-01 00:00:00  125.6377          1.850   5.37
88   1997-05-01 00:00:00  119.1924          2.030   5.30
89   1997-06-01 00:00:00  114.2857          1.920   5.13
90   1997-07-01 00:00:00  115.3759          1.700   5.12
91   1997-08-01 00:00:00  117.9295          1.600   5.19
92   1997-09-01 00:00:00  120.8900          1.540   5.09
93   1997-10-01 00:00:00  121.0605          1.350   5.09
94   1997-11-01 00:00:00  125.3817          1.430   5.17
95   1997-12-01 00:00:00  129.7341          1.530   5.24
96   1998-01-01 00:00:00  129.5475          1.420   5.03
97   1998-02-01 00:00:00  125.8516          1.400   5.07
98   1998-03-01 00:00:00  129.0823          1.280   5.04
99   1998-04-01 00:00:00  131.7536          1.080   5.06
100  1998-05-01 00:00:00  134.8960          1.080   5.14
101  1998-06-01 00:00:00  140.3305          1.210   5.12
102  1998-07-01 00:00:00  140.7874          1.150   5.03
103  1998-08-01 00:00:00  144.6800          0.980   4.95
104  1998-09-01 00:00:00  134.4805          0.660   4.63
105  1998-10-01 00:00:00  121.0486          0.690   4.05
106  1998-11-01 00:00:00  120.2895          0.850   4.42
107  1998-12-01 00:00:00  117.0709          1.360   4.40
108  1999-01-01 00:00:00  113.2900          2.100   4.33
109  1999-02-01 00:00:00  116.6684          1.995   4.44
110  1999-03-01 00:00:00  119.4730          1.745   4.47
111  1999-04-01 00:00:00  119.7723          1.405   4.37
112  1999-05-01 00:00:00  121.9995          1.480   4.56
113  1999-06-01 00:00:00  120.7245          1.835   4.82
114  1999-07-01 00:00:00  119.3305          1.785   4.58
115  1999-08-01 00:00:00  113.2268          1.910   4.87
116  1999-09-01 00:00:00  106.8752          1.710   4.88
117  1999-10-01 00:00:00  105.9650          1.810   4.98
118  1999-11-01 00:00:00  104.6485          1.830   5.20
119  1999-12-01 00:00:00  102.5843          1.645   5.44
120  2000-01-01 00:00:00  105.2960          1.710   5.50
121  2000-02-01 00:00:00  109.3885          1.835   5.72
122  2000-03-01 00:00:00  106.3074          1.770   5.85
123  2000-04-01 00:00:00  105.6270          1.760   5.81
124  2000-05-01 00:00:00  108.3205          1.655   6.10
125  2000-06-01 00:00:00  106.1255          1.760   5.97
126  2000-07-01 00:00:00  108.2115          1.675   6.00
127  2000-08-01 00:00:00  108.0804          1.895   6.07
128  2000-09-01 00:00:00  106.8375          1.840   5.98
129  2000-10-01 00:00:00  108.4429          1.815   6.04
130  2000-11-01 00:00:00  109.0095          1.615   6.06
131  2000-12-01 00:00:00  112.2090          1.640   5.68
132  2001-01-01 00:00:00  116.6719          1.500   4.95
133  2001-02-01 00:00:00  116.2337          1.300   4.71
134  2001-03-01 00:00:00  121.5050          1.270   4.28
135  2001-04-01 00:00:00  123.7710          1.290   3.85
136  2001-05-01 00:00:00  121.7682          1.240   3.62
137  2001-06-01 00:00:00  122.3510          1.210   3.45
138  2001-07-01 00:00:00  124.4981          1.330   3.45
139  2001-08-01 00:00:00  121.3670          1.375   3.29
140  2001-09-01 00:00:00  118.6117          1.420   2.63
141  2001-10-01 00:00:00  121.4536          1.300   2.12
142  2001-11-01 00:00:00  122.4055          1.355   1.88
143  2001-12-01 00:00:00  127.5945          1.365   1.78
144  2002-01-01 00:00:00  132.6833          1.480   1.73
145  2002-02-01 00:00:00  133.6426          1.525   1.82
146  2002-03-01 00:00:00  131.0610          1.390   2.01
147  2002-04-01 00:00:00  130.7718          1.370   1.93
148  2002-05-01 00:00:00  126.3750          1.385   1.86
149  2002-06-01 00:00:00  123.2905          1.310   1.79
150  2002-07-01 00:00:00  117.8991          1.310   1.70
151  2002-08-01 00:00:00  118.9927          1.175   1.60
152  2002-09-01 00:00:00  121.0780          1.175   1.60
153  2002-10-01 00:00:00  123.9077          0.985   1.56
154  2002-11-01 00:00:00  121.6079          0.995   1.27
155  2002-12-01 00:00:00  121.8929          0.900   1.24
156  2003-01-01 00:00:00  118.8133          0.800   1.20
157  2003-02-01 00:00:00  119.3379          0.780   1.18
158  2003-03-01 00:00:00  118.6871          0.700   1.13
159  2003-04-01 00:00:00  119.8950          0.605   1.14
160  2003-05-01 00:00:00  117.3681          0.530   1.08
161  2003-06-01 00:00:00  118.3290          0.835   0.92
162  2003-07-01 00:00:00  118.6959          0.935   0.95
163  2003-08-01 00:00:00  118.6624          1.455   1.03
164  2003-09-01 00:00:00  114.8000          1.385   1.01
165  2003-10-01 00:00:00  109.4955          1.460   1.00
166  2003-11-01 00:00:00  109.1778          1.295   1.02
167  2003-12-01 00:00:00  107.7377          1.360   0.99
168  2004-01-01 00:00:00  106.2685          1.310   0.97
169  2004-02-01 00:00:00  106.7079          1.225   0.99
170  2004-03-01 00:00:00  108.5157          1.440   0.99
171  2004-04-01 00:00:00  107.6564          1.530   1.09
172  2004-05-01 00:00:00  112.1960          1.520   1.31
173  2004-06-01 00:00:00  109.4336          1.775   1.60
174  2004-07-01 00:00:00  109.4871          1.850   1.66
175  2004-08-01 00:00:00  110.2336          1.535   1.72
176  2004-09-01 00:00:00  110.0914          1.440   1.87
177  2004-10-01 00:00:00  108.7835          1.490   2.00
178  2004-11-01 00:00:00  104.6990          1.445   2.27
179  2004-12-01 00:00:00  103.8104          1.430   2.43
180  2005-01-01 00:00:00  103.3410          1.320   2.61
181  2005-02-01 00:00:00  104.9442          1.465   2.77
182  2005-03-01 00:00:00  105.2543          1.320   3.00
183  2005-04-01 00:00:00  107.1938          1.235   3.05
184  2005-05-01 00:00:00  106.5952          1.240   3.08
185  2005-06-01 00:00:00  108.7473          1.165   3.13
186  2005-07-01 00:00:00  111.9535          1.305   3.42
187  2005-08-01 00:00:00  110.6065          1.345   3.66
188  2005-09-01 00:00:00  111.2390          1.475   3.67
189  2005-10-01 00:00:00  114.8695          1.545   3.99
190  2005-11-01 00:00:00  118.4540          1.430   4.15
191  2005-12-01 00:00:00  118.4624          1.470   4.18
192  2006-01-01 00:00:00  115.4765          1.560   4.31
193  2006-02-01 00:00:00  117.8605          1.580   4.52
194  2006-03-01 00:00:00  117.2778          1.765   4.62
195  2006-04-01 00:00:00  117.0695          1.920   4.72
196  2006-05-01 00:00:00  111.7305          1.810   4.82
197  2006-06-01 00:00:00  114.6250          1.915   4.97
198  2006-07-01 00:00:00  115.7670          1.920   5.06
199  2006-08-01 00:00:00  115.9243          1.610   4.97
200  2006-09-01 00:00:00  117.2145          1.665   4.89
201  2006-10-01 00:00:00  118.6090          1.710   4.92
202  2006-11-01 00:00:00  117.3205          1.645   4.95
203  2006-12-01 00:00:00  117.3220          1.675   4.88
204  2007-01-01 00:00:00  120.4471          1.695   4.95
205  2007-02-01 00:00:00  120.5047          1.625   4.96
206  2007-03-01 00:00:00  117.2600          1.650   4.89
207  2007-04-01 00:00:00  118.9324          1.620   4.86
208  2007-05-01 00:00:00  120.7732          1.745   4.78
209  2007-06-01 00:00:00  122.6886          1.870   4.76
210  2007-07-01 00:00:00  121.4148          1.790   4.83
211  2007-08-01 00:00:00  116.7335          1.595   4.38
212  2007-09-01 00:00:00  115.0435          1.675   4.05
213  2007-10-01 00:00:00  115.8661          1.600   4.01
214  2007-11-01 00:00:00  111.0729          1.470   3.46
215  2007-12-01 00:00:00  112.4490          1.500   3.23
216  2008-01-01 00:00:00  107.8181          1.430   2.75
217  2008-02-01 00:00:00  107.0300          1.355   2.04
218  2008-03-01 00:00:00  100.7562          1.275   1.48
219  2008-04-01 00:00:00  102.6777          1.575   1.55
220  2008-05-01 00:00:00  104.3595          1.750   1.82
221  2008-06-01 00:00:00  106.9152          1.590   2.13
222  2008-07-01 00:00:00  106.8518          1.530   1.93
223  2008-08-01 00:00:00  109.3624          1.405   1.92
224  2008-09-01 00:00:00  106.5748          1.460   1.61
225  2008-10-01 00:00:00   99.9659          1.465   1.20
226  2008-11-01 00:00:00   96.9656          1.390   0.73
227  2008-12-01 00:00:00   91.2750          1.165   0.26
228  2009-01-01 00:00:00   90.1205          1.290   0.30
229  2009-02-01 00:00:00   92.9158          1.270   0.45
230  2009-03-01 00:00:00   97.8550          1.345   0.42
231  2009-04-01 00:00:00   98.9200          1.420   0.35
232  2009-05-01 00:00:00   96.6445          1.480   0.30
233  2009-06-01 00:00:00   96.6145          1.350   0.31
234  2009-07-01 00:00:00   94.3670          1.410   0.27
235  2009-08-01 00:00:00   94.8971          1.305   0.26
236  2009-09-01 00:00:00   91.2748          1.290   0.21
237  2009-10-01 00:00:00   90.3671          1.405   0.16
238  2009-11-01 00:00:00   89.2674          1.260   0.15
239  2009-12-01 00:00:00   89.9509          1.285   0.17
240  2010-01-01 00:00:00   91.1011          1.315   0.15
241  2010-02-01 00:00:00   90.1395          1.295   0.18
242  2010-03-01 00:00:00   90.7161          1.390   0.22
243  2010-04-01 00:00:00   93.4527          1.280   0.24
244  2010-05-01 00:00:00   91.9730          1.255   0.22
245  2010-06-01 00:00:00   90.8059          1.080   0.19
246  2010-07-01 00:00:00   87.5005          1.065   0.20
247  2010-08-01 00:00:00   85.3727          0.960   0.19
248  2010-09-01 00:00:00   84.3571          0.930   0.19
249  2010-10-01 00:00:00   81.7285          0.930   0.18
250  2010-11-01 00:00:00   82.5180          1.190   0.18
251  2010-12-01 00:00:00   83.3376          1.120   0.19
252  2011-01-01 00:00:00   82.6250          1.210   0.18
253  2011-02-01 00:00:00   82.5368          1.255   0.17
254  2011-03-01 00:00:00   81.6470          1.250   0.16
255  2011-04-01 00:00:00   83.1771          1.200   0.12
256  2011-05-01 00:00:00   81.1257          1.150   0.09
257  2011-06-01 00:00:00   80.4259          1.130   0.10
258  2011-07-01 00:00:00   79.2425          1.075   0.08
259  2011-08-01 00:00:00   76.9657          1.030   0.06
260  2011-09-01 00:00:00   76.7957          1.025   0.04
261  2011-10-01 00:00:00   76.6430          1.045   0.05
262  2011-11-01 00:00:00   77.5595          1.070   0.05
263  2011-12-01 00:00:00   77.7967          0.980   0.05
264  2012-01-01 00:00:00   76.9640          0.960   0.07
265  2012-02-01 00:00:00   78.4700          0.955   0.12
266  2012-03-01 00:00:00   82.4659          0.985   0.14
267  2012-04-01 00:00:00   81.2524          0.885   0.14
268  2012-05-01 00:00:00   79.6668          0.825   0.15
269  2012-06-01 00:00:00   79.3152            NaN   0.15

In [93]:
from IPython.display import HTML
HTML(fred.to_html())


Out[93]:
DATE EXJPUS INTGSBJPM193N TB6MS
0 1990-01-01 00:00:00 144.9819 6.640 7.55
1 1990-02-01 00:00:00 145.6932 6.920 7.70
2 1990-03-01 00:00:00 153.3082 7.360 7.85
3 1990-04-01 00:00:00 158.4586 7.240 7.84
4 1990-05-01 00:00:00 154.0441 6.700 7.76
5 1990-06-01 00:00:00 153.6957 7.060 7.63
6 1990-07-01 00:00:00 149.0395 7.480 7.52
7 1990-08-01 00:00:00 147.4609 8.190 7.38
8 1990-09-01 00:00:00 138.4405 8.620 7.32
9 1990-10-01 00:00:00 129.5909 7.680 7.16
10 1990-11-01 00:00:00 129.2155 7.400 7.03
11 1990-12-01 00:00:00 133.8890 7.060 6.70
12 1991-01-01 00:00:00 133.6986 6.830 6.28
13 1991-02-01 00:00:00 130.5358 6.660 5.93
14 1991-03-01 00:00:00 137.3867 6.890 5.92
15 1991-04-01 00:00:00 137.1127 7.000 5.71
16 1991-05-01 00:00:00 138.2218 6.870 5.61
17 1991-06-01 00:00:00 139.7475 7.120 5.75
18 1991-07-01 00:00:00 137.8300 6.800 5.70
19 1991-08-01 00:00:00 136.8164 6.550 5.39
20 1991-09-01 00:00:00 134.2995 6.110 5.25
21 1991-10-01 00:00:00 130.7723 6.020 5.04
22 1991-11-01 00:00:00 129.6321 5.930 4.61
23 1991-12-01 00:00:00 128.0395 5.530 4.10
24 1992-01-01 00:00:00 125.4614 5.530 3.87
25 1992-02-01 00:00:00 127.6989 5.410 3.93
26 1992-03-01 00:00:00 132.8627 5.330 4.18
27 1992-04-01 00:00:00 133.5395 5.480 3.87
28 1992-05-01 00:00:00 130.7710 5.410 3.75
29 1992-06-01 00:00:00 126.8355 5.140 3.77
30 1992-07-01 00:00:00 125.8817 4.740 3.28
31 1992-08-01 00:00:00 126.2310 4.620 3.21
32 1992-09-01 00:00:00 122.5967 4.490 2.96
33 1992-10-01 00:00:00 121.1652 4.410 3.04
34 1992-11-01 00:00:00 123.8800 4.400 3.34
35 1992-12-01 00:00:00 124.0409 4.370 3.36
36 1993-01-01 00:00:00 124.9932 4.310 3.14
37 1993-02-01 00:00:00 120.7595 3.760 3.07
38 1993-03-01 00:00:00 117.0174 4.170 3.05
39 1993-04-01 00:00:00 112.4114 4.260 2.97
40 1993-05-01 00:00:00 110.3430 4.350 3.07
41 1993-06-01 00:00:00 107.4118 4.090 3.20
42 1993-07-01 00:00:00 107.6914 3.900 3.16
43 1993-08-01 00:00:00 103.7650 3.690 3.14
44 1993-09-01 00:00:00 105.5748 3.360 3.06
45 1993-10-01 00:00:00 107.0200 3.110 3.12
46 1993-11-01 00:00:00 107.8765 2.730 3.26
47 1993-12-01 00:00:00 109.9130 2.580 3.23
48 1994-01-01 00:00:00 111.4415 3.290 3.15
49 1994-02-01 00:00:00 106.3011 3.510 3.43
50 1994-03-01 00:00:00 105.0974 3.460 3.78
51 1994-04-01 00:00:00 103.4843 3.510 4.09
52 1994-05-01 00:00:00 103.7533 3.290 4.60
53 1994-06-01 00:00:00 102.5264 3.670 4.55
54 1994-07-01 00:00:00 98.4450 3.800 4.75
55 1994-08-01 00:00:00 99.9404 4.140 4.88
56 1994-09-01 00:00:00 98.7743 3.920 5.04
57 1994-10-01 00:00:00 98.3530 4.080 5.39
58 1994-11-01 00:00:00 98.0440 3.990 5.72
59 1994-12-01 00:00:00 100.1824 3.910 6.21
60 1995-01-01 00:00:00 99.7660 3.950 6.21
61 1995-02-01 00:00:00 98.2368 3.740 6.03
62 1995-03-01 00:00:00 90.5196 3.010 5.89
63 1995-04-01 00:00:00 83.6895 2.790 5.77
64 1995-05-01 00:00:00 85.1127 2.160 5.67
65 1995-06-01 00:00:00 84.6355 2.140 5.42
66 1995-07-01 00:00:00 87.3970 2.170 5.37
67 1995-08-01 00:00:00 94.7383 2.410 5.41
68 1995-09-01 00:00:00 100.5455 1.950 5.30
69 1995-10-01 00:00:00 100.8390 2.010 5.32
70 1995-11-01 00:00:00 101.9400 1.880 5.27
71 1995-12-01 00:00:00 101.8495 2.170 5.13
72 1996-01-01 00:00:00 105.7514 2.320 4.92
73 1996-02-01 00:00:00 105.7880 2.580 4.77
74 1996-03-01 00:00:00 105.9400 2.300 4.96
75 1996-04-01 00:00:00 107.1995 2.550 5.06
76 1996-05-01 00:00:00 106.3423 2.350 5.12
77 1996-06-01 00:00:00 108.9600 2.400 5.25
78 1996-07-01 00:00:00 109.1909 2.530 5.30
79 1996-08-01 00:00:00 107.8659 2.140 5.13
80 1996-09-01 00:00:00 109.9310 2.050 5.24
81 1996-10-01 00:00:00 112.4123 1.800 5.11
82 1996-11-01 00:00:00 112.2958 1.750 5.07
83 1996-12-01 00:00:00 113.9810 1.930 5.04
84 1997-01-01 00:00:00 117.9124 1.770 5.10
85 1997-02-01 00:00:00 122.9621 1.780 5.06
86 1997-03-01 00:00:00 122.7738 1.750 5.26
87 1997-04-01 00:00:00 125.6377 1.850 5.37
88 1997-05-01 00:00:00 119.1924 2.030 5.30
89 1997-06-01 00:00:00 114.2857 1.920 5.13
90 1997-07-01 00:00:00 115.3759 1.700 5.12
91 1997-08-01 00:00:00 117.9295 1.600 5.19
92 1997-09-01 00:00:00 120.8900 1.540 5.09
93 1997-10-01 00:00:00 121.0605 1.350 5.09
94 1997-11-01 00:00:00 125.3817 1.430 5.17
95 1997-12-01 00:00:00 129.7341 1.530 5.24
96 1998-01-01 00:00:00 129.5475 1.420 5.03
97 1998-02-01 00:00:00 125.8516 1.400 5.07
98 1998-03-01 00:00:00 129.0823 1.280 5.04
99 1998-04-01 00:00:00 131.7536 1.080 5.06
100 1998-05-01 00:00:00 134.8960 1.080 5.14
101 1998-06-01 00:00:00 140.3305 1.210 5.12
102 1998-07-01 00:00:00 140.7874 1.150 5.03
103 1998-08-01 00:00:00 144.6800 0.980 4.95
104 1998-09-01 00:00:00 134.4805 0.660 4.63
105 1998-10-01 00:00:00 121.0486 0.690 4.05
106 1998-11-01 00:00:00 120.2895 0.850 4.42
107 1998-12-01 00:00:00 117.0709 1.360 4.40
108 1999-01-01 00:00:00 113.2900 2.100 4.33
109 1999-02-01 00:00:00 116.6684 1.995 4.44
110 1999-03-01 00:00:00 119.4730 1.745 4.47
111 1999-04-01 00:00:00 119.7723 1.405 4.37
112 1999-05-01 00:00:00 121.9995 1.480 4.56
113 1999-06-01 00:00:00 120.7245 1.835 4.82
114 1999-07-01 00:00:00 119.3305 1.785 4.58
115 1999-08-01 00:00:00 113.2268 1.910 4.87
116 1999-09-01 00:00:00 106.8752 1.710 4.88
117 1999-10-01 00:00:00 105.9650 1.810 4.98
118 1999-11-01 00:00:00 104.6485 1.830 5.20
119 1999-12-01 00:00:00 102.5843 1.645 5.44
120 2000-01-01 00:00:00 105.2960 1.710 5.50
121 2000-02-01 00:00:00 109.3885 1.835 5.72
122 2000-03-01 00:00:00 106.3074 1.770 5.85
123 2000-04-01 00:00:00 105.6270 1.760 5.81
124 2000-05-01 00:00:00 108.3205 1.655 6.10
125 2000-06-01 00:00:00 106.1255 1.760 5.97
126 2000-07-01 00:00:00 108.2115 1.675 6.00
127 2000-08-01 00:00:00 108.0804 1.895 6.07
128 2000-09-01 00:00:00 106.8375 1.840 5.98
129 2000-10-01 00:00:00 108.4429 1.815 6.04
130 2000-11-01 00:00:00 109.0095 1.615 6.06
131 2000-12-01 00:00:00 112.2090 1.640 5.68
132 2001-01-01 00:00:00 116.6719 1.500 4.95
133 2001-02-01 00:00:00 116.2337 1.300 4.71
134 2001-03-01 00:00:00 121.5050 1.270 4.28
135 2001-04-01 00:00:00 123.7710 1.290 3.85
136 2001-05-01 00:00:00 121.7682 1.240 3.62
137 2001-06-01 00:00:00 122.3510 1.210 3.45
138 2001-07-01 00:00:00 124.4981 1.330 3.45
139 2001-08-01 00:00:00 121.3670 1.375 3.29
140 2001-09-01 00:00:00 118.6117 1.420 2.63
141 2001-10-01 00:00:00 121.4536 1.300 2.12
142 2001-11-01 00:00:00 122.4055 1.355 1.88
143 2001-12-01 00:00:00 127.5945 1.365 1.78
144 2002-01-01 00:00:00 132.6833 1.480 1.73
145 2002-02-01 00:00:00 133.6426 1.525 1.82
146 2002-03-01 00:00:00 131.0610 1.390 2.01
147 2002-04-01 00:00:00 130.7718 1.370 1.93
148 2002-05-01 00:00:00 126.3750 1.385 1.86
149 2002-06-01 00:00:00 123.2905 1.310 1.79
150 2002-07-01 00:00:00 117.8991 1.310 1.70
151 2002-08-01 00:00:00 118.9927 1.175 1.60
152 2002-09-01 00:00:00 121.0780 1.175 1.60
153 2002-10-01 00:00:00 123.9077 0.985 1.56
154 2002-11-01 00:00:00 121.6079 0.995 1.27
155 2002-12-01 00:00:00 121.8929 0.900 1.24
156 2003-01-01 00:00:00 118.8133 0.800 1.20
157 2003-02-01 00:00:00 119.3379 0.780 1.18
158 2003-03-01 00:00:00 118.6871 0.700 1.13
159 2003-04-01 00:00:00 119.8950 0.605 1.14
160 2003-05-01 00:00:00 117.3681 0.530 1.08
161 2003-06-01 00:00:00 118.3290 0.835 0.92
162 2003-07-01 00:00:00 118.6959 0.935 0.95
163 2003-08-01 00:00:00 118.6624 1.455 1.03
164 2003-09-01 00:00:00 114.8000 1.385 1.01
165 2003-10-01 00:00:00 109.4955 1.460 1.00
166 2003-11-01 00:00:00 109.1778 1.295 1.02
167 2003-12-01 00:00:00 107.7377 1.360 0.99
168 2004-01-01 00:00:00 106.2685 1.310 0.97
169 2004-02-01 00:00:00 106.7079 1.225 0.99
170 2004-03-01 00:00:00 108.5157 1.440 0.99
171 2004-04-01 00:00:00 107.6564 1.530 1.09
172 2004-05-01 00:00:00 112.1960 1.520 1.31
173 2004-06-01 00:00:00 109.4336 1.775 1.60
174 2004-07-01 00:00:00 109.4871 1.850 1.66
175 2004-08-01 00:00:00 110.2336 1.535 1.72
176 2004-09-01 00:00:00 110.0914 1.440 1.87
177 2004-10-01 00:00:00 108.7835 1.490 2.00
178 2004-11-01 00:00:00 104.6990 1.445 2.27
179 2004-12-01 00:00:00 103.8104 1.430 2.43
180 2005-01-01 00:00:00 103.3410 1.320 2.61
181 2005-02-01 00:00:00 104.9442 1.465 2.77
182 2005-03-01 00:00:00 105.2543 1.320 3.00
183 2005-04-01 00:00:00 107.1938 1.235 3.05
184 2005-05-01 00:00:00 106.5952 1.240 3.08
185 2005-06-01 00:00:00 108.7473 1.165 3.13
186 2005-07-01 00:00:00 111.9535 1.305 3.42
187 2005-08-01 00:00:00 110.6065 1.345 3.66
188 2005-09-01 00:00:00 111.2390 1.475 3.67
189 2005-10-01 00:00:00 114.8695 1.545 3.99
190 2005-11-01 00:00:00 118.4540 1.430 4.15
191 2005-12-01 00:00:00 118.4624 1.470 4.18
192 2006-01-01 00:00:00 115.4765 1.560 4.31
193 2006-02-01 00:00:00 117.8605 1.580 4.52
194 2006-03-01 00:00:00 117.2778 1.765 4.62
195 2006-04-01 00:00:00 117.0695 1.920 4.72
196 2006-05-01 00:00:00 111.7305 1.810 4.82
197 2006-06-01 00:00:00 114.6250 1.915 4.97
198 2006-07-01 00:00:00 115.7670 1.920 5.06
199 2006-08-01 00:00:00 115.9243 1.610 4.97
200 2006-09-01 00:00:00 117.2145 1.665 4.89
201 2006-10-01 00:00:00 118.6090 1.710 4.92
202 2006-11-01 00:00:00 117.3205 1.645 4.95
203 2006-12-01 00:00:00 117.3220 1.675 4.88
204 2007-01-01 00:00:00 120.4471 1.695 4.95
205 2007-02-01 00:00:00 120.5047 1.625 4.96
206 2007-03-01 00:00:00 117.2600 1.650 4.89
207 2007-04-01 00:00:00 118.9324 1.620 4.86
208 2007-05-01 00:00:00 120.7732 1.745 4.78
209 2007-06-01 00:00:00 122.6886 1.870 4.76
210 2007-07-01 00:00:00 121.4148 1.790 4.83
211 2007-08-01 00:00:00 116.7335 1.595 4.38
212 2007-09-01 00:00:00 115.0435 1.675 4.05
213 2007-10-01 00:00:00 115.8661 1.600 4.01
214 2007-11-01 00:00:00 111.0729 1.470 3.46
215 2007-12-01 00:00:00 112.4490 1.500 3.23
216 2008-01-01 00:00:00 107.8181 1.430 2.75
217 2008-02-01 00:00:00 107.0300 1.355 2.04
218 2008-03-01 00:00:00 100.7562 1.275 1.48
219 2008-04-01 00:00:00 102.6777 1.575 1.55
220 2008-05-01 00:00:00 104.3595 1.750 1.82
221 2008-06-01 00:00:00 106.9152 1.590 2.13
222 2008-07-01 00:00:00 106.8518 1.530 1.93
223 2008-08-01 00:00:00 109.3624 1.405 1.92
224 2008-09-01 00:00:00 106.5748 1.460 1.61
225 2008-10-01 00:00:00 99.9659 1.465 1.20
226 2008-11-01 00:00:00 96.9656 1.390 0.73
227 2008-12-01 00:00:00 91.2750 1.165 0.26
228 2009-01-01 00:00:00 90.1205 1.290 0.30
229 2009-02-01 00:00:00 92.9158 1.270 0.45
230 2009-03-01 00:00:00 97.8550 1.345 0.42
231 2009-04-01 00:00:00 98.9200 1.420 0.35
232 2009-05-01 00:00:00 96.6445 1.480 0.30
233 2009-06-01 00:00:00 96.6145 1.350 0.31
234 2009-07-01 00:00:00 94.3670 1.410 0.27
235 2009-08-01 00:00:00 94.8971 1.305 0.26
236 2009-09-01 00:00:00 91.2748 1.290 0.21
237 2009-10-01 00:00:00 90.3671 1.405 0.16
238 2009-11-01 00:00:00 89.2674 1.260 0.15
239 2009-12-01 00:00:00 89.9509 1.285 0.17
240 2010-01-01 00:00:00 91.1011 1.315 0.15
241 2010-02-01 00:00:00 90.1395 1.295 0.18
242 2010-03-01 00:00:00 90.7161 1.390 0.22
243 2010-04-01 00:00:00 93.4527 1.280 0.24
244 2010-05-01 00:00:00 91.9730 1.255 0.22
245 2010-06-01 00:00:00 90.8059 1.080 0.19
246 2010-07-01 00:00:00 87.5005 1.065 0.20
247 2010-08-01 00:00:00 85.3727 0.960 0.19
248 2010-09-01 00:00:00 84.3571 0.930 0.19
249 2010-10-01 00:00:00 81.7285 0.930 0.18
250 2010-11-01 00:00:00 82.5180 1.190 0.18
251 2010-12-01 00:00:00 83.3376 1.120 0.19
252 2011-01-01 00:00:00 82.6250 1.210 0.18
253 2011-02-01 00:00:00 82.5368 1.255 0.17
254 2011-03-01 00:00:00 81.6470 1.250 0.16
255 2011-04-01 00:00:00 83.1771 1.200 0.12
256 2011-05-01 00:00:00 81.1257 1.150 0.09
257 2011-06-01 00:00:00 80.4259 1.130 0.10
258 2011-07-01 00:00:00 79.2425 1.075 0.08
259 2011-08-01 00:00:00 76.9657 1.030 0.06
260 2011-09-01 00:00:00 76.7957 1.025 0.04
261 2011-10-01 00:00:00 76.6430 1.045 0.05
262 2011-11-01 00:00:00 77.5595 1.070 0.05
263 2011-12-01 00:00:00 77.7967 0.980 0.05
264 2012-01-01 00:00:00 76.9640 0.960 0.07
265 2012-02-01 00:00:00 78.4700 0.955 0.12
266 2012-03-01 00:00:00 82.4659 0.985 0.14
267 2012-04-01 00:00:00 81.2524 0.885 0.14
268 2012-05-01 00:00:00 79.6668 0.825 0.15
269 2012-06-01 00:00:00 79.3152 NaN 0.15

In [95]:
# let's try this again
fred = pd.read_csv('fred_fx.csv')
fred.dtypes


Out[95]:
DATE              object
EXJPUS           float64
INTGSBJPM193N    float64
TB6MS            float64
dtype: object

In [96]:
fred['DATE'] = pd.to_datetime(fred['DATE'])
fred.dtypes


Out[96]:
DATE             datetime64[ns]
EXJPUS                  float64
INTGSBJPM193N           float64
TB6MS                   float64
dtype: object

In [98]:
fred.index[:50]


Out[98]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], dtype=int64)

In [100]:
# Returns new object without inplace=True!
fred.set_index('DATE', inplace=True)

In [101]:
fred


Out[101]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3)

In [102]:
fred.head()


Out[102]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 144.9819 6.64 7.55
1990-02-01 145.6932 6.92 7.70
1990-03-01 153.3082 7.36 7.85
1990-04-01 158.4586 7.24 7.84
1990-05-01 154.0441 6.70 7.76

In [104]:
fred.EXJPUS


Out[104]:
DATE
1990-01-01    144.9819
1990-02-01    145.6932
1990-03-01    153.3082
1990-04-01    158.4586
1990-05-01    154.0441
1990-06-01    153.6957
1990-07-01    149.0395
1990-08-01    147.4609
1990-09-01    138.4405
1990-10-01    129.5909
1990-11-01    129.2155
1990-12-01    133.8890
1991-01-01    133.6986
1991-02-01    130.5358
1991-03-01    137.3867
...
2011-04-01    83.1771
2011-05-01    81.1257
2011-06-01    80.4259
2011-07-01    79.2425
2011-08-01    76.9657
2011-09-01    76.7957
2011-10-01    76.6430
2011-11-01    77.5595
2011-12-01    77.7967
2012-01-01    76.9640
2012-02-01    78.4700
2012-03-01    82.4659
2012-04-01    81.2524
2012-05-01    79.6668
2012-06-01    79.3152
Name: EXJPUS, Length: 270, dtype: float64

In [106]:
type(fred.EXJPUS)


Out[106]:
[pandas.core.series.TimeSeries,
 pandas.core.series.Series,
 pandas.core.generic.PandasContainer,
 pandas.core.base.PandasObject,
 pandas.core.base.StringMixin,
 numpy.ndarray,
 object]

In [108]:
fred.EXJPUS


Out[108]:
DATE
1990-01-01    144.9819
1990-02-01    145.6932
1990-03-01    153.3082
1990-04-01    158.4586
1990-05-01    154.0441
1990-06-01    153.6957
1990-07-01    149.0395
1990-08-01    147.4609
1990-09-01    138.4405
1990-10-01    129.5909
1990-11-01    129.2155
1990-12-01    133.8890
1991-01-01    133.6986
1991-02-01    130.5358
1991-03-01    137.3867
...
2011-04-01    83.1771
2011-05-01    81.1257
2011-06-01    80.4259
2011-07-01    79.2425
2011-08-01    76.9657
2011-09-01    76.7957
2011-10-01    76.6430
2011-11-01    77.5595
2011-12-01    77.7967
2012-01-01    76.9640
2012-02-01    78.4700
2012-03-01    82.4659
2012-04-01    81.2524
2012-05-01    79.6668
2012-06-01    79.3152
Name: EXJPUS, Length: 270, dtype: float64

In [109]:
fred.index


Out[109]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-01 00:00:00, ..., 2012-06-01 00:00:00]
Length: 270, Freq: None, Timezone: None

In [110]:
fred.index[5]


Out[110]:
Timestamp('1990-06-01 00:00:00', tz=None)

In [113]:
stamp = fred.index[5]
stamp


Out[113]:
Timestamp('1990-06-01 00:00:00', tz=None)

In [115]:
stamp.year, stamp.month, stamp.day


Out[115]:
(1990, 6, 1)

In [116]:
stamp.weekday()


Out[116]:
4

In [117]:
fred.index.year


Out[117]:
array([1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,
       1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991,
       1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992,
       1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993,
       1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994, 1994,
       1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995, 1995, 1995,
       1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996, 1996, 1996, 1996,
       1996, 1996, 1996, 1996, 1996, 1996, 1996, 1997, 1997, 1997, 1997,
       1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1998, 1998, 1998,
       1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1999, 1999,
       1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 2000,
       2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
       2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001,
       2001, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002,
       2002, 2002, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,
       2003, 2003, 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004,
       2004, 2004, 2004, 2004, 2005, 2005, 2005, 2005, 2005, 2005, 2005,
       2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2006,
       2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007, 2007, 2007, 2007,
       2007, 2007, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008,
       2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2009, 2009, 2009,
       2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2010, 2010,
       2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2011,
       2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,
       2012, 2012, 2012, 2012, 2012, 2012], dtype=int32)

In [118]:
fred.index.weekday


Out[118]:
array([0, 3, 3, 6, 1, 4, 6, 2, 5, 0, 3, 5, 1, 4, 4, 0, 2, 5, 0, 3, 6, 1, 4,
       6, 2, 5, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3, 6, 2, 4,
       0, 2, 5, 1, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 2, 5, 0, 3, 5, 1, 4,
       6, 2, 4, 0, 3, 4, 0, 2, 5, 0, 3, 6, 1, 4, 6, 2, 5, 5, 1, 3, 6, 1, 4,
       0, 2, 5, 0, 3, 6, 6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3,
       6, 2, 4, 0, 2, 5, 1, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4, 0, 3, 3, 6, 1, 4,
       6, 2, 5, 0, 3, 5, 1, 4, 4, 0, 2, 5, 0, 3, 6, 1, 4, 6, 2, 5, 5, 1, 3,
       6, 1, 4, 0, 2, 5, 0, 3, 6, 0, 3, 5, 1, 3, 6, 2, 4, 0, 2, 5, 1, 1, 4,
       6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4, 0, 3, 3,
       6, 1, 4, 6, 2, 5, 0, 3, 5, 1, 4, 5, 1, 3, 6, 1, 4, 0, 2, 5, 0, 3, 6,
       6, 2, 4, 0, 2, 5, 1, 3, 6, 1, 4, 0, 0, 3, 5, 1, 3, 6, 2, 4, 0, 2, 5,
       1, 1, 4, 6, 2, 4, 0, 3, 5, 1, 3, 6, 2, 3, 6, 1, 4], dtype=int32)

In [119]:
fred


Out[119]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3)

In [120]:
fred.EXJPUS


Out[120]:
DATE
1990-01-01    144.9819
1990-02-01    145.6932
1990-03-01    153.3082
1990-04-01    158.4586
1990-05-01    154.0441
1990-06-01    153.6957
1990-07-01    149.0395
1990-08-01    147.4609
1990-09-01    138.4405
1990-10-01    129.5909
1990-11-01    129.2155
1990-12-01    133.8890
1991-01-01    133.6986
1991-02-01    130.5358
1991-03-01    137.3867
...
2011-04-01    83.1771
2011-05-01    81.1257
2011-06-01    80.4259
2011-07-01    79.2425
2011-08-01    76.9657
2011-09-01    76.7957
2011-10-01    76.6430
2011-11-01    77.5595
2011-12-01    77.7967
2012-01-01    76.9640
2012-02-01    78.4700
2012-03-01    82.4659
2012-04-01    81.2524
2012-05-01    79.6668
2012-06-01    79.3152
Name: EXJPUS, Length: 270, dtype: float64

In [121]:
fred.EXJPUS.plot()


Out[121]:
<matplotlib.axes.AxesSubplot at 0x106af4890>

In [122]:
fred.EXJPUS['1999':'2007']


Out[122]:
DATE
1999-01-01    113.2900
1999-02-01    116.6684
1999-03-01    119.4730
1999-04-01    119.7723
1999-05-01    121.9995
1999-06-01    120.7245
1999-07-01    119.3305
1999-08-01    113.2268
1999-09-01    106.8752
1999-10-01    105.9650
1999-11-01    104.6485
1999-12-01    102.5843
2000-01-01    105.2960
2000-02-01    109.3885
2000-03-01    106.3074
...
2006-10-01    118.6090
2006-11-01    117.3205
2006-12-01    117.3220
2007-01-01    120.4471
2007-02-01    120.5047
2007-03-01    117.2600
2007-04-01    118.9324
2007-05-01    120.7732
2007-06-01    122.6886
2007-07-01    121.4148
2007-08-01    116.7335
2007-09-01    115.0435
2007-10-01    115.8661
2007-11-01    111.0729
2007-12-01    112.4490
Name: EXJPUS, Length: 108, dtype: float64

In [123]:
fred.EXJPUS['1999-05':'2001-09']


Out[123]:
DATE
1999-05-01    121.9995
1999-06-01    120.7245
1999-07-01    119.3305
1999-08-01    113.2268
1999-09-01    106.8752
1999-10-01    105.9650
1999-11-01    104.6485
1999-12-01    102.5843
2000-01-01    105.2960
2000-02-01    109.3885
2000-03-01    106.3074
2000-04-01    105.6270
2000-05-01    108.3205
2000-06-01    106.1255
2000-07-01    108.2115
2000-08-01    108.0804
2000-09-01    106.8375
2000-10-01    108.4429
2000-11-01    109.0095
2000-12-01    112.2090
2001-01-01    116.6719
2001-02-01    116.2337
2001-03-01    121.5050
2001-04-01    123.7710
2001-05-01    121.7682
2001-06-01    122.3510
2001-07-01    124.4981
2001-08-01    121.3670
2001-09-01    118.6117
Name: EXJPUS, dtype: float64

In [124]:
stamp


Out[124]:
Timestamp('1990-06-01 00:00:00', tz=None)

In [126]:
fred.EXJPUS[stamp]


Out[126]:
153.69569999999999

In [127]:
fred.ix[stamp]


Out[127]:
EXJPUS           153.6957
INTGSBJPM193N      7.0600
TB6MS              7.6300
Name: 1990-06-01 00:00:00, dtype: float64

In [128]:
fred


Out[128]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3)

In [131]:
fred.head(20)


Out[131]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 144.9819 6.64 7.55
1990-02-01 145.6932 6.92 7.70
1990-03-01 153.3082 7.36 7.85
1990-04-01 158.4586 7.24 7.84
1990-05-01 154.0441 6.70 7.76
1990-06-01 153.6957 7.06 7.63
1990-07-01 149.0395 7.48 7.52
1990-08-01 147.4609 8.19 7.38
1990-09-01 138.4405 8.62 7.32
1990-10-01 129.5909 7.68 7.16
1990-11-01 129.2155 7.40 7.03
1990-12-01 133.8890 7.06 6.70
1991-01-01 133.6986 6.83 6.28
1991-02-01 130.5358 6.66 5.93
1991-03-01 137.3867 6.89 5.92
1991-04-01 137.1127 7.00 5.71
1991-05-01 138.2218 6.87 5.61
1991-06-01 139.7475 7.12 5.75
1991-07-01 137.8300 6.80 5.70
1991-08-01 136.8164 6.55 5.39

In [133]:
years = fred.index.year
years


Out[133]:
array([1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,
       1990, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991,
       1991, 1991, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992,
       1992, 1992, 1992, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993,
       1993, 1993, 1993, 1993, 1994, 1994, 1994, 1994, 1994, 1994, 1994,
       1994, 1994, 1994, 1994, 1994, 1995, 1995, 1995, 1995, 1995, 1995,
       1995, 1995, 1995, 1995, 1995, 1995, 1996, 1996, 1996, 1996, 1996,
       1996, 1996, 1996, 1996, 1996, 1996, 1996, 1997, 1997, 1997, 1997,
       1997, 1997, 1997, 1997, 1997, 1997, 1997, 1997, 1998, 1998, 1998,
       1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1999, 1999,
       1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 2000,
       2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
       2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001,
       2001, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002,
       2002, 2002, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,
       2003, 2003, 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004,
       2004, 2004, 2004, 2004, 2005, 2005, 2005, 2005, 2005, 2005, 2005,
       2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2006,
       2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007, 2007, 2007, 2007,
       2007, 2007, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008,
       2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2009, 2009, 2009,
       2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2010, 2010,
       2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2011,
       2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011,
       2012, 2012, 2012, 2012, 2012, 2012], dtype=int32)

In [136]:
annual_min = fred.groupby(years).min()
annual_max = fred.groupby(years).max()

In [139]:
# Chapter 7
annual_min_and_max = pd.concat([annual_min, annual_max], 
                               axis=1, keys=['min', 'max'])
annual_min_and_max


Out[139]:
min max
EXJPUS INTGSBJPM193N TB6MS EXJPUS INTGSBJPM193N TB6MS
1990 129.2155 6.640 6.70 158.4586 8.620 7.85
1991 128.0395 5.530 4.10 139.7475 7.120 6.28
1992 121.1652 4.370 2.96 133.5395 5.530 4.18
1993 103.7650 2.580 2.97 124.9932 4.350 3.26
1994 98.0440 3.290 3.15 111.4415 4.140 6.21
1995 83.6895 1.880 5.13 101.9400 3.950 6.21
1996 105.7514 1.750 4.77 113.9810 2.580 5.30
1997 114.2857 1.350 5.06 129.7341 2.030 5.37
1998 117.0709 0.660 4.05 144.6800 1.420 5.14
1999 102.5843 1.405 4.33 121.9995 2.100 5.44
2000 105.2960 1.615 5.50 112.2090 1.895 6.10
2001 116.2337 1.210 1.78 127.5945 1.500 4.95
2002 117.8991 0.900 1.24 133.6426 1.525 2.01
2003 107.7377 0.530 0.92 119.8950 1.460 1.20
2004 103.8104 1.225 0.97 112.1960 1.850 2.43
2005 103.3410 1.165 2.61 118.4624 1.545 4.18
2006 111.7305 1.560 4.31 118.6090 1.920 5.06
2007 111.0729 1.470 3.23 122.6886 1.870 4.96
2008 91.2750 1.165 0.26 109.3624 1.750 2.75
2009 89.2674 1.260 0.15 98.9200 1.480 0.45
2010 81.7285 0.930 0.15 93.4527 1.390 0.24
2011 76.6430 0.980 0.04 83.1771 1.255 0.18
2012 76.9640 0.825 0.07 82.4659 0.985 0.15

In [141]:
fred.EXJPUS.resample('A-DEC', how='min')


Out[141]:
DATE
1990-12-31    129.2155
1991-12-31    128.0395
1992-12-31    121.1652
1993-12-31    103.7650
1994-12-31     98.0440
1995-12-31     83.6895
1996-12-31    105.7514
1997-12-31    114.2857
1998-12-31    117.0709
1999-12-31    102.5843
2000-12-31    105.2960
2001-12-31    116.2337
2002-12-31    117.8991
2003-12-31    107.7377
2004-12-31    103.8104
2005-12-31    103.3410
2006-12-31    111.7305
2007-12-31    111.0729
2008-12-31     91.2750
2009-12-31     89.2674
2010-12-31     81.7285
2011-12-31     76.6430
2012-12-31     76.9640
Freq: A-DEC, dtype: float64

In [142]:
fred.EXJPUS.resample('A-DEC', how=['min', 'max'])


Out[142]:
min max
DATE
1990-12-31 129.2155 158.4586
1991-12-31 128.0395 139.7475
1992-12-31 121.1652 133.5395
1993-12-31 103.7650 124.9932
1994-12-31 98.0440 111.4415
1995-12-31 83.6895 101.9400
1996-12-31 105.7514 113.9810
1997-12-31 114.2857 129.7341
1998-12-31 117.0709 144.6800
1999-12-31 102.5843 121.9995
2000-12-31 105.2960 112.2090
2001-12-31 116.2337 127.5945
2002-12-31 117.8991 133.6426
2003-12-31 107.7377 119.8950
2004-12-31 103.8104 112.1960
2005-12-31 103.3410 118.4624
2006-12-31 111.7305 118.6090
2007-12-31 111.0729 122.6886
2008-12-31 91.2750 109.3624
2009-12-31 89.2674 98.9200
2010-12-31 81.7285 93.4527
2011-12-31 76.6430 83.1771
2012-12-31 76.9640 82.4659

In [144]:
annual_minmax = fred.resample('A-DEC', how=['min', 'max'])
annual_minmax


Out[144]:
EXJPUS INTGSBJPM193N TB6MS
min max min max min max
DATE
1990-12-31 129.2155 158.4586 6.640 8.620 6.70 7.85
1991-12-31 128.0395 139.7475 5.530 7.120 4.10 6.28
1992-12-31 121.1652 133.5395 4.370 5.530 2.96 4.18
1993-12-31 103.7650 124.9932 2.580 4.350 2.97 3.26
1994-12-31 98.0440 111.4415 3.290 4.140 3.15 6.21
1995-12-31 83.6895 101.9400 1.880 3.950 5.13 6.21
1996-12-31 105.7514 113.9810 1.750 2.580 4.77 5.30
1997-12-31 114.2857 129.7341 1.350 2.030 5.06 5.37
1998-12-31 117.0709 144.6800 0.660 1.420 4.05 5.14
1999-12-31 102.5843 121.9995 1.405 2.100 4.33 5.44
2000-12-31 105.2960 112.2090 1.615 1.895 5.50 6.10
2001-12-31 116.2337 127.5945 1.210 1.500 1.78 4.95
2002-12-31 117.8991 133.6426 0.900 1.525 1.24 2.01
2003-12-31 107.7377 119.8950 0.530 1.460 0.92 1.20
2004-12-31 103.8104 112.1960 1.225 1.850 0.97 2.43
2005-12-31 103.3410 118.4624 1.165 1.545 2.61 4.18
2006-12-31 111.7305 118.6090 1.560 1.920 4.31 5.06
2007-12-31 111.0729 122.6886 1.470 1.870 3.23 4.96
2008-12-31 91.2750 109.3624 1.165 1.750 0.26 2.75
2009-12-31 89.2674 98.9200 1.260 1.480 0.15 0.45
2010-12-31 81.7285 93.4527 0.930 1.390 0.15 0.24
2011-12-31 76.6430 83.1771 0.980 1.255 0.04 0.18
2012-12-31 76.9640 82.4659 0.825 0.985 0.07 0.15

In [145]:
annual_minmax.columns


Out[145]:
MultiIndex
[(u'EXJPUS', u'min'), (u'EXJPUS', u'max'), (u'INTGSBJPM193N', u'min'), (u'INTGSBJPM193N', u'max'), (u'TB6MS', u'min'), (u'TB6MS', u'max')]

In [146]:
annual_minmax['EXJPUS', 'max']


Out[146]:
DATE
1990-12-31    158.4586
1991-12-31    139.7475
1992-12-31    133.5395
1993-12-31    124.9932
1994-12-31    111.4415
1995-12-31    101.9400
1996-12-31    113.9810
1997-12-31    129.7341
1998-12-31    144.6800
1999-12-31    121.9995
2000-12-31    112.2090
2001-12-31    127.5945
2002-12-31    133.6426
2003-12-31    119.8950
2004-12-31    112.1960
2005-12-31    118.4624
2006-12-31    118.6090
2007-12-31    122.6886
2008-12-31    109.3624
2009-12-31     98.9200
2010-12-31     93.4527
2011-12-31     83.1771
2012-12-31     82.4659
Freq: A-DEC, Name: (EXJPUS, max), dtype: float64

In [148]:
annual_minmax[('EXJPUS', 'max')].index


Out[148]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-12-31 00:00:00, ..., 2012-12-31 00:00:00]
Length: 23, Freq: A-DEC, Timezone: None

In [150]:
# Your own aggregation function
def mad(x):
    return np.abs(x - x.mean()).mean()
fred.EXJPUS.resample('A-DEC', how=mad)


Out[150]:
DATE
1990-12-31    8.022794
1991-12-31    3.344775
1992-12-31    2.995399
1993-12-31    5.042639
1994-12-31    3.238742
1995-12-31    6.529103
1996-12-31    2.323658
1997-12-31    3.502692
1998-12-31    7.003133
1999-12-31    5.948200
2000-12-31    1.485524
2001-12-31    2.212117
2002-12-31    4.699887
2003-12-31    3.742594
2004-12-31    1.940404
2005-12-31    4.125758
2006-12-31    1.370915
2007-12-31    3.027983
2008-12-31    4.209392
2009-12-31    2.950133
2010-12-31    3.614492
2011-12-31    2.211200
2012-12-31    1.446733
Freq: A-DEC, dtype: float64

In [154]:
# Works with non-numeric data
import pandas.util.testing as tm
tm.rands(10)


Out[154]:
'ujx32j8s6C'

In [158]:
choices = np.array([rands(10) for i in xrange(50)])

draws = np.random.randint(0, 50, size=1000)
timestamps = pd.date_range('2013-10-17 13:10', periods=1000, freq='S')
ts = Series(choices.take(draws), index=timestamps)
ts[:5]


Out[158]:
2013-10-17 13:10:00    LiockWHKcs
2013-10-17 13:10:01    JCVdXVljNc
2013-10-17 13:10:02    qHT39t1dbZ
2013-10-17 13:10:03    dIzYNzp5x4
2013-10-17 13:10:04    vTX5A53FQq
Freq: S, dtype: object

In [ ]:
# Hint!
ts.unique()

Breakout session: SF Home sales

Also, Compute number of unique strings by minute in above example!

http://bit.ly/16e8TfA

'M' : Monthly how=['mean', 'count']


In [ ]:
# If you're having trouble displaying DataFrames in the console
pd.options.display.line_width = 1000

In [159]:
!head -n 10 SFHousing.csv












In [162]:
house = pd.read_csv('SFHousing.csv')


Out[162]:
0       $425,500 
1     $1,160,000 
2       $774,025 
3       $660,000 
4       $450,000 
5       $680,000 
6       $820,000 
7       $227,000 
8       $476,000 
9       $640,000 
10    $1,365,000 
11      $470,000 
12      $945,000 
13      $475,000 
14      $850,000 
...
1152    $1,105,000 
1153      $920,000 
1154      $650,000 
1155    $1,530,000 
1156    $1,810,000 
1157    $1,850,000 
1158    $1,140,000 
1159      $879,000 
1160    $1,085,000 
1161      $430,000 
1162      $842,930 
1163      $901,000 
1164      $872,000 
1165    $1,800,000 
1166    $1,935,000 
Name: Sale_Amount, Length: 1167, dtype: object

In [165]:
def parse_dollars(x):
    return float(x.replace('$', '').replace(',', ''))
house.Sale_Amount.map(parse_dollars)


Out[165]:
0      425500
1     1160000
2      774025
3      660000
4      450000
5      680000
6      820000
7      227000
8      476000
9      640000
10    1365000
11     470000
12     945000
13     475000
14     850000
...
1152    1105000
1153     920000
1154     650000
1155    1530000
1156    1810000
1157    1850000
1158    1140000
1159     879000
1160    1085000
1161     430000
1162     842930
1163     901000
1164     872000
1165    1800000
1166    1935000
Name: Sale_Amount, Length: 1167, dtype: float64

In [164]:
house.Sale_Amount.str.replace('[$,]', '').map(float)


Out[164]:
0      425500
1     1160000
2      774025
3      660000
4      450000
5      680000
6      820000
7      227000
8      476000
9      640000
10    1365000
11     470000
12     945000
13     475000
14     850000
...
1152    1105000
1153     920000
1154     650000
1155    1530000
1156    1810000
1157    1850000
1158    1140000
1159     879000
1160    1085000
1161     430000
1162     842930
1163     901000
1164     872000
1165    1800000
1166    1935000
Name: Sale_Amount, Length: 1167, dtype: float64

In [167]:
house


Out[167]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1167 entries, 0 to 1166
Data columns (total 6 columns):
APN             1167  non-null values
ADDRESS         1167  non-null values
Date_Sale       1167  non-null values
Sale_Amount     1167  non-null values
Num_Bedrooms    1167  non-null values
BLKLOT          1167  non-null values
dtypes: float64(1), int64(1), object(4)

In [168]:
house['Sale_Amount'] = house.Sale_Amount.str.replace('[$,]', '').map(float)
house['Date_Sale'] = pd.to_datetime(house['Date_Sale'])

In [171]:
pd.read_csv?

In [172]:
house = pd.read_csv('SFHousing.csv', parse_dates='Date_Sale', 
                    converters={'Sale_Amount': parse_dollars},
                    index_col='Date_Sale')
house


Out[172]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1167 entries, 2012-01-03 00:00:00 to 2012-12-11 00:00:00
Data columns (total 5 columns):
APN             1167  non-null values
ADDRESS         1167  non-null values
Sale_Amount     1167  non-null values
Num_Bedrooms    1167  non-null values
BLKLOT          1167  non-null values
dtypes: float64(1), int64(1), object(3)

In [179]:
results = house['Sale_Amount'].resample('M', how=['mean', 'count', 
                                                  'min', 'max'])
results


Out[179]:
mean count min max
Date_Sale
2012-01-31 786415.250000 68 1500 2004000
2012-02-29 743212.036585 82 92500 2750000
2012-03-31 852983.142857 105 10000 2515000
2012-04-30 873141.256000 125 29000 3800000
2012-05-31 1041000.985401 137 245000 4750000
2012-06-30 850681.054795 146 208000 1998000
2012-07-31 1073750.650000 100 208000 6000000
2012-08-31 2013070.721591 176 208000 16600000
2012-09-30 1084000.875000 104 119000 5100000
2012-10-31 963303.260274 73 208000 3500000
2012-11-30 982842.959184 49 430000 2200000
2012-12-31 1867500.000000 2 1800000 1935000

In [180]:
results['mean'].plot()


Out[180]:
<matplotlib.axes.AxesSubplot at 0x107588610>

In [181]:
mask = house['Sale_Amount'] > 100000
results2 = house[mask]['Sale_Amount'].resample('M', how=['mean', 'count', 
                                                  'min', 'max'])
results['mean'].plot()
results2['mean'].plot()


Out[181]:
<matplotlib.axes.AxesSubplot at 0x1077fb810>

In [182]:
results = house['Sale_Amount'].resample('M', how=['mean', 'std'])
results


Out[182]:
mean std
Date_Sale
2012-01-31 786415.250000 444604.955801
2012-02-29 743212.036585 419249.847713
2012-03-31 852983.142857 467924.650333
2012-04-30 873141.256000 487755.836551
2012-05-31 1041000.985401 707571.144498
2012-06-30 850681.054795 391403.908165
2012-07-31 1073750.650000 892045.137817
2012-08-31 2013070.721591 3979886.034985
2012-09-30 1084000.875000 682614.097054
2012-10-31 963303.260274 476037.983246
2012-11-30 982842.959184 366576.180111
2012-12-31 1867500.000000 95459.415460

In [188]:
sigma = 0.5
lower = results['mean'] - sigma * results['std']
upper = results['mean'] + sigma * results['std']

results['mean'].plot(style='k-')
lower.plot(style='k--')
upper.plot(style='k--')


Out[188]:
<matplotlib.axes.AxesSubplot at 0x107f784d0>

Shifting, correlation, date arithmetic


In [189]:
fred


Out[189]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3)

In [191]:
fred.head(10)


Out[191]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 144.9819 6.64 7.55
1990-02-01 145.6932 6.92 7.70
1990-03-01 153.3082 7.36 7.85
1990-04-01 158.4586 7.24 7.84
1990-05-01 154.0441 6.70 7.76
1990-06-01 153.6957 7.06 7.63
1990-07-01 149.0395 7.48 7.52
1990-08-01 147.4609 8.19 7.38
1990-09-01 138.4405 8.62 7.32
1990-10-01 129.5909 7.68 7.16

In [192]:
fred.shift(3).head(10)


Out[192]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 NaN NaN NaN
1990-02-01 NaN NaN NaN
1990-03-01 NaN NaN NaN
1990-04-01 144.9819 6.64 7.55
1990-05-01 145.6932 6.92 7.70
1990-06-01 153.3082 7.36 7.85
1990-07-01 158.4586 7.24 7.84
1990-08-01 154.0441 6.70 7.76
1990-09-01 153.6957 7.06 7.63
1990-10-01 149.0395 7.48 7.52

In [194]:
fred_1diff = fred - fred.shift(1)
fred_1diff.head(10)


Out[194]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 NaN NaN NaN
1990-02-01 0.7113 0.28 0.15
1990-03-01 7.6150 0.44 0.15
1990-04-01 5.1504 -0.12 -0.01
1990-05-01 -4.4145 -0.54 -0.08
1990-06-01 -0.3484 0.36 -0.13
1990-07-01 -4.6562 0.42 -0.11
1990-08-01 -1.5786 0.71 -0.14
1990-09-01 -9.0204 0.43 -0.06
1990-10-01 -8.8496 -0.94 -0.16

In [195]:
fred_1diff.corr()


Out[195]:
EXJPUS INTGSBJPM193N TB6MS
EXJPUS 1.000000 0.114982 0.238944
INTGSBJPM193N 0.114982 1.000000 0.178863
TB6MS 0.238944 0.178863 1.000000

In [196]:
(fred - fred.shift(6)).corr()


Out[196]:
EXJPUS INTGSBJPM193N TB6MS
EXJPUS 1.000000 0.004250 0.091739
INTGSBJPM193N 0.004250 1.000000 0.326939
TB6MS 0.091739 0.326939 1.000000

In [199]:
s2


Out[199]:
one      1
two      2
three    3
four     4
five     5
dtype: int64

In [200]:
s3


Out[200]:
one      1
two      2
three    3
dtype: int64

In [201]:
s2 + s3


Out[201]:
five    NaN
four    NaN
one       2
three     6
two       4
dtype: float64

In [203]:
def lag_corr(table, periods):
    # you may not care but...
    return (table[periods:] - table.shift(periods)).corr()

def pctchg_corr(table, periods):
    # you may not care but...
    return (table[periods:] / table.shift(periods) - 1).corr()

all_lags = [lag_corr(fred, i) for i in xrange(1, 12)]

lags_onetable = pd.concat(all_lags, keys=range(1, 12))
lags_onetable


Out[203]:
EXJPUS INTGSBJPM193N TB6MS
1 EXJPUS 1.000000 0.114982 0.238944
INTGSBJPM193N 0.114982 1.000000 0.178863
TB6MS 0.238944 0.178863 1.000000
2 EXJPUS 1.000000 0.091502 0.229482
INTGSBJPM193N 0.091502 1.000000 0.250218
TB6MS 0.229482 0.250218 1.000000
3 EXJPUS 1.000000 0.045051 0.193890
INTGSBJPM193N 0.045051 1.000000 0.281884
TB6MS 0.193890 0.281884 1.000000
4 EXJPUS 1.000000 0.012804 0.162092
INTGSBJPM193N 0.012804 1.000000 0.303291
TB6MS 0.162092 0.303291 1.000000
5 EXJPUS 1.000000 0.005297 0.127369
INTGSBJPM193N 0.005297 1.000000 0.322181
TB6MS 0.127369 0.322181 1.000000
6 EXJPUS 1.000000 0.004250 0.091739
INTGSBJPM193N 0.004250 1.000000 0.326939
TB6MS 0.091739 0.326939 1.000000
7 EXJPUS 1.000000 -0.000657 0.063158
INTGSBJPM193N -0.000657 1.000000 0.320909
TB6MS 0.063158 0.320909 1.000000
8 EXJPUS 1.000000 -0.007450 0.042993
INTGSBJPM193N -0.007450 1.000000 0.320119
TB6MS 0.042993 0.320119 1.000000
9 EXJPUS 1.000000 -0.015944 0.032352
INTGSBJPM193N -0.015944 1.000000 0.323683
TB6MS 0.032352 0.323683 1.000000
10 EXJPUS 1.000000 -0.016366 0.027600
INTGSBJPM193N -0.016366 1.000000 0.327883
TB6MS 0.027600 0.327883 1.000000
11 EXJPUS 1.000000 -0.024651 0.026665
INTGSBJPM193N -0.024651 1.000000 0.329450
TB6MS 0.026665 0.329450 1.000000

In [208]:
unstacked = lags_onetable.unstack(1)
unstacked


Out[208]:
EXJPUS INTGSBJPM193N TB6MS
EXJPUS INTGSBJPM193N TB6MS EXJPUS INTGSBJPM193N TB6MS EXJPUS INTGSBJPM193N TB6MS
1 1 0.114982 0.238944 0.114982 1 0.178863 0.238944 0.178863 1
2 1 0.091502 0.229482 0.091502 1 0.250218 0.229482 0.250218 1
3 1 0.045051 0.193890 0.045051 1 0.281884 0.193890 0.281884 1
4 1 0.012804 0.162092 0.012804 1 0.303291 0.162092 0.303291 1
5 1 0.005297 0.127369 0.005297 1 0.322181 0.127369 0.322181 1
6 1 0.004250 0.091739 0.004250 1 0.326939 0.091739 0.326939 1
7 1 -0.000657 0.063158 -0.000657 1 0.320909 0.063158 0.320909 1
8 1 -0.007450 0.042993 -0.007450 1 0.320119 0.042993 0.320119 1
9 1 -0.015944 0.032352 -0.015944 1 0.323683 0.032352 0.323683 1
10 1 -0.016366 0.027600 -0.016366 1 0.327883 0.027600 0.327883 1
11 1 -0.024651 0.026665 -0.024651 1 0.329450 0.026665 0.329450 1

In [209]:
unstacked['TB6MS', 'EXJPUS'].plot(label='TB-JP')
unstacked['TB6MS', 'INTGSBJPM193N'].plot(label='TB-INT')


Out[209]:
<matplotlib.axes.AxesSubplot at 0x1073f8590>

In [210]:
fred_1diff


Out[210]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-01 00:00:00 to 2012-06-01 00:00:00
Data columns (total 3 columns):
EXJPUS           269  non-null values
INTGSBJPM193N    268  non-null values
TB6MS            269  non-null values
dtypes: float64(3)

In [213]:
fred_1diff.EXJPUS.corr(fred_1diff.EXJPUS.shift(1))


Out[213]:
0.28489041350111521

In [215]:
fred_1diff.shift(1).head()


Out[215]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 NaN NaN NaN
1990-02-01 NaN NaN NaN
1990-03-01 0.7113 0.28 0.15
1990-04-01 7.6150 0.44 0.15
1990-05-01 5.1504 -0.12 -0.01

In [216]:
fred_1diff.corrwith(fred_1diff.shift(1))


Out[216]:
EXJPUS           0.284890
INTGSBJPM193N    0.153984
TB6MS            0.511621
dtype: float64

In [218]:
pd.DataFrame({'Lag1': fred_1diff.corrwith(fred_1diff.shift(1)),
     'Lag2': fred_1diff.corrwith(fred_1diff.shift(2))})


Out[218]:
Lag1 Lag2
EXJPUS 0.284890 0.058668
INTGSBJPM193N 0.153984 0.027434
TB6MS 0.511621 0.317941

In [221]:
lag_acorr_table = pd.DataFrame({'Lag%d' % i: 
                                fred_1diff.corrwith(fred_1diff.shift(i))
         for i in range(1, 7)})
lag_acorr_table


Out[221]:
Lag1 Lag2 Lag3 Lag4 Lag5 Lag6
EXJPUS 0.284890 0.058668 -0.004404 -0.114183 -0.239900 -0.210275
INTGSBJPM193N 0.153984 0.027434 -0.175531 -0.122494 0.024914 0.107726
TB6MS 0.511621 0.317941 0.295669 0.158632 0.146528 0.221403

In [222]:
lag_acorr_table.T


Out[222]:
EXJPUS INTGSBJPM193N TB6MS
Lag1 0.284890 0.153984 0.511621
Lag2 0.058668 0.027434 0.317941
Lag3 -0.004404 -0.175531 0.295669
Lag4 -0.114183 -0.122494 0.158632
Lag5 -0.239900 0.024914 0.146528
Lag6 -0.210275 0.107726 0.221403

Date arithmetic


In [224]:
fred.head(10)


Out[224]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 144.9819 6.64 7.55
1990-02-01 145.6932 6.92 7.70
1990-03-01 153.3082 7.36 7.85
1990-04-01 158.4586 7.24 7.84
1990-05-01 154.0441 6.70 7.76
1990-06-01 153.6957 7.06 7.63
1990-07-01 149.0395 7.48 7.52
1990-08-01 147.4609 8.19 7.38
1990-09-01 138.4405 8.62 7.32
1990-10-01 129.5909 7.68 7.16

In [228]:
fred.shift(10, freq='H').head(10)


Out[228]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 10:00:00 144.9819 6.64 7.55
1990-02-01 10:00:00 145.6932 6.92 7.70
1990-03-01 10:00:00 153.3082 7.36 7.85
1990-04-01 10:00:00 158.4586 7.24 7.84
1990-05-01 10:00:00 154.0441 6.70 7.76
1990-06-01 10:00:00 153.6957 7.06 7.63
1990-07-01 10:00:00 149.0395 7.48 7.52
1990-08-01 10:00:00 147.4609 8.19 7.38
1990-09-01 10:00:00 138.4405 8.62 7.32
1990-10-01 10:00:00 129.5909 7.68 7.16

In [226]:
fred.shift(2, freq='M').head(10)


Out[226]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-02-28 144.9819 6.64 7.55
1990-03-31 145.6932 6.92 7.70
1990-04-30 153.3082 7.36 7.85
1990-05-31 158.4586 7.24 7.84
1990-06-30 154.0441 6.70 7.76
1990-07-31 153.6957 7.06 7.63
1990-08-31 149.0395 7.48 7.52
1990-09-30 147.4609 8.19 7.38
1990-10-31 138.4405 8.62 7.32
1990-11-30 129.5909 7.68 7.16

In [233]:
pfred = fred.to_period('M')
pfred.head(10).index


Out[233]:
<class 'pandas.tseries.period.PeriodIndex'>
freq: M
[1990-01, ..., 1990-10]
length: 10

In [239]:
pfred.index[0]


Out[239]:
Period('1990-01', 'M')

In [240]:
pfred.index[0].asfreq('S', 'end')


Out[240]:
Period('1990-01-31 23:59:59', 'S')

In [241]:
pfred.index[0].asfreq('S', 'start')


Out[241]:
Period('1990-01-01 00:00:00', 'S')

In [244]:
pfred.index[0].asfreq('H', 'end') - 5


Out[244]:
Period('1990-01-31 18:00', 'H')

In [248]:
# 7th business day
(pfred.index[0].asfreq('B', 'start') + 6).to_timestamp()


Out[248]:
Timestamp('1990-01-09 00:00:00', tz=None)

In [250]:
fred.head()


Out[250]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-01 144.9819 6.64 7.55
1990-02-01 145.6932 6.92 7.70
1990-03-01 153.3082 7.36 7.85
1990-04-01 158.4586 7.24 7.84
1990-05-01 154.0441 6.70 7.76

In [252]:
fred.shift(4, freq='D').head()


Out[252]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-05 144.9819 6.64 7.55
1990-02-05 145.6932 6.92 7.70
1990-03-05 153.3082 7.36 7.85
1990-04-05 158.4586 7.24 7.84
1990-05-05 154.0441 6.70 7.76

In [257]:
fred.shift(4, freq='D').resample('D').head(50)


Out[257]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-05 144.9819 6.64 7.55
1990-01-06 NaN NaN NaN
1990-01-07 NaN NaN NaN
1990-01-08 NaN NaN NaN
1990-01-09 NaN NaN NaN
1990-01-10 NaN NaN NaN
1990-01-11 NaN NaN NaN
1990-01-12 NaN NaN NaN
1990-01-13 NaN NaN NaN
1990-01-14 NaN NaN NaN
1990-01-15 NaN NaN NaN
1990-01-16 NaN NaN NaN
1990-01-17 NaN NaN NaN
1990-01-18 NaN NaN NaN
1990-01-19 NaN NaN NaN
1990-01-20 NaN NaN NaN
1990-01-21 NaN NaN NaN
1990-01-22 NaN NaN NaN
1990-01-23 NaN NaN NaN
1990-01-24 NaN NaN NaN
1990-01-25 NaN NaN NaN
1990-01-26 NaN NaN NaN
1990-01-27 NaN NaN NaN
1990-01-28 NaN NaN NaN
1990-01-29 NaN NaN NaN
1990-01-30 NaN NaN NaN
1990-01-31 NaN NaN NaN
1990-02-01 NaN NaN NaN
1990-02-02 NaN NaN NaN
1990-02-03 NaN NaN NaN
1990-02-04 NaN NaN NaN
1990-02-05 145.6932 6.92 7.70
1990-02-06 NaN NaN NaN
1990-02-07 NaN NaN NaN
1990-02-08 NaN NaN NaN
1990-02-09 NaN NaN NaN
1990-02-10 NaN NaN NaN
1990-02-11 NaN NaN NaN
1990-02-12 NaN NaN NaN
1990-02-13 NaN NaN NaN
1990-02-14 NaN NaN NaN
1990-02-15 NaN NaN NaN
1990-02-16 NaN NaN NaN
1990-02-17 NaN NaN NaN
1990-02-18 NaN NaN NaN
1990-02-19 NaN NaN NaN
1990-02-20 NaN NaN NaN
1990-02-21 NaN NaN NaN
1990-02-22 NaN NaN NaN
1990-02-23 NaN NaN NaN

In [259]:
fred.shift(4, freq='D').resample('D').apply(pd.Series.interpolate).head(50)


Out[259]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-05 144.981900 6.640000 7.550000
1990-01-06 145.004845 6.649032 7.554839
1990-01-07 145.027790 6.658065 7.559677
1990-01-08 145.050735 6.667097 7.564516
1990-01-09 145.073681 6.676129 7.569355
1990-01-10 145.096626 6.685161 7.574194
1990-01-11 145.119571 6.694194 7.579032
1990-01-12 145.142516 6.703226 7.583871
1990-01-13 145.165461 6.712258 7.588710
1990-01-14 145.188406 6.721290 7.593548
1990-01-15 145.211352 6.730323 7.598387
1990-01-16 145.234297 6.739355 7.603226
1990-01-17 145.257242 6.748387 7.608065
1990-01-18 145.280187 6.757419 7.612903
1990-01-19 145.303132 6.766452 7.617742
1990-01-20 145.326077 6.775484 7.622581
1990-01-21 145.349023 6.784516 7.627419
1990-01-22 145.371968 6.793548 7.632258
1990-01-23 145.394913 6.802581 7.637097
1990-01-24 145.417858 6.811613 7.641935
1990-01-25 145.440803 6.820645 7.646774
1990-01-26 145.463748 6.829677 7.651613
1990-01-27 145.486694 6.838710 7.656452
1990-01-28 145.509639 6.847742 7.661290
1990-01-29 145.532584 6.856774 7.666129
1990-01-30 145.555529 6.865806 7.670968
1990-01-31 145.578474 6.874839 7.675806
1990-02-01 145.601419 6.883871 7.680645
1990-02-02 145.624365 6.892903 7.685484
1990-02-03 145.647310 6.901935 7.690323
1990-02-04 145.670255 6.910968 7.695161
1990-02-05 145.693200 6.920000 7.700000
1990-02-06 145.965164 6.935714 7.705357
1990-02-07 146.237129 6.951429 7.710714
1990-02-08 146.509093 6.967143 7.716071
1990-02-09 146.781057 6.982857 7.721429
1990-02-10 147.053021 6.998571 7.726786
1990-02-11 147.324986 7.014286 7.732143
1990-02-12 147.596950 7.030000 7.737500
1990-02-13 147.868914 7.045714 7.742857
1990-02-14 148.140879 7.061429 7.748214
1990-02-15 148.412843 7.077143 7.753571
1990-02-16 148.684807 7.092857 7.758929
1990-02-17 148.956771 7.108571 7.764286
1990-02-18 149.228736 7.124286 7.769643
1990-02-19 149.500700 7.140000 7.775000
1990-02-20 149.772664 7.155714 7.780357
1990-02-21 150.044629 7.171429 7.785714
1990-02-22 150.316593 7.187143 7.791071
1990-02-23 150.588557 7.202857 7.796429

In [255]:
fred.shift(4, freq='D').resample('D', fill_method='bfill').head(50)


Out[255]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-01-05 144.9819 6.64 7.55
1990-01-06 145.6932 6.92 7.70
1990-01-07 145.6932 6.92 7.70
1990-01-08 145.6932 6.92 7.70
1990-01-09 145.6932 6.92 7.70
1990-01-10 145.6932 6.92 7.70
1990-01-11 145.6932 6.92 7.70
1990-01-12 145.6932 6.92 7.70
1990-01-13 145.6932 6.92 7.70
1990-01-14 145.6932 6.92 7.70
1990-01-15 145.6932 6.92 7.70
1990-01-16 145.6932 6.92 7.70
1990-01-17 145.6932 6.92 7.70
1990-01-18 145.6932 6.92 7.70
1990-01-19 145.6932 6.92 7.70
1990-01-20 145.6932 6.92 7.70
1990-01-21 145.6932 6.92 7.70
1990-01-22 145.6932 6.92 7.70
1990-01-23 145.6932 6.92 7.70
1990-01-24 145.6932 6.92 7.70
1990-01-25 145.6932 6.92 7.70
1990-01-26 145.6932 6.92 7.70
1990-01-27 145.6932 6.92 7.70
1990-01-28 145.6932 6.92 7.70
1990-01-29 145.6932 6.92 7.70
1990-01-30 145.6932 6.92 7.70
1990-01-31 145.6932 6.92 7.70
1990-02-01 145.6932 6.92 7.70
1990-02-02 145.6932 6.92 7.70
1990-02-03 145.6932 6.92 7.70
1990-02-04 145.6932 6.92 7.70
1990-02-05 145.6932 6.92 7.70
1990-02-06 153.3082 7.36 7.85
1990-02-07 153.3082 7.36 7.85
1990-02-08 153.3082 7.36 7.85
1990-02-09 153.3082 7.36 7.85
1990-02-10 153.3082 7.36 7.85
1990-02-11 153.3082 7.36 7.85
1990-02-12 153.3082 7.36 7.85
1990-02-13 153.3082 7.36 7.85
1990-02-14 153.3082 7.36 7.85
1990-02-15 153.3082 7.36 7.85
1990-02-16 153.3082 7.36 7.85
1990-02-17 153.3082 7.36 7.85
1990-02-18 153.3082 7.36 7.85
1990-02-19 153.3082 7.36 7.85
1990-02-20 153.3082 7.36 7.85
1990-02-21 153.3082 7.36 7.85
1990-02-22 153.3082 7.36 7.85
1990-02-23 153.3082 7.36 7.85

Time zone handling


In [262]:
stamp = pd.Timestamp(datetime.now())
stamp


Out[262]:
Timestamp('2013-10-17 15:23:57.495752', tz=None)

In [264]:
print stamp.tz


None

In [267]:
stamp.tz_localize('US/Pacific')


Out[267]:
Timestamp('2013-10-17 15:23:57.495752-0700', tz='US/Pacific')

In [268]:
stamp_pac = stamp.tz_localize('US/Pacific')
stamp_pac


Out[268]:
Timestamp('2013-10-17 15:23:57.495752-0700', tz='US/Pacific')

In [273]:
stamp_pac.tz_convert('Asia/Tokyo')


Out[273]:
18

In [274]:
stamp_pac.tz_convert('Asia/Tokyo').hour


Out[274]:
7

In [275]:
stamp_pac.tz_convert('Asia/Tokyo').day


Out[275]:
18

In [277]:
stamp_pac.tz_convert('Asia/Tokyo').value


Out[277]:
1382048637495752000

In [278]:
stamp_pac.tz_convert('Asia/Tokyo').tz_convert('utc').value


Out[278]:
1382048637495752000

In [285]:
fred_shifted = fred.shift(1, freq='4D9H30T')
fred_shang = fred_shifted\
    .tz_localize('US/Eastern')\
    .tz_convert('Asia/Shanghai')
fred_shang


Out[285]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 270 entries, 1990-01-05 22:30:00+08:00 to 2012-06-05 21:30:00+08:00
Data columns (total 3 columns):
EXJPUS           270  non-null values
INTGSBJPM193N    269  non-null values
TB6MS            270  non-null values
dtypes: float64(3)

In [287]:
fred_shang.index


Out[287]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1990-01-05 22:30:00, ..., 2012-06-05 21:30:00]
Length: 270, Freq: None, Timezone: Asia/Shanghai

In [293]:
fred_shifted = fred.shift(1, freq='4D9H30T')
fred_shifted\
    .tz_localize('US/Eastern')\
    .tz_convert('US/Pacific').resample('A-DEC', how='mean')


Out[293]:
EXJPUS INTGSBJPM193N TB6MS
DATE
1990-12-31 00:00:00-08:00 144.818167 7.362500 7.453333
1991-12-31 00:00:00-08:00 134.507742 6.525833 5.440833
1992-12-31 00:00:00-08:00 126.747042 4.944167 3.546667
1993-12-31 00:00:00-08:00 111.231417 3.692500 3.122500
1994-12-31 00:00:00-08:00 102.195258 3.714167 4.632500
1995-12-31 00:00:00-08:00 94.105783 2.531667 5.565833
1996-12-31 00:00:00-08:00 108.804842 2.225000 5.080833
1997-12-31 00:00:00-08:00 121.094650 1.687500 5.176667
1998-12-31 00:00:00-08:00 130.818200 1.096667 4.828333
1999-12-31 00:00:00-08:00 113.713167 1.770833 4.745000
2000-12-31 00:00:00-08:00 107.821308 1.747500 5.898333
2001-12-31 00:00:00-08:00 121.519267 1.329583 3.334167
2002-12-31 00:00:00-08:00 125.266875 1.250000 1.675833
2003-12-31 00:00:00-08:00 115.916642 1.011667 1.054167
2004-12-31 00:00:00-08:00 108.156925 1.499167 1.575000
2005-12-31 00:00:00-08:00 110.138392 1.359583 3.392500
2006-12-31 00:00:00-08:00 116.349758 1.731250 4.802500
2007-12-31 00:00:00-08:00 117.765483 1.652917 4.430000
2008-12-31 00:00:00-08:00 103.379350 1.449167 1.618333
2009-12-31 00:00:00-08:00 93.599550 1.342500 0.279167
2010-12-31 00:00:00-08:00 87.750225 1.150833 0.194167
2011-12-31 00:00:00-08:00 79.711717 1.118333 0.095833
2012-12-31 00:00:00-08:00 79.689050 0.922000 0.128333

Introduce homework


In [297]:
!cat homework.md


=======================
Python for Data Science
=======================
Homework for 2013-10-17 Lecture (Wes McKinney)
==============================================

Download a dump of data about closed GitHub issues for the pandas project here:

https://www.dropbox.com/s/pe6dqooznrfynii/closed.json

Use the built-in json library to read this file into memory. Each element in
the list contains information about a GitHub issue and all developer comments
that were made on it in the 'comments' field.

1) Make a DataFrame with one row per issue with the following columns extracted
from the issue data:

ntitle, created_at, labels, closed_at, user, id

Transform the user values to be simply the 'login' string, so that the user
column contains only string usernames.

2) Remove duplicate rows by id from the DataFrame you just created using the id
column's duplicated method.

4) Convert the created_at and closed_at columns from string to datetime type.

5) Now construct appropriate time series and pandas functions to make the
following plots:

- Number of issues created by month

- Number of distinct users creating issues each month (hint: you can pass a
  function to resample's how argument, and there's nothing wrong with having
  string values in a TimeSeries)

6) Make a table and an accompanying plot illustrating:

- The mean number of days it took for issues to be closed by the month they
  were opened. In other words, for closed issues created in August 2012, how
  long were they open on average? (hint: use the total_seconds function on the
  timedelta objects computed when subtracting datetime objects). Also show the
  number of issues in each month in the table.

7) Make a DataFrame containing all the comments for all of the issues. You will
want to add an 'id' attribute to each comment while doing so so that each row
contains a single comment and has the id of the issue it belongs to.

Convert the 'created' column to datetime format; note you will need to multiply
the values (appropriately converted to integers) by 1000000 to get them in
nanoseconds and pass to to_datetime.

8) For each month, compute a table summarizing the following for each month:

- Total number of issue comments
- The "chattiest" user (most number of comments)
- The percentage of total comments made by the chattiest users
- The number of distinct participants in the issue comments

9) Create a helper 'labels' table from the issues data with two columns: id and
label. If an issue has 3 elements in its 'labels' value, add 3 rows to the
table. If an issue does not have any labels, place a single row with None as
the label (hint: construct a list of tuples, then make the DataFrame).

10) Now, join the issues data with the labels helper table (pandas.merge). Add
a column to this table containing the number of days (as a floating point
number) it took to close each issue.

11) Compute a table containing the average time to close for each label
type. Now make a plot comparing mean time to close by month for Enhancement
versus Bug issue types.

Python for Data Science

Homework for 2013-10-17 Lecture (Wes McKinney)

Download a dump of data about closed GitHub issues for the pandas project here:

https://www.dropbox.com/s/pe6dqooznrfynii/closed.json

Use the built-in json library to read this file into memory. Each element in the list contains information about a GitHub issue and all developer comments that were made on it in the 'comments' field.

1) Make a DataFrame with one row per issue with the following columns extracted from the issue data:

ntitle, created_at, labels, closed_at, user, id

Transform the user values to be simply the 'login' string, so that the user column contains only string usernames.

2) Remove duplicate rows by id from the DataFrame you just created using the id column's duplicated method.

4) Convert the created_at and closed_at columns from string to datetime type.

5) Now construct appropriate time series and pandas functions to make the following plots:

  • Number of issues created by month

  • Number of distinct users creating issues each month (hint: you can pass a function to resample's how argument, and there's nothing wrong with having string values in a TimeSeries)

6) Make a table and an accompanying plot illustrating:

  • The mean number of days it took for issues to be closed by the month they were opened. In other words, for closed issues created in August 2012, how long were they open on average? (hint: use the total_seconds function on the timedelta objects computed when subtracting datetime objects). Also show the number of issues in each month in the table.

7) Make a DataFrame containing all the comments for all of the issues. You will want to add an 'id' attribute to each comment while doing so so that each row contains a single comment and has the id of the issue it belongs to.

Convert the 'created' column to datetime format; note you will need to multiply the values (appropriately converted to integers) by 1000000 to get them in nanoseconds and pass to to_datetime.

8) For each month, compute a table summarizing the following for each month:

  • Total number of issue comments
  • The "chattiest" user (most number of comments)
  • The percentage of total comments made by the chattiest users
  • The number of distinct participants in the issue comments

9) Create a helper 'labels' table from the issues data with two columns: id and label. If an issue has 3 elements in its 'labels' value, add 3 rows to the table. If an issue does not have any labels, place a single row with None as the label (hint: construct a list of tuples, then make the DataFrame).

10) Now, join the issues data with the labels helper table (pandas.merge). Add a column to this table containing the number of days (as a floating point number) it took to close each issue.

11) Compute a table containing the average time to close for each label type. Now make a plot comparing mean time to close by month for Enhancement versus Bug issue types.


In [298]:
import json
closed_issues = json.load(open('closed.json'))
closed_issues[0]


Out[298]:
{u'assignee': None,
 u'body': u're: pystatsmodels e-mail\r\n\r\n\r\nhi everyone,\r\n\r\njust getting started with pandas and i was wondering if someone could\r\nhelp me out.  do pandas.DataMatrix objects support per item comparison\r\noperations?\r\n\r\ni have a two data matrices, and i want to do something like this:\r\n\r\ndiv[div > 0.5 * price] = 0\r\n\r\nthis would work if div and price were numpy.ndarray objects.  any idea\r\nhow i would do something like this with pandas.DataMatrix objects?\r\n\r\nthanks,\r\nandy\r\n',
 u'closed_at': u'2011-02-19T23:13:48Z',
 u'comments': [{u'author': u'wesm',
   u'created': u'1298157227000',
   u'text': u'implemented in git HEAD',
   u'updated': u'1298157227000'}],
 u'comments_url': u'https://api.github.com/repos/pydata/pandas/issues/1/comments',
 u'created_at': u'2010-09-29T00:45:31Z',
 u'events_url': u'https://api.github.com/repos/pydata/pandas/issues/1/events',
 u'html_url': u'https://github.com/pydata/pandas/issues/1',
 u'id': 337721,
 u'labels': [],
 u'labels_url': u'https://api.github.com/repos/pydata/pandas/issues/1/labels{/name}',
 u'milestone': None,
 u'number': 1,
 u'pull_request': {u'diff_url': None, u'html_url': None, u'patch_url': None},
 u'state': u'closed',
 u'title': u'Enable element-wise comparison operations in DataMatrix objects',
 u'updated_at': u'2013-04-26T21:25:39Z',
 u'url': u'https://api.github.com/repos/pydata/pandas/issues/1',
 u'user': {u'avatar_url': u'https://secure.gravatar.com/avatar/2c08a3eed709a9d1a2654cea45aa466f?d=https://a248.e.akamai.net/assets.github.com%2Fimages%2Fgravatars%2Fgravatar-user-420.png',
  u'events_url': u'https://api.github.com/users/wesm/events{/privacy}',
  u'followers_url': u'https://api.github.com/users/wesm/followers',
  u'following_url': u'https://api.github.com/users/wesm/following',
  u'gists_url': u'https://api.github.com/users/wesm/gists{/gist_id}',
  u'gravatar_id': u'2c08a3eed709a9d1a2654cea45aa466f',
  u'html_url': u'https://github.com/wesm',
  u'id': 329591,
  u'login': u'wesm',
  u'organizations_url': u'https://api.github.com/users/wesm/orgs',
  u'received_events_url': u'https://api.github.com/users/wesm/received_events',
  u'repos_url': u'https://api.github.com/users/wesm/repos',
  u'starred_url': u'https://api.github.com/users/wesm/starred{/owner}{/repo}',
  u'subscriptions_url': u'https://api.github.com/users/wesm/subscriptions',
  u'type': u'User',
  u'url': u'https://api.github.com/users/wesm'}}

In [ ]: