In [1]:
import pandas as pd

years = range(1950,2013)

values = []
for year in years:
    names1986 = pd.read_csv('data/yob%s.txt' % year, names=['name','sex','births'])
    values.append(names1986.births[names1986.name=='Dmitri'].sum())

In [2]:
names1986.plot()


Out[2]:
<matplotlib.axes.AxesSubplot at 0x10d4fa950>

In [3]:
Dmitri = pd.Series(values,index=years)

In [4]:
Dmitri.plot()


Out[4]:
<matplotlib.axes.AxesSubplot at 0x10e58ad90>

In [50]:
Dmitri.head(10)


Out[50]:
1950     0
1951     0
1952     0
1953     0
1954     0
1955     8
1956    10
1957     0
1958     0
1959     9
dtype: int64

In [11]:
type(names1986.name[names1986.name == 'Dmitri'])


Out[11]:
pandas.core.series.Series

In [23]:
names1986.head()


Out[23]:
name sex births
0 Jessica F 52657
1 Ashley F 49674
2 Amanda F 40519
3 Jennifer F 36173
4 Sarah F 28134

In [24]:
names1986.ix[10]


Out[24]:
name      Megan
sex           F
births    18592
Name: 10, dtype: object

In [26]:
pieces = []
for year in range(1880,2011):
    path = 'data/yob%s.txt' % year
    frame = pd.read_csv(path, names = ['name','sex','births'])
    frame['year'] = year
    pieces.append(frame)
names = pd.concat(pieces, ignore_index=True)

In [27]:
names.count()


Out[27]:
name      1691357
sex       1691357
births    1691357
year      1691357
dtype: int64

In [29]:
glory = names[names.name=='Dmitri']

In [32]:
total_births = names.pivot_table('births',rows = 'year',cols = 'sex',aggfunc = sum)

In [33]:
total_births


Out[33]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 131 entries, 1880 to 2010
Data columns (total 2 columns):
F    131  non-null values
M    131  non-null values
dtypes: int64(2)

In [34]:
total_births.head()


Out[34]:
sex F M
year
1880 90993 110491
1881 91955 100746
1882 107850 113687
1883 112322 104630
1884 129022 114445

In [47]:
geo = pd.Series(['Uchaly','Ufa','Moskva','Vancouver','Ottawa'], index = [1986,1986,2003,2005,2014])

In [48]:
geo


Out[48]:
1986       Uchaly
1986          Ufa
2003       Moskva
2005    Vancouver
2014       Ottawa
dtype: object

In [43]:
geo.values


Out[43]:
array(['Uchaly', 'Ufa', 'Moskva', 'Vancouver', 'Ottawa'], dtype=object)

In [44]:
geo.index


Out[44]:
Int64Index([1987, 1986, 2003, 2005, 2014], dtype=int64)

In [53]:
geo[geo == 'Ufa']


Out[53]:
1986    Ufa
dtype: object

In [ ]:
names.