Example: 2012 Federal Election Commission Database


In [3]:
from __future__ import division
from numpy.random import randn
import numpy as np
import os
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(10, 6))
from pandas import *
import pandas
np.set_printoptions(precision=4)
%cd book_scripts/fec


/home/phillip/Documents/code/py/pandas-book/rev_539000/book_scripts/fec

In [5]:
fec = read_csv('P00000001-ALL.csv')

In [6]:
fec


Out[6]:
cmte_id cand_id cand_nm contbr_nm contbr_city ... receipt_desc memo_cd memo_text form_tp file_num
0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE ... NaN NaN NaN SA17A 736166
1 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE ... NaN NaN NaN SA17A 736166
2 C00410118 P20002978 Bachmann, Michelle SMITH, LANIER LANETT ... NaN NaN NaN SA17A 749073
3 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT ... NaN NaN NaN SA17A 749073
4 C00410118 P20002978 Bachmann, Michelle WARDENBURG, HAROLD HOT SPRINGS NATION ... NaN NaN NaN SA17A 736166
5 C00410118 P20002978 Bachmann, Michelle BECKMAN, JAMES SPRINGDALE ... NaN NaN NaN SA17A 736166
6 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT ... NaN NaN NaN SA17A 736166
... ... ... ... ... ... ... ... ... ... ... ...
1001724 C00500587 P20003281 Perry, Rick HEFFERNAN, JILL PRINCE MRS. INFO REQUESTED ... NaN NaN NaN SA17A 751678
1001725 C00500587 P20003281 Perry, Rick ELWOOD, MIKE MR. INFO REQUESTED ... NaN NaN NaN SA17A 751678
1001726 C00500587 P20003281 Perry, Rick GORMAN, CHRIS D. MR. INFO REQUESTED ... REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... NaN REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... SA17A 751678
1001727 C00500587 P20003281 Perry, Rick DUFFY, DAVID A. MR. INFO REQUESTED ... NaN NaN NaN SA17A 751678
1001728 C00500587 P20003281 Perry, Rick GRANE, BRYAN F. MR. INFO REQUESTED ... NaN NaN NaN SA17A 751678
1001729 C00500587 P20003281 Perry, Rick TOLBERT, DARYL MR. INFO REQUESTED ... NaN NaN NaN SA17A 751678
1001730 C00500587 P20003281 Perry, Rick ANDERSON, MARILEE MRS. INFO REQUESTED ... NaN NaN NaN SA17A 751678

1001731 rows × 16 columns


In [7]:
fec.ix[123456]


Out[7]:
cmte_id            C00431445
cand_id            P80003338
cand_nm        Obama, Barack
contbr_nm        ELLMAN, IRA
contbr_city            TEMPE
...
contb_receipt_dt    01-DEC-11
receipt_desc              NaN
memo_cd                   NaN
memo_text                 NaN
form_tp                 SA17A
file_num               772372
Name: 123456, Length: 16, dtype: object

In [8]:
unique_cands = fec.cand_nm.unique()
unique_cands
unique_cands[2]


Out[8]:
'Obama, Barack'

In [9]:
parties = {'Bachmann, Michelle': 'Republican',
           'Cain, Herman': 'Republican',
           'Gingrich, Newt': 'Republican',
           'Huntsman, Jon': 'Republican',
           'Johnson, Gary Earl': 'Republican',
           'McCotter, Thaddeus G': 'Republican',
           'Obama, Barack': 'Democrat',
           'Paul, Ron': 'Republican',
           'Pawlenty, Timothy': 'Republican',
           'Perry, Rick': 'Republican',
           "Roemer, Charles E. 'Buddy' III": 'Republican',
           'Romney, Mitt': 'Republican',
           'Santorum, Rick': 'Republican'}
parties = {'Bachmann, Michelle': 'Republican', 'Cain, Herman': 'Republican', 'Gingrich, Newt': 'Republican', 'Huntsman, Jon': 'Republican', 'Johnson, Gary Earl': 'Republican', 'McCotter, Thaddeus G': 'Republican', 'Obama, Barack': 'Democrat', 'Paul, Ron': 'Republican', 'Pawlenty, Timothy': 'Republican', 'Perry, Rick': 'Republican', "Roemer, Charles E. 'Buddy' III": 'Republican', 'Romney, Mitt': 'Republican', 'Santorum, Rick': 'Republican'}

In [10]:
fec.cand_nm[123456:123461]
fec.cand_nm[123456:123461].map(parties)
# Add it as a column
fec['party'] = fec.cand_nm.map(parties)
fec['party'].value_counts()


Out[10]:
Democrat      593746
Republican    407985
dtype: int64

In [11]:
(fec.contb_receipt_amt > 0).value_counts()


Out[11]:
True     991475
False     10256
dtype: int64

In [12]:
fec = fec[fec.contb_receipt_amt > 0]

In [15]:
fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack', 'Romney, Mitt'])]