In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [3]:
%matplotlib inline
from pandas import DataFrame, Series
In [17]:
fec = pd.read_csv('ch09/P00000001-ALL.csv')
Out[17]:
In [19]:
fec[-5:-1]
Out[19]:
In [20]:
fec.ix[123456]
Out[20]:
In [21]:
unique_cands = fec.cand_nm.unique()
unique_cands
Out[21]:
In [22]:
unique_cands[2]
Out[22]:
In [23]:
parties = {'Backmann, Michelle': 'Republican',
'Cain, Herman': 'Republican',
'Gingrich, Newt': 'Republican',
'Huntsman, Jon': 'Republican',
'Johnson, Gary Earl': 'Republican',
'McCotter, Thaddeus G': 'Republican',
'Obama, Barack': 'Democrat',
'Paul, Ron':'Republican',
'Pawlenty, Timothy':'Republican',
'Perry, Rick':'Republican',
"Roemer, Charles E. 'Buddy' III": 'Republican',
'Romney, Mitt': 'Republican',
'Santorum, Rick': 'Republican'}
In [24]:
fec.cand_nm[123456:123461]
Out[24]:
In [25]:
fec.cand_nm[123456:123461].map(parties)
Out[25]:
In [26]:
# 将其添加为一个新列
fec['party'] = fec.cand_nm.map(parties)
fec['party'].value_counts()
Out[26]:
In [27]:
(fec.contb_receipt_amt > 0).value_counts()
Out[27]:
In [28]:
fec = fec[fec.contb_receipt_amt > 0]
In [29]:
fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack', 'Romney, Mitt'])]
In [41]:
fec.groupby('cand_nm')['cand_nm'].count().sort_values(ascending=False)[:2]
Out[41]:
In [43]:
Series.sort_values?
In [44]:
fec.contbr_occupation.value_counts()[:10]
Out[44]:
In [45]:
fec.cand_nm.value_counts()[:2]
Out[45]:
In [50]:
occ_mapping = {
'INFORMATION REQUESTED PER BEST EFFORTS': 'NOT PROVIDED',
'INFORMATION REQUESTED': 'NOT PROVIDED',
'INFORMATION REQUESTED (BEST EFFORTS)': 'NOT PROVIDED',
'C.E.O.':'CEO'
}
#如果没有提供相关映射,则返回x
f = lambda x: occ_mapping.get(x, x)
fec.loc[:, 'contbr_occupation'] = fec.loc[:, 'contbr_occupation'].map(f)
In [55]:
fec.contbr_occupation.value_counts()[:10]
Out[55]:
In [58]:
by_occupation = fec.pivot_table('contb_receipt_amt',
index='contbr_occupation',
columns='party', aggfunc='sum')
over_2mm = by_occupation[by_occupation.sum(1) > 2000000]
over_2mm
Out[58]:
In [59]:
over_2mm.plot(kind='barh')
Out[59]:
In [61]:
fec_mrbo.groupby('cand_nm')['contb_receipt_amt'].value_counts()[:10]
Out[61]:
In [66]:
def get_top_amounts(group, key, n=5):
totals = group.groupby(key)['contb_receipt_amt'].sum()
#根据key对totals进行降序排列
return totals.sort_values(ascending=False)[:7]
In [67]:
grouped = fec_mrbo.groupby('cand_nm')
grouped.apply(get_top_amounts, 'contbr_occupation', n=7)
Out[67]:
In [68]:
bins = np.array([0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000])
labels = pd.cut(fec_mrbo.contb_receipt_amt, bins)
labels
Out[68]:
In [69]:
grouped = fec_mrbo.groupby(['cand_nm', labels])
grouped.size().unstack(0)
Out[69]:
In [70]:
bucket_sums = grouped.contb_receipt_amt.sum().unstack(0)
bucket_sums
Out[70]:
In [71]:
normed_sums = bucket_sums.div(bucket_sums.sum(axis=1), axis=0)
normed_sums
Out[71]:
In [72]:
normed_sums[:-2].plot(kind='barh', stacked=True)
Out[72]:
In [73]:
grouped = fec_mrbo.groupby(['cand_nm', 'contbr_st'])
totals = grouped.contb_receipt_amt.sum().unstack(0).fillna(0)
totals[:10]
Out[73]:
In [74]:
percent = totals.div(totals.sum(1), axis=0)
percent[:10]
Out[74]:
In [77]:
from mpl_toolkits.basemap import Basemap, cm
from matplotlib import rcParams
from matplotlib.collections import LineCollection
#from shapelib import ShapeFile
#import dbflib
In [78]:
obama = percent['Obama, Barack']
fig = plt.figure(figsize=(12, 12))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
lllat = 21; urlat = 53; lllon=-118; urlon=-62
m = Basemap(ax=ax, projection='stere',
lon_0=(urlon+lllon)/2, lat_0=(urlat + lllat) / 2,
llcrnrlat=lllat, urcrnrlat=urlat, llcrnrlon=lllon,
urcrnrlon=urlon, resolution='l')
m.drawcoastlines()
m.drawcountries()
plt.show()
In [ ]:
In [ ]: