In [2]:
from pandas import DataFrame, read_csv
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.line_width', 5000)
pd.set_option('display.max_columns', 60)
%matplotlib inline
In [3]:
# Read in the data
Location = "data/Campaign_Finance_-_FPPC_Form_460_-_Schedule_A_-_Monetary_Contributions.csv"
df = read_csv(Location)
In [233]:
# Remove all non-mayor race contributions
mayor_ids = ['1357609', '1354678', '1362261', '1359017']
df = df[df['Filer_ID'].isin(mayor_ids)]
# Convert dollars contributed to a float
df['Tran_Amt1'] = df['Tran_Amt1'].str[1:].astype(float)
In [234]:
# Find the top donors overall (inlcuding donors to multiple campaigns)
unique_donors = df.groupby(['Tran_NamF', 'Tran_NamL']).sum().sort('Tran_Amt1', ascending=False)
unique_donors['Tran_Amt1'].head(5)
Out[234]:
In [235]:
# Sum contributions from a single donor (or couple) to a single campaign
# into one line
top_totals = df.groupby(['Filer_NamL', 'Tran_NamF', 'Tran_NamL']).sum()
In [236]:
# Find the max donor to each campaign
sort_amt = lambda x: x.sort('Tran_Amt1', ascending=False)
candidate_max = top_totals.groupby(level=0).apply(sort_amt)
top_three_rows = lambda x: x[:3]
candidate_max = candidate_max.groupby(level=0).apply(top_three_rows).reset_index([0,1], drop=True)
candidate_max['Tran_Amt1']
Out[236]:
In [ ]: