Filings

Extract filings made by ballot measure committees


In [1]:
import os
import settings
import datetime
import pandas as pd

Pul in all distinct committees


In [2]:
committees = settings.open_csv("committees.csv")

In [3]:
committees.head()


Out[3]:
calaccess_committee_id committee_name prop_count
0 1219956 CONCERNED CITIZENS AGAINST PROPOSITION 22 1
1 1220380 COMMUNITY COLLEGE FACILITY COALITION ISSUES CO... 1
2 1220380 COMMUNITY COLLEGE FACILITY COALITION ISSUES CO... 1
3 1220380 COMMUNITY COLLEGE FACILITY COALITION ISSUES CO... 1
4 1220380 COMMUNITY COLLEGE FACILITY COALITION ISSUES CO... 1

Pull in all quarterly filings


In [4]:
form_460_filings = pd.read_sql_table("calaccess_processed_form460filing", settings.db_connection)

In [5]:
form_460_filings.head()


Out[5]:
date_filed filer_id filer_lastname filer_firstname election_date from_date thru_date monetary_contributions loans_received subtotal_cash_contributions ... begin_cash_balance cash_receipts miscellaneous_cash_increases cash_payments ending_cash_balance loan_guarantees_received cash_equivalents outstanding_debts filing_id amendment_count
0 2000-01-26 1069401 BURTON FOR STATE SENATE 2000 2000-03-07 2000-01-01 2000-01-22 57600.0 0.0 57600.0 ... 1595669.0 57600.0 0.0 9075.0 1644194.0 0.0 0.0 0.0 577732 0
1 2000-01-26 1077363 MARILYN LYON FOR SENATE 2000-03-07 2000-01-01 2000-01-22 975.0 0.0 975.0 ... 161101.0 975.0 0.0 48091.0 113986.0 0.0 0.0 10000.0 577733 0
2 2000-01-26 1070497 SVORINICH OFFICEHOLDER ACCOUNT II 2000-03-07 2000-01-01 2000-01-22 0.0 0.0 0.0 ... 3493.0 0.0 0.0 0.0 3493.0 0.0 0.0 0.0 577754 0
3 2000-01-26 1076525 JOHN HISSERICH FOR STATE ASSEMBLY 2000-03-07 2000-01-01 2000-01-22 7425.0 0.0 7425.0 ... 43726.0 7425.0 0.0 24066.0 27085.0 0.0 0.0 56500.0 577841 1
4 2000-01-27 1038947 CALIFORNIA FACULTY ASSOCIATION POLITICAL ACTIO... 2000-03-07 2000-01-01 2000-01-22 0.0 0.0 0.0 ... 75106.0 0.0 0.0 12751.0 62356.0 0.0 0.0 0.0 577947 0

5 rows × 28 columns


In [6]:
prop_filings = pd.merge(
    form_460_filings,
    committees,
    how="inner",
    left_on="filer_id",
    right_on="calaccess_committee_id"
)

In [7]:
trimmed_filings = prop_filings[[
    'calaccess_committee_id',
    'committee_name',
    'filing_id',
    'date_filed',
    'from_date', 
    'thru_date',
    'total_contributions',
    'total_expenditures_made',
]].sort_values([
    'committee_name',
    'thru_date'
]).rename(columns={
    "filing_id": "calaccess_filing_id",
})

In [8]:
trimmed_filings.total_expenditures_made.sum()


Out[8]:
3832355230.0

In [9]:
trimmed_filings.to_csv(
    os.path.join(settings.data_dir, 'filings.csv'),
    index=False
)

2016 committees


In [10]:
recent_committees = pd.read_csv(
    os.path.join(settings.data_dir, "committees-2016.csv")
)

In [11]:
recent_prop_filings = pd.merge(
    form_460_filings,
    recent_committees,
    how="inner",
    left_on="filer_id",
    right_on="calaccess_committee_id"
)

In [12]:
recent_prop_filings.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 531 entries, 0 to 530
Data columns (total 31 columns):
date_filed                      531 non-null datetime64[ns]
filer_id                        531 non-null int64
filer_lastname                  531 non-null object
filer_firstname                 531 non-null object
election_date                   191 non-null datetime64[ns]
from_date                       531 non-null datetime64[ns]
thru_date                       531 non-null datetime64[ns]
monetary_contributions          531 non-null float64
loans_received                  531 non-null float64
subtotal_cash_contributions     531 non-null float64
nonmonetary_contributions       531 non-null float64
total_contributions             531 non-null float64
payments_made                   531 non-null float64
loans_made                      531 non-null float64
subtotal_cash_payments          531 non-null float64
unpaid_bills                    531 non-null float64
nonmonetary_adjustment          531 non-null float64
total_expenditures_made         531 non-null float64
begin_cash_balance              531 non-null float64
cash_receipts                   531 non-null float64
miscellaneous_cash_increases    531 non-null float64
cash_payments                   531 non-null float64
ending_cash_balance             531 non-null float64
loan_guarantees_received        531 non-null float64
cash_equivalents                531 non-null float64
outstanding_debts               531 non-null float64
filing_id                       531 non-null int64
amendment_count                 531 non-null int64
calaccess_committee_id          531 non-null int64
committee_name                  531 non-null object
prop_count                      531 non-null int64
dtypes: datetime64[ns](4), float64(19), int64(5), object(3)
memory usage: 132.8+ KB

In [13]:
recent_trimmed_filings = recent_prop_filings[[
    'calaccess_committee_id',
    'committee_name',
    'filing_id',
    'date_filed',
    'from_date', 
    'thru_date',
    'payments_made',
    'loans_made',
    'unpaid_bills',
    'nonmonetary_adjustment',
    'subtotal_cash_payments',
    'total_expenditures_made',
]].sort_values([
    'committee_name',
    'thru_date'
]).rename(columns={
    "filing_id": "calaccess_filing_id",
})

In [14]:
recent_trimmed_filings = recent_trimmed_filings[recent_trimmed_filings.from_date >= datetime.date(2015, 1, 1)]

In [15]:
recent_trimmed_filings.total_expenditures_made.sum()


Out[15]:
527580010.0

In [16]:
recent_trimmed_filings.to_csv(
    os.path.join(settings.data_dir, 'filings-2016.csv'),
    index=False
)