In [1]:
import numpy as np
import pandas as pd
From 1993 - 2010, the U.S. Census Bureau published the Consolidated Federal Funds Report, a dataset that contained total federal spending and how money flowed to each state, county, and congressional district.
When they stopped, the country was left with no information about the federal dollars in the states.
source: Bureau of Economic Analysis, Annual State Personal Income and Employment, Personal Current Transfer Receipts (SA35)
Totals include the following LineCodes from the 2000 Current transfer receipts of individuals from governments series:
(list the codes here)
In [2]:
#money to individuals
indv_cols = ['year', 'state_id', 'federal', 'total']
indv = pd.read_csv(
'data/individualreceiptgovtstate.txt',
usecols=indv_cols,
index_col=['year','state_id']
)
indv = indv.rename(columns = {
'total' : 'total_indv_receipt',
'federal' : 'fed_indv_receipt'
})
indv.head()
Out[2]:
source: Census State and Local Government Finances
state revenue totals = general revenue (i.e., exclude liquor, utility, and insurance trust revenues)
In [3]:
#money to state and local governments
govt_cols = ['year', 'state_id', 'level', 'general_rev', 'fed_rev']
govt = pd.read_csv(
'data/revenuegeneralstate.txt',
usecols = govt_cols,
index_col=['year','state_id']
)
#we only want the subset of data where level = 1 (combination of state and local governments)
govt = govt[govt['level']==1][['general_rev', 'fed_rev']]
govt = govt.rename(columns = {
'general_rev' : 'total_rev'
})
govt.head()
Out[3]:
source: USASpending.gov
primary awards (no sub-contracts) by place of performance state
In [4]:
#federal contracts
contract_cols = ['year', 'state_id', 'total_amount', 'dod_amount']
contracts = pd.read_csv(
'data/federalcontractstate.txt',
usecols = contract_cols,
index_col=['year','state_id']
)
contracts = contracts.rename(columns= {
'total_amount' : 'fed_contracts',
'dod_amount' : 'dod_contracts'
})
contracts.head()
Out[4]:
In [5]:
#federal employee salaries/benefits
emp_cols = ['year', 'state_id', 'emp_fed_total', 'comp_fed_total', 'emp_total', 'comp_total']
emp = pd.read_csv(
'data/federaljobstate.txt',
usecols = emp_cols,
index_col=['year','state_id']
)
emp = emp.rename(columns = {
'emp_total' : 'total_emp',
'comp_total' : 'total_comp',
'emp_fed_total' : 'fed_emp',
'comp_fed_total' : 'fed_comp'
})
emp.head()
Out[5]:
In [6]:
#stick everything together
all = (indv
.join(govt)
.join(contracts)
.join(emp))
all.reset_index(inplace=True)
#get rid of years < 2004
#all = all[all['year']>2003]
all.head()
Out[6]:
In [7]:
#total federal dollars in state =
# federal receipts to individuals +
# federal money to state/local governments +
# contract $ +
# federal compensation
all['total'] = (
all['total_indv_receipt'] +
all['fed_rev'] +
all['fed_contracts'] +
all['fed_comp']
)
In [8]:
#merge in state names
states = pd.read_csv(
'data/state.txt')
all = pd.merge(all, states[['id', 'state_ansi', 'state_abbr', 'state_name']], left_on='state_id', right_on='id')
Adjust totals to FY 2012 dollars.
Deflator source, Office of Management and Budget:
10.1--Gross Domestic Product and Deflators Used in the Historical Tables: 1940-2019 from the FY 2015 President's Budget Request.
In [9]:
#grab the deflators published by OMB as part of the FY 2015 budget request
gdp = pd.read_excel('data/hist10z1.xls', 0, parse_cols=[0,1,2])
gdp.columns = ['year', 'gdp', 'gdp_price_index']
def clean_year(x):
try:
return int(x[:4])
except:
return np.nan
gdp['year'] = gdp['year'].map(lambda x: clean_year(x))
gdp = gdp.dropna(subset=['year'])
gdp['year'] = gdp['year'].astype(np.int64)
#adjust deflators to base year 2012
denom = gdp[gdp['year']==2012]['gdp_price_index'].values[0]
gdp['gdp_price_index'] = gdp['gdp_price_index']/denom
In [10]:
#merge deflators into main data and inflation-adjust
all = pd.merge(all, gdp[['year','gdp_price_index']], left_on='year', right_on='year')
all['total_adj'] = all['total']/all['gdp_price_index']
Get population estimates for per-capita numbers
source: Census Population Estimates, State Characteristics, Vintage 2013
In [11]:
#population
pop = pd.read_csv(
'data/populationstate.txt'
)
pop = pop.rename(columns = {'value' : 'pop'})
all = pd.merge(all, pop, left_on=['year','state_id'], right_on=['year','state_id'])
In [12]:
%matplotlib inline
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
#tweak dataset format for Seaborn time series plotting
viz2 = all
viz2 = viz2[['state_abbr', 'year', 'fed_indv_receipt', 'fed_contracts', 'fed_comp', 'fed_rev', 'total', 'gdp_price_index', 'pop']]
viz2['Individual Aid'] = viz2['fed_indv_receipt'] / viz2['gdp_price_index']
viz2['Per-Person Individual Aid'] = viz2['Individual Aid'] / viz2['pop']
viz2['Contracts'] = viz2['fed_contracts'] / viz2['gdp_price_index']
viz2['Per-Person Contracts'] = viz2['Contracts'] / viz2['pop']
viz2['State Revenue'] = viz2['fed_rev'] / viz2['gdp_price_index']
viz2['Per-Person State Revenue'] = viz2['State Revenue'] / viz2['pop']
viz2['Compensation'] = viz2['fed_comp'] / viz2['gdp_price_index']
viz2['Per-Person Compensation'] = viz2['Compensation'] / viz2['pop']
viz2['Total'] = viz2['total'] / viz2['gdp_price_index']
viz2['Per-Person Total'] = viz2['Total'] / viz2['pop']
viz2 = viz2.rename(columns = {'state_abbr' : 'State', 'year' : 'Year'})
spendtype = viz2[['State', 'Year', 'Individual Aid','Contracts', 'State Revenue', 'Compensation']]
spendtype_pc = viz2[['State', 'Year', 'Per-Person Individual Aid','Per-Person Contracts', 'Per-Person State Revenue', 'Per-Person Compensation']]
spendtype = pd.melt(spendtype, id_vars=['State', 'Year'], var_name = 'Fed $ Type', value_name = 'Amount')
spendtype_pc = pd.melt(spendtype_pc, id_vars=['State', 'Year'], var_name = 'Fed $ Type', value_name = 'Amount')
spendtype_pc.head()
Out[12]:
In [13]:
#tweak dataset format for Seaborn time series plotting
viz2 = all
viz2 = viz2[['state_abbr', 'year', 'fed_indv_receipt', 'fed_contracts', 'fed_comp', 'fed_rev', 'total', 'gdp_price_index', 'pop']]
viz2['Individual Aid'] = viz2['fed_indv_receipt'] / viz2['gdp_price_index']
viz2['Per-Person Individual Aid'] = viz2['Individual Aid'] / viz2['pop']
viz2['Contracts'] = viz2['fed_contracts'] / viz2['gdp_price_index']
viz2['Per-Person Contracts'] = viz2['Contracts'] / viz2['pop']
viz2['State Revenue'] = viz2['fed_rev'] / viz2['gdp_price_index']
viz2['Per-Person State Revenue'] = viz2['State Revenue'] / viz2['pop']
viz2['Compensation'] = viz2['fed_comp'] / viz2['gdp_price_index']
viz2['Per-Person Compensation'] = viz2['Compensation'] / viz2['pop']
viz2['Total'] = viz2['total'] / viz2['gdp_price_index']
viz2['Per-Person Total'] = viz2['Total'] / viz2['pop']
viz2 = viz2.rename(columns = {'state_abbr' : 'State', 'year' : 'Year'})
spendtype = viz2[['State', 'Year', 'Individual Aid','Contracts', 'State Revenue', 'Compensation']]
spendtype = pd.melt(spendtype, id_vars=['State', 'Year'], var_name = 'Fed $ Type', value_name = 'Amount')
spendtype_pc = viz2[['State', 'Year', 'Per-Person Individual Aid','Per-Person Contracts', 'Per-Person State Revenue', 'Per-Person Compensation']]
spendtype_pc = pd.melt(spendtype_pc, id_vars=['State', 'Year'], var_name = 'Fed $ Type', value_name = 'Amount')
spendtype_pc.head()
Out[13]:
In [21]:
sns.factorplot(
'Year', 'Amount', 'Fed $ Type',
spendtype_pc[spendtype_pc['State']=='US'],kind='point',size=5
)
Out[21]:
In [15]:
#plot per-capita federal spending (by type) in the U.S.
#exclude Washington, DC because its federal compensation numbers skew so high
sns.set(style="white")
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.factorplot(
'Year', 'Amount', 'Fed $ Type',
spendtype_pc[spendtype_pc['State']<>'DC'],
col='State', col_wrap=4, kind='point',
palette=sns.color_palette(flatui)
)
Out[15]:
In [16]:
stateus_pc = viz2[['State', 'Year', 'Per-Person Total']]
#limit DataFrame to years where we have data for all 4 ways that federal $ go to states
#get rid of DC
stateus_pc = stateus_pc[(stateus_pc['Year'].isin([2007, 2008, 2009, 2010, 2011])) & (~stateus_pc['State'].isin(['DC']))]
us = stateus_pc[stateus_pc['State']=='US']
us = us.rename(columns={'Per-Person Total' : 'US Per-Person Total'})
stateus_pc = stateus_pc[stateus_pc['State']<>'US']
stateus_pc = pd.merge(
stateus_pc, us,
left_on=['Year'], right_on=['Year'],
suffixes=('', '_US'))
stateus_pc = stateus_pc[['State', 'Year', 'Per-Person Total', 'US Per-Person Total']]
stateus_pc = pd.melt(
stateus_pc,
id_vars=['Year', 'State'],
var_name = 'Type', value_name = 'Amount')
sns.set(style="white")
sns.factorplot(
'Year', 'Amount', 'Type',
stateus_pc,
col='State', col_wrap=4, kind='bar'
)
Out[16]:
In [17]:
contracts = pd.read_csv('data/fed_contract_totals.txt')
contracts.head()
Out[17]:
In [ ]:
contracts = contracts[~contracts['state_abbr'].isin(['DC', 'PR', 'US'])]
g = sns.FacetGrid(contracts, col='state_abbr', col_wrap=4)
g.map(sns.pointplot, 'year', 'total_adj')
g.set_axis_labels("Year", "Per-Capita Fed. Contract $")