In [105]:
import pandas as pd
from datetime import datetime, timedelta
import time
import requests
import numpy as np
import json
import urllib
from pandas.io.json import json_normalize
import re
import os.path
import zipfile
from glob import glob

In [2]:
url ="https://api.usaspending.gov/api/v1/awards/?limit=100"
r = requests.get(url, verify=False)
r.raise_for_status()
type(r)
data = r.json() 
meta = data['page_metadata']
data = data['results']
df_API_data = pd.io.json.json_normalize(data)


/Users/mbaumer/anaconda2/lib/python2.7/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)

In [3]:
df_API_data.col


Out[3]:
awarding_agency.id awarding_agency.office_agency awarding_agency.subtier_agency.abbreviation awarding_agency.subtier_agency.name awarding_agency.subtier_agency.subtier_code awarding_agency.toptier_agency.abbreviation awarding_agency.toptier_agency.cgac_code awarding_agency.toptier_agency.fpds_code awarding_agency.toptier_agency.name awarding_agency.toptier_flag ... recipient.small_business_description subaward_count total_loan_value total_obligation total_outlay total_subaward_amount total_subsidy_cost type type_description uri
0 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2980.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
1 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1995.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
2 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 18283.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
3 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 6563.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
4 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 463.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
5 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1797.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
6 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 575.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
7 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 640.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
8 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 845.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
9 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 224.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
10 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1677.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
11 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4310.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
12 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1547.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
13 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2379.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
14 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4015.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
15 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4227.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
16 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2464.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
17 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 895.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
18 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 191.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
19 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1600.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
20 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1191.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
21 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 596.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
22 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1677.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
23 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1459.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
24 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2905.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
25 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1806.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
26 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 689.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
27 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 3357.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
28 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 927.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
29 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 3975.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
70 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 758.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
71 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 582.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
72 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 168.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
73 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 576.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
74 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 619.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
75 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 506.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
76 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1026.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
77 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 599.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
78 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2131.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
79 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 661.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
80 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 859.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
81 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 658.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
82 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4705.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
83 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 534.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
84 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 879.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
85 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1285.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
86 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1649.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
87 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4602.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
88 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4698.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
89 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4472.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
90 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4002.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
91 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4790.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
92 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 753.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
93 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1940.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
94 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 243.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
95 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 375.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
96 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 2362.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
97 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4705.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
98 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 4784.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None
99 153 None FSA Farm Service Agency 12D2 USDA 012 1200 Department of Agriculture False ... None 0 None 1587.00 None None None 06 DIRECT PAYMENT FOR SPECIFIED USE, AS A SUBSIDY... None

100 rows × 92 columns


In [33]:
base_url = "https://api.usaspending.gov"
endpt_trans = "/api/v2/search/spending_by_award/?limit=10"

params = {
    "filters": {
    "time_period": [
        {
            "start_date": "2016-10-01",
            "end_date": "2017-09-30"
        }
    ]
}

}

url = base_url + endpt_trans
r = requests.post(url, json=params)
print(r.status_code, r.reason)
r.raise_for_status()
r.headers
r.request.headers
data = r.json() 
meta = data['page_metadata']
data = data['results']
df_trans = pd.io.json.json_normalize(data)


(422, 'Unprocessable Entity')
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-33-8f1c182eab43> in <module>()
     17 r = requests.post(url, json=params)
     18 print(r.status_code, r.reason)
---> 19 r.raise_for_status()
     20 r.headers
     21 r.request.headers

/Users/mbaumer/anaconda2/lib/python2.7/site-packages/requests/models.pyc in raise_for_status(self)
    938 
    939         if http_error_msg:
--> 940             raise HTTPError(http_error_msg, response=self)
    941 
    942     def close(self):

HTTPError: 422 Client Error: Unprocessable Entity for url: https://api.usaspending.gov/api/v2/search/spending_by_award/?limit=10

In [99]:
currentFY = 2019
n_years_desired = 10

def download_latest_data(currentFY,n_years_desired):

    #find latest datestamp on usaspending files
    usaspending_base = 'https://files.usaspending.gov/award_data_archive/'
    save_path = '../new_data/'
    r = requests.get(usaspending_base, allow_redirects=True)
    r.raise_for_status()
    datestr = re.findall('_(\d{8}).zip',r.content)[0]

    for FY in np.arange(currentFY-n_years_desired+1,currentFY+1):
        doe_contracts_url = usaspending_base+str(FY)+'_089_Contracts_Full_' + datestr + '.zip'
        doe_grants_url = usaspending_base+str(FY)+'_089_Assistance_Full_' + datestr + '.zip'
        nsf_grants_url = usaspending_base+str(FY)+'_049_Assistance_Full_' + datestr + '.zip'
        doe_sc_url = 'https://science.energy.gov/~/media/_/excel/universities/DOE-SC_Grants_FY'+str(FY)+'.xlsx'

        for url in [doe_contracts_url,doe_grants_url,nsf_grants_url,doe_sc_url]:

            filename = url.split('/')[-1]
            if os.path.exists(save_path+filename): continue

            if url == doe_sc_url: 
                verify='doe_cert.pem'
            else:
                verify=True

            try: 
                r = requests.get(url, allow_redirects=True,verify=verify)
                r.raise_for_status()
            except:
                print 'could not find', url
                continue

            # DOE website stupidly returns a 200 HTTP code when displaying 404 page :/
            page_not_found_text = 'The page that you have requested was not found.'
            if page_not_found_text in r.content: 
                print 'could not find', url
                continue

            open(save_path+filename, 'wb+').write(r.content)
            zipper = zipfile.ZipFile(save_path+filename,'r')
            zipper.extractall(path='../new_data')
    print 'Data download complete'
    
def unzip_all():
    for unzip_this in glob('../new_data/*.zip'):
        zipper = zipfile.ZipFile(unzip_this,'r')
        zipper.extractall(path='../new_data')

In [129]:
print 'Generating DOE Contract data...'

contract_file_list = glob('../new_data/*089_Contracts*.csv')
contract_df_list = []
for contract_file in contract_file_list:
    contract_df_list.append(pd.read_csv(contract_file))
fulldata = pd.concat(contract_df_list,ignore_index=True)

print len(fulldata)

sc_awarding_offices = ['CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)',
              'OAK RIDGE OFFICE (OFFICE OF SCIENCE)',
              'SC CHICAGO SERVICE CENTER',
              'SC OAK RIDGE OFFICE']

sc_funding_offices = ['CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)',
                      'OAK RIDGE OFFICE (OFFICE OF SCIENCE)',
                      'SCIENCE',
                      'SC OAK RIDGE OFFICE',
                      'SC CHICAGO SERVICE CENTER'
                     ]

sc_contracts = fulldata[(fulldata['awarding_office_name'].isin(
    sc_awarding_offices)) | (fulldata['funding_office_name'].isin(sc_funding_offices))]

print len(sc_contracts)

#sc_contracts.to_pickle('../cleaned_data/sc_contracts.pkl')


Generating DOE Contract data...
89160
9626

In [137]:
print 'Generating NSF Grant data...'
grant_file_list = glob('../new_data/*049_Assistance*.csv')
grant_df_list = []
for grant_file in grant_file_list:
    grant_df_list.append(pd.read_csv(grant_file))
fulldata = pd.concat(grant_df_list,ignore_index=True)
len(fulldata)


Generating NSF Grant data...
/Users/mbaumer/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (21,23,53,59,62,63,64,68,70) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/mbaumer/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (33,45,65,66) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/mbaumer/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (21,23,53,59,62,63,64,65,68,70) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/mbaumer/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (34,48,65) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/mbaumer/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (1,2,21,23,33,34,43,45,53,55,59,62,63,64,65,66,68,70) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[137]:
167509

In [142]:
mps_grants = fulldata[fulldata['cfda_title'] == 'MATHEMATICAL AND PHYSICAL SCIENCES']
len(mps_grants)


Out[142]:
32320

In [147]:
mps_grants['recipient_congressional_district'].unique()


Out[147]:
array([ 0.,  1., nan,  7.,  5.,  9., 25., 34., 32., 13., 23.,  3., 14.,
       12., 24.,  2., 17., 10., 45.,  8., 16.,  4., 27., 11., 21., 49.,
        6., 31., 18., 15., 20., 22., 33., 26., 19., 30., 47., 39., 37.,
       44., 28., 35., 50., 52., 48., 53., 46., 29., 98., 41., 36., 90.,
       43., 40., 38.])

In [ ]:
mps_grants = mps_grants.dropna(subset=['principal_place_cd'])

    strlist = []
    for code in mps_grants['principal_place_cd'].values:
        if code == 'ZZ':
            code = '00'
        if len(str(int(code))) < 2:
            strlist.append('0' + str(int(code)))
        else:
            strlist.append(str(int(code)))

    mps_grants['cong_dist'] = mps_grants['principal_place_state_code'] + strlist
    pd.to_pickle(mps_grants, '../cleaned_data/nsf_mps_grants.pkl')