In [4]:
import pandas as pd
import numpy as np

In [6]:
data2014 = pd.read_excel('data/doe_sbir/FY14_PI_R2_Award_List.xlsx')
data2015 = pd.read_excel('data/doe_sbir/FY15_PI_R2_Award_List.xlsx')
data2016 = pd.read_excel('data/doe_sbir/FY16_PI_R2_Award_List.xlsx')

In [7]:
fulldata = pd.concat([data2014,data2015,data2016])

In [9]:
len(fulldata)


Out[9]:
431

In [10]:
fulldata.columns


Out[10]:
Index([u' Amount Requested', u'Address', u'Amount Requested', u'Award',
       u'City', u'Company', u'DOE Funding Program', u'PI', u'Project Summary',
       u'Project Title', u'SBIR/STTR', u'State', u'State ', u'Summary',
       u'Title', u'Topic Number', u'Topic Title', u'Topic Title ',
       u'Topic number ', u'Zip', u'Zip Code'],
      dtype='object')

In [12]:
hepgrants = fulldata[fulldata['DOE Funding Program'] == 'High Energy Physics']

In [22]:
hepgrants.groupby('')['Amount Requested'].sum()


Out[22]:
State 
 AZ     149967
 CA    1204536
 CO    1058683
 CT     150000
 IL     150000
 KY     149999
 MA     599673
 MD     149982
 MI     150000
 MN     149898
 NC     150000
 NM     150000
 OH     149856
 TX     149958
Name: Amount Requested, dtype: float64

In [ ]: