In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('SearchResults (1).csv')

In [3]:
len(data)


Out[3]:
26551

In [4]:
from scipy.stats import itemfreq
data.columns


Out[4]:
Index([u'PSC Type', u'Contract ID', u'Modification Number',
       u'Transaction Number', u'Award/IDV Type', u'Vendor Name',
       u'Contracting Agency ID', u'Contracting Agency', u'Date Signed',
       u'Action Obligation ($)', u'Reference IDV', u'Contracting Office Name',
       u'NAICS', u'NAICS Description', u'PSC', u'PSC Description',
       u'Vendor City', u'Vendor DUNS', u'Vendor State', u'Vendor ZIP Code',
       u'Global DUNS Number', u'Global Vendor Name', u'Unnamed: 22'],
      dtype='object')

In [ ]:
data2 = pd.read_csv('./datafeeds\\2017_DOE_Contracts_Full_20170215.csv')
data3 = pd.read_csv('./datafeeds\\2016_DOE_Contracts_Full_20170215 2.csv')
data4 = pd.read_csv('./datafeeds\\2015_DOE_Contracts_Full_20170215.csv')
data5 = pd.read_csv('./datafeeds\\2014_DOE_Contracts_Full_20170215.csv')
data6 = pd.read_csv('./datafeeds\\2013_DOE_Contracts_Full_20170215.csv')
data7 = pd.read_csv('./datafeeds\\2012_DOE_Contracts_Full_20170215.csv')
#data2.columns == data7.columns OK, good

In [84]:
fulldata = pd.concat([data2,data3,data4,data5,data6,data7])

In [109]:
offices = fulldata['contractingofficeid'].drop_duplicates()
offices[offices.str.contains('CA')]


Out[109]:
21              893031: EM-PORTSMOUTH/PADUCAH PROJECT OFC
28                      892430: SC CHICAGO SERVICE CENTER
46                                    893032: EM-CARLSBAD
0       00002: CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)
493     00016: PORTSMOUTH/PADUCAH PROJECT OFFICE (EM CBC)
538                          00029: CARLSBAD FIELD OFFICE
4626                 892430: SE-SC CHICAGO SERVICE CENTER
4887                                  893032: MP-CARLSBAD
4968            893031: MP-PORTSMOUTH/PADUCAH PROJECT OFC
Name: contractingofficeid, dtype: object

In [108]:
offices = fulldata['fundingrequestingofficeid'].drop_duplicates()
# print following to search funding offices
offices[offices.str.contains('SC')]


Out[108]:
5                                          892401: SCIENCE
179                      892430: SC CHICAGO SERVICE CENTER
225                            892431: SC OAK RIDGE OFFICE
319                                        F2BDCW: ESC FDX
0        00002: CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)
4              00005: OAK RIDGE OFFICE (OFFICE OF SCIENCE)
422                                        F4FDAG: ASC RAF
2138               SCDN: DOMESTIC NUCLEAR DETECTION OFFICE
2523          SCST: DIRECTORATE FOR SCIENCE AND TECHNOLOGY
4626                              892401: SE-SCIENCE FUNDS
9478                          HHA106: SOUTHEAST REGION RSC
12502              113164: EOP-OFC OF SCIENCE  TECH POLICY
489      SCNP: DIRECTORATE FOR NATIONAL PROTECTION AND ...
5987                    SCSEC: DHS OFFICE OF THE SECRETARY
6050                 F1ATA0: AF OFFICE SCIENTIFIC RESEARCH
9297                    NSSC0: NASA SHARED SERVICES CENTER
3854                                   F4FFWF: 635 SCOW FM
9504                           JSC00: JOHNSON SPACE CENTER
2497             SP4701: DES DSCP CONTRACTING SERVICES OFC
8049                        SCOH: OFFICE OF HEALTH AFFAIRS
236      AGS: DIVISION OF ATMOSPHERIC AND GEOSPACE SCIE...
1389              00012: SCHENECTADY NAVAL REACTORS OFFICE
2297               SC001: OFFICE OF PROCUREMENT OPERATIONS
3124           N0760A: CIVIL ENGINEER CORPS OFFICER SCHOOL
4514     SCRIIA: RIIA - OFFICE OF INTELLIGENCE AND ANAL...
5921                          SCSECO: DHS OFFICE OF POLICY
6213                     N62271: NAVAL POSTGRADUATE SCHOOL
13733                      EAR: DIVISION OF EARTH SCIENCES
Name: fundingrequestingofficeid, dtype: object

In [88]:
sc_offices = ['00002: CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)',
              '00005: OAK RIDGE OFFICE (OFFICE OF SCIENCE)','892431: SC OAK RIDGE OFFICE','892401: SCIENCE',
             '892430: SC CHICAGO SERVICE CENTER', '892430: SE-SC CHICAGO SERVICE CENTER', '892401: SE-SCIENCE FUNDS']
sc_contracts = fulldata[(fulldata['fundingrequestingofficeid'].isin(sc_offices)) | (fulldata['contractingofficeid'].isin(sc_offices))]
byDistrict = sc_contracts.groupby('placeofperformancecongressionaldistrict')

In [100]:
sc_contracts.to_pickle('./sc_contracts_2012-2017')

In [93]:
def tell_me_about_district(distcode):
    try: 
        byDistrict.get_group(distcode)
    except KeyError: 
        print 'This district received no SC contracts from 2012-2016'
        return
    print 'In the past 5 years, this district has received: ', '${:,.2f}'.format(byDistrict.get_group(distcode)['dollarsobligated'].sum())
    print byDistrict.get_group(distcode).groupby(['vendorname','fiscal_year']).sum()['dollarsobligated']

In [99]:
tell_me_about_district('TX03')


This district received no SC contracts from 2012-2016

In [61]:
sc_contracts.groupby(['placeofperformancecongressionaldistrict','vendorname']).sum()['dollarsobligated']


Out[61]:
placeofperformancecongressionaldistrict  vendorname                                                               
CA13                                     OUTSOURCE CONSULTING SERVICES, INC.                                         -1.825200e+04
                                         THE REGENTS OF THE UNIVERSITY OF CALIFORNIA (1741)                           8.741514e+08
CA17                                     NEW TECH SOLUTIONS INCORPORATED                                              1.557622e+04
CA18                                     STANFORD UNIVERSITY                                                          5.660364e+08
CA44                                     HUMAN POTENTIAL CONSULTANTS, L.L.C.                                          1.349504e+05
CA52                                     BLUE TECH INC.                                                               6.388150e+03
DC00                                     ALVAREZ & ASSOCIATES, LLC                                                    5.766719e+05
                                         AMERICAN ASSOCIATION FOR THE ADVANCEMENT OF SCIENCE                          5.258000e+04
                                         AMERICAN SOCIETY FOR MICROBIOLOGY                                            5.274000e+03
                                         CAPITAL MEETING PLANNING INC                                                 3.000000e+05
                                         CHICKASAW ADVISORY SERVICES, LLC                                            -2.500000e+04
                                         CNI GLOBAL SOLUTIONS, LLC                                                    2.320000e+06
                                         NATIONAL ACADEMY OF SCIENCES                                                 6.000000e+05
                                         NEWSWISE                                                                     3.200000e+04
                                         OPTICAL SOCIETY OF AMERICA, INCORPORATED THE                                 1.229000e+04
                                         QUALITY SYSTEMS APPLICATIONS INC                                             2.500000e+04
                                         WINVALE GROUP, LLC (THE)                                                     2.181839e+04
FL22                                     TAYLOR & FRANCIS GROUP, LLC                                                  5.960000e+03
GA11                                     SYSCOM TECHNOLOGIES, INC.                                                    0.000000e+00
IA04                                     IOWA STATE UNIVERSITY EQUITIES CORPORATION                                   3.309250e+07
IL03                                     22ND CENTURY TECHNOLOGIES INC.                                               8.545000e+06
                                         AMERICAN ASSOCIATION FOR LAB ACCREDATION (INC)                              -5.400000e-01
                                         BLUE TECH INC.                                                               5.475000e+03
                                         CNI TECHNICAL SERVICES, LLC                                                  8.154092e+04
                                         D & B POWER ASSOCIATES, INC.                                                 5.188000e+03
                                         ERI ECONOMIC RESEARCH INSTITUTE                                              5.589000e+03
                                         FOUR POINTS TECHNOLOGY, L.L.C.                                              -3.041440e+03
                                         GRADUATE SCHOOL                                                             -1.337800e+04
                                         H A KING AND ASSOCIATES INCORPORATED (0334)                                  7.977483e+04
                                         IRON MOUNTAIN INFORMATION MANAGEMENT, LLC                                    7.000000e+03
                                                                                                                          ...     
TN03                                     THERMOCOPY OF TENNESSEE, INC.                                               -1.054580e+03
                                         THOMSON REUTERS (SCIENTIFIC) LLC                                             6.005324e+04
                                         THYSSENKRUPP ELEVATOR CORPORATION (1267)                                     5.708150e+04
                                         TITAN PROPANE LLC                                                           -2.994390e+03
                                         TOP-TIER TECHNOLOGIES, INC.                                                  7.380000e+03
                                         TOWER SERVICES, INC                                                          2.395000e+03
                                         TRIBALCO, LLC                                                                1.829376e+05
                                         UNION CARBIDE CORP                                                           0.000000e+00
                                         UNITED PARCEL SERVICE INCORPORATED (OH) (2075)                              -8.148000e+01
                                         URS                                                                          2.667273e+08
                                         UT BATTELLE LIMITED LIABILITY COMPANY                                        1.337169e+09
                                         VERISURF SOFTWARE INC                                                        0.000000e+00
                                         VIDEOLINK, INC.                                                              0.000000e+00
                                         W. W. GRAINGER, INC.                                                         0.000000e+00
                                         WASTREN ADVANTAGE, INC.                                                      8.000000e+06
                                         WEST PUBLISHING CORPORATION                                                  1.026486e+04
                                         WM. S. TRIMBLE CO. INC.                                                      8.420000e+03
                                         WOLTERS KLUWER FINANCIAL SERVICES, INC.                                      2.097600e+04
                                         XEROX CORPORATION                                                           -3.837600e+02
                                         ZIBIZ CORPORATION                                                            0.000000e+00
TX28                                     ALLIED ASSOCIATES COMMERCIAL FLOORS, INC                                     1.496316e+04
VA02                                     JEFFERSON SCIENCE ASSOCIATES LIMITED LIABILITY COMPANY A SURA/CSC COMPANY    1.236434e+08
VA08                                     BRUNDIDGE & STANGER PC                                                       0.000000e+00
                                         DELOITTE CONSULTING LLP                                                      5.000000e+03
                                         JDM ASSOCIATES, LLC                                                          2.416521e+04
                                         NATIONAL CAPITOL CONTRACTING, LLC                                            4.750000e+05
VA11                                     ORACLE AMERICA, INC.                                                         0.000000e+00
WA04                                     BATTELLE MEMORIAL INSTITUTE                                                  9.639466e+08
                                         CH2M HILL PLATEAU REMEDIATION COMPANY                                        3.125700e+04
WI02                                     AMERICAN SOCIETY OF AGRONOMY, INC.                                           3.990000e+03
Name: dollarsobligated, dtype: float64

In [6]:
np.unique(data['Contracting Office Name'])


/Users/mbaumer/anaconda2/lib/python2.7/site-packages/numpy/lib/arraysetops.py:200: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.
  flag = np.concatenate(([True], aux[1:] != aux[:-1]))
Out[6]:
array([nan, '8900-14', 'ADVANCED RSRCH PROJ AGENCY ARPA-E',
       'ALASKA POWER ADMINISTRATION', 'ALBUQUERQUE OPERATIONS OFFICE',
       'BARTLESVILLE PROJECT OFFICE', 'BONNEVILLE POWER ADMINISTRATION',
       'CARLSBAD FIELD OFFICE',
       'CHICAGO SERVICE CENTER (OFFICE OF SCIENCE)',
       'CLINCH RIVER BREEDER REACTOR PROJECT', 'EM-CARLSBAD',
       'EM-ENVIRONMENTAL MGMT CON BUS CTR',
       'ENVIRONMENTAL MEASUREMENTS LAB',
       'FEDERAL ENERGY REGULATORY COMMISSION', 'GOLDEN FIELD OFFICE',
       'GRAND FORKS PROJECT OFFICE', 'HEADQUARTERS PROCUREMENT SERVICES',
       'IDAHO OPERATIONS OFFICE', 'LARAMIE PROJECT OFFICE',
       'LAS VEGAS SATELLITE OFFICE (LVSO)', 'LIVERMORE SITE OFFICE',
       'LOS ALAMOS SITE OFFICE', 'MORGANTOWN ENERGY TECHNOLOGY CENTER',
       'NATIONAL ENERGY TECHNOLOGY CENTER',
       'NATIONAL ENERGY TECHNOLOGY LAB',
       'NATIONAL NUCLEAR SECURITY ADMINISTRATION',
       'NATIONAL NUCLEAR SECURITY ADMINISTRATION HEADQUARTERS',
       'NATIONAL NUCLEAR SECURITY ADMN BUSINESS SVCS DIVISION',
       'NAVAL REACTORS LABORATORY FIELD OFFICE',
       'NEVADA OPERATIONS OFFICE', 'NNSA MO CONTRACTING',
       'NNSA NAVAL REACTORS LAB FLD OFFICE',
       'NNSA NON-MO CNTRCTNG OPS DIV',
       'OAK RIDGE OFFICE (OFFICE OF SCIENCE)',
       'OAK RIDGE SITE OFFICE (NNSA)', 'OAKLAND OPERATIONS OFFICE',
       'OFFICE OF CIVILIAN RADIOACTIVE WASTE MGMT',
       'OFFICE OF ENVIRONMENTAL MANAGEMENT CONSOLIDATED BUSINESS CENTER',
       'OFFICE OF RIVER PROTECTION', 'OHIO FIELD OFFICE (EM CBC)',
       'PANTEX SITE OFFICE', 'PITTSBURGH ENERGY TECHNOLOGY CENTER',
       'PORTSMOUTH/PADUCAH PROJECT OFFICE (EM CBC)',
       'RICHLAND OPERATIONS OFFICE', 'ROCKY FLATS OFFICE (EM CBC)',
       'SALT REPOSITORY PROJECT OFFICE', 'SAN FRANCISCO SUPPORT OFFICE',
       'SAVANNAH RIVER OPERATIONS OFFICE', 'SC CHICAGO SERVICE CENTER',
       'SC OAK RIDGE OFFICE', 'SCHENECTADY NAVAL REACTORS OFFICE',
       'SOUTHEASTERN POWER ADMINISTRATION',
       'SOUTHWESTERN POWER ADMINISTRATION', 'STRATEGIC PETROLEUM RESERVE',
       'STRATEGIC PETROLEUM RESERVE PROJECT MGMT',
       'WESTERN REGIONAL OFFICE', 'WESTERN-CORPORATE SERVICES OFFICE',
       'WESTERN-DESERT SOUTHWEST REGION', 'WESTERN-ROCKY MOUNTAIN REGION',
       'WESTERN-SIERRA NEVADA REGION', 'WESTERN-UPPER GREAT PLAINS REGION',
       'WESTERN-UPPPER GREAT PLAINS REGION'], dtype=object)

In [ ]: