An exploratory data analysis

Connection to MongoDB



In [1]:

    
import pymongo
from pymongo import MongoClient
import time
Client = MongoClient("mongodb://nbi-mongo.admin/")
db = Client.bridge
collection = db["SampleNbi2"]

Bridge Records in Database



In [2]:

    
startTime = time.time()
print("Bridges Records in DB: ", collection.count())
print("Seconds : ", (time.time() - startTime))









    



Bridges Records in DB:  17509885
Seconds :  0.008536100387573242

Total Number of Bridges (Years)



In [3]:

    
import pandas as pd

listStates = []
startTime = time.time()
for i in collection.distinct("stateCode"):
    listStates.append(i)
print("Time to create list of distinct states in Seconds: ", (time.time() - startTime))

countPerYear = {}
startTime = time.time()
for i in collection.distinct("year"):
    countPerYear[i] =  collection.find({"year":i}).count()
print("Seconds: ",(time.time()- startTime))
count_per_year = pd.DataFrame(list(countPerYear.items()), columns = ['year', 'Distinct count of Bridges'])
count_per_year.set_index("year", inplace = True)
count_per_year









    



Time to create list of distinct states in Seconds:  0.014206171035766602
Seconds:  5.496885061264038






    Out[3]:






  
    
      
      Distinct count of Bridges
    
    
      year
      
    
  
  
    
      1992
      707509
    
    
      1993
      700993
    
    
      1994
      703606
    
    
      1995
      713410
    
    
      1996
      711792
    
    
      1997
      715853
    
    
      1998
      716703
    
    
      1999
      721247
    
    
      2000
      724037
    
    
      2001
      727990
    
    
      2002
      730034
    
    
      2003
      732942
    
    
      2004
      736600
    
    
      2005
      740010
    
    
      2006
      740177
    
    
      2007
      748661
    
    
      2008
      751134
    
    
      2009
      746542
    
    
      2010
      632426
    
    
      2011
      632405
    
    
      2012
      634324
    
    
      2013
      634668
    
    
      2014
      637622
    
    
      2015
      638836
    
    
      2016
      642566

Trend of the count of records in the bridge



In [4]:

    
%matplotlib inline
print("Trend of the count of records in the bridge:")
count_per_year.plot(figsize=(10,8))
count_per_year.plot(kind = "bar", figsize=(10,8))









    



Trend of the count of records in the bridge:






    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09bcd4ee48>

States with most bridges



In [5]:

    
countPerState = {}
startTime = time.time()
for i in listStates:
    countPerState[i] = collection.find({"year":2016, "stateCode":i}).count()
print("Seconds: ", (time.time() - startTime))
count_per_state = pd.DataFrame(list(countPerState.items()), columns = ['State Code', 'Count of Records'])
#count_per_state = count_per_state.set_index('State Code')









    



Seconds:  36.40173649787903



In [7]:

    
stateName = {'25':'MASSACHUSETTS',
             '04':'ARIZONA', 
             '08':'COLORADO',
             '38':'NORTH DAKOTA', 
             '09':'CONNECTICUT', 
             '19':'IOWA', 
             '26':'MICHIGAN', 
             '48':'TEXAS',
             '35':'NEW MEXICO',
             '17':'ILLINOIS', 
             '51':'VIRGINIA',
             '23':'MAINE',
             '16':'IDAHO',
             '36':'NEW YORK',
             '56':'WYOMING',
             '29':'MISSOURI',
             '39':'OHIO',
             '28':'MISSISSIPI', 
             '11':'DISTRICT OF COLOMBIA',
             '21':'KENTUCKY', 
             '18':'INDIANA',
             '06':'CALIFORNIA',
             '47':'TENNESSEE', 
             '12':'FLORIDA',
             '24':'MARYLAND',
             '34':'NEW JERSEY', 
             '46':'SOUTH DAKOTA',
             '13':'GEORGIA',
             '55':'WISCONSIN',
             '30':'MONTANA',
             '54':'WEST VIGINIA',
             '15':'HAWAII', 
             '32':'NEVADA', 
             '37':'NORTH CAROLINA',
             '10':'DELAWARE', 
             '33':'NEW HAMPSHIRE', 
             '44':'RHODE ISLAND',
             '50':'VERMONT', 
             '42':'PENNSYLVANIA', 
             '05':'ARKANSAS', 
             '20':'KANSAS', 
             '45':'SOUTH CAROLINA',
             '22':'LOUISIANA',
             '40':'OKLAHOMA', 
             '72':'PUERTO RICO', 
             '41':'OREGON',
             '21':'MINNESOTA', 
             '53':'WASHINGTON', 
             '01':'ALABAMA', 
             '31':'NEBRASKA',
             '02':'ALASKA', 
             '49':'UTAH'
               }

count_per_state['State Name'] =  count_per_state['State Code'].map(stateName)
count_per_state = count_per_state.set_index('State Name')
asc_c=count_per_state.sort_values(['Count of Records'],ascending = 0)
asc_c.head(n=10).sort_values(['Count of Records'], ascending = 0).plot(kind = 'bar', figsize=(10,8))









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09bcdbd898>

States with least bridges



In [8]:

    
asc_c.tail(n=10).sort_values(['Count of Records'], ascending = 1).plot(kind = 'bar', figsize=(10,8))









    Out[8]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09bcd3c160>

Year Built



In [9]:

    
pipeline = [{"$match":{"year":2016}},
            {'$group':{"_id":"$yearBuilt", "count":{"$sum":1}}}]
startTime = time.time()
yearBuiltResult = collection.aggregate(pipeline)
print("Seconds: ",(time.time() - startTime))
yearBuiltResult_df = pd.DataFrame(list(yearBuiltResult))









    



Seconds:  1.5601255893707275

Early bridges



In [10]:

    
yearBuiltResult_df = yearBuiltResult_df.rename(columns = {'_id': 'Year'})
sort_yearBuiltResult_df=yearBuiltResult_df.sort_values('count', ascending = 0)
sort_yearBuiltResult_df.set_index('Year').head(n=10).plot(kind = 'bar', figsize=(10,8))









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09bcb45c18>

Recently built bridges



In [11]:

    
sort_yearBuiltResult_rdf=yearBuiltResult_df.sort_values('Year', ascending = 0)
sort_yearBuiltResult_rdf.set_index('Year').head(n=10).plot(kind = "bar", figsize = (10,8))









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b912c1d0>

Summary of NBI data



In [12]:

    
pipeline = [{"$match": {"year":2016}},
            {"$project":{"_id":0, "stateCode":1,"deckWidthOutToOut":1, "structureLength":1, "averageDailyTraffic":1 }}]
startTime = time.time()
pdc = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
nbi = pd.DataFrame(list(pdc))









    



Seconds :  0.07834887504577637



In [13]:

    
stateName = {'25':'MASSACHUSETTS',
             '04':'ARIZONA', 
             '08':'COLORADO',
             '38':'NORTH DAKOTA', 
             '09':'CONNECTICUT', 
             '19':'IOWA', 
             '26':'MICHIGAN', 
             '48':'TEXAS',
             '35':'NEW MEXICO',
             '17':'ILLINOIS', 
             '51':'VIRGINIA',
             '23':'MAINE',
             '16':'IDAHO',
             '36':'NEW YORK',
             '56':'WYOMING',
             '29':'MISSOURI',
             '39':'OHIO',
             '28':'MISSISSIPI', 
             '11':'DISTRICT OF COLOMBIA',
             '21':'KENTUCKY', 
             '18':'INDIANA',
             '06':'CALIFORNIA',
             '47':'TENNESSEE', 
             '12':'FLORIDA',
             '24':'MARYLAND',
             '34':'NEW JERSEY', 
             '46':'SOUTH DAKOTA',
             '13':'GEORGIA',
             '55':'WISCONSIN',
             '30':'MONTANA',
             '54':'WEST VIGINIA',
             '15':'HAWAII', 
             '32':'NEVADA', 
             '37':'NORTH CAROLINA',
             '10':'DELAWARE', 
             '33':'NEW HAMPSHIRE', 
             '44':'RHODE ISLAND',
             '50':'VERMONT', 
             '42':'PENNSYLVANIA', 
             '05':'ARKANSAS', 
             '20':'KANSAS', 
             '45':'SOUTH CAROLINA',
             '22':'LOUISIANA',
             '40':'OKLAHOMA', 
             '72':'PUERTO RICO', 
             '41':'OREGON',
             '21':'MINNESOTA', 
             '53':'WASHINGTON', 
             '01':'ALABAMA', 
             '31':'NEBRASKA',
             '02':'ALASKA', 
             '49':'UTAH'
               }

nbi['State Name'] =  nbi['stateCode'].map(stateName)



In [14]:

    
nbi['deckArea']= nbi['deckWidthOutToOut'] * nbi['structureLength']
nbi_summary = nbi[['State Name','deckWidthOutToOut','structureLength','deckArea','averageDailyTraffic']]
nbi_summary = nbi_summary.groupby(['State Name']).agg({'State Name':'count',
                                'deckArea':'sum',
                                'averageDailyTraffic':'sum'})
nbi_summary = nbi_summary.rename(columns={'State Name': 'Count', 'deckArea':'Sum of Deck Area','averageDailyTraffic':
                                         'Sum of ADT'})



In [15]:

    
nbi_summary









    Out[15]:






  
    
      
      Count
      Sum of Deck Area
      Sum of ADT
    
    
      State Name
      
      
      
    
  
  
    
      ALABAMA
      16098
      9238040.53
      76613580
    
    
      ALASKA
      1488
      710703.90
      3357512
    
    
      ARIZONA
      8154
      5055845.20
      97256999
    
    
      ARKANSAS
      12871
      6381829.40
      49636105
    
    
      CALIFORNIA
      25431
      29478067.98
      667205896
    
    
      COLORADO
      8680
      4926475.88
      68595652
    
    
      CONNECTICUT
      4214
      3265916.52
      78275293
    
    
      DELAWARE
      877
      978596.38
      11361551
    
    
      DISTRICT OF COLOMBIA
      245
      568827.33
      7660611
    
    
      FLORIDA
      12313
      16759415.81
      213337215
    
    
      GEORGIA
      14835
      9404058.27
      135068507
    
    
      HAWAII
      1132
      1319916.61
      27400179
    
    
      IDAHO
      4445
      1723198.53
      11566802
    
    
      ILLINOIS
      26704
      13004416.75
      133709726
    
    
      INDIANA
      19245
      7997458.47
      97813523
    
    
      IOWA
      24184
      8248470.40
      34260303
    
    
      KANSAS
      25013
      8192548.56
      46401837
    
    
      LOUISIANA
      12915
      16387705.86
      80282307
    
    
      MAINE
      2450
      1217855.97
      11420274
    
    
      MARYLAND
      5321
      5155225.32
      116163051
    
    
      MASSACHUSETTS
      5171
      4055694.07
      114458271
    
    
      MICHIGAN
      11156
      6357402.03
      93784086
    
    
      MINNESOTA
      14265
      6053639.35
      64332373
    
    
      MISSISSIPI
      17068
      9093623.83
      43837769
    
    
      MISSOURI
      24468
      10361960.26
      86300007
    
    
      MONTANA
      5276
      2000703.82
      10505792
    
    
      NEBRASKA
      15334
      3969666.40
      22562432
    
    
      NEVADA
      1933
      1595543.65
      32174713
    
    
      NEW HAMPSHIRE
      2486
      1120425.98
      16910083
    
    
      NEW JERSEY
      6730
      6878062.72
      160237725
    
    
      NEW MEXICO
      3973
      1728319.62
      26782628
    
    
      NEW YORK
      17462
      12869629.22
      169682377
    
    
      NORTH CAROLINA
      18099
      9221997.64
      112602177
    
    
      NORTH DAKOTA
      4400
      1232040.30
      4423736
    
    
      OHIO
      56568
      26635142.36
      354235716
    
    
      OKLAHOMA
      23053
      8350748.75
      70967585
    
    
      OREGON
      8118
      5017158.18
      56756580
    
    
      PENNSYLVANIA
      22791
      12377122.98
      163719521
    
    
      PUERTO RICO
      2205
      1973541.98
      38053215
    
    
      RHODE ISLAND
      772
      786182.40
      15845827
    
    
      SOUTH CAROLINA
      9358
      6800337.44
      46154561
    
    
      SOUTH DAKOTA
      5849
      1664936.80
      7112750
    
    
      TENNESSEE
      20123
      9659962.24
      156431834
    
    
      TEXAS
      53488
      47253738.73
      543042520
    
    
      UTAH
      3039
      1922455.87
      63527662
    
    
      VERMONT
      2766
      853119.53
      6841770
    
    
      VIRGINIA
      13892
      9620708.82
      122500324
    
    
      WASHINGTON
      8178
      6890269.73
      66350712
    
    
      WEST VIGINIA
      7217
      3805059.64
      24843795
    
    
      WISCONSIN
      14230
      6706707.15
      78666403
    
    
      WYOMING
      3128
      1269968.77
      7350946

Bridge Counts across states



In [16]:

    
nbi_summary['Count'].plot(kind = 'bar', figsize = (11,9))









    Out[16]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b90175f8>

Deck Area Summary across states



In [17]:

    
nbi_summary['Sum of Deck Area'].plot(kind = 'bar', figsize = (11,9))









    Out[17]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09a0e249e8>

Summary of Average Daily Traffic across states



In [18]:

    
nbi_summary['Sum of ADT'].plot(kind = 'bar', figsize = (11,9))









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b8be2160>

Breakdown of Agencies Responsible for the maintenance of U.S. Highway Bridges



In [33]:

    
pipeline = [{"$match": {"year":2016}},
            {"$project":{"_id":0, "maintenanceReponsibility":1,"deckWidthOutToOut":1, "structureLength":1, "averageDailyTraffic":1 }}]
startTime = time.time()
pdc1 = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
nbi_main = pd.DataFrame(list(pdc1))









    



Seconds :  0.07691693305969238



In [34]:

    
maintenanceReponsibility ={ -1 : 'NA',
                      1 : 'State Highway Agency',
                      21: 'Other State Agency',
                      4 : 'City or Municipal Highway Agency',
                      80: 'Unknown',
                      66: 'National Park Service',
                      2 : 'County Highway Agency',
                      60: 'Other Federal Agencies (not listest below)',
                      64: 'U.S forest Services' ,
                      68: 'Bureau of Land Management',
                      26: 'Private (other than railroad)',
                      62: 'Bureau of Indian Affairs', 
                      3 : 'Town or Township Highway Agency',
                      25: 'Other Local Agencies',
                      11: 'State Park, Forest or Reservation Agency',
                      63: 'Bureau of Fish and Wildlife',
                      27: 'Railroad',
                      74: 'Army',
                      70: 'Corps of Engineers (Civil)',
                      72: 'Air Force',
                      61: 'Indian Tribal Agency',
                      71: 'Corps of Engineers (Military)',
                      69: 'Bureau of Reclamation',
                      67: 'Tennesssee Valley Authority',
                      32: 'Local Toll Authority',
                      12: 'Local Park, Forest or Reservation Agency',
                      31: 'State Toll Authority',
                      73: 'Navy / Marines',
                      75: 'NASA',
                      76: 'Metropolitian Washington Airports Service'
}
  
nbi_main['Maintenance Reponsibility'] =  nbi_main['maintenanceReponsibility'].map(maintenanceReponsibility)
nbi_main['deckArea']= nbi_main['deckWidthOutToOut'] * nbi_main['structureLength']
nbi_main_maintenance=nbi_main.groupby(['Maintenance Reponsibility']).agg({'Maintenance Reponsibility':'count',
                                'deckArea':'sum',
                                'averageDailyTraffic':'sum'})
mant_valid_bridges = nbi_main_maintenance['Maintenance Reponsibility'].sum()
mant_sum_deck = nbi_main_maintenance['deckArea'].sum()
mant_sum_adt = nbi_main_maintenance['averageDailyTraffic'].sum()
mant_valid_bridges = nbi_main_maintenance['Maintenance Reponsibility'].sum()
mant_sum_deck = nbi_main_maintenance['deckArea'].sum()
mant_sum_adt = nbi_main_maintenance['averageDailyTraffic'].sum()



In [35]:

    
nbi_main_maintenance['Percent Valid Bridge']= (nbi_main_maintenance['Maintenance Reponsibility']/mant_valid_bridges)*100
nbi_main_maintenance['Percent Deck Area']= (nbi_main_maintenance['deckArea'] / mant_sum_deck)*100
nbi_main_maintenance['Percent ADT']= (nbi_main_maintenance['averageDailyTraffic'] / mant_sum_adt)*100
nbi_main_maintenance['deckArea'] = nbi_main_maintenance['deckArea'].apply(lambda x: '{:.2f}'.format(x))

nbi_main_maintenance = nbi_main_maintenance.rename(columns = {"Maintenance Reponsibilty":"Count",
                                                              "deckArea":"Sum of Deck Area",
                                                               "averageDailyTraffic":"Sum of ADT"})



In [36]:

    
nbi_main_maintenance









    Out[36]:






  
    
      
      Maintenance Reponsibility
      Sum of Deck Area
      Sum of ADT
      Percent Valid Bridge
      Percent Deck Area
      Percent ADT
    
    
      Maintenance Reponsibility
      
      
      
      
      
      
    
  
  
    
      Air Force
      287
      121469.57
      449042
      0.044665
      0.031584
      0.009352
    
    
      Army
      985
      219463.57
      711242
      0.153292
      0.057063
      0.014812
    
    
      Bureau of Fish and Wildlife
      304
      41538.49
      25695
      0.047310
      0.010801
      0.000535
    
    
      Bureau of Indian Affairs
      944
      248088.63
      552992
      0.146911
      0.064506
      0.011517
    
    
      Bureau of Land Management
      481
      79536.58
      20686
      0.074856
      0.020681
      0.000431
    
    
      Bureau of Reclamation
      146
      40043.25
      61985
      0.022721
      0.010412
      0.001291
    
    
      City or Municipal Highway Agency
      46975
      26482462.24
      310286133
      7.310533
      6.885786
      6.462026
    
    
      Corps of Engineers (Civil)
      245
      366675.07
      428039
      0.038128
      0.095340
      0.008914
    
    
      County Highway Agency
      244552
      49576983.27
      240400073
      38.058659
      12.890663
      5.006577
    
    
      Indian Tribal Agency
      39
      3607.57
      3880
      0.006069
      0.000938
      0.000081
    
    
      Local Park, Forest or Reservation Agency
      81
      18803.75
      187565
      0.012606
      0.004889
      0.003906
    
    
      Local Toll Authority
      1158
      4880722.87
      26862081
      0.180215
      1.269052
      0.559430
    
    
      Metropolitian Washington Airports Service
      51
      46870.62
      776200
      0.007937
      0.012187
      0.016165
    
    
      NA
      23
      13544.39
      91002
      0.003579
      0.003522
      0.001895
    
    
      NASA
      6
      20869.13
      20428
      0.000934
      0.005426
      0.000425
    
    
      National Park Service
      1313
      593206.15
      4156536
      0.204337
      0.154241
      0.086564
    
    
      Navy / Marines
      214
      147731.32
      309184
      0.033304
      0.038412
      0.006439
    
    
      Other Federal Agencies (not listest below)
      14
      14362.89
      63484
      0.002179
      0.003735
      0.001322
    
    
      Other Local Agencies
      1548
      1519572.29
      4464528
      0.240909
      0.395109
      0.092978
    
    
      Other State Agency
      778
      662541.42
      4277990
      0.121077
      0.172269
      0.089094
    
    
      Private (other than railroad)
      776
      1076508.62
      11389681
      0.120766
      0.279906
      0.237202
    
    
      Railroad
      530
      251812.41
      2086452
      0.082482
      0.065475
      0.043452
    
    
      State Highway Agency
      296467
      278558303.91
      3962699890
      46.137984
      72.428798
      82.527277
    
    
      State Park, Forest or Reservation Agency
      1180
      228839.46
      658890
      0.183639
      0.059501
      0.013722
    
    
      State Toll Authority
      8187
      14377100.30
      211637887
      1.274110
      3.738234
      4.407575
    
    
      Tennesssee Valley Authority
      36
      100292.60
      114529
      0.005603
      0.026077
      0.002385
    
    
      Town or Township Highway Agency
      30032
      4291058.24
      18653453
      4.673761
      1.115731
      0.388477
    
    
      U.S forest Services
      5194
      597138.73
      267702
      0.808322
      0.155264
      0.005575
    
    
      Unknown
      20
      16913.49
      27774
      0.003113
      0.004398
      0.000578



In [22]:

    
nbi_main_maintenance['Percent Valid Bridge'].plot.bar(figsize = (10,8))









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b8ab2898>



In [23]:

    
nbi_main_maintenance['Percent ADT'].plot.bar(figsize = (10,8))









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b830ec88>



In [24]:

    
nbi_main_maintenance['Percent Deck Area'].plot.bar(figsize = (10,8))









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b8612048>

Structure Material and Type of Construction



In [25]:

    
pipelineSM = [{"$match":{"year":2016}},
               {"$group":{"_id":"$structureTypeMain", "count":{"$sum":1}}}]
startTime = time.time()
SM=collection.aggregate(pipelineSM)
print("Seconds: ",(time.time() - startTime))
SM_df = pd.DataFrame(list(SM))
structMat_Type= SM_df.sort_values('count',ascending = 0).head(20)
structMat_Type['kindOfMaterialDesign'] = structMat_Type._id.apply(lambda x: x.get('kindOfMaterialDesign'))
structMat_Type['typeOfDesignConstruction'] = structMat_Type._id.apply(lambda x: x.get('typeOfDesignConstruction'))









    



Seconds:  2.0784292221069336



In [26]:

    
Material ={ -1 : 'NA',
                    1 : 'Concrete',
                    2 : 'Concrete Continuous',
                    3 : 'Steel',
                    4 : 'Steel Continuous',
                    5 : 'Prestressed concrete',
                    6 : 'Prestressed concrete continuous',
                    7 : 'Wood or Timber',
                    8 : 'Masonry',
                    9 : 'Aluminium, Wrought, Iron, Cast Iron',
                    0 : 'Other'
}
typeOfDesign  = { -1 : 'NA',
                    1 : 'Slab',
                    2 : 'Stringer / Multi-beam or Girder',
                    3 : 'Gider and Floorbeam System',
                    4 : 'Tee Beam',
                    5 : 'Box Beam or Girder - Multiple',
                    6 : 'Box Beam or Girder - Single or Spread ',
                    7 : 'Frame',
                    8 : 'Orthotropic',
                    9 : 'Truss - Deck',
                   10 : 'Truss - Thru',
                   11 : 'Arch - Deck', 
                   13 : 'Suspension',
                   14 : 'Stayed Girder',
                   15 : 'Movable - Lift',
                   16 : 'Movable - Bascule',
                   17 : 'Movabale - swing',
                   18 : 'Tunnel',
                   19 : 'Culvert',
                   20 : 'Mixed Type',
                   21 : 'Segmental Box Girder',
                   22 : 'Channel Beam',
                   0 : 'Other'
                }
structMat_Type['Material'] = structMat_Type['kindOfMaterialDesign'].map(Material)
structMat_Type['Type of Construction'] = structMat_Type['typeOfDesignConstruction'].map(typeOfDesign)
cols = ['Material','Type of Construction','count']
structMat_Type[cols]









    Out[26]:






  
    
      
      Material
      Type of Construction
      count
    
  
  
    
      58
      Steel
      Stringer / Multi-beam or Girder
      102750
    
    
      54
      Concrete
      Culvert
      93248
    
    
      73
      Prestressed concrete
      Stringer / Multi-beam or Girder
      57734
    
    
      52
      Steel Continuous
      Stringer / Multi-beam or Girder
      53597
    
    
      82
      Prestressed concrete
      Box Beam or Girder - Multiple
      49386
    
    
      67
      Concrete Continuous
      Slab
      35238
    
    
      100
      Concrete
      Slab
      33991
    
    
      106
      Concrete Continuous
      Culvert
      29097
    
    
      97
      Concrete
      Tee Beam
      20432
    
    
      47
      Prestressed concrete continuous
      Stringer / Multi-beam or Girder
      17030
    
    
      88
      Wood or Timber
      Stringer / Multi-beam or Girder
      15330
    
    
      87
      Steel
      Culvert
      15325
    
    
      89
      Concrete
      Channel Beam
      12958
    
    
      98
      Prestressed concrete
      Slab
      12217
    
    
      80
      Concrete
      Stringer / Multi-beam or Girder
      9567
    
    
      66
      Steel
      Truss - Thru
      8993
    
    
      65
      Prestressed concrete
      Tee Beam
      8436
    
    
      70
      Concrete Continuous
      Tee Beam
      6501
    
    
      109
      Prestressed concrete
      Box Beam or Girder - Single or Spread
      5888
    
    
      122
      Concrete
      Frame
      5610



In [27]:

    
cols = ['Material','Type of Construction','count']
structMat_Type[cols]
structMat_Type['Material / Type of Construction'] = structMat_Type['Material'].astype(str)+" - "+structMat_Type['Type of Construction']



In [28]:

    
cols = ['Material / Type of Construction', 'count']
structMat_Type = structMat_Type[cols]
structMat_Type = structMat_Type.set_index('Material / Type of Construction')
structMat_Type.plot(kind = 'bar', figsize=(11,9))









    Out[28]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b88bee80>

Scour Critical Bridges



In [29]:

    
pipeline = [{"$match": {"year":2016}},
            {"$project":{"_id":0, "stateCode":1,"scourCriticalBridges":1}}]
startTime = time.time()
scour = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
scour_df = pd.DataFrame(list(scour))









    



Seconds :  0.15120792388916016



In [30]:

    
scour_df['scourCriticalStatus'] = None
for index, row in scour_df.iterrows():
    if row.scourCriticalBridges in ["0","1","2","3"]:
        scour_df.loc[index, 'scourCriticalStatus'] = "True"
    else:
        scour_df.loc[index, 'scourCriticalStatus'] = "False"



In [37]:

    
scour_df.groupby(['scourCriticalStatus'])['scourCriticalStatus'].count().plot.bar()









    Out[37]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09b5e9dfd0>



In [ ]:

	Distinct count of Bridges
year
1992	707509
1993	700993
1994	703606
1995	713410
1996	711792
1997	715853
1998	716703
1999	721247
2000	724037
2001	727990
2002	730034
2003	732942
2004	736600
2005	740010
2006	740177
2007	748661
2008	751134
2009	746542
2010	632426
2011	632405
2012	634324
2013	634668
2014	637622
2015	638836
2016	642566

	Count	Sum of Deck Area	Sum of ADT
State Name
ALABAMA	16098	9238040.53	76613580
ALASKA	1488	710703.90	3357512
ARIZONA	8154	5055845.20	97256999
ARKANSAS	12871	6381829.40	49636105
CALIFORNIA	25431	29478067.98	667205896
COLORADO	8680	4926475.88	68595652
CONNECTICUT	4214	3265916.52	78275293
DELAWARE	877	978596.38	11361551
DISTRICT OF COLOMBIA	245	568827.33	7660611
FLORIDA	12313	16759415.81	213337215
GEORGIA	14835	9404058.27	135068507
HAWAII	1132	1319916.61	27400179
IDAHO	4445	1723198.53	11566802
ILLINOIS	26704	13004416.75	133709726
INDIANA	19245	7997458.47	97813523
IOWA	24184	8248470.40	34260303
KANSAS	25013	8192548.56	46401837
LOUISIANA	12915	16387705.86	80282307
MAINE	2450	1217855.97	11420274
MARYLAND	5321	5155225.32	116163051
MASSACHUSETTS	5171	4055694.07	114458271
MICHIGAN	11156	6357402.03	93784086
MINNESOTA	14265	6053639.35	64332373
MISSISSIPI	17068	9093623.83	43837769
MISSOURI	24468	10361960.26	86300007
MONTANA	5276	2000703.82	10505792
NEBRASKA	15334	3969666.40	22562432
NEVADA	1933	1595543.65	32174713
NEW HAMPSHIRE	2486	1120425.98	16910083
NEW JERSEY	6730	6878062.72	160237725
NEW MEXICO	3973	1728319.62	26782628
NEW YORK	17462	12869629.22	169682377
NORTH CAROLINA	18099	9221997.64	112602177
NORTH DAKOTA	4400	1232040.30	4423736
OHIO	56568	26635142.36	354235716
OKLAHOMA	23053	8350748.75	70967585
OREGON	8118	5017158.18	56756580
PENNSYLVANIA	22791	12377122.98	163719521
PUERTO RICO	2205	1973541.98	38053215
RHODE ISLAND	772	786182.40	15845827
SOUTH CAROLINA	9358	6800337.44	46154561
SOUTH DAKOTA	5849	1664936.80	7112750
TENNESSEE	20123	9659962.24	156431834
TEXAS	53488	47253738.73	543042520
UTAH	3039	1922455.87	63527662
VERMONT	2766	853119.53	6841770
VIRGINIA	13892	9620708.82	122500324
WASHINGTON	8178	6890269.73	66350712
WEST VIGINIA	7217	3805059.64	24843795
WISCONSIN	14230	6706707.15	78666403
WYOMING	3128	1269968.77	7350946

	Maintenance Reponsibility	Sum of Deck Area	Sum of ADT	Percent Valid Bridge	Percent Deck Area	Percent ADT
Maintenance Reponsibility
Air Force	287	121469.57	449042	0.044665	0.031584	0.009352
Army	985	219463.57	711242	0.153292	0.057063	0.014812
Bureau of Fish and Wildlife	304	41538.49	25695	0.047310	0.010801	0.000535
Bureau of Indian Affairs	944	248088.63	552992	0.146911	0.064506	0.011517
Bureau of Land Management	481	79536.58	20686	0.074856	0.020681	0.000431
Bureau of Reclamation	146	40043.25	61985	0.022721	0.010412	0.001291
City or Municipal Highway Agency	46975	26482462.24	310286133	7.310533	6.885786	6.462026
Corps of Engineers (Civil)	245	366675.07	428039	0.038128	0.095340	0.008914
County Highway Agency	244552	49576983.27	240400073	38.058659	12.890663	5.006577
Indian Tribal Agency	39	3607.57	3880	0.006069	0.000938	0.000081
Local Park, Forest or Reservation Agency	81	18803.75	187565	0.012606	0.004889	0.003906
Local Toll Authority	1158	4880722.87	26862081	0.180215	1.269052	0.559430
Metropolitian Washington Airports Service	51	46870.62	776200	0.007937	0.012187	0.016165
NA	23	13544.39	91002	0.003579	0.003522	0.001895
NASA	6	20869.13	20428	0.000934	0.005426	0.000425
National Park Service	1313	593206.15	4156536	0.204337	0.154241	0.086564
Navy / Marines	214	147731.32	309184	0.033304	0.038412	0.006439
Other Federal Agencies (not listest below)	14	14362.89	63484	0.002179	0.003735	0.001322
Other Local Agencies	1548	1519572.29	4464528	0.240909	0.395109	0.092978
Other State Agency	778	662541.42	4277990	0.121077	0.172269	0.089094
Private (other than railroad)	776	1076508.62	11389681	0.120766	0.279906	0.237202
Railroad	530	251812.41	2086452	0.082482	0.065475	0.043452
State Highway Agency	296467	278558303.91	3962699890	46.137984	72.428798	82.527277
State Park, Forest or Reservation Agency	1180	228839.46	658890	0.183639	0.059501	0.013722
State Toll Authority	8187	14377100.30	211637887	1.274110	3.738234	4.407575
Tennesssee Valley Authority	36	100292.60	114529	0.005603	0.026077	0.002385
Town or Township Highway Agency	30032	4291058.24	18653453	4.673761	1.115731	0.388477
U.S forest Services	5194	597138.73	267702	0.808322	0.155264	0.005575
Unknown	20	16913.49	27774	0.003113	0.004398	0.000578

	Material	Type of Construction	count
58	Steel	Stringer / Multi-beam or Girder	102750
54	Concrete	Culvert	93248
73	Prestressed concrete	Stringer / Multi-beam or Girder	57734
52	Steel Continuous	Stringer / Multi-beam or Girder	53597
82	Prestressed concrete	Box Beam or Girder - Multiple	49386
67	Concrete Continuous	Slab	35238
100	Concrete	Slab	33991
106	Concrete Continuous	Culvert	29097
97	Concrete	Tee Beam	20432
47	Prestressed concrete continuous	Stringer / Multi-beam or Girder	17030
88	Wood or Timber	Stringer / Multi-beam or Girder	15330
87	Steel	Culvert	15325
89	Concrete	Channel Beam	12958
98	Prestressed concrete	Slab	12217
80	Concrete	Stringer / Multi-beam or Girder	9567
66	Steel	Truss - Thru	8993
65	Prestressed concrete	Tee Beam	8436
70	Concrete Continuous	Tee Beam	6501
109	Prestressed concrete	Box Beam or Girder - Single or Spread	5888
122	Concrete	Frame	5610