In [1]:
import pymongo
from pymongo import MongoClient
import time
Client = MongoClient("mongodb://nbi-mongo.admin/")
db = Client.bridge
collection = db["SampleNbi2"]
In [2]:
startTime = time.time()
print("Bridges Records in DB: ", collection.count())
print("Seconds : ", (time.time() - startTime))
In [3]:
import pandas as pd
listStates = []
startTime = time.time()
for i in collection.distinct("stateCode"):
listStates.append(i)
print("Time to create list of distinct states in Seconds: ", (time.time() - startTime))
countPerYear = {}
startTime = time.time()
for i in collection.distinct("year"):
countPerYear[i] = collection.find({"year":i}).count()
print("Seconds: ",(time.time()- startTime))
count_per_year = pd.DataFrame(list(countPerYear.items()), columns = ['year', 'Distinct count of Bridges'])
count_per_year.set_index("year", inplace = True)
count_per_year
Out[3]:
In [4]:
%matplotlib inline
print("Trend of the count of records in the bridge:")
count_per_year.plot(figsize=(10,8))
count_per_year.plot(kind = "bar", figsize=(10,8))
Out[4]:
In [5]:
countPerState = {}
startTime = time.time()
for i in listStates:
countPerState[i] = collection.find({"year":2016, "stateCode":i}).count()
print("Seconds: ", (time.time() - startTime))
count_per_state = pd.DataFrame(list(countPerState.items()), columns = ['State Code', 'Count of Records'])
#count_per_state = count_per_state.set_index('State Code')
In [7]:
stateName = {'25':'MASSACHUSETTS',
'04':'ARIZONA',
'08':'COLORADO',
'38':'NORTH DAKOTA',
'09':'CONNECTICUT',
'19':'IOWA',
'26':'MICHIGAN',
'48':'TEXAS',
'35':'NEW MEXICO',
'17':'ILLINOIS',
'51':'VIRGINIA',
'23':'MAINE',
'16':'IDAHO',
'36':'NEW YORK',
'56':'WYOMING',
'29':'MISSOURI',
'39':'OHIO',
'28':'MISSISSIPI',
'11':'DISTRICT OF COLOMBIA',
'21':'KENTUCKY',
'18':'INDIANA',
'06':'CALIFORNIA',
'47':'TENNESSEE',
'12':'FLORIDA',
'24':'MARYLAND',
'34':'NEW JERSEY',
'46':'SOUTH DAKOTA',
'13':'GEORGIA',
'55':'WISCONSIN',
'30':'MONTANA',
'54':'WEST VIGINIA',
'15':'HAWAII',
'32':'NEVADA',
'37':'NORTH CAROLINA',
'10':'DELAWARE',
'33':'NEW HAMPSHIRE',
'44':'RHODE ISLAND',
'50':'VERMONT',
'42':'PENNSYLVANIA',
'05':'ARKANSAS',
'20':'KANSAS',
'45':'SOUTH CAROLINA',
'22':'LOUISIANA',
'40':'OKLAHOMA',
'72':'PUERTO RICO',
'41':'OREGON',
'21':'MINNESOTA',
'53':'WASHINGTON',
'01':'ALABAMA',
'31':'NEBRASKA',
'02':'ALASKA',
'49':'UTAH'
}
count_per_state['State Name'] = count_per_state['State Code'].map(stateName)
count_per_state = count_per_state.set_index('State Name')
asc_c=count_per_state.sort_values(['Count of Records'],ascending = 0)
asc_c.head(n=10).sort_values(['Count of Records'], ascending = 0).plot(kind = 'bar', figsize=(10,8))
Out[7]:
In [8]:
asc_c.tail(n=10).sort_values(['Count of Records'], ascending = 1).plot(kind = 'bar', figsize=(10,8))
Out[8]:
In [9]:
pipeline = [{"$match":{"year":2016}},
{'$group':{"_id":"$yearBuilt", "count":{"$sum":1}}}]
startTime = time.time()
yearBuiltResult = collection.aggregate(pipeline)
print("Seconds: ",(time.time() - startTime))
yearBuiltResult_df = pd.DataFrame(list(yearBuiltResult))
In [10]:
yearBuiltResult_df = yearBuiltResult_df.rename(columns = {'_id': 'Year'})
sort_yearBuiltResult_df=yearBuiltResult_df.sort_values('count', ascending = 0)
sort_yearBuiltResult_df.set_index('Year').head(n=10).plot(kind = 'bar', figsize=(10,8))
Out[10]:
In [11]:
sort_yearBuiltResult_rdf=yearBuiltResult_df.sort_values('Year', ascending = 0)
sort_yearBuiltResult_rdf.set_index('Year').head(n=10).plot(kind = "bar", figsize = (10,8))
Out[11]:
In [12]:
pipeline = [{"$match": {"year":2016}},
{"$project":{"_id":0, "stateCode":1,"deckWidthOutToOut":1, "structureLength":1, "averageDailyTraffic":1 }}]
startTime = time.time()
pdc = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
nbi = pd.DataFrame(list(pdc))
In [13]:
stateName = {'25':'MASSACHUSETTS',
'04':'ARIZONA',
'08':'COLORADO',
'38':'NORTH DAKOTA',
'09':'CONNECTICUT',
'19':'IOWA',
'26':'MICHIGAN',
'48':'TEXAS',
'35':'NEW MEXICO',
'17':'ILLINOIS',
'51':'VIRGINIA',
'23':'MAINE',
'16':'IDAHO',
'36':'NEW YORK',
'56':'WYOMING',
'29':'MISSOURI',
'39':'OHIO',
'28':'MISSISSIPI',
'11':'DISTRICT OF COLOMBIA',
'21':'KENTUCKY',
'18':'INDIANA',
'06':'CALIFORNIA',
'47':'TENNESSEE',
'12':'FLORIDA',
'24':'MARYLAND',
'34':'NEW JERSEY',
'46':'SOUTH DAKOTA',
'13':'GEORGIA',
'55':'WISCONSIN',
'30':'MONTANA',
'54':'WEST VIGINIA',
'15':'HAWAII',
'32':'NEVADA',
'37':'NORTH CAROLINA',
'10':'DELAWARE',
'33':'NEW HAMPSHIRE',
'44':'RHODE ISLAND',
'50':'VERMONT',
'42':'PENNSYLVANIA',
'05':'ARKANSAS',
'20':'KANSAS',
'45':'SOUTH CAROLINA',
'22':'LOUISIANA',
'40':'OKLAHOMA',
'72':'PUERTO RICO',
'41':'OREGON',
'21':'MINNESOTA',
'53':'WASHINGTON',
'01':'ALABAMA',
'31':'NEBRASKA',
'02':'ALASKA',
'49':'UTAH'
}
nbi['State Name'] = nbi['stateCode'].map(stateName)
In [14]:
nbi['deckArea']= nbi['deckWidthOutToOut'] * nbi['structureLength']
nbi_summary = nbi[['State Name','deckWidthOutToOut','structureLength','deckArea','averageDailyTraffic']]
nbi_summary = nbi_summary.groupby(['State Name']).agg({'State Name':'count',
'deckArea':'sum',
'averageDailyTraffic':'sum'})
nbi_summary = nbi_summary.rename(columns={'State Name': 'Count', 'deckArea':'Sum of Deck Area','averageDailyTraffic':
'Sum of ADT'})
In [15]:
nbi_summary
Out[15]:
In [16]:
nbi_summary['Count'].plot(kind = 'bar', figsize = (11,9))
Out[16]:
In [17]:
nbi_summary['Sum of Deck Area'].plot(kind = 'bar', figsize = (11,9))
Out[17]:
In [18]:
nbi_summary['Sum of ADT'].plot(kind = 'bar', figsize = (11,9))
Out[18]:
In [33]:
pipeline = [{"$match": {"year":2016}},
{"$project":{"_id":0, "maintenanceReponsibility":1,"deckWidthOutToOut":1, "structureLength":1, "averageDailyTraffic":1 }}]
startTime = time.time()
pdc1 = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
nbi_main = pd.DataFrame(list(pdc1))
In [34]:
maintenanceReponsibility ={ -1 : 'NA',
1 : 'State Highway Agency',
21: 'Other State Agency',
4 : 'City or Municipal Highway Agency',
80: 'Unknown',
66: 'National Park Service',
2 : 'County Highway Agency',
60: 'Other Federal Agencies (not listest below)',
64: 'U.S forest Services' ,
68: 'Bureau of Land Management',
26: 'Private (other than railroad)',
62: 'Bureau of Indian Affairs',
3 : 'Town or Township Highway Agency',
25: 'Other Local Agencies',
11: 'State Park, Forest or Reservation Agency',
63: 'Bureau of Fish and Wildlife',
27: 'Railroad',
74: 'Army',
70: 'Corps of Engineers (Civil)',
72: 'Air Force',
61: 'Indian Tribal Agency',
71: 'Corps of Engineers (Military)',
69: 'Bureau of Reclamation',
67: 'Tennesssee Valley Authority',
32: 'Local Toll Authority',
12: 'Local Park, Forest or Reservation Agency',
31: 'State Toll Authority',
73: 'Navy / Marines',
75: 'NASA',
76: 'Metropolitian Washington Airports Service'
}
nbi_main['Maintenance Reponsibility'] = nbi_main['maintenanceReponsibility'].map(maintenanceReponsibility)
nbi_main['deckArea']= nbi_main['deckWidthOutToOut'] * nbi_main['structureLength']
nbi_main_maintenance=nbi_main.groupby(['Maintenance Reponsibility']).agg({'Maintenance Reponsibility':'count',
'deckArea':'sum',
'averageDailyTraffic':'sum'})
mant_valid_bridges = nbi_main_maintenance['Maintenance Reponsibility'].sum()
mant_sum_deck = nbi_main_maintenance['deckArea'].sum()
mant_sum_adt = nbi_main_maintenance['averageDailyTraffic'].sum()
mant_valid_bridges = nbi_main_maintenance['Maintenance Reponsibility'].sum()
mant_sum_deck = nbi_main_maintenance['deckArea'].sum()
mant_sum_adt = nbi_main_maintenance['averageDailyTraffic'].sum()
In [35]:
nbi_main_maintenance['Percent Valid Bridge']= (nbi_main_maintenance['Maintenance Reponsibility']/mant_valid_bridges)*100
nbi_main_maintenance['Percent Deck Area']= (nbi_main_maintenance['deckArea'] / mant_sum_deck)*100
nbi_main_maintenance['Percent ADT']= (nbi_main_maintenance['averageDailyTraffic'] / mant_sum_adt)*100
nbi_main_maintenance['deckArea'] = nbi_main_maintenance['deckArea'].apply(lambda x: '{:.2f}'.format(x))
nbi_main_maintenance = nbi_main_maintenance.rename(columns = {"Maintenance Reponsibilty":"Count",
"deckArea":"Sum of Deck Area",
"averageDailyTraffic":"Sum of ADT"})
In [36]:
nbi_main_maintenance
Out[36]:
In [22]:
nbi_main_maintenance['Percent Valid Bridge'].plot.bar(figsize = (10,8))
Out[22]:
In [23]:
nbi_main_maintenance['Percent ADT'].plot.bar(figsize = (10,8))
Out[23]:
In [24]:
nbi_main_maintenance['Percent Deck Area'].plot.bar(figsize = (10,8))
Out[24]:
In [25]:
pipelineSM = [{"$match":{"year":2016}},
{"$group":{"_id":"$structureTypeMain", "count":{"$sum":1}}}]
startTime = time.time()
SM=collection.aggregate(pipelineSM)
print("Seconds: ",(time.time() - startTime))
SM_df = pd.DataFrame(list(SM))
structMat_Type= SM_df.sort_values('count',ascending = 0).head(20)
structMat_Type['kindOfMaterialDesign'] = structMat_Type._id.apply(lambda x: x.get('kindOfMaterialDesign'))
structMat_Type['typeOfDesignConstruction'] = structMat_Type._id.apply(lambda x: x.get('typeOfDesignConstruction'))
In [26]:
Material ={ -1 : 'NA',
1 : 'Concrete',
2 : 'Concrete Continuous',
3 : 'Steel',
4 : 'Steel Continuous',
5 : 'Prestressed concrete',
6 : 'Prestressed concrete continuous',
7 : 'Wood or Timber',
8 : 'Masonry',
9 : 'Aluminium, Wrought, Iron, Cast Iron',
0 : 'Other'
}
typeOfDesign = { -1 : 'NA',
1 : 'Slab',
2 : 'Stringer / Multi-beam or Girder',
3 : 'Gider and Floorbeam System',
4 : 'Tee Beam',
5 : 'Box Beam or Girder - Multiple',
6 : 'Box Beam or Girder - Single or Spread ',
7 : 'Frame',
8 : 'Orthotropic',
9 : 'Truss - Deck',
10 : 'Truss - Thru',
11 : 'Arch - Deck',
13 : 'Suspension',
14 : 'Stayed Girder',
15 : 'Movable - Lift',
16 : 'Movable - Bascule',
17 : 'Movabale - swing',
18 : 'Tunnel',
19 : 'Culvert',
20 : 'Mixed Type',
21 : 'Segmental Box Girder',
22 : 'Channel Beam',
0 : 'Other'
}
structMat_Type['Material'] = structMat_Type['kindOfMaterialDesign'].map(Material)
structMat_Type['Type of Construction'] = structMat_Type['typeOfDesignConstruction'].map(typeOfDesign)
cols = ['Material','Type of Construction','count']
structMat_Type[cols]
Out[26]:
In [27]:
cols = ['Material','Type of Construction','count']
structMat_Type[cols]
structMat_Type['Material / Type of Construction'] = structMat_Type['Material'].astype(str)+" - "+structMat_Type['Type of Construction']
In [28]:
cols = ['Material / Type of Construction', 'count']
structMat_Type = structMat_Type[cols]
structMat_Type = structMat_Type.set_index('Material / Type of Construction')
structMat_Type.plot(kind = 'bar', figsize=(11,9))
Out[28]:
In [29]:
pipeline = [{"$match": {"year":2016}},
{"$project":{"_id":0, "stateCode":1,"scourCriticalBridges":1}}]
startTime = time.time()
scour = collection.aggregate(pipeline)
print("Seconds : ", (time.time() - startTime))
scour_df = pd.DataFrame(list(scour))
In [30]:
scour_df['scourCriticalStatus'] = None
for index, row in scour_df.iterrows():
if row.scourCriticalBridges in ["0","1","2","3"]:
scour_df.loc[index, 'scourCriticalStatus'] = "True"
else:
scour_df.loc[index, 'scourCriticalStatus'] = "False"
In [37]:
scour_df.groupby(['scourCriticalStatus'])['scourCriticalStatus'].count().plot.bar()
Out[37]:
In [ ]: