In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
archive_cso_data = pd.read_csv('data/cso_events.csv')
updated_cso_data = pd.read_csv('data/updated_format_cso_events.csv')
print(archive_cso_data.dtypes)
print(updated_cso_data.dtypes)
In [3]:
archive_cso_data.head()
Out[3]:
In [4]:
updated_cso_data.head()
Out[4]:
In [5]:
# Mapping of segments pulled from original istheresewageinthechicagoriver.com app
# https://github.com/open-city/chicago-river-sewage/blob/c99480208fe884436c9bbdd649b992f5d7ce75e7/app.py
WATERWAY_SEGMENTS = [
{"segment": 1 , "waterway": "NSC Upper (NSWRP)", "riverway": "North Shore Channel", "description": "Lake Michigan to North Side Water Reclamation Plant"},
{"segment": 2 , "waterway": "NSC Lower (NSWRP)", "riverway": "North Shore Channel", "description": "North Side Water Reclamation Plant to the confluence with the North Branch of the Chicago River"},
{"segment": 3 , "waterway": "NBCR Lower (NSC Confluence)", "riverway": "North Branch of Chicago River", "description": "Confluence with the North Shore Channel to Wolf Point"},
{"segment": 4 , "waterway": "NBCR Upper (NSC Confluence)", "riverway": "North Branch of Chicago River", "description": "Beckwith Road and West Fork to confluence with the North Shore Channel"},
{"segment": 5 , "waterway": "Chicago R", "riverway": "Chicago River", "description": "Wolf Point to Chicago River Controlling Works"},
{"segment": 6 , "waterway": "SB Chicago R", "riverway": "South Branch of Chicago River", "description": "Wolf Point to Damen Avenue"},
{"segment": 7 , "waterway": "SF SB Chicago R", "riverway": "South Fork of SBCR (Bubbly Creek)", "description": ""},
{"segment": 8 , "waterway": "CSSC Upper (SWRP)", "riverway": "Chicago Sanitary and Ship Canal", "description": "Damen Avenue to the Stickney Water Reclamation Plant"},
{"segment": 9 , "waterway": "CSSC Lower (SWRP)", "riverway": "Chicago Sanitary and Ship Canal", "description": "Stickney Water Reclamation Plant to the confluence with the Calumet-Sag Channel"},
{"segment": 10, "waterway": "CSSC Lower (SWRP)", "riverway": "Chicago Sanitary and Ship Canal", "description": "From the confluence with the Calumet-Sag Channel to the Lemont Water Reclamation Plant"},
{"segment": 11, "waterway": "CSSC Lower (SWRP)", "riverway": "Chicago Sanitary and Ship Canal", "description": "Lemont Water Reclamation Plant to Lockport Lock & Dam"},
{"segment": 12, "waterway": "Weller Cr", "riverway": "Weller Creek", "description": ""},
{"segment": 13, "waterway": "DesPlaines Upper", "riverway": "Des Plaines River", "description": "Weller Creek to Willow-Higgins Creek"},
{"segment": 14, "waterway": "DesPlaines Middle", "riverway": "Des Plaines River", "description": "Willow-Higgins Creek to the confluence with Salt Creek"},
{"segment": 15, "waterway": "DesPlaines Lower", "riverway": "Des Plaines River", "description": "The confluence with Salt Creek to the confluence with the CSSC"},
{"segment": 16, "waterway": "Salt Cr", "riverway": "Salt Creek", "description": "From Addison Creek to the confluence with the Des Plaines River"},
{"segment": 17, "waterway": "Cal R", "riverway": "Calumet River", "description": "O'Brien Locks to Lake Michigan"},
{"segment": 18, "waterway": "Grand Cal R", "riverway": "Grand Calumet River", "description": "From confluence with the Little Calumet River to the Indiana state line"},
{"segment": 19, "waterway": "Little Cal R (North)", "riverway": "Little Calumet River", "description": "O'Brien Locks to the Calumet-Sag Channel"},
{"segment": 20, "waterway": "Little Cal R (South)", "riverway": "Little Calumet River", "description": "Indiana state line to the Calumet-Sag Channel"},
{"segment": 21, "waterway": "Cal Sag Ch", "riverway": "Calumet-Sag Channel", "description": ""},
{"segment": 22, "waterway": "Cal Union drainage Ditch", "riverway": "Calumet Union Drainage Ditch", "description": ""},
{"segment": 23, "waterway": "Addison Cr", "riverway": "Addison Creek", "description": ""},
{"segment": 30, "waterway": "", "riverway": "Wilmette", "description": "Discharge to Lake Michigan"},
{"segment": 31, "waterway": "", "riverway": "Chicago River Controlling Works", "description": "Discharge to Lake Michigan"},
{"segment": 32, "waterway": "", "riverway": "O'Brien", "description": "Discharge to Lake Michigan"},
]
In [6]:
import json
with open('data/mwrd_cso.geojson', 'r') as geoj:
location_geoj = json.load(geoj)
cso_locations = [g['properties'] for g in location_geoj['features']]
cso_location_df = pd.DataFrame(cso_locations)
print(cso_location_df.dtypes)
cso_location_df.head()
Out[6]:
In [7]:
updated_cso_data['Waterway Reach'].unique()
Out[7]:
In [8]:
updated_cso_data[updated_cso_data['Waterway Reach'].isnull()]
Out[8]:
In [9]:
updated_cso_data['Open date/time'] = pd.to_datetime(updated_cso_data['Open date/time'])
updated_cso_data['Close date/time'] = pd.to_datetime(updated_cso_data['Close date/time'])
updated_cso_data['Gate Open Period'] = updated_cso_data['Gate Open Period'].apply(lambda x: x[1:])
updated_cso_data['Gate Open Period'] = pd.to_timedelta(updated_cso_data['Gate Open Period'], unit='s')
print(updated_cso_data.dtypes)
updated_cso_data.head()
Out[9]:
In [10]:
# Make waterway segments into dict to convert segment index in archive data
waterway_dict = dict()
for w in WATERWAY_SEGMENTS:
waterway_dict[w['segment']] = w['waterway']
archive_cso_data['Waterway Reach'] = archive_cso_data['segment'].apply(lambda x: waterway_dict[x])
print(archive_cso_data.dtypes)
archive_cso_data.head()
Out[10]:
In [11]:
archive_cso_data['Duration'] = pd.to_timedelta(archive_cso_data['duration'], unit='m')
archive_cso_data.head()
Out[11]:
In [13]:
archive_cso_data['Open date/time'] = pd.to_datetime(archive_cso_data['date'] + ' ' + archive_cso_data['starttime'])
archive_cso_data['Close date/time'] = archive_cso_data['Open date/time'] + archive_cso_data['Duration']
print(archive_cso_data.dtypes)
archive_cso_data.head()
Out[13]:
In [14]:
archive_cso_match = archive_cso_data[['id', 'location', 'Waterway Reach', 'Open date/time', 'Close date/time', 'Duration']]
archive_cso_match.head()
Out[14]:
In [16]:
archive_cso_match = archive_cso_match.rename(columns={'Duration': 'Gate Open Period', 'location': 'Outfall Location'})
archive_cso_match['Outfall Structure'] = archive_cso_match['Outfall Location']
archive_cso_match['Tide Gate #'] = ''
archive_cso_match['Plant'] = ''
archive_cso_match = archive_cso_match[['Outfall Structure', 'Outfall Location', 'Tide Gate #', 'Waterway Reach', 'Plant', 'Open date/time', 'Close date/time', 'Gate Open Period']]
print(archive_cso_match.dtypes)
archive_cso_match.head()
Out[16]:
In [17]:
archive_cso_match.to_csv('data/archive_cso_events_clean.csv')
In [20]:
print(updated_cso_data.shape)
print(archive_cso_match.shape)
In [21]:
merged_cso_data = pd.concat([updated_cso_data, archive_cso_match])
print(merged_cso_data.dtypes)
print(merged_cso_data.shape)
merged_cso_data.head()
Out[21]:
In [22]:
merged_cso_data.to_csv('data/merged_cso_data.csv')
In [29]:
# Plotting the number of events per waterway
events_by_waterway = merged_cso_data.groupby(['Waterway Reach'])['Gate Open Period'].count().sort_values(ascending=False)
events_by_waterway.plot(kind='bar')
Out[29]:
In [34]:
merged_cso_data.dtypes
Out[34]:
In [43]:
# Plotting the total duration of events by waterway (have to convert type for plotting)
duration_by_waterway = merged_cso_data.groupby(['Waterway Reach'])['Gate Open Period'].sum().sort_values(ascending=False)
duration_by_waterway.astype('int64').plot(kind='bar')
Out[43]:
In [ ]: