In [1]:
from RomeaJam.traffik import db, Jam, Alert, Segment, SegmentStatus, RoadStatus, RoadAverage
from datetime import datetime, timedelta
from time import time
In [2]:
def get_status(dt, category=None):
"""returns road status given specific datetime"""
if category:
return db.session.query(RoadStatus).filter(RoadStatus.timestamp > dt.strftime('%s')).\
filter(RoadStatus.timestamp < (dt+timedelta(0,60)).strftime('%s')).\
filter(RoadStatus.category == category).all()
else:
return db.session.query(RoadStatus).filter(RoadStatus.timestamp > dt.strftime('%s')).\
filter(RoadStatus.timestamp < (dt+timedelta(0,60)).strftime('%s')).all()
def get_segments(dt, category=None):
"""prints segment statuses given a specific datetime"""
for status in db.session.query(SegmentStatus).filter(SegmentStatus.timestamp > dt.strftime('%s')).\
filter(SegmentStatus.timestamp < (dt+timedelta(0,60)).strftime('%s')):
if status.segment.category == category or category == None:
print status.segment.category, status.packing_index
def printall(dt, category=None):
print get_status(dt, category)
get_segments(dt, category)
In [3]:
printall(datetime(2016,8,16,18,40)) #accident
In [5]:
printall(datetime(2016,8,16,10,43), category='Arrive')
In [6]:
printall(datetime(2016,8,15,19,39), category='Leave')
In [7]:
printall(datetime(2016,8,12,19,20), category='Leave')
In [8]:
printall(datetime(2016,8,12,19,20), 'Leave')
In [9]:
printall(datetime(2016,8,13,21,15), 'Leave') #no traffic
In [10]:
printall(datetime(2016,8,13,9,47), 'Arrive') #slow down
In [11]:
printall(datetime(2016,8,11,12,38), 'Arrive') #no traffic
In [12]:
printall(datetime(2016,8,11,11,20), 'Arrive')
In [13]:
printall(datetime(2016,8,9,8,35), 'Arrive')
In [3]:
printall(datetime(2016,8,7,20,0)) #serious accident
In [12]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
SegmentStatus dataframe
In [8]:
qs = db.session.query(SegmentStatus).join(Segment).\
filter(SegmentStatus.timestamp > datetime(2016,8,4,0,0).strftime('%s')).\
filter(SegmentStatus.timestamp < datetime(2016,8,16,0,0).strftime('%s'))
ds = pd.read_sql(qs.statement, qs.session.bind)
ds.set_index('id', inplace=True)
ds['timestamp'] = pd.to_datetime(ds['timestamp'],unit='s')
ds.head()
Out[8]:
In [30]:
#Let's check how many segments can have a packing index <> 0 in the same timestamp/category
ds = ds[(ds['packing_index'] > 0)]
ds1 = pd.DataFrame(ds['packing_index'].groupby([ds.road_status_id]).count())
ds1 = pd.DataFrame(ds1['packing_index'].groupby([ds1.packing_index]).count())
ds1.head()
Out[30]:
It's likely that there is a sort of "snake effect" in the data, i.e. the traffic flows and there aren't jams at the same timestamp in all the segments of the road. Because of this, I will introduce a "snake_parameter" in the RoadStatus class, so the packing_index will be the result of the average of the first snake_parameter segments ordered by packing index desc
RoadStatus dataframe
In [18]:
qr = db.session.query(RoadStatus).filter(RoadStatus.timestamp > datetime(2016,8,4,0,0).strftime('%s')).\
filter(RoadStatus.timestamp < datetime(2016,8,16,0,0).strftime('%s'))
dr = pd.read_sql(qr.statement, qr.session.bind)
dr.set_index('id', inplace=True)
dr['timestamp'] = pd.to_datetime(dr['timestamp'],unit='s')
dr.sort([('packing_index')], ascending=False).head()
Out[18]:
In [36]:
dr.dtypes
Out[36]:
In [36]:
dr.describe()
Out[36]:
In [40]:
plt.figure(figsize=(18,5))
plt.subplot(1, 3, 1)
plt.xlabel('packing index')
plt.boxplot(dr[dr.packing_index>0].packing_index.reset_index()['packing_index'], showmeans=True, showfliers=True)
plt.show()
In [75]:
time = pd.DatetimeIndex(dr.timestamp)
dr_plt = dr.groupby([time.hour]).mean()
dr_plt.reset_index(inplace=True)
fig = plt.figure(figsize=(15,5))
ax = plt.gca()
dr_plt.plot(x='index', y='packing_index', ax=ax)
plt.title("Hourly average packing_index")
plt.ylabel('Packing index')
plt.xlabel('Hour')
ax.set_xticks(range(23))
plt.show()
In [16]:
time = pd.DatetimeIndex(dr.timestamp)
dr_plt = dr.groupby([time.weekday]).mean()
dr_plt.reset_index(inplace=True)
dayDict = {0:'Mon', 1:'Tue', 2:'Wed', 3:'Thu', 4:'Fri', 5:'Sat', 6:'Sun'}
def f(x):
daylabel = dayDict[x]
return daylabel
dr_plt['daylabel'] = dr_plt['index'].apply(f)
fig = plt.figure(figsize=(15,5))
ax = plt.gca()
dr_plt.plot(x='daylabel', y='packing_index', ax=ax)
plt.title("average packing_index per weekday")
plt.ylabel('Packing index')
plt.xlabel('Day')
ax.set_xticks(range(6))
plt.show()
In [74]:
time = pd.DatetimeIndex(dr.timestamp)
dr_plt = dr.groupby([time.day]).mean()
dr_plt.reset_index(inplace=True)
fig = plt.figure(figsize=(15,5))
ax = plt.gca()
dr_plt.plot(x='index', y='packing_index', ax=ax)
plt.title("average packing_index per day")
plt.ylabel('Packing index')
plt.xlabel('Day (August 2016)')
ax.set_xticks(range(18))
plt.show()
Jam dataframe
In [86]:
qj = db.session.query(Jam).filter(Jam.timestamp > datetime(2016,8,4,0,0).strftime('%s')).\
filter(Jam.timestamp < datetime(2016,8,16,0,0).strftime('%s'))
dj = pd.read_sql(qj.statement, qj.session.bind)
dj.set_index('id', inplace=True)
dj['timestamp'] = pd.to_datetime(dj['timestamp'],unit='s')
dj.head()
Out[86]:
In [104]:
time = pd.DatetimeIndex(dj.timestamp)
dj['day']=time.day
dj['hour']=time.hour
dj_time = dj.groupby([dj.day, dj.hour, dj.startLongitude, dj.endLongitude, dj.startLatitude, dj.endLatitude, dj.street, dj.severity, dj.color, dj.source, dj.direction]).count()
print 'Average traffic duration: %.2f min' % dj_time['timestamp'].mean()
In [128]:
dj_dur = pd.DataFrame(dj_time['timestamp'])
dj_dur.reset_index(inplace=True)
dj_dur = dj_dur[['hour', 'timestamp']]
dj_dur.columns=['hour', 'duration']
dj_dur = dj_dur.groupby([dj_dur.hour]).mean()
dj_dur.reset_index(inplace=True)
dj_dur.head()
Out[128]:
In [129]:
fig = plt.figure(figsize=(15,5))
ax = plt.gca()
dj_dur.plot(x='hour', y='duration', ax=ax)
plt.title("Average jam duration per hour")
plt.ylabel('Duration [min]')
plt.xlabel('Hour')
ax.set_xticks(range(23))
plt.show()
In [41]:
pd.scatter_matrix(dr, alpha=0.2, figsize=(18, 18), diagonal='kde')
Out[41]: