LIS590DV Final Project - Group Athena

Part1 - Routes with Different Numbers of Trips

Part1 - Shapes of Routes with Most/Least Number of Trips

Author: Hui Lyu


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.patches as mpatches
import csv
from collections import Counter

In [2]:
import plotly 
plotly.tools.set_credentials_file(username='huilyu2', api_key='LYEkqxDQFmZzZIBXn9rn')
import plotly.plotly as py
from plotly.graph_objs import *

In [3]:
%matplotlib inline

In [4]:
trips_df = pd.read_csv("GTFS Dataset/trips.csv")

In [5]:
trips_df.columns


Out[5]:
Index(['route_id', 'service_id', 'trip_id', 'trip_headsign', 'direction_id',
       'block_id', 'shape_id'],
      dtype='object')

In [6]:
trips_df.head()


Out[6]:
route_id service_id trip_id trip_headsign direction_id block_id shape_id
0 TEAL T4 UIMF [@14.0.51708725@][4][1277756770140]/0__T4_UIMF WEST - ILLINOIS TERMINAL 1 T4 UIMF TEAL 26
1 TEAL T4 UIMF [@14.0.51708725@][4][1275505811421]/0__T4_UIMF WEST - ILLINOIS TERMINAL 1 T4 UIMF TEAL 23
2 TEAL T4 UIMF [@7.0.41893871@][3][1243541396687]/72__T4_UIMF EAST - ORCHARD DOWNS 0 T4 UIMF 12E TEAL 13
3 TEAL T4 UIMF [@7.0.41893871@][4][1243540851671]/4__T4_UIMF WEST - ILLINOIS TERMINAL 1 T4 UIMF 12W TEAL 12
4 TEAL T4 UIMF [@7.0.41893871@][3][1243541396687]/74__T4_UIMF EAST - ORCHARD DOWNS 0 T4 UIMF 12E TEAL 13

How many trips for each route?


In [7]:
len(np.unique(trips_df["route_id"])) # How many routes in all


Out[7]:
100

In [8]:
trips_count = Counter(trips_df["route_id"])
trips_count.values()


Out[8]:
dict_values([53, 33, 1, 129, 76, 250, 48, 52, 20, 29, 9, 6, 1, 4, 23, 112, 3, 61, 19, 89, 38, 135, 224, 33, 67, 71, 32, 333, 3, 138, 111, 36, 21, 21, 141, 25, 23, 6, 84, 16, 76, 49, 49, 2, 88, 68, 94, 4, 12, 38, 222, 39, 76, 45, 16, 27, 122, 21, 1, 69, 4, 48, 47, 32, 50, 1, 59, 29, 27, 4, 30, 92, 5, 9, 116, 56, 238, 40, 46, 8, 60, 37, 36, 34, 63, 69, 34, 1, 25, 54, 108, 12, 51, 5, 22, 72, 127, 21, 24, 8])

In [9]:
plt.rcParams["figure.figsize"] = (8, 8)
fig, ax = plt.subplots()

n, bins, patches = plt.hist(list(trips_count.values()),bins=np.arange(0,400,50),edgecolor = 'k')
bin_centers = 0.5 * (bins[:-1] + bins[1:])
cm = plt.cm.get_cmap('Purples')

# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
    plt.setp(p, 'facecolor', cm(c))
    
plt.xlabel("Number of Trips for Each Route",fontsize=16)
plt.ylabel("Count of Routes",fontsize=16)
plt.title("Distribution of Routes with Different Numbers of Trips\n(Total: 100 Routes which Exist Trip)",fontweight="bold",fontsize=18)
plt.xticks(bins)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')
plt.savefig('Distribution of Routes with Trips.svg', bbox_inches='tight')
plt.savefig('Distribution of Routes with Trips.png', bbox_inches='tight')



In [10]:
trips_count.most_common(10)


Out[10]:
[('SILVER', 333),
 ('ILLINI', 250),
 ('TEAL', 238),
 ('ILLINI EVENING', 224),
 ('TEAL SATURDAY', 222),
 ('YELLOWHOPPER', 141),
 ('ILLINI LIMITED SATURDAY', 138),
 ('ILLINI EVENING SATURDAY', 135),
 ('GREEN SATURDAY', 129),
 ('YELLOW SATURDAY', 127)]

In [11]:
shape_id_dict = {}
trip_count_dict = {}
for word, count in trips_count.most_common(10):
    trip_count_dict[word]=count # A route corresponds to its number of trips
    shape_id_dict[word]=np.unique(trips_df[trips_df.route_id == word][["shape_id"]])
    # A route correponds to its several shapes

In [12]:
import operator
trip_count_list = sorted(trip_count_dict.items(), key=operator.itemgetter(1), reverse=True)
trip_count_df = pd.DataFrame(trip_count_list,columns = ['route_id', 'number of trips'])

In [13]:
trip_count_df


Out[13]:
route_id number of trips
0 SILVER 333
1 ILLINI 250
2 TEAL 238
3 ILLINI EVENING 224
4 TEAL SATURDAY 222
5 YELLOWHOPPER 141
6 ILLINI LIMITED SATURDAY 138
7 ILLINI EVENING SATURDAY 135
8 GREEN SATURDAY 129
9 YELLOW SATURDAY 127

In [14]:
routes_df = pd.read_csv("GTFS Dataset/routes.csv")

In [15]:
routes_df.head()


Out[15]:
route_id agency_id route_short_name route_long_name route_desc route_type route_url route_color route_text_color
0 GOLD ALT CUMTD 10 Gold 1 Alternate NaN 3 NaN c7994a 000000
1 RUBY SATURDAY CUMTD 110 Ruby Saturday NaN 3 NaN eb008b 000000
2 SILVER LIMITED SATURDAY CUMTD 130 Silver Limited Saturday NaN 3 NaN d1d3d4 000000
3 BROWN ALT PM CUMTD 9 Brown Alternate PM NaN 3 NaN 823822 ffffff
4 YELLOW LATE NIGHT SUNDAY CUMTD 100 Yellow Late Night Sunday NaN 3 NaN fcee1f 000000

In [16]:
df_routes = pd.DataFrame()
for k in np.arange(10):
    df_routes = df_routes.append(routes_df[routes_df.route_id == trip_count_df.route_id[k]])
df_routes
#df_shapes = df_shapes.append(shapes_df[shapes_df.shape_id == shape_id])


Out[16]:
route_id agency_id route_short_name route_long_name route_desc route_type route_url route_color route_text_color
40 SILVER CUMTD 13 Silver NaN 3 NaN cccccc 000000
45 ILLINI CUMTD 22 Illini NaN 3 NaN 5a1d5a ffffff
93 TEAL CUMTD 12 Teal NaN 3 NaN 006991 ffffff
91 ILLINI EVENING CUMTD 220 Illini Evening NaN 3 NaN 5a1d5a ffffff
18 TEAL SATURDAY CUMTD 120 Teal Saturday NaN 3 NaN 006991 ffffff
97 YELLOWHOPPER CUMTD 1 Yellowhopper NaN 3 NaN fcee1f 000000
100 ILLINI LIMITED SATURDAY CUMTD 220 Illini Limited Saturday NaN 3 NaN 5a1d5a ffffff
67 ILLINI EVENING SATURDAY CUMTD 220 Illini Evening Saturday NaN 3 NaN 5a1d5a ffffff
10 GREEN SATURDAY CUMTD 50 Green Saturday NaN 3 NaN 008063 ffffff
63 YELLOW SATURDAY CUMTD 100 Yellow Saturday NaN 3 NaN fcee1f 000000

In [17]:
df_routes[["route_color"]]


Out[17]:
route_color
40 cccccc
45 5a1d5a
93 006991
91 5a1d5a
18 006991
97 fcee1f
100 5a1d5a
67 5a1d5a
10 008063
63 fcee1f

In [18]:
plt.rcParams["figure.figsize"] = (16, 16)
fig, ax = plt.subplots()
colors = ['#cccccc','#5a1d5a','#006991','#5a1d5a','#006991','#fcee1f','#5a1d5a','#5a1d5a','#008063','#fcee1f']
plt.barh(bottom=np.arange(10)[::-1], width=trip_count_df['number of trips'], height=0.5,alpha=0.9,color=colors)
plt.xlabel("Number of Trips",fontsize=20)
plt.ylabel("Route ID",fontsize=20)
ax.set_yticks(np.arange(10)[::-1])
ax.set_yticklabels(trip_count_df['route_id'], fontsize=18)
plt.title("Top 10 Routes with Most Number of Trips for Each Route",fontweight="bold",fontsize=24)
ax.xaxis.grid(color='gray', linestyle='dotted') 
for i, v in enumerate(trip_count_df['number of trips'][::-1]):
    ax.text(v + 2, i-0.05, str(v), color='k', fontsize=18,fontweight="bold")

plt.savefig('Top 10 Routes with Most Trips.svg', bbox_inches='tight')
plt.savefig('Top 10 Routes with Most Trips.png', bbox_inches='tight')



In [19]:
shape_id_dict


Out[19]:
{'GREEN SATURDAY': array(['50E->MNBDY', '50E->PC', '50E-WKND-HP->CF', 'GN1SATPO', 'GN1SUNPO',
        'GREEN WEEKEND 23', 'GREEN WEEKEND 44', '[@15.0.63188916@]15',
        '[@15.0.68513015@]219', '[@15.0.68513015@]220',
        '[@15.0.73006437@]18', '[@15.0.73006437@]37', '[@15.0.73006437@]38'], dtype=object),
 'ILLINI': array(['22N ILLINI 10', '22S ILLINI 20', '22S ILLINI 21', 'ILLINI 34',
        'ILLINI 46', 'ILLINI 47', '[@15.0.63192528@]43',
        '[@15.0.66063553@]12', '[@15.0.68513188@]4'], dtype=object),
 'ILLINI EVENING': array(['220N ILLINI 10', '220S ILLINI 20', 'ILLINI EV 25',
        '[@14.0.56288498@]24', '[@2.0.85634827@]37'], dtype=object),
 'ILLINI EVENING SATURDAY': array(['[@124.0.92260187@]220N ILLINI 10',
        '[@124.0.92260187@]220S ILLINI 20', '[@124.0.92260187@]24',
        '[@124.0.92260187@]ILLINI EV 25'], dtype=object),
 'ILLINI LIMITED SATURDAY': array(['22N ILLINI LIMITED WEEKEND', 'GR2PO', 'ILLINI LIMITED WEEKEND 845'], dtype=object),
 'SILVER': array(['SILVER 120', 'SILVER 2', 'SILVER 42', 'SILVER 43',
        '[@15.0.73007178@]121'], dtype=object),
 'TEAL': array(['12E TEAL 13', '12W TEAL 12', 'TEAL 23', 'TEAL 24', 'TEAL 25',
        'TEAL 26', 'TEAL 27', 'TEAL 34', 'TEAL 35', 'TEAL 45', 'TEAL 98',
        'TEAL 99', '[@15.0.79613563@]47'], dtype=object),
 'TEAL SATURDAY': array(['12E TEAL WEEKEND 13', '12W TEAL WEEKEND 12', 'TEAL WEEKEND 16',
        'TEAL WEEKEND 45'], dtype=object),
 'YELLOW SATURDAY': array(['[@14.0.57766396@]100N', '[@14.0.57766396@]100NGRNWRT->',
        '[@14.0.57766396@]100NY1', '[@14.0.57766396@]100S',
        '[@14.0.57766396@]100S->BRWSWDFLD', '[@14.0.57766396@]Y1SATPO',
        '[@14.0.57766396@]Y3SATPO', '[@15.0.63192124@]23'], dtype=object),
 'YELLOWHOPPER': array(['YELLOWHOPPER 17', 'YELLOWHOPPER 23', 'YELLOWHOPPER 25',
        'YELLOWHOPPER 26', '[@15.0.63189099@]29'], dtype=object)}

In [20]:
trips_count.most_common()[:-11:-1]


Out[20]:
[('10W GOLD ALT', 1),
 ('7W GREY ALT', 1),
 ('1N YELLOW ALT', 1),
 ('BROWN ALT1', 1),
 ('5W GREEN ALT 2', 1),
 ('1N YELLOW ALT PM', 2),
 ('5E GREEN EXPRESS 1 ALT', 3),
 ('GREEN EXPRESS ALT', 3),
 ('5W GREEN EXPRESS 2', 4),
 ('1S YELLOW ALT', 4)]

In [21]:
shapes_df = pd.read_csv("GTFS Dataset/shapes.csv")

In [22]:
shapes_df.head()


Out[22]:
shape_id shape_pt_lat shape_pt_lon shape_pt_sequence shape_dist_traveled
0 [@2.0.86175868@]34 40.114158 -88.173105 0 0.000000
1 [@2.0.86175868@]34 40.114158 -88.173106 1 0.134184
2 [@2.0.86175868@]34 40.114171 -88.173107 2 1.560577
3 [@2.0.86175868@]34 40.114186 -88.173108 3 3.228456
4 [@2.0.86175868@]34 40.114200 -88.173109 4 4.787531

In [23]:
top_ten_shapes = list(shape_id_dict.keys())
top_ten_shapes


Out[23]:
['ILLINI LIMITED SATURDAY',
 'TEAL',
 'GREEN SATURDAY',
 'SILVER',
 'ILLINI',
 'YELLOWHOPPER',
 'ILLINI EVENING SATURDAY',
 'TEAL SATURDAY',
 'YELLOW SATURDAY',
 'ILLINI EVENING']

In [24]:
df_shapes = pd.DataFrame()

for key in top_ten_shapes:
    for shape_id in shape_id_dict[key]:
        df_shapes = df_shapes.append(shapes_df[shapes_df.shape_id == shape_id])

df_shapes_group = df_shapes.groupby("shape_id")

In [25]:
df_shapes_least = pd.DataFrame()
shape_id_dict_least = {}
once_routes = ("BROWN ALT1","10W GOLD ALT","5W GREEN ALT 2","7W GREY ALT","1N YELLOW ALT")

for word, count in trips_count.most_common()[:-11:-1]:
    shape_id_dict_least[word]=np.unique(trips_df[trips_df.route_id == word][["shape_id"]])
    
for key in once_routes:
    for shape_id in shape_id_dict_least[key]:
        df_shapes_least = df_shapes_least.append(shapes_df[shapes_df.shape_id == shape_id])

df_shapes_least_group = df_shapes_least.groupby("shape_id")

In [26]:
plt.rcParams["figure.figsize"] = (14, 14)

for name, group in df_shapes_group:
    plt.plot(group['shape_pt_lon'],group['shape_pt_lat'],linestyle="solid",linewidth=3,alpha=0.3,c="k")
for name, group in df_shapes_least_group:
    plt.plot(group['shape_pt_lon'],group['shape_pt_lat'],linestyle="solid",linewidth=3,alpha=0.3,c="gray")

plt.xlabel("Longitude",fontsize=20)
plt.ylabel("Latitude",fontsize=20)
plt.title("Shapes of Routes with Most/Least Number of Trips",fontweight="bold",fontsize=22)
plt.grid(color='gray', linestyle='dotted')

black_patch = mpatches.Patch(color='k')
gray_patch = mpatches.Patch(color='lightgray')
first_legend = plt.legend(title="Routes",handles=[black_patch,gray_patch],labels=["Top 10 Routes with Most Number of Trips","5 Routes with Only One Trip per Route"],prop={'size':14},loc=1)

rectangle1=plt.Rectangle((-88.26,40.1266),width=0.01,height=0.0067,alpha=0.6,facecolor="yellow",edgecolor="None")
rectangle2=plt.Rectangle((-88.20,40.0866),width=0.01,height=0.0067,alpha=0.6,facecolor="yellow",edgecolor="None")
rectangle3=plt.Rectangle((-88.21,40.1066),width=0.01,height=0.0067,alpha=0.6,facecolor="#adff2f",edgecolor="None")
rectangle4=plt.Rectangle((-88.20,40.0934),width=0.01,height=0.0067,alpha=0.6,facecolor="#adff2f",edgecolor="None")
rectangle5=plt.Rectangle((-88.25,40.1134),width=0.01,height=0.0134,alpha=0.6,facecolor="#7fff00",edgecolor="None")
rectangle6=plt.Rectangle((-88.23,40.1134),width=0.01,height=0.0067,alpha=0.6,facecolor="#7fff00",edgecolor="None")

ax=plt.gca()
# Add the legend manually to the current Axes.
ax.add_artist(first_legend)

ax.set_yticks(np.arange(40.05,40.16,0.01))
ax.set_xticks(np.arange(-88.32,-88.15,0.01))
ellipse1 = Ellipse(xy=(-88.242,40.123),width=0.044,height=0.021,alpha=0.9,facecolor="None",edgecolor="hotpink",lw=2)
ellipse2 = Ellipse(xy=(-88.201,40.101),width=0.023,height=0.028,alpha=0.9,facecolor="None",edgecolor="hotpink",lw=2)
ax.add_patch(ellipse1)
ax.add_patch(ellipse2)
ax.add_patch(rectangle1)
ax.add_patch(rectangle2)
ax.add_patch(rectangle3)
ax.add_patch(rectangle4)
ax.add_patch(rectangle5)
ax.add_patch(rectangle6)

second_legend = plt.legend(title="Density of Bus Stops",prop={'size':14},handles=[rectangle1,rectangle3,rectangle5],labels=["highest","very high","high"],loc=2)
ax.add_artist(second_legend)

plt.savefig('Shapes of Routes with Trips.svg', bbox_inches='tight')
plt.savefig('Shapes of Routes with Trips.png', bbox_inches='tight')



In [ ]: