In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.patches as mpatches
import csv
from collections import Counter
In [2]:
import plotly
plotly.tools.set_credentials_file(username='huilyu2', api_key='LYEkqxDQFmZzZIBXn9rn')
import plotly.plotly as py
from plotly.graph_objs import *
In [3]:
%matplotlib inline
In [4]:
trips_df = pd.read_csv("GTFS Dataset/trips.csv")
In [5]:
trips_df.columns
Out[5]:
In [6]:
trips_df.head()
Out[6]:
How many trips for each route?
In [7]:
len(np.unique(trips_df["route_id"])) # How many routes in all
Out[7]:
In [8]:
trips_count = Counter(trips_df["route_id"])
trips_count.values()
Out[8]:
In [9]:
plt.rcParams["figure.figsize"] = (8, 8)
fig, ax = plt.subplots()
n, bins, patches = plt.hist(list(trips_count.values()),bins=np.arange(0,400,50),edgecolor = 'k')
bin_centers = 0.5 * (bins[:-1] + bins[1:])
cm = plt.cm.get_cmap('Purples')
# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
plt.setp(p, 'facecolor', cm(c))
plt.xlabel("Number of Trips for Each Route",fontsize=16)
plt.ylabel("Count of Routes",fontsize=16)
plt.title("Distribution of Routes with Different Numbers of Trips\n(Total: 100 Routes which Exist Trip)",fontweight="bold",fontsize=18)
plt.xticks(bins)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')
plt.savefig('Distribution of Routes with Trips.svg', bbox_inches='tight')
plt.savefig('Distribution of Routes with Trips.png', bbox_inches='tight')
In [10]:
trips_count.most_common(10)
Out[10]:
In [11]:
shape_id_dict = {}
trip_count_dict = {}
for word, count in trips_count.most_common(10):
trip_count_dict[word]=count # A route corresponds to its number of trips
shape_id_dict[word]=np.unique(trips_df[trips_df.route_id == word][["shape_id"]])
# A route correponds to its several shapes
In [12]:
import operator
trip_count_list = sorted(trip_count_dict.items(), key=operator.itemgetter(1), reverse=True)
trip_count_df = pd.DataFrame(trip_count_list,columns = ['route_id', 'number of trips'])
In [13]:
trip_count_df
Out[13]:
In [14]:
routes_df = pd.read_csv("GTFS Dataset/routes.csv")
In [15]:
routes_df.head()
Out[15]:
In [16]:
df_routes = pd.DataFrame()
for k in np.arange(10):
df_routes = df_routes.append(routes_df[routes_df.route_id == trip_count_df.route_id[k]])
df_routes
#df_shapes = df_shapes.append(shapes_df[shapes_df.shape_id == shape_id])
Out[16]:
In [17]:
df_routes[["route_color"]]
Out[17]:
In [18]:
plt.rcParams["figure.figsize"] = (16, 16)
fig, ax = plt.subplots()
colors = ['#cccccc','#5a1d5a','#006991','#5a1d5a','#006991','#fcee1f','#5a1d5a','#5a1d5a','#008063','#fcee1f']
plt.barh(bottom=np.arange(10)[::-1], width=trip_count_df['number of trips'], height=0.5,alpha=0.9,color=colors)
plt.xlabel("Number of Trips",fontsize=20)
plt.ylabel("Route ID",fontsize=20)
ax.set_yticks(np.arange(10)[::-1])
ax.set_yticklabels(trip_count_df['route_id'], fontsize=18)
plt.title("Top 10 Routes with Most Number of Trips for Each Route",fontweight="bold",fontsize=24)
ax.xaxis.grid(color='gray', linestyle='dotted')
for i, v in enumerate(trip_count_df['number of trips'][::-1]):
ax.text(v + 2, i-0.05, str(v), color='k', fontsize=18,fontweight="bold")
plt.savefig('Top 10 Routes with Most Trips.svg', bbox_inches='tight')
plt.savefig('Top 10 Routes with Most Trips.png', bbox_inches='tight')
In [19]:
shape_id_dict
Out[19]:
In [20]:
trips_count.most_common()[:-11:-1]
Out[20]:
In [21]:
shapes_df = pd.read_csv("GTFS Dataset/shapes.csv")
In [22]:
shapes_df.head()
Out[22]:
In [23]:
top_ten_shapes = list(shape_id_dict.keys())
top_ten_shapes
Out[23]:
In [24]:
df_shapes = pd.DataFrame()
for key in top_ten_shapes:
for shape_id in shape_id_dict[key]:
df_shapes = df_shapes.append(shapes_df[shapes_df.shape_id == shape_id])
df_shapes_group = df_shapes.groupby("shape_id")
In [25]:
df_shapes_least = pd.DataFrame()
shape_id_dict_least = {}
once_routes = ("BROWN ALT1","10W GOLD ALT","5W GREEN ALT 2","7W GREY ALT","1N YELLOW ALT")
for word, count in trips_count.most_common()[:-11:-1]:
shape_id_dict_least[word]=np.unique(trips_df[trips_df.route_id == word][["shape_id"]])
for key in once_routes:
for shape_id in shape_id_dict_least[key]:
df_shapes_least = df_shapes_least.append(shapes_df[shapes_df.shape_id == shape_id])
df_shapes_least_group = df_shapes_least.groupby("shape_id")
In [26]:
plt.rcParams["figure.figsize"] = (14, 14)
for name, group in df_shapes_group:
plt.plot(group['shape_pt_lon'],group['shape_pt_lat'],linestyle="solid",linewidth=3,alpha=0.3,c="k")
for name, group in df_shapes_least_group:
plt.plot(group['shape_pt_lon'],group['shape_pt_lat'],linestyle="solid",linewidth=3,alpha=0.3,c="gray")
plt.xlabel("Longitude",fontsize=20)
plt.ylabel("Latitude",fontsize=20)
plt.title("Shapes of Routes with Most/Least Number of Trips",fontweight="bold",fontsize=22)
plt.grid(color='gray', linestyle='dotted')
black_patch = mpatches.Patch(color='k')
gray_patch = mpatches.Patch(color='lightgray')
first_legend = plt.legend(title="Routes",handles=[black_patch,gray_patch],labels=["Top 10 Routes with Most Number of Trips","5 Routes with Only One Trip per Route"],prop={'size':14},loc=1)
rectangle1=plt.Rectangle((-88.26,40.1266),width=0.01,height=0.0067,alpha=0.6,facecolor="yellow",edgecolor="None")
rectangle2=plt.Rectangle((-88.20,40.0866),width=0.01,height=0.0067,alpha=0.6,facecolor="yellow",edgecolor="None")
rectangle3=plt.Rectangle((-88.21,40.1066),width=0.01,height=0.0067,alpha=0.6,facecolor="#adff2f",edgecolor="None")
rectangle4=plt.Rectangle((-88.20,40.0934),width=0.01,height=0.0067,alpha=0.6,facecolor="#adff2f",edgecolor="None")
rectangle5=plt.Rectangle((-88.25,40.1134),width=0.01,height=0.0134,alpha=0.6,facecolor="#7fff00",edgecolor="None")
rectangle6=plt.Rectangle((-88.23,40.1134),width=0.01,height=0.0067,alpha=0.6,facecolor="#7fff00",edgecolor="None")
ax=plt.gca()
# Add the legend manually to the current Axes.
ax.add_artist(first_legend)
ax.set_yticks(np.arange(40.05,40.16,0.01))
ax.set_xticks(np.arange(-88.32,-88.15,0.01))
ellipse1 = Ellipse(xy=(-88.242,40.123),width=0.044,height=0.021,alpha=0.9,facecolor="None",edgecolor="hotpink",lw=2)
ellipse2 = Ellipse(xy=(-88.201,40.101),width=0.023,height=0.028,alpha=0.9,facecolor="None",edgecolor="hotpink",lw=2)
ax.add_patch(ellipse1)
ax.add_patch(ellipse2)
ax.add_patch(rectangle1)
ax.add_patch(rectangle2)
ax.add_patch(rectangle3)
ax.add_patch(rectangle4)
ax.add_patch(rectangle5)
ax.add_patch(rectangle6)
second_legend = plt.legend(title="Density of Bus Stops",prop={'size':14},handles=[rectangle1,rectangle3,rectangle5],labels=["highest","very high","high"],loc=2)
ax.add_artist(second_legend)
plt.savefig('Shapes of Routes with Trips.svg', bbox_inches='tight')
plt.savefig('Shapes of Routes with Trips.png', bbox_inches='tight')
In [ ]: