In [1]:
#enable plotting
%matplotlib inline
#import packages
import matplotlib.pyplot as plt
import numpy as np
import csv
import collections
from collections import Counter
#set graph size
plt.rcParams["figure.figsize"] = (20,10)
In [2]:
#read the file
fn = "stop_times.csv"
with open(fn, "r") as f:
reader = csv.reader(f)
header = next(reader)
data = {}
for column in header:
data[column] = []
for row in reader:
for column, value in zip(header, row):
data[column].append(value)
fn1 = "stops.csv"
with open(fn1, "r") as f1:
reader1 = csv.reader(f1)
header1 = next(reader1)
data1 = {}
for column1 in header1:
data1[column1] = []
for row1 in reader1:
for column1, value1 in zip(header1, row1):
data1[column1].append(value1)
fn2 = "routes.csv"
with open(fn2, "r") as f2:
reader2 = csv.reader(f2)
header2 = next(reader2)
data2 = {}
for column2 in header2:
data2[column2] = []
for row2 in reader2:
for column2, value2 in zip(header2, row2):
data2[column2].append(value2)
In [3]:
#amount of stops per trip
trip_count={}
for id in data['trip_id']:
if id not in trip_count.keys():
trip_count[id]=1
else:
trip_count[id]+=1
a = Counter(trip_count)
sorted_a=sorted(a.items(), key=lambda a: a[1])
x_val = np.arange(len(sorted_a))
y_val = [x[1] for x in sorted_a]
plt.bar(x_val,y_val,align='center', width=0.6, color='r')
plt.ylabel('Amount of stops',fontsize=15)
plt.xlabel('Trips', fontsize=15)
plt.title('Distribution of amount of stops per trip',fontsize=15)
plt.show()
In [18]:
len(sorted_a)
Out[18]:
In [4]:
#amount of stops per stop
stop_count={}
for id in data['stop_id']:
if id not in stop_count.keys():
stop_count[id]=1
else:
stop_count[id]+=1
b = Counter(stop_count)
sorted_b=sorted(b.items(), key=lambda b: b[1])
x_val = np.arange(len(sorted_b))
y_val = [x[1] for x in sorted_b]
plt.bar(x_val,y_val,align='center', width=0.6, color='g')
plt.ylabel('Amount of trips',fontsize=15)
plt.xlabel('Stops', fontsize=15)
plt.title('Distribution of amount of trips per stop',fontsize=15)
plt.show()
In [17]:
len(sorted_b)
Out[17]:
In [10]:
c=Counter(stop_count).most_common(20)
c.sort(key=lambda x: x[1])
x_val = list(zip(*c))[0]
y_val = list(zip(*c))[1]
x_pos = np.arange(len(x_val))
plt.bar(x_pos, y_val,align='center',width=0.6, color='b')
plt.xticks(x_pos, x_val,fontsize=8)
plt.ylabel('Amount of trips',fontsize=15)
plt.title('20 stops with most trip stops',fontsize=15)
plt.show()
In [12]:
color_count={}
for color in data2['route_color']:
if color not in color_count.keys():
color_count[color]=1
else:
color_count[color]+=1
d = Counter(color_count)
sorted_d=sorted(d.items(), key=lambda d: d[1])
x_val = [x[0] for x in sorted_d]
y_val = [x[1] for x in sorted_d]
x_pos = np.arange(len(x_val))
plt.bar(x_pos,y_val,align='center', width=0.6, color='y')
plt.xticks(x_pos, x_val,fontsize=8)
plt.ylabel('Amount of routes',fontsize=15)
plt.title('Distribution of routes per color',fontsize=15)
plt.show()
In [15]:
location_count={}
for code in data1['stop_code']:
if code not in location_count.keys():
location_count[code]=1
else:
location_count[code]+=1
e = Counter(location_count)
sorted_e=sorted(e.items(), key=lambda e: e[1])
x_val = [x[0] for x in sorted_e]
y_val = [x[1] for x in sorted_e]
x_pos = np.arange(len(x_val))
plt.bar(x_pos,y_val,align='center', width=0.5, color='c')
plt.ylabel('Amount of stops',fontsize=15)
plt.title('Stops per location',fontsize=15)
plt.show()
In [16]:
len(x_val)
Out[16]:
In [ ]: