Yingjun Guan, Xiaoliang Jiang, Xinyu Zhang, Jialu Wang. The first task is based on the Champaign-Urbana Metro Transit District (CUMTD). From the data source (http://developer.cumtd.com/), the data and the corresponding documentation can be found. The data involves the information of agency (agency.txt), running schedule (calendar.txt), running exception schedule (calendar_dates.txt), stops (stops.txt), stop time(stop_time.txt), routes of all trafic (routes.txt), shapes of the routes - timely records rather than the stops(shapes.txt), daily time schedule (trips.txt), and the fare information (fare_rules.txt and fare_attributes.txt)
In [1]:
#1. Yingjun Guan
from pylab import *
# make a square figure and axes
figure(1, figsize=(6,6))
ax = axes([0.1, 0.1, 0.8, 0.8])
# The slices will be ordered and plotted counter-clockwise.
labels = '105 days', '51-100 days', '10-50days', '<10 days'
fracs = [43/289, 67/289, 139/289, 40/289]
explode=(0.05, 0, 0, 0)
pie(fracs, explode=explode, labels=labels,
autopct='%1.1f%%', shadow=True, startangle=90)
# The default startangle is 0, which would start
# the Frogs slice on the x-axis. With startangle=90,
# everything is rotated counter-clockwise by 90 degrees,
# so the plotting starts on the positive y-axis.
title('Pie chart for traffic running days (out of 147)', bbox={'facecolor':'0.8', 'pad':5})
show()
In [2]:
# 2. Jialu Wang
#enable plotting
%matplotlib inline
#import packages
import matplotlib.pyplot as plt
import numpy as np
import csv
import collections
from collections import Counter
#set graph size
plt.rcParams["figure.figsize"] = (20,10)
In [6]:
#read the file
fn = "stop_times.txt"
with open(fn, "r") as f:
reader = csv.reader(f)
header = next(reader)
data = {}
for column in header:
data[column] = []
for row in reader:
for column, value in zip(header, row):
data[column].append(value)
fn1 = "stops.txt"
with open(fn1, "r") as f1:
reader1 = csv.reader(f1)
header1 = next(reader1)
data1 = {}
for column1 in header1:
data1[column1] = []
for row1 in reader1:
for column1, value1 in zip(header1, row1):
data1[column1].append(value1)
fn2 = "routes - routes.csv.csv"
with open(fn2, "r") as f2:
reader2 = csv.reader(f2)
header2 = next(reader2)
data2 = {}
for column2 in header2:
data2[column2] = []
for row2 in reader2:
for column2, value2 in zip(header2, row2):
data2[column2].append(value2)
In [7]:
location_count={}
for code in data1['stop_code']:
if code not in location_count.keys():
location_count[code]=1
else:
location_count[code]+=1
e = Counter(location_count)
sorted_e=sorted(e.items(), key=lambda e: e[1])
x_val = [x[0] for x in sorted_e]
y_val = [x[1] for x in sorted_e]
x_pos = np.arange(len(x_val))
plt.bar(x_pos,y_val,align='center', width=0.5, color='c')
plt.ylabel('Amount of stops',fontsize=15)
plt.title('Stops per location',fontsize=15)
plt.show()
In [8]:
#amount of stops per trip
trip_count={}
for id in data['trip_id']:
if id not in trip_count.keys():
trip_count[id]=1
else:
trip_count[id]+=1
a = Counter(trip_count)
sorted_a=sorted(a.items(), key=lambda a: a[1])
x_val = np.arange(len(sorted_a))
y_val = [x[1] for x in sorted_a]
plt.bar(x_val,y_val,align='center', width=0.6, color='r')
plt.ylabel('Amount of stops',fontsize=15)
plt.xlabel('Trips', fontsize=15)
plt.title('Distribution of amount of stops per trip',fontsize=15)
plt.show()
In [5]:
len(sorted_a)
Out[5]:
In [9]:
#amount of stops per stop
stop_count={}
for id in data['stop_id']:
if id not in stop_count.keys():
stop_count[id]=1
else:
stop_count[id]+=1
b = Counter(stop_count)
sorted_b=sorted(b.items(), key=lambda b: b[1])
x_val = np.arange(len(sorted_b))
y_val = [x[1] for x in sorted_b]
plt.bar(x_val,y_val,align='center', width=0.6, color='g')
plt.ylabel('Amount of trips',fontsize=15)
plt.xlabel('Stops', fontsize=15)
plt.title('Distribution of amount of trips per stop',fontsize=15)
plt.show()
In [7]:
len(sorted_b)
Out[7]:
In [106]:
c=Counter(stop_count).most_common(20)
c.sort(key=lambda x: x[1])
x_val = list(zip(*c))[0]
y_val = list(zip(*c))[1]
x_pos = np.arange(len(x_val))
plt.bar(x_pos, y_val,align='center',width=0.6, color='#9BD3F0')
plt.xticks(x_pos, x_val,fontsize=8)
plt.ylabel('Amount of trips',fontsize=15)
plt.title('20 stops with most trips traveling by',fontsize=15)
plt.show()
In [11]:
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import math
from IPython.display import Image
import time
plotly.tools.set_credentials_file(username='xjiang36', api_key='uZyWsdSH3xd9bxUefIFf')
In [14]:
dfroutes = pd.read_csv("routes.txt",encoding='iso-8859-1')
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
routeclean=dftrips["route_id"].value_counts().reset_index().rename(columns={'index': 'x'})
def Nameclean(dataset,a):
wordlist=["SILVER","ILLINI","TEAL","YELLOW","GREEN","BROWN","GREY","GOLD","LIME","BLUE","RED","BROWN","BRONZE","ORANGE","LAVENDER","RUBY"]
for j in range(len(wordlist)):
for i in range(len(dataset)):
if dataset[a][i].find(wordlist[j])>=0:
dataset[a][i]=wordlist[j]
Nameclean(routeclean,"x")
sumroute=routeclean[:18]
cleanedroute=routeclean["x"].value_counts().reset_index().rename(columns={'index': 'name'})
for j in range(len(cleanedroute["name"])):
rsum=0
for i in range(len(routeclean)):
if routeclean["x"][i]==cleanedroute["name"][j]:
rsum+=routeclean["route_id"][i]
cleanedroute["x"][j]=rsum
colorbar0=[]
Nameclean(dfroutes,"route_id")
for i in range(len(cleanedroute['name'])):
for j in range(len(dfroutes['route_id'])):
if cleanedroute['name'][i]==dfroutes['route_id'][j]:
colorbar0.append("#%s"%dfroutes['route_color'][j])
break
import plotly.plotly as py
import plotly.graph_objs as go
trace0 = go.Bar(
x=cleanedroute["name"],
y=cleanedroute["x"],
marker=dict(
#color=['#66FF66','#FFFF66','#E0E0E0','','#666600','#A0A0A0','#FF6666','#B266FF','#CCCC00','#663300','#FFFF99','#FF9933','#FF0000','#66FFFF','#0000FF','#FF66B2','#000066','#330000']),
color=colorbar0),
)
data = [trace0]
layout = go.Layout(
title='Buses on each route',
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='color-bar')
#py.image.save_as(fig,'Whale-plot.png')
Out[14]:
In [15]:
dfroutes = pd.read_csv("routes.txt",encoding='iso-8859-1')
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
routeclean=dftrips["route_id"].value_counts().reset_index().rename(columns={'index': 'x'})
def Nameclean(dataset,a):
wordlist=["SILVER","ILLINI","TEAL","YELLOW","GREEN","BROWN","GREY","GOLD","LIME","BLUE","RED","BROWN","BRONZE","ORANGE","LAVENDER","RUBY"]
for j in range(len(wordlist)):
for i in range(len(dataset)):
if dataset[a][i].find(wordlist[j])>=0:
dataset[a][i]=wordlist[j]
Nameclean(routeclean,"x")
sumroute=routeclean[:18]
cleanedroute=routeclean["x"].value_counts().reset_index().rename(columns={'index': 'name'})
trace0 = go.Bar(
x=cleanedroute["name"],
y=cleanedroute["x"],
marker=dict(
#color=['#66FF66','#FFFF66','#E0E0E0','','#666600','#A0A0A0','#FF6666','#B266FF','#CCCC00','#663300','#FFFF99','#FF9933','#FF0000','#66FFFF','#0000FF','#FF66B2','#000066','#330000']),
color=colorbar0),
)
data = [trace0]
layout = go.Layout(
title='Distribution of each route by color',
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='color-bar')
#py.image.save_as(fig,'Whale-plot2.png')
Out[15]:
In [11]:
len(x_val)
Out[11]:
In [68]:
# 3. Xinyu Zhang
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import os
import csv
In [69]:
df = pd.read_csv("/Users/celine/Desktop/5DataVisual/google_transit/shapes.txt")
df2=df.groupby('shape_id')
In [76]:
# from matplotlib import cm
plt.rcParams["figure.figsize"] = (20, 20)
mycolor=plt.cm.jet
color_id=np.linspace(0,1,677)
s=0
for name, group in df2:
s=s+1
# print(name)
#group.plot('shape_pt_lat','shape_pt_lon')
plt.plot(group['shape_pt_lon'],group['shape_pt_lat'], color=plt.cm.jet(s/677), alpha = 0.1, linewidth = 1.5)
plt.show()
In [77]:
# 4. Xiaoliang Jiang
fn = "stops.txt"
with open(fn, "r") as f:
reader = csv.reader(f)
header = next(reader)
data = {}
for column in header:
data[column] = []
for row in reader:
for column, value in zip(header, row):
data[column].append(value)
In [78]:
class Dataset:
def __init__(self, data):
self.data = data
def convert(self, column, dtype):
self.data[column] = np.array(self.data[column], dtype=dtype)
def columns(self):
return self.data.keys()
def filter_eq(self, column, value):
good = (self.data[column] == value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_lt(self, column, value):
good = (self.data[column] < value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_gt(self, column, value):
good = (self.data[column] > value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_ne(self, column, value):
good = (self.data[column] != value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def size(self):
for key in self.data:
return self.data[key].size
def split(self, column):
new_datasets = {}
for split_value in np.unique(self.data[column]):
new_datasets[split_value] = self.filter_eq(column, split_value)
return new_datasets
def stats(self):
statistics = {}
for key in self.data:
if self.data[key].dtype not in ("float", "int"):
continue
values = self.data[key]
statistics[key] = (values.min(), values.max(), values.std(), values.mean())
return statistics
def compare(self, other):
stats1 = self.stats()
stats2 = other.stats()
for column in self.columns():
if column not in stats1: continue
print("Column '{0:25s}'".format(column))
for s1, s2 in zip(stats1[column], stats2[column]):
print(" {0} vs {1}".format(s1, s2))
def plot(self, x_column, y_column):
plt.plot(self.data[x_column], self.data[y_column], '.')
In [79]:
header
Out[79]:
In [80]:
stopsdata= Dataset(data)
value_types = {'stop_ids': 'str',
'stop_code': 'str',
'stop_name':'str',
'stop_desc':'str',
'stop_lat':'float',
'stop_lon':'float',
'zone_id':'float',
'stop_url':'str',
'location_type':'str',
'parent_station':'str'}
for v in stopsdata.columns():
stopsdata.convert(v, value_types.get(v, "str"))
In [81]:
plt.subplot(221)
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 1 minutes for each stop", fontsize=15)
plt.plot(data["stop_lon"],data["stop_lat"],c='#00ff80',marker='o',markersize=7,mec='none',ls='',alpha=0.05)
plt.subplot(222)
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 2 minutes for each stop", fontsize=15)
plt.plot(data["stop_lon"],data["stop_lat"],c='#80ff00',marker='o',markersize=15,mec='none',ls='',alpha=0.05)
plt.subplot(223)
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 5 minutes for each stop", fontsize=15)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ffff00',marker='o',markersize=32,mec='none',ls='',alpha=0.05)
plt.subplot(224)
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 10 minutes for each stop", fontsize=15)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ff0000',marker='o',markersize=65,mec='none',ls='',alpha=0.05)
Out[81]:
In [83]:
stats=stopsdata.stats()
plt.rcParams["figure.figsize"] = (20, 15)
stats=stopsdata.stats()
lon_min=stats["stop_lon"][0]
lon_max=stats["stop_lon"][1]
lat_min=stats["stop_lat"][0]
lat_max=stats["stop_lat"][1]
num_bins=16
lon=np.mgrid[lon_min:lon_max:(num_bins+1)*1j]
lat=np.mgrid[lat_min:lat_max:(num_bins+1)*1j]
tree_count=np.zeros((num_bins,num_bins))
for i in range(num_bins):
left_lat=lat[i]
right_lat=lat[i+1]
filter_lat_left=stopsdata.filter_gt("stop_lat",left_lat)
filter_lat_right=filter_lat_left.filter_lt("stop_lat",right_lat)
for j in range(num_bins):
left_lon=lon[j]
right_lon=lon[j+1]
filter_lon_left=filter_lat_right.filter_gt("stop_lon",left_lon)
filter_lon_right=filter_lon_left.filter_lt("stop_lon",right_lon)
tree_count[i,j] +=filter_lon_right.size()
#plt.xlim(lon_min,lon_max)
#plt.ylim(lat_min,lat_max)
plt.subplot(221)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",cmap =plt.cm.gray_r,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.subplot(222)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",cmap =plt.cm.Blues,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.subplot(223)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower", cmap = plt.cm.afmhot,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.subplot(224)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower", cmap = plt.cm.BuGn,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
In [84]:
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import math
from IPython.display import Image
import time
plotly.tools.set_credentials_file(username='xjiang36', api_key='uZyWsdSH3xd9bxUefIFf')
In [87]:
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
dfshapes = pd.read_csv("/Users/celine/Desktop/5DataVisual/google_transit/shapes.txt",encoding='iso-8859-1')
In [88]:
dfroutes = pd.read_csv("routes.txt",encoding='iso-8859-1')
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
routeclean=dftrips["route_id"].value_counts().reset_index().rename(columns={'index': 'x'})
def Nameclean(dataset,a):
wordlist=["SILVER","ILLINI","TEAL","YELLOW","GREEN","BROWN","GREY","GOLD","LIME","BLUE","RED","BROWN","BRONZE","ORANGE","LAVENDER","RUBY"]
for j in range(len(wordlist)):
for i in range(len(dataset)):
if dataset[a][i].find(wordlist[j])>=0:
dataset[a][i]=wordlist[j]
Nameclean(routeclean,"x")
sumroute=routeclean[:18]
cleanedroute=routeclean["x"].value_counts().reset_index().rename(columns={'index': 'name'})
for j in range(len(cleanedroute["name"])):
rsum=0
for i in range(len(routeclean)):
if routeclean["x"][i]==cleanedroute["name"][j]:
rsum+=routeclean["route_id"][i]
cleanedroute["x"][j]=rsum
colorbar0=[]
Nameclean(dfroutes,"route_id")
for i in range(len(cleanedroute['name'])):
for j in range(len(dfroutes['route_id'])):
if cleanedroute['name'][i]==dfroutes['route_id'][j]:
colorbar0.append("#%s"%dfroutes['route_color'][j])
break
In [105]:
#Xiaoliang Jiang
colors=['#008063', '#fcee1f', '#d1d3d4', '#5a1d5a', '#808285', '#006991', '#a78bc0', '#eb008b', '#b2d235', '#823822', '#c7994a', '#f99f2a', '#9e8966', '#ed1c24', '#355caa', '#2b3088', '#000000', '#ffbfff']
names=['GREEN','YELLOW','SILVER','ILLINI','GREY','TEAL','LAVENDER','RUBY','LIME','BROWN','GOLD','ORANGE','BRONZE','RED','BLUE','NAVY','RAVEN','PINK']
stats=stopsdata.stats()
plt.rcParams["figure.figsize"] = (10, 5)
stats=stopsdata.stats()
lon_min=stats["stop_lon"][0]
lon_max=stats["stop_lon"][1]
lat_min=stats["stop_lat"][0]
lat_max=stats["stop_lat"][1]
num_bins=16
lon=np.mgrid[lon_min:lon_max:(num_bins+1)*1j]
lat=np.mgrid[lat_min:lat_max:(num_bins+1)*1j]
tree_count=np.zeros((num_bins,num_bins))
for i in range(num_bins):
left_lat=lat[i]
right_lat=lat[i+1]
filter_lat_left=stopsdata.filter_gt("stop_lat",left_lat)
filter_lat_right=filter_lat_left.filter_lt("stop_lat",right_lat)
for j in range(num_bins):
left_lon=lon[j]
right_lon=lon[j+1]
filter_lon_left=filter_lat_right.filter_gt("stop_lon",left_lon)
filter_lon_right=filter_lon_left.filter_lt("stop_lon",right_lon)
tree_count[i,j] +=filter_lon_right.size()
#plt.xlim(lon_min,lon_max)
#plt.ylim(lat_min,lat_max)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",interpolation='none',cmap =plt.cm.gray_r)
plt.xlabel("Longitude",fontsize=10)
plt.ylabel("Latitude",fontsize=10)
plt.title("The distribution of stops & amount of bus on each route", fontsize=15)
for name in cleanedroute["name"]:
tempshapeID=dftrips[dftrips["route_id"]==name]["shape_id"]
count=0
for i in tempshapeID:
tempshapeIDvalue=i
count+=1
if count>20:
break
subrows=dfshapes[dfshapes["shape_id"]==tempshapeIDvalue]
plt.plot(subrows["shape_pt_lon"],subrows["shape_pt_lat"],c=colors[names.index(name)],linewidth=cleanedroute['x'][cleanedroute['name']==name]/200,mec='none',ls='-')#,alpha=0.05)
plt.xlim([lon_min,lon_max])
plt.ylim([lat_min,lat_max])
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.plot()
Out[105]:
In [48]:
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 10 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ffcccc',marker='o',markersize=169,mec='none',ls='') #15min
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 5 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ffe5cc',marker='o',markersize=56,mec='none',ls='') #10min
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 2 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ffffcc',marker='o',markersize=28,mec='none',ls='') #5min
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 1/2/5/10 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#e5ffcc',marker='o',markersize=11.2,mec='none',ls='')#2min
mycolor=plt.cm.jet
color_id=np.linspace(0,1,677)
s=0
for name, group in df2:
s=s+1
# print(name)
#group.plot('shape_pt_lat','shape_pt_lon')
plt.plot(group['shape_pt_lon'],group['shape_pt_lat'], color=plt.cm.jet(s/677), alpha = 0.2, linewidth = 2.5)
plt.show()
In [46]:
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas & MTD routes", fontsize=25)
plt.xlim(-88.339,-88.139)
plt.ylim(40.02329,40.17671)
r=11.2528
colorlist=("#ffcccc","#ffd5cc","#ffddcc","#ffe6cc","#ffeecc","#fff7cc","#ffffcc","#f7ffcc","#eeffcc","#e6ffcc","#ddffcc","#d5ffcc")
for i in range(12,0,-1):
plt.plot(data["stop_lon"],data["stop_lat"],color=colorlist[12-i],marker='o',markersize=11.2528*i,mec='none',ls='')
mycolor=plt.cm.jet
color_id=np.linspace(0,1,677)
s=0
for name, group in df2:
s=s+1
# print(name)
#group.plot('shape_pt_lat','shape_pt_lon')
plt.plot(group['shape_pt_lon'],group['shape_pt_lat'], color=plt.cm.binary(s/677), alpha = 0.2, linewidth = 2.5)
plt.show()
In [90]:
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas & MTD routes", fontsize=25)
plt.xlim(-88.339,-88.139)
plt.ylim(40.02329,40.17671)
r=11.2528
colorlist=("#ffcccc","#ffd5cc","#ffddcc","#ffe6cc","#ffeecc","#fff7cc","#ffffcc","#f7ffcc","#eeffcc","#e6ffcc","#ddffcc","#d5ffcc")
for i in range(12,0,-1):
plt.plot(data["stop_lon"],data["stop_lat"],color=colorlist[12-i],marker='o',markersize=11.2528*i,mec='none',ls='')
for name in cleanedroute["name"]:
tempshapeID=dftrips[dftrips["route_id"]==name]["shape_id"]
count=0
for i in tempshapeID:
tempshapeIDvalue=i
count+=1
if count>20:
break
subrows=dfshapes[dfshapes["shape_id"]==tempshapeIDvalue]
plt.plot(subrows["shape_pt_lon"],subrows["shape_pt_lat"],c=colors[names.index(name)],linewidth=2,mec='none',ls='-')#,alpha=0.05)
In [50]:
import pandas as pd
import numpy as np
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
In [ ]:
In [51]:
plotly.tools.set_credentials_file(username='alexbear', api_key='L6m9DmfDjqrksfHtUH5j')
mapbox_access_token = 'pk.eyJ1IjoieGlhb2xpYW5namlhbmciLCJhIjoiY2l6OWJhZTZqMDFoMDJ3cG82Znhja3dodCJ9.88BUSeqz4H2xm8bemGd4VQ'
In [52]:
df = pd.read_csv('stops.txt',encoding='iso-8859-1')
In [53]:
data = Data([
Scattermapbox(
lat=df['stop_lat'],
lon=df['stop_lon'],
mode='markers',
marker=Marker(
color='#FF9933',
opacity=0.4,
size=9
),
text='Stopname: '+df['stop_name'],
)
])
layout = Layout(
title="Distribution of MTD stops in Champaign <br>(Hover for breakdown)",
autosize=True,
hovermode='closest',
mapbox=dict(
accesstoken=mapbox_access_token,
bearing=0,
center=dict(
lat=40.11,
lon=-88.23
),
pitch=0,
zoom=10
),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Multiple Mapbox')
Out[53]:
In [92]:
import pandas as pd
df1 = pd.read_csv('/Users/celine/Desktop/5DataVisual/google_transit/shapes.txt')
#print(df1.head())
In [93]:
aa=df1['shape_id'][0]
good=(df1["shape_id"]==aa)
#df1[good]
In [95]:
import pandas as pd
df2 = pd.read_csv('trips.txt')
#df2.head()
good2=(df2['shape_id']==aa)
#df2[good2]
bb=df2["trip_id"][4704]
#bb # target trip_id
In [96]:
import pandas as pd
df3 = pd.read_csv('stop_times.txt')
#df3.head()
good3=(df3['trip_id']==bb)
#df3[good3]
In [97]:
list_arrival_time=df3['arrival_time'][195862:195901].tolist()
list_stop_id=df3['stop_id'][195862:195901].tolist()
In [98]:
import pandas as pd
df4 = pd.read_csv('stops.txt')
#df4.head()
cc=list_stop_id[0]
#cc
good4=(df4['stop_id']==cc)
df4['stop_lat'][good4],df4['stop_lon'][good4]
df4['stop_lat'][1840],df4['stop_lon'][1840]
Out[98]:
In [99]:
list_stop_lat=[]
list_stop_lon=[]
for stops in list_stop_id:
#print(stops)
goodtemp=(df4['stop_id']==stops)
list_stop_lat.append(df4['stop_lat'][goodtemp])
list_stop_lon.append(df4['stop_lon'][goodtemp])
#list_stop_lat+=df4['stop_lat'][good4]
#list_stop_lon+=df4['stop_lon'][good4]
#list_stop_lat
#list_row=[1840,393,986,618,2195,2407,223,704,442,2399,1111,1684,404,961,380,390,1266,1878,1598,150,1071,540,1131,180,2312,617,1803,2400,2262,1181,561,1894,1143,2000,790,1406,1035,2389]
In [100]:
list_row=[1840,393,986,618,2195,2407,223,704,442,2399,1111,1684,404,961,380,390,1266,1878,1598,150,1071,540,1131,180,2312,617,1803,2400,2262,1181,561,1894,1143,2000,790,1406,1035,2389,480]
list_stop_lat=[]
list_stop_lon=[]
for stops in list_row:
#print(stops)
#goodtemp=(df4['stop_id']==1840)
list_stop_lat.append(df4['stop_lat'][stops])
list_stop_lon.append(df4['stop_lon'][stops])
#list_stop_lat+=df4['stop_lat'][good4]
#list_stop_lon+=df4['stop_lon'][good4]
#list_stop_lat
In [101]:
list_distance=[]
for i in range(len(list_stop_lat)):
good5=(df1["shape_id"]==aa)&(df1['shape_pt_lat']==list_stop_lat[i])&(df1['shape_pt_lon']==list_stop_lon[i])
list_distance.append(df1['shape_dist_traveled'][good5])
#list_distance
row2=[0,116,152,200,275,323,371,430,461,561,697,849,962,1085,1110,1136,1170,1200,1259,1283,1298,1394,1457,1488,1542,1600,1700,1738,1767,1854,1932,1972,2031,2121,2191,2242,2291,2366,2481]
list_distance=[]
for i in row2:
#print(i)
#good5=(df1["shape_id"]==aa)&(df1['shape_pt_lat']==list_stop_lat[i])&(df1['shape_pt_lon']==list_stop_lon[i])
list_distance.append(df1['shape_dist_traveled'][i])
#print(df1['shape_dist_traveled'][i])
#list_distance
#good=(df1["shape_id"]==aa)
list_pt_lat=df1['shape_pt_lat'][good]
list_pt_lon=df1['shape_pt_lon'][good]
In [102]:
from datetime import datetime
format = '%H:%M:%S'
list_arrival=[]
for time in list_arrival_time:
list_arrival.append(datetime.strptime(time, format))
#list_arrival
#print(datetime.strptime(list_arrival_time[1], format))
#- datetime.strptime(time1, format)
In [103]:
list_velocity=[]
for i in range(38):
ds=list_distance[i+1]-list_distance[i]
dt=(list_arrival[i+1]-list_arrival[i]).total_seconds()
if dt==0:
dt=15
list_velocity.append(ds/dt)
#list_velocity
In [104]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.pyplot as plt
min, max = (2, 18)
step = 2
# Setting up a colormap that's a simple transtion
mymap = mpl.colors.LinearSegmentedColormap.from_list('mycolors',['blue','red'])
# Using contourf to provide my colorbar info, then clearing the figure
Z = [[0,0],[0,0]]
levels = range(min,max+step,step)
CS3 = plt.contourf(Z, levels, cmap=mymap)
plt.clf()
figure3=plt.figure(figsize=(12,8))
plt.plot(list_stop_lon,list_stop_lat,'x')
plt.legend(["stop"])
plt.xlabel("Longitude")
plt.ylabel("Latitude")
for i in range(38):
#min=min()
start=row2[i]
end=row2[i+1]
#print(start,end)
x=list_pt_lon[start:end]
y=list_pt_lat[start:end]
#print(x)
#print(y)
z=list_velocity[i]
r = (float(z)-min)/(max-min)
#print(r)
g = 0
b = 1-r
plt.plot(x,y,color=(r,g,b), linewidth=3)
plt.xlim([-88.25,-88.16])
plt.ylim(40.11,40.12)
plt.colorbar(CS3) # using the colorbar info I got from contourf
plt.show()
In [ ]: