In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import os
import csv

In [2]:
print (os.getcwd())


C:\Users\xiaoliang\Documents\GitHub

In [3]:
fn = "stops.txt"
with open(fn, "r") as f:
    reader = csv.reader(f)
    header = next(reader)
    data = {}
    for column in header:
        data[column] = []
    for row in reader:
        for column, value in zip(header, row):
            data[column].append(value)

In [4]:
class Dataset:
    def __init__(self, data):
        self.data = data
        
    def convert(self, column, dtype):
        self.data[column] = np.array(self.data[column], dtype=dtype)
        
    def columns(self):
        return self.data.keys()
    
    def filter_eq(self, column, value):
        good = (self.data[column] == value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_lt(self, column, value):
        good = (self.data[column] < value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_gt(self, column, value):
        good = (self.data[column] > value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_ne(self, column, value):
        good = (self.data[column] != value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def size(self):
        for key in self.data:
            return self.data[key].size

    def split(self, column):
        new_datasets = {}
        for split_value in np.unique(self.data[column]):
            new_datasets[split_value] = self.filter_eq(column, split_value)
        return new_datasets

    def stats(self):
        statistics = {}
        for key in self.data:
            if self.data[key].dtype not in ("float", "int"):
                continue
            values = self.data[key]
            statistics[key] = (values.min(), values.max(), values.std(), values.mean())
        return statistics
    
    def compare(self, other):
        stats1 = self.stats()
        stats2 = other.stats()
        for column in self.columns():
            if column not in stats1: continue
            print("Column '{0:25s}'".format(column))
            for s1, s2 in zip(stats1[column], stats2[column]):
                print("    {0} vs {1}".format(s1, s2))
    
    def plot(self, x_column, y_column):
        plt.plot(self.data[x_column], self.data[y_column], '.')

In [6]:
stopsdata= Dataset(data)
value_types = {'stop_ids': 'str',
               'stop_code': 'str',
               'stop_name':'str',
               'stop_desc':'str',
               'stop_lat':'float',
               'stop_lon':'float',
               'zone_id':'float',
               'stop_url':'str',
               'location_type':'str',
               'parent_station':'str'}
for v in stopsdata.columns():
    stopsdata.convert(v, value_types.get(v, "str"))

In [136]:
plt.rcParams["figure.figsize"] = (20, 20)
plt.subplot(221)

plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 1 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#00ff80',marker='o',markersize=7,mec='none',ls='',alpha=0.05)
plt.subplot(222)
)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 2 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#80ff00',marker='o',markersize=15,mec='none',ls='',alpha=0.05)
plt.subplot(223)
)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 5 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ffff00',marker='o',markersize=32,mec='none',ls='',alpha=0.05)
plt.subplot(224)

plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas in 10 minutes for each stop", fontsize=25)
plt.plot(data["stop_lon"],data["stop_lat"],c='#ff0000',marker='o',markersize=65,mec='none',ls='',alpha=0.05)


Out[136]:
[<matplotlib.lines.Line2D at 0x20e864e0390>]

explaination

Accroding to the google map, 0.02 longitude at 40.06N latitude equals 1.1 miles. So the circle here represent a circle area with r=0.275mile which means an area which only take 5 minutes to walk.


In [133]:
stats=stopsdata.stats()
plt.rcParams["figure.figsize"] = (20, 15)
stats=stopsdata.stats()
lon_min=stats["stop_lon"][0]
lon_max=stats["stop_lon"][1]
lat_min=stats["stop_lat"][0]
lat_max=stats["stop_lat"][1]
num_bins=16
lon=np.mgrid[lon_min:lon_max:(num_bins+1)*1j]
lat=np.mgrid[lat_min:lat_max:(num_bins+1)*1j]
tree_count=np.zeros((num_bins,num_bins))
for i in range(num_bins):
    left_lat=lat[i]
    right_lat=lat[i+1]
    filter_lat_left=stopsdata.filter_gt("stop_lat",left_lat)
    filter_lat_right=filter_lat_left.filter_lt("stop_lat",right_lat)
    for j in range(num_bins):
        left_lon=lon[j]
        right_lon=lon[j+1]
        filter_lon_left=filter_lat_right.filter_gt("stop_lon",left_lon)
        filter_lon_right=filter_lon_left.filter_lt("stop_lon",right_lon)
        tree_count[i,j] +=filter_lon_right.size()
#plt.xlim(lon_min,lon_max)
#plt.ylim(lat_min,lat_max)
plt.subplot(221)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",cmap =plt.cm.gray_r,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.subplot(222)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",cmap =plt.cm.Blues,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.subplot(223)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower", cmap = plt.cm.afmhot,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")

plt.subplot(224)
plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower", cmap = plt.cm.BuGn,interpolation='none')
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("The distribution of stops", fontsize=25)
color_bar=plt.colorbar()
color_bar.set_label("Count")



In [ ]:
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import math
from IPython.display import Image
import time
plotly.tools.set_credentials_file(username='xjiang36', api_key='uZyWsdSH3xd9bxUefIFf')

In [18]:
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
dfshapes = pd.read_csv("shapes.txt",encoding='iso-8859-1')

In [55]:
dfroutes = pd.read_csv("routes.txt",encoding='iso-8859-1')
dftrips = pd.read_csv("trips.txt",encoding='iso-8859-1')
routeclean=dftrips["route_id"].value_counts().reset_index().rename(columns={'index': 'x'})
def Nameclean(dataset,a):
    wordlist=["SILVER","ILLINI","TEAL","YELLOW","GREEN","BROWN","GREY","GOLD","LIME","BLUE","RED","BROWN","BRONZE","ORANGE","LAVENDER","RUBY"]
    for j in range(len(wordlist)):
        for i in range(len(dataset)):
            if dataset[a][i].find(wordlist[j])>=0:
                dataset[a][i]=wordlist[j]
Nameclean(routeclean,"x")
sumroute=routeclean[:18]
cleanedroute=routeclean["x"].value_counts().reset_index().rename(columns={'index': 'name'})
for j in range(len(cleanedroute["name"])):
    rsum=0
    for i in range(len(routeclean)):
        if routeclean["x"][i]==cleanedroute["name"][j]:
            rsum+=routeclean["route_id"][i]
    cleanedroute["x"][j]=rsum
colorbar0=[]
Nameclean(dfroutes,"route_id")
for i in range(len(cleanedroute['name'])):
    for j in range(len(dfroutes['route_id'])):
        if cleanedroute['name'][i]==dfroutes['route_id'][j]:
            colorbar0.append("#%s"%dfroutes['route_color'][j])
            break


C:\Users\xiaoliang\Anaconda3\lib\site-packages\ipykernel\__main__.py:9: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

C:\Users\xiaoliang\Anaconda3\lib\site-packages\ipykernel\__main__.py:18: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [137]:
#Xiaoliang Jiang
colors=['#008063', '#fcee1f', '#d1d3d4', '#5a1d5a', '#808285', '#006991', '#a78bc0', '#eb008b', '#b2d235', '#823822', '#c7994a', '#f99f2a', '#9e8966', '#ed1c24', '#355caa', '#2b3088', '#000000', '#ffbfff']
names=['GREEN','YELLOW','SILVER','ILLINI','GREY','TEAL','LAVENDER','RUBY','LIME','BROWN','GOLD','ORANGE','BRONZE','RED','BLUE','NAVY','RAVEN','PINK']
stats=stopsdata.stats()
plt.rcParams["figure.figsize"] = (10, 5)
stats=stopsdata.stats()
lon_min=stats["stop_lon"][0]
lon_max=stats["stop_lon"][1]
lat_min=stats["stop_lat"][0]
lat_max=stats["stop_lat"][1]
num_bins=16
lon=np.mgrid[lon_min:lon_max:(num_bins+1)*1j]
lat=np.mgrid[lat_min:lat_max:(num_bins+1)*1j]
tree_count=np.zeros((num_bins,num_bins))
for i in range(num_bins):
    left_lat=lat[i]
    right_lat=lat[i+1]
    filter_lat_left=stopsdata.filter_gt("stop_lat",left_lat)
    filter_lat_right=filter_lat_left.filter_lt("stop_lat",right_lat)
    for j in range(num_bins):
        left_lon=lon[j]
        right_lon=lon[j+1]
        filter_lon_left=filter_lat_right.filter_gt("stop_lon",left_lon)
        filter_lon_right=filter_lon_left.filter_lt("stop_lon",right_lon)
        tree_count[i,j] +=filter_lon_right.size()
#plt.xlim(lon_min,lon_max)
#plt.ylim(lat_min,lat_max)

plt.imshow(tree_count, extent=(lon_min,lon_max,lat_min,lat_max),origin="lower",interpolation='none',cmap =plt.cm.gray_r)
plt.xlabel("Longitude",fontsize=10)
plt.ylabel("Latitude",fontsize=10)
plt.title("The distribution of stops", fontsize=15)
for name in cleanedroute["name"]:
    tempshapeID=dftrips[dftrips["route_id"]==name]["shape_id"]
    count=0
    for i in tempshapeID:
        tempshapeIDvalue=i
        count+=1
        if count>20:
            break
        subrows=dfshapes[dfshapes["shape_id"]==tempshapeIDvalue]
        plt.plot(subrows["shape_pt_lon"],subrows["shape_pt_lat"],c=colors[names.index(name)],linewidth=cleanedroute['x'][cleanedroute['name']==name]/200,mec='none',ls='-')#,alpha=0.05)
        
plt.xlim([lon_min,lon_max])
plt.ylim([lat_min,lat_max])
color_bar=plt.colorbar()
color_bar.set_label("Count")
plt.plot()


Out[137]:
[]

In [140]:
plt.rcParams["figure.figsize"] = (20, 20)
plt.grid()
plt.xlabel("Longitude",fontsize=15)
plt.ylabel("Latitude",fontsize=15)
plt.title("Walkable areas & MTD routes", fontsize=25)
plt.xlim(-88.339,-88.139)
plt.ylim(40.02329,40.17671)
r=11.2528
colorlist=("#ffcccc","#ffd5cc","#ffddcc","#ffe6cc","#ffeecc","#fff7cc","#ffffcc","#f7ffcc","#eeffcc","#e6ffcc","#ddffcc","#d5ffcc")

for i in range(12,0,-1):
    plt.plot(data["stop_lon"],data["stop_lat"],color=colorlist[12-i],marker='o',markersize=11.2528*i,mec='none',ls='')
for name in cleanedroute["name"]:
    tempshapeID=dftrips[dftrips["route_id"]==name]["shape_id"]
    count=0
    for i in tempshapeID:
        tempshapeIDvalue=i
        count+=1
        if count>20:
            break
        subrows=dfshapes[dfshapes["shape_id"]==tempshapeIDvalue]
        plt.plot(subrows["shape_pt_lon"],subrows["shape_pt_lat"],c=colors[names.index(name)],linewidth=2,mec='none',ls='-')#,alpha=0.05)



In [ ]: