In [ ]:
import networkx as nx
import pandas as pd 
import igraph
from pandas import Timestamp
import numpy as np

Data


In [ ]:
df = pd.read_csv('Data/working_data.csv',parse_dates=["starttime", "stoptime"])
del df['Unnamed: 0']

Data Set-Up for Community Detection and Alluvial Diagram

Weekdays


In [ ]:
# Subset of WEEKDAYS; subscribers, non-round trips 
df_wkday = df[(df.usertype == 'Subscriber') & (df.dist != 0) & (df.weektype == 'Weekday') & \
               (df.starttime >= Timestamp('2013-08-01')) & (df.starttime < Timestamp('2013-11-01'))]\
[['starttime', 'stoptime', 'start station id', 'end station id', 'dayofweek', 'dayofweek_name']]

# Add auxillary information
wkday_aux_info = pd.read_csv('Data/station_aux_info.csv')
for i in ['start station id', 'end station id']:
    prefix = i.split(' ',1)[0]
    df_wkday_merge = pd.merge(df_wkday, wkday_aux_info, left_on=i, right_on='station_id')
    df_wkday_merge.rename(columns={'stationid_new':prefix + 'stationid_new', 'station_x':prefix + 'station_x',\
                             'station_y':prefix + 'station_y', 'borough':prefix + '_boro',\
                             'neighborhood':prefix + 'neigh', 'neigh_id':prefix + 'neigh_id',\
                             'neigh_x':prefix + 'neigh_x', 'neigh_y':prefix + 'neigh_y',\
                             'grid_x':prefix + 'grid_x', 'grid_y':prefix + 'grid_y', 'grid_id':prefix + 'grid_id'},\
                          inplace=True)
    del df_wkday_merge['station_id']
    df_wkday = df_wkday_merge

In [ ]:
# Matrix for WEEKDAYS at 1 hour intervals between August-October from 7am - 11:59pm; minus round-trips
store_1hr_allWkday_matrix = pd.HDFStore('store_1hr_allWkday_matrix.h5')

# Create list of hours to loop through
hrList = ['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23']    

indexList = range(0, 332)

df_indexed = df_wkday.set_index('starttime')

for hr in hrList:
    subset = df_indexed.between_time(hr + ':00:00', hr + ':59:00')
    subset_agg = subset.groupby(['startstationid_new', 'endstationid_new'], as_index=False).agg(len) \
    [['startstationid_new', 'endstationid_new', 'dayofweek']]
    subset_agg.columns = ['startstationid', 'endstationid', 'count']
    matrix = subset_agg.pivot(index = 'startstationid', columns = 'endstationid', values = 'count').fillna(0)  
    matrix_reindex = matrix.reindex(index = indexList, columns = indexList, fill_value = 0)
    store_1hr_allWkday_matrix['matrix_' + hr] = matrix_reindex

In [ ]:
# Save WEEKDAY matrices in pajek format for Infomap
for i in store_1hr_allWkday_matrix.keys():
    outname = 'pajek/' + i[8:] + '.pajek'
    f = store_1hr_allWkday_matrix[i]
    iG_dir = igraph.Graph.Weighted_Adjacency(np.matrix(f).tolist(), "DIRECTED", "weight", True)
    iG_dir.write(outname, format='pajek')

In [ ]:
"""Initial test showed very little differences between the days. So this was excluded from the study"""
# # Matrix for MONDAY-FRIDAY at 1 hour intervals between August-October from 7am - 11:59pm; sans round-trips

# store_1hr_matrix = pd.HDFStore('store_1hr_matrix.h5')

# # Create list of hours to loop through
# hrList = ['07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23']    

# indexList = range(0, 332)

# for i in range(0,5):
#     df_aug_oct = df_merge[df_merge['dayofweek'] == i] 
#     df_indexed = df_aug_oct.set_index('starttime')
     
#     for hr in hrList:
#         subset = df_indexed.between_time(hr + ':00:00', hr + ':59:00')
#         subset_agg = subset.groupby(['startstationid_new', 'endstationid_new'], as_index=False).agg(len) \
#         [['startstationid_new', 'endstationid_new', 'dayofweek']]
#         subset_agg.columns = ['startstationid', 'endstationid', 'count']
#         matrix = subset_agg.pivot(index = 'startstationid', columns = 'endstationid', values = 'count').fillna(0)  
#         matrix_reindex = matrix.reindex(index = indexList, columns = indexList, fill_value = 0)
#         store_1hr_matrix['matrix_' + str(i) + '_' + hr] = matrix_reindex

# # Save MONDAY-FRIDAY matrices in pajek format for Infomap
# for i in store_1hr_matrix.h5.keys():
#     outname = 'pajek/' + i[8:] + '.pajek'
#     f = store_1hr_allWknd_matrix[i]
#     iG_dir = igraph.Graph.Weighted_Adjacency(np.matrix(f).tolist(), "DIRECTED", "weight", True)
#     iG_dir.write(outname, format='pajek')

Weekends


In [ ]:
# Subset of WEEKENDS; non-round trips; all usertypes
df_wknd = df[(df.dist != 0) & (df.weektype == 'Weekend') & \
               (df.starttime >= Timestamp('2013-08-01')) & (df.starttime < Timestamp('2013-11-01'))]\
[['starttime','stoptime','start station id','end station id', 'dayofweek', 'dayofweek_name']]

wknd_aux_info = pd.read_csv('Data/station_aux_info.csv')
for i in ['start station id', 'end station id']:
    prefix = i.split(' ',1)[0]
    df_wknd_merge = pd.merge(df_wknd, wknd_aux_info, left_on=i, right_on='station_id')
    df_wknd_merge.rename(columns={'stationid_new':prefix + 'stationid_new', 'station_x':prefix + 'station_x',\
                             'station_y':prefix + 'station_y', 'borough':prefix + '_boro',\
                             'neighborhood':prefix + 'neigh', 'neigh_id':prefix + 'neigh_id',\
                             'neigh_x':prefix + 'neigh_x', 'neigh_y':prefix + 'neigh_y',\
                             'grid_x':prefix + 'grid_x', 'grid_y':prefix + 'grid_y', 'grid_id':prefix + 'grid_id'},\
                          inplace=True)
    del df_wknd_merge['station_id']
    df_wknd = df_wknd_merge

In [ ]:
# Matrix for the WEEKENDS at 1 hour intervals between August-October from 8am - 11:59pm; sans round-trips
store_1hr_allWknd_matrix = pd.HDFStore('store_1hr_allWknd_matrix.h5')

# Create list of hours to loop through
hrList = ['08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23']    

indexList = range(0, 332)

df_indexed = df_wknd.set_index('starttime')

for hr in hrList:
    subset = df_indexed.between_time(hr + ':00:00', hr + ':59:00')
    subset_agg = subset.groupby(['startstationid_new', 'endstationid_new'], as_index=False).agg(len) \
    [['startstationid_new', 'endstationid_new', 'dayofweek']]
    subset_agg.columns = ['startstationid', 'endstationid', 'count']
    matrix = subset_agg.pivot(index = 'startstationid', columns = 'endstationid', values = 'count').fillna(0)  
    matrix_reindex = matrix.reindex(index = indexList, columns = indexList, fill_value = 0)
    store_1hr_allWknd_matrix['matrix_' + hr] = matrix_reindex

In [ ]:
# Save WEEKEND matrices in pajek format for Infomap
for i in store_1hr_allWknd_matrix.keys():
    outname = 'pajek/' + i[8:] + '.pajek'
    f = store_1hr_allWknd_matrix[i]
    iG_dir = igraph.Graph.Weighted_Adjacency(np.matrix(f).tolist(), "DIRECTED", "weight", True)
    iG_dir.write(outname, format='pajek')

In [ ]:
"""Initial test showed very little differences between the days. So this was excluded from the study"""
# # Matrix for the Saturdays and Sundays at 1 hour intervals between August-October from 8am - 11:59pm; sans round-trips

# store_1hr_wknd_matrix = pd.HDFStore('store_1hr_wknd_matrix.h5')

# # Create list of hours to loop through
# hrList = ['08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23']    

# indexList = range(0, 332)

# for i in [5,6]:
#     df_aug_oct = df_wknd_merge[df_wknd_merge['dayofweek'] == i] 
#     df_indexed = df_aug_oct.set_index('starttime')
     
#     for hr in hrList:
#         subset = df_indexed.between_time(hr + ':00:00', hr + ':59:00')
#         subset_agg = subset.groupby(['startstationid_new', 'endstationid_new'], as_index=False).agg(len) \
#         [['startstationid_new', 'endstationid_new', 'dayofweek']]
#         subset_agg.columns = ['startstationid', 'endstationid', 'count']
#         matrix = subset_agg.pivot(index = 'startstationid', columns = 'endstationid', values = 'count').fillna(0)  
#         matrix_reindex = matrix.reindex(index = indexList, columns = indexList, fill_value = 0)
#         store_1hr_wknd_matrix['matrix_' + str(i) + '_' + hr] = matrix_reindex

# # Save matrices in pajek format for Infomap
# for i in store_1hr_wknd_matrix.h5.keys():
#     outname = 'pajek/' + i[8:] + '.pajek'
#     f = store_1hr_allWknd_matrix[i]
#     iG_dir = igraph.Graph.Weighted_Adjacency(np.matrix(f).tolist(), "DIRECTED", "weight", True)
#     iG_dir.write(outname, format='pajek')