버스 정보 가져오기


In [23]:
import urllib2, urllib
import json

def get_businfo(busno):
    parameters = {}
    parameters['strSrch'] = busno
    target = 'http://m.bus.go.kr/mBus/bus/getBusRouteList.bms'

    parameters = urllib.urlencode(parameters)
    
    while True:
        handler = urllib2.urlopen(target, parameters)
        if handler.code < 400:
            f = handler.read()
            j = json.loads(f.decode('cp949'))
            try:
                return j["resultList"][0]["busRouteId"]
            except:
                return None

def get_busroute(busno):
    routeid = get_businfo(busno)
    if not routeid:
        return 
    parameters = {}
    parameters['busRouteId'] = routeid
    target = 'http://m.bus.go.kr/mBus/bus/getRouteAndPos.bms'
    
    parameters = urllib.urlencode(parameters)
    
    while True:
        handler = urllib2.urlopen(target, parameters)
        if handler.code < 400:
            f = handler.read()
            j = json.loads(f.decode('cp949'))
            routes = j["resultList"]
            for route in routes:
                busRouteNm = route['busRouteNm']
                busRouteId = route['busRouteId']
                stationNm = route['stationNm']
                stationNo = route['stationNo']
                x = route['gpsX']
                y = route['gpsY']
                l = '\t'.join([busRouteNm, busRouteId, stationNo, stationNm, x, y])
                # l = '\t'.join(list(route.values()))
                print(l.encode('utf-8'))
            break

def get_busroute_keys(busno):
    routeid = get_businfo(busno)
    parameters = {}
    parameters['busRouteId'] = routeid
    target = 'http://m.bus.go.kr/mBus/bus/getRouteAndPos.bms'
    
    parameters = urllib.urlencode(parameters)
    
    while True:
        handler = urllib2.urlopen(target, parameters)
        if handler.code < 400:
            f = handler.read()
            j = json.loads(f.decode('cp949'))
            route = j["resultList"][0]
            print(','.join(list(route.keys())))
            break
            
def main():
#     get_busroute_keys('0017')
#     print('busno, busid, stationid, stationnm, x, y')
    busnos = ['0017', '0018', '1014', '1017', '1020', '1111', '1113', '1114', '1115', '1117', '1119', '1120', '1122', '1124', '1126', '1127', '1128', '1129', '1130', '1131', '1132', '1133', '1135', '1136', '1137', '1138', '1139', '1140',  '1141',  '1142', '1143', '1144', '1146', '1152', '1154', '1155', '1156', '1157', '1161', '1162', '1164', '1165', '1166', '1212', '1213', '1215', '1218', '1221', '1222', '1224', '1225', '1226', '1227', '1711', '2012', '2013', '2014', '2015', '2016', '2112', '2113', '2114', '2211', '2220', '2221', '2222', '2223', '2224', '2227', '2230', '2233', '2234', '2235', '2411', '2412', '2413', '2415', '3011', '3212', '3214', '3215', '3216', '3217', '3219', '3220', '3313', '3314', '3315', '3316', '3317', '3318', '3319', '3411', '3412', '3413', '3414', '3416', '3417', '3418', '3422', '3423', '4212', '4318', '4319', '4412', '4419', '4425', '4426', '4429', '4430', '4431', '4432', '4433', '4434', '5012', '5413', '5511', '5513', '5515', '5516', '5517', '5519', '5523', '5524', '5525', '5526', '5528', '5530', '5531', '5534', '5535', '5536', '5537', '5538', '5612', '5615', '5616', '5617', '5618', '5619', '5620', '5621', '5623', '5624', '5625', '5626', '5627', '5630', '5633', '5712', '5713', '5714', '6211', '6411', '6511', '6512', '6513', '6514', '6515', '6611', '6613', '6614', '6616', '6617', '6618', '6620', '6623', '6624', '6625', '6627', '6628', '6629', '6630', '6631', '6632', '6635', '6637', '6638', '6640', '6641', '6642', '6643', '6645', '6646', '6647', '6648', '6649', '6650', '6651', '6653', '6654', '6657', '6712', '6714', '6715', '6716', '7011', '7013A', '7013B', '7016', '7017', '7018', '7019', '7021', '7022', '7024', '7025', '7211', '7212', '7611', '7612', '7613', '7711', '7713', '7715', '7719', '7720', '7722', '7723', '7726', '7727', '7728', '7730', '7733', '7737', '7738']
    for busno in busnos:
        get_busroute(busno)

# if __name__ == '__main__':
#     main()

google map 파일 출력


In [1]:
from string import Template

colors = ["FF0000", "00FF00", "0000FF", "FFFF00", "FF00FF", "00FFFF", "000000",
        "800000", "008000", "000080", "808000", "800080", "008080", "808080",
        "C00000", "00C000", "0000C0", "C0C000", "C000C0", "00C0C0", "C0C0C0",
        "400000", "004000", "000040", "404000", "400040", "004040", "404040",
        "200000", "002000", "000020", "202000", "200020", "002020", "202020",
        "600000", "006000", "000060", "606000", "600060", "006060", "606060",
        "A00000", "00A000", "0000A0", "A0A000", "A000A0", "00A0A0", "A0A0A0",
        "E00000", "00E000", "0000E0", "E0E000", "E000E0", "00E0E0", "E0E0E0"]

def generate_busstops(df):
    coos = ',\n'.join(['new google.maps.LatLng(%s, %s)' %(r['y'] ,r['x']) for i, r in df.iterrows()])
    return 'var busstops = [{0}];'.format(coos)

def generate_polyline(valname, df, idx=0):
    coos = ',\n'.join(['new google.maps.LatLng(%s, %s)' %(r['y'] ,r['x']) for i, r in df.iterrows()])
    valcoos =  'var {0} = [{1}];'.format(valname, coos)
    
    polyline_template = """
                        {0}
                        var {1}_ = new google.maps.Polyline({{
                        path: {1},
                        strokeColor: "#{2}",
                        strokeOpacity: 0.8,
                        strokeWeight: 3
                        }});
                        {1}_.setMap(map);"""
    
    return polyline_template.format(valcoos, valname, colors[idx])
    
# data = {'busstops': busstops, 'busroutes':busroutes}
def generate_template_html(data, outfilename='busmap.html'):
    infile = open('map_temp.html')
    template = Template(infile.read())
    map_html = template.substitute(data)
    outfile = open(outfilename, 'w')
    outfile.write(map_html)

버스 정보 읽기와 필터링


In [12]:
import pandas as pd

bus_df = pd.read_csv('bus.tsv', sep='\t')

mbus_df = bus_df[bus_df['stationid']!='0']
mbus_df = mbus_df[mbus_df['stationid']!='미정차']
mbus_df = mbus_df[mbus_df['stationid']!='35331']
# mbus_df.shape
sampling_mbus = mbus_df.loc[np.random.permutation(mbus_df.index)[:200]]

In [13]:
# 일부 정거장 구하기
busstops = generate_busstops(sampling_mbus)
    
data = {'busstops': busstops, 'busroutes': ''}
generate_template_html(data, 'all_busstop')

In [5]:
N_BUSSTOPS = 8

stops = mbus_df['stationid'].value_counts()
d = stops[stops > N_BUSSTOPS].index.values.tolist()
mbus_df = mbus_df[mbus_df['stationid'].isin(d)]
busstop_df = mbus_df.drop_duplicates(cols='stationid', take_last=True)
# busstop_df

필터링 한 버스 정거장 map으로 출력


In [101]:
busstops = generate_busstops(busstop_df)
busroutes = ''
i = 0
for idx, row in busstop_df.iterrows():
    route = bus_df[bus_df['busno']==row['busno']]
    busroutes += generate_polyline('poly' + str(i), route, i)
    i += 1
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data)

24138 버스 정거장에 다니는 버스

  1. 버스 정거장에 다니는 버스
  2. 각 버스의 노선

In [7]:
#1 버스 정거장에 다니는 버스

import pandas as pd

bus_df = pd.read_csv('bus.tsv', sep='\t')

mbus_df = bus_df[bus_df['stationid']!='0']
mbus_df = mbus_df[mbus_df['stationid']!='미정차']
mbus_df = mbus_df[mbus_df['stationid']!='35331']

mbus_df = mbus_df[mbus_df['stationid']=='24138']
station_df = mbus_df.drop_duplicates(cols='busno', take_last=True)

In [8]:
#2 각 버스의 노선

busstops = generate_busstops(station_df.drop_duplicates(cols='stationid', take_last=True))
print(busstops)
busroutes = ''
i = 0
for idx, row in station_df.iterrows():
    route = bus_df[bus_df['busno']==row['busno']]
#     print(route)
    busroutes += generate_polyline('poly' + str(i), route, i)
    i += 1
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data, 'bus.html')


var busstops = [new google.maps.LatLng(37.5128993133, 127.09794576)];

버스가 많은 정차하는 버스 정거장을 클러스러링한다.

  1. 버스 정거장 선택
  2. 시각화
  3. 클러스터링
  4. 클러스터링 후 시각화

In [8]:
#1 버스 정거장 선택

import pandas as pd

bus_df = pd.read_csv('bus.tsv', sep='\t')

mbus_df = bus_df[bus_df['stationid']!='0']
mbus_df = mbus_df[mbus_df['stationid']!='미정차']
mbus_df = mbus_df[mbus_df['stationid']!='35331']

N_BUSSTOPS = 7

stops = mbus_df['stationid'].value_counts()
d = stops[stops > N_BUSSTOPS].index.values.tolist()
mbus_df = mbus_df[mbus_df['stationid'].isin(d)]
busstop_df = mbus_df.drop_duplicates(cols='stationid', take_last=True)
busstop_df


Out[8]:
busno busid stationid stationnm x y
2555 1222 4122200 08146 석계역굴다리앞 127.065797 37.614331
3463 2114 4211400 11312 한진한화그랑빌아파트 127.069232 37.615805
3464 2114 4211400 11284 석계역2번출구 127.066973 37.615274
3466 2114 4211400 08147 석계역 127.067255 37.615003
3496 2114 4211400 11283 석계역1번출구.A 127.064989 37.614919
4102 2235 4223500 07418 중랑공영차고지 127.103897 37.613367
6019 4319 4431900 23243 강남경찰서.강남운전면허시험장 127.067156 37.509783
6020 4319 4431900 24154 잠실종합운동장 127.072816 37.510419
6021 4319 4431900 24157 종합운동장사거리 127.079555 37.511613
6022 4319 4431900 24158 신천역4번출구 127.084495 37.511407
6023 4319 4431900 24145 잠실트리지움아파트앞 127.091202 37.511512
6024 4319 4431900 24146 잠실역.롯데월드 127.098196 37.512412
6028 4319 4431900 24138 잠실역.롯데월드 127.097946 37.512899
6030 4319 4431900 24141 잠실2동주민센터 신천역8번출구앞 127.087281 37.511772
6031 4319 4431900 24142 잠실엘스아파트앞 127.083390 37.511871
6032 4319 4431900 24143 종합운동장사거리 127.079692 37.512009
6033 4319 4431900 24144 잠실종합운동장 127.072073 37.511118
6193 4419 4441910 23197 한국전력공사 127.062719 37.510408
6355 4434 4443400 23196 강남경찰서면허시험장 127.065907 37.509875
8784 5713 4571300 18777 석수역 126.903310 37.433717
8785 5713 4571300 18013 시흥유통센터 126.903454 37.440257
8786 5713 4571300 18011 금천폭포공원 126.903273 37.447843
8787 5713 4571300 18009 시흥사거리 126.901412 37.452517
8788 5713 4571300 18007 금천구청 126.898872 37.459209
8789 5713 4571300 18005 말미고개 126.897582 37.464160
8834 5713 4571300 18006 말미고개 126.897459 37.464605
8835 5713 4571300 18008 금천구청 126.899269 37.457674
8836 5713 4571300 18010 시흥사거리 126.901098 37.452921
8837 5713 4571300 18012 금천폭포공원 126.902658 37.448908
8838 5713 4571300 18014 시흥유통센터 126.903344 37.440743
8839 5713 4571300 18776 석수역 126.902729 37.434643
8932 5714 4571400 17133 남구로역 126.886196 37.484273
8933 5714 4571400 17132 가리봉시장 126.886993 37.482429
9253 6512 4651200 17134 구로4동자치회관 126.885645 37.486098
9254 6512 4651200 17135 구로시장 126.884930 37.487797
9255 6512 4651200 17137 구로4동우체국.고대구로병원정문 126.884240 37.490248
9275 6512 4651200 19010 강남성심병원.대림성모병원 126.907900 37.490871
9276 6512 4651200 19009 시흥대로.한국광물자원공사 126.904695 37.487030
9277 6512 4651200 17013 구로디지털단지역 126.901548 37.483099
9303 6512 4651200 21112 금천경찰서.신림푸르지오아파트 126.911240 37.481362
9306 6512 4651200 21001 구로디지털단지역 126.902447 37.483935
9307 6512 4651200 20001 신대방성원상떼빌 126.904626 37.486505
9308 6512 4651200 20002 신대방경남아파트 126.907770 37.490299
9325 6512 4651200 17138 구로4동우체국.고대구로병원정문 126.884065 37.489902
9326 6512 4651200 17136 구로시장 126.884517 37.488173
9558 6515 4651500 21127 서울대학교 126.947952 37.466741
9559 6515 4651500 21142 신림중.삼성고.관악문화관도서관 126.944528 37.470209
9560 6515 4651500 21143 서울산업정보학교.삼성교 126.941180 37.470861
9583 6515 4651500 21157 신림동고시촌입구 126.938145 37.470469
9584 6515 4651500 21158 서울산업정보학교.삼성교 126.942028 37.470574
9585 6515 4651500 21159 관악산입구.관악문화관도서관 126.946438 37.468163
10698 6635 4663500 18003 금천우체국 126.898083 37.469136
10699 6635 4663500 18001 문성초등학교 126.898666 37.475364
10701 6635 4663500 18002 문성초등학교 126.898320 37.473981
10702 6635 4663500 18004 금천우체국 126.897935 37.470079
10742 6637 4663700 19119 김안과병원 126.902834 37.520416
10743 6637 4663700 19162 당산동진로아파트 126.896878 37.521326
10823 6638 4663800 15258 진명여고 126.865104 37.523940
10828 6638 4663800 15167 목동대학학원 126.873444 37.524927
10869 6640A 4664002 15194 양천구청 126.866428 37.516298
... ... ... ... ... ...

81 rows × 6 columns


In [16]:
#2 시각화

busstops = generate_busstops(busstop_df)
busroutes = ''
# i = 0
# for idx, row in busstop_df.iterrows():
#     route = bus_df[bus_df['busno']==row['busno']]
#     busroutes += generate_polyline('poly' + str(i), route, i)
#     i += 1
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data, 'busstop_origin.html')

In [17]:
#3 클러스터링

from sklearn.cluster import KMeans
from sklearn import metrics

print(busstop_df.head())
X = busstop_df[['x', 'y']]
y = busstop_df['busno']
cluster_range = range(2, 15)
vmeasures = []

for n_cluster in cluster_range:

    # km = KMeans(n_clusters=7, init='random', max_iter=100, n_init=1, verbose=1)
    km = KMeans(init='k-means++', n_clusters=n_cluster, n_init=10)

#     print "Clustering sparse data with %s" % km
    km.fit(X)
#     print '----------------------------------------------------'
#     print n_cluster
#     print "Homogeneity: %0.3f" % metrics.homogeneity_score(y, km.labels_)
#     print "Completeness: %0.3f" % metrics.completeness_score(y, km.labels_)
#     print "V-measure: %0.3f" % metrics.v_measure_score(y, km.labels_)
#     print "Adjusted Rand-Index: %.3f" % metrics.adjusted_rand_score(y, km.labels_)
    vmeasures.append(metrics.v_measure_score(y, km.labels_))
#     vmeasures.append(metrics.silhouette_score(X, km.labels_, metric='euclidean'))
    
import matplotlib.pyplot as plt
plt.plot(cluster_range, vmeasures)
    
plt.xlabel('# cluster')
plt.ylabel('v measure')
plt.autoscale(tight=True)
plt.grid()
plt.show()


     busno    busid stationid   stationnm           x          y
2555  1222  4122200     08146     석계역굴다리앞  127.065797  37.614331
3463  2114  4211400     11312  한진한화그랑빌아파트  127.069232  37.615805
3464  2114  4211400     11284     석계역2번출구  127.066973  37.615274
3466  2114  4211400     08147         석계역  127.067255  37.615003
3496  2114  4211400     11283   석계역1번출구.A  127.064989  37.614919

[5 rows x 6 columns]

In [20]:
#4 클러스터링 후 시각화
best_clusters = 7
km = KMeans(init='k-means++', n_clusters=best_clusters, n_init=10)
km.fit(X)
# print km.cluster_centers_
centers = pd.DataFrame(km.cluster_centers_, columns=['x', 'y'])

busstops = generate_busstops(centers)
busroutes = ''
# i = 0
# for idx, row in busstop_df.iterrows():
#     route = bus_df[bus_df['busno']==row['busno']]
#     busroutes += generate_polyline('poly' + str(i), route, i)
#     i += 1
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data, 'busstop_cluster_centers.html')

DBSCAN로 이상치인 홀로 떨어진 빈도수 높은 버스 정거장 찾기

  1. DBSCAN로 이상치 찾기
  2. 시각화

In [15]:
#1. DBSCAN로 이상치 찾기
from collections import namedtuple

from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer
from sklearn.cluster import DBSCAN

X['sx'] = X.x
X['sy'] = X.y

ss = StandardScaler()
X['sx'] = ss.fit_transform(X.sx)
X['sy'] = ss.fit_transform(X.sy)
# print(X)

Param = namedtuple('Param', ['eps', 'min_samples'])
params = [Param(0.45, 2), Param(0.30, 2), Param(0.35, 2), Param(0.40, 2), 
            Param(0.45, 4), Param(0.30, 4), Param(0.35, 4), Param(0.40, 4), 
            Param(0.45, 3), Param(0.30, 3), Param(0.35, 3), Param(0.40, 3)]
# print(X.values)
for param in params:
    dbscan = DBSCAN(eps=param.eps, min_samples=param.min_samples).fit(X[['sx', 'sy']].values)
    labels = dbscan.labels_
    outliers = X[labels == -1]
    
    print(param)
#     print(labels)
    print(outliers)
#     print("V-measure: %0.3f" % metrics.v_measure_score(y, labels))
    print(metrics.silhouette_score(X, labels, metric='euclidean'))

# busstops = generate_busstops(outliers)
# busroutes = ''
    
# data = {'busstops': busstops, 'busroutes': busroutes}
# generate_template_html(data, 'busstop_cluster_outlier.html')


-c:11: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead
-c:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead
/media/riemann/dedekind/local/lib/python2.7/site-packages/numpy/core/_methods.py:55: RuntimeWarning: Mean of empty slice.
  warnings.warn("Mean of empty slice.", RuntimeWarning)
Param(eps=0.45, min_samples=2)
                x          y        sx        sy
11401  126.801262  37.565235 -1.773647  1.086223

[1 rows x 4 columns]
0.578956767951
Param(eps=0.3, min_samples=2)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[3 rows x 4 columns]
0.569195243662
Param(eps=0.35, min_samples=2)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[3 rows x 4 columns]
0.557622991947
Param(eps=0.4, min_samples=2)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223

[2 rows x 4 columns]
0.559847187262
Param(eps=0.45, min_samples=4)
                x          y        sx        sy
11401  126.801262  37.565235 -1.773647  1.086223

[1 rows x 4 columns]
0.578956767951
Param(eps=0.3, min_samples=4)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
10823  126.865104  37.523940 -0.963517  0.236850
10828  126.873444  37.524927 -0.857686  0.257150
10869  126.866428  37.516298 -0.946719  0.079653
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[6 rows x 4 columns]
0.538119913613
Param(eps=0.35, min_samples=4)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[3 rows x 4 columns]
0.557622991947
Param(eps=0.4, min_samples=4)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223

[2 rows x 4 columns]
0.559847187262
Param(eps=0.45, min_samples=3)
                x          y        sx        sy
11401  126.801262  37.565235 -1.773647  1.086223

[1 rows x 4 columns]
0.578956767951
Param(eps=0.3, min_samples=3)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[3 rows x 4 columns]
0.569195243662
Param(eps=0.35, min_samples=3)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223
11542  126.837665  37.508486 -1.311701 -0.081022

[3 rows x 4 columns]
0.557622991947
Param(eps=0.4, min_samples=3)
                x          y        sx        sy
4102   127.103897  37.613367  2.066672  2.076246
11401  126.801262  37.565235 -1.773647  1.086223

[2 rows x 4 columns]
0.559847187262

In [22]:
#2 시각화
best_param = Param(eps=0.3, min_samples=3)
dbscan = DBSCAN(eps=best_param.eps, min_samples=best_param.min_samples).fit(X[['sx', 'sy']].values)
labels = dbscan.labels_
outliers = X[labels == -1]

busstops = generate_busstops(outliers)
busroutes = ''
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data, 'busstop_cluster_outlier.html')

mean shift 적용하기

  • mean shift
  • 시각화

In [18]:
#1 mean shift
from sklearn.cluster import MeanShift, estimate_bandwidth

import pandas as pd

bus_df = pd.read_csv('bus.tsv', sep='\t')

mbus_df = bus_df[bus_df['stationid']!='0']
mbus_df = mbus_df[mbus_df['stationid']!='미정차']
mbus_df = mbus_df[mbus_df['stationid']!='35331']

N_BUSSTOPS = 7

stops = mbus_df['stationid'].value_counts()
d = stops[stops > N_BUSSTOPS].index.values.tolist()
mbus_df = mbus_df[mbus_df['stationid'].isin(d)]
busstop_df = mbus_df.drop_duplicates(cols='stationid', take_last=True)
# busstop_df
X = busstop_df[['x', 'y']]
y = busstop_df['busno']

print(X.shape)
bandwidth = estimate_bandwidth(X.values, quantile=0.2, n_samples=30)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X.values)
labels = ms.labels_
cluster_centers = ms.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

print("number of estimated clusters : %d" % n_clusters_)
print(ms.cluster_centers_)


(81, 2)
number of estimated clusters : 5
[[ 126.90469961   37.47674268]
 [ 126.89581985   37.54925483]
 [ 127.08018682   37.51131514]
 [ 127.07302401   37.61478327]
 [ 126.80126158   37.5652345 ]]

In [ ]:
#2 클러스터링 후 시각화

centers = pd.DataFrame(ms.cluster_centers_, columns=['x', 'y'])

busstops = generate_busstops(centers)
busroutes = ''
# i = 0
# for idx, row in busstop_df.iterrows():
#     route = bus_df[bus_df['busno']==row['busno']]
#     busroutes += generate_polyline('poly' + str(i), route, i)
#     i += 1
    
data = {'busstops': busstops, 'busroutes': busroutes}
generate_template_html(data, 'busstop_cluster_centers.html')