In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import time
import datetime as dt
import pickle
import numpy as np
from itertools import chain, combinations
import random
import scipy as sp
from __future__ import division
from dict_stops import * 
import pandas as pd
import os
import csv

In [3]:
frame = pd.read_csv('/home/cata/Documentos/Datois/etapas_2013_abril_allyearsids_10_100000.csv')

In [4]:
frame_2 = pd.read_csv('/home/cata/Documentos/Datois/etapas_2013_septiembre_allyearsids_10_100000.csv')

In [5]:
if os.name == 'nt':
    path_subway_dictionary = 'C:\Users\catalina\Documents\Datois\Diccionario-EstacionesMetro.csv'
    path_csv_sequences = 'C:\Users\catalina\Documents\sequences\\'
else:
    path_subway_dictionary = '/home/cata/Documentos/Datois/Diccionario-EstacionesMetro.csv'
    path_csv_sequences = '/home/cata/Documentos/sequences/'

# Función que carga las estaciones de metro
# en un diccionario
def load_metro_dictionary():
    dict_metro = {}
    with open(path_subway_dictionary,mode='r') as infile:
        reader = csv.reader(infile,delimiter=';')
        dict_metro = {rows[5]:rows[7] for rows in reader}
    return dict_metro

In [6]:
# Función que estandariza los valores de los paraderos de subida 
# y bajada
def update_vals(row,data = load_metro_dictionary()):
    if row.par_subida in data:
        row.par_subida = data[row.par_subida]
    if row.par_bajada in data:
        row.par_bajada = data[row.par_bajada]
    return row

In [7]:
# Función que estandariza los valores de los paraderos de subida 
# y bajada
def add_vals(row,latlong,paradero,data = dict_latlong_stops):
    stop_name = row[paradero]
    if stop_name in data:
        return data[stop_name][latlong]
    else :
        return np.nan

In [8]:
def frame_config(frame):
    frame['tiempo_subida'] = pd.to_datetime(frame.tiempo_subida)
    frame['tiempo_bajada'] = pd.to_datetime(frame.tiempo_bajada)
    frame = frame.apply(update_vals, axis=1)
    frame['weekday'] = frame.tiempo_subida.dt.dayofweek
    frame['lat_subida'] = frame.apply(add_vals,args=('lat','par_subida'),axis=1)
    frame['lat_bajada'] = frame.apply(add_vals,args=('lat','par_bajada'),axis=1)
    frame['long_subida'] = frame.apply(add_vals,args=('long','par_subida'),axis=1)
    frame['long_bajada'] = frame.apply(add_vals,args=('long','par_bajada'),axis=1)
    frame = frame.sort_values(by=['id', 'tiempo_subida'])
    frame['diferencia_tiempo'] = (frame['tiempo_subida']-frame['tiempo_subida'].shift()).fillna(0)
    return frame

In [9]:
def hour_to_seconds(an_hour):
    return int(an_hour.hour*3600 + an_hour.minute *60 + an_hour.second)

In [10]:
frame = frame_config(frame)

In [11]:
frame.head()


Out[11]:
tiempo_subida id x_subida y_subida tipo_transporte serviciosentidovariante tipo_dia nviaje netapa x_bajada ... par_bajada zona_subida zona_bajada adulto weekday lat_subida lat_bajada long_subida long_bajada diferencia_tiempo
23 2013-04-14 06:45:44 1132106 348108.0 6289153.0 BUS T203 00R DOMINGO 1 1 346818.0 ... E-20-190-SN-40 328.0 307.0 0.0 6 -33.526277 -33.433786 -70.635551 -70.647786 0 days 00:00:00
22 2013-04-14 07:51:52 1132106 346751.0 6299389.0 BUS T502 00I DOMINGO 2 1 351363.0 ... T-15-135-PO-5 307.0 188.0 0.0 6 -33.434116 -33.406027 -70.648104 -70.598251 0 days 01:06:08
21 2013-04-14 19:56:47 1132106 351368.0 6302559.0 BUS T502 00R DOMINGO 3 1 346763.0 ... T-4-19-NS-100 188.0 55.0 0.0 6 -33.405971 -33.432332 -70.598379 -70.648651 0 days 12:04:55
20 2013-04-14 20:15:25 1132106 346713.0 6299427.0 BUS T203 00I DOMINGO 3 2 348095.0 ... T-24-205-NS-20 307.0 348.0 0.0 6 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:18:38
19 2013-04-15 21:04:59 1132106 348103.0 6289191.0 BUS T206 00R LABORAL 4 1 346844.0 ... T-20-190-SN-35 328.0 309.0 0.0 0 -33.526277 -33.434819 -70.635551 -70.647429 1 days 00:49:34

5 rows × 23 columns


In [12]:
frame.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 23 to 99994
Data columns (total 23 columns):
tiempo_subida              100000 non-null datetime64[ns]
id                         100000 non-null int64
x_subida                   99224 non-null float64
y_subida                   99224 non-null float64
tipo_transporte            100000 non-null object
serviciosentidovariante    98074 non-null object
tipo_dia                   100000 non-null object
nviaje                     100000 non-null int64
netapa                     100000 non-null int64
x_bajada                   86774 non-null float64
y_bajada                   86774 non-null float64
tiempo_bajada              86774 non-null datetime64[ns]
par_subida                 99195 non-null object
par_bajada                 86767 non-null object
zona_subida                99180 non-null float64
zona_bajada                86764 non-null float64
adulto                     86774 non-null float64
weekday                    100000 non-null int64
lat_subida                 99195 non-null float64
lat_bajada                 86767 non-null float64
long_subida                99195 non-null float64
long_bajada                86767 non-null float64
diferencia_tiempo          100000 non-null timedelta64[ns]
dtypes: datetime64[ns](2), float64(11), int64(4), object(5), timedelta64[ns](1)
memory usage: 18.3+ MB

In [13]:
frame_2 = frame_config(frame_2)

Paraderos sin latitud ni longitud


In [14]:
paraderos_sinlatlong = frame_2['par_subida'][frame_2['lat_subida'].isnull()& frame_2['par_subida'].notnull()].unique()

In [16]:
paraderos_sinlatlong


Out[16]:
array(['T-20-304-SN-11', 'T-20-305-SN-10', 'T-20-207-OP-12',
       'T-20-400-OP-5', 'T-20-304-SN-13', 'L-33-71-OP-140', 'L-23-43-OP-1',
       'L-32-11-NS-60', 'L-29-35-SN-15', 'T-20-200-SN-54', 'L-23-22-SN-30',
       'L-33-84-PO-2', 'T-4-19-NS-33', 'L-4-3-NS-20', 'L-29-35-NS-20',
       'L-33-63-NS-5', 'T-4-19-SN-68', 'L-11-18-PO-20'], dtype=object)

In [84]:
frame_2 = frame_2[frame_2.lat_subida.notnull()]

In [54]:
from scipy.stats.mstats import mode
f = lambda x: mode(x, axis=None)[0][0]
g = lambda x: mode(x,axis=None)[1][0]
aggregations = {
    'tiempo_subida': "count"  # Calculate two results for the 'network' column with a list
    
}
a_group = frame.groupby(['par_subida','tipo_transporte']).agg(aggregations)
another_group = frame.groupby(['par_subida','par_bajada','lat_subida','tipo_transporte']).agg(aggregations)

In [57]:
sorted_group = another_group.sort_values('tiempo_subida',ascending=False)
sorted_group.head(100)


Out[57]:
tiempo_subida
par_subida par_bajada lat_subida tipo_transporte
PLAZA DE ARMAS PLAZA MAIPU -33.437258 METRO 100
PLAZA MAIPU PLAZA DE ARMAS -33.510217 METRO 98
PLAZA DE PUENTE ALTO TOBALABA -33.609546 METRO 95
LA CISTERNA ESCUELA MILITAR -33.537712 METRO 84
LAS REJAS TOBALABA -33.457156 METRO 79
CAL Y CANTO LA CISTERNA -33.432885 METRO 69
MANQUEHUE LA CISTERNA -33.408844 METRO 61
HOSPITAL SOTERO DEL RIO PLAZA DE ARMAS -33.576460 METRO 59
LA CISTERNA CAL Y CANTO -33.537712 METRO 59
PLAZA DE ARMAS BELLAVISTA DE LA FLORIDA -33.437258 METRO 58
UNIVERSIDAD DE CHILE PEDRO DE VALDIVIA -33.443937 METRO 58
LAS REJAS LOS LEONES -33.457156 METRO 56
VICENTE VALDES TOBALABA -33.526338 METRO 56
PLAZA DE ARMAS -33.526338 METRO 55
LAS REJAS UNIVERSIDAD DE CHILE -33.457156 METRO 54
UNIVERSIDAD DE CHILE TOBALABA -33.443937 METRO 54
MANQUEHUE UNIVERSIDAD DE CHILE -33.408844 METRO 54
GRECIA TOBALABA -33.468819 METRO 52
UNIVERSIDAD DE CHILE LAS REJAS -33.443937 METRO 52
MANQUEHUE SANTA LUCIA -33.408844 METRO 51
LA CISTERNA SANTA ANA -33.537712 METRO 51
SANTA ANA LA CISTERNA -33.438308 METRO 51
PLAZA DE PUENTE ALTO FRANCISCO BILBAO -33.609546 METRO 50
LAS REJAS PEDRO DE VALDIVIA -33.457156 METRO 50
LAS PARCELAS PLAZA DE ARMAS -33.475204 METRO 50
UNIVERSIDAD DE CHILE ESCUELA MILITAR -33.443937 METRO 50
ESCUELA MILITAR LA CISTERNA -33.414312 METRO 49
PLAZA DE ARMAS HOSPITAL SOTERO DEL RIO -33.437258 METRO 48
MANQUEHUE HOSPITAL SOTERO DEL RIO -33.408844 METRO 48
LA CISTERNA TOBALABA -33.537712 METRO 47
... ... ... ... ...
SANTA LUCIA ESCUELA MILITAR -33.442848 METRO 35
LA CISTERNA PATRONATO -33.537712 METRO 35
SAN MIGUEL LA CISTERNA -33.488785 METRO 35
SAN ALBERTO HURTADO ESCUELA MILITAR -33.454114 METRO 35
LA CISTERNA SAN MIGUEL -33.537712 METRO 34
UNIVERSIDAD DE CHILE MANUEL MONTT -33.443937 METRO 34
LA CISTERNA MANQUEHUE -33.537712 METRO 34
FRANCISCO BILBAO -33.537712 METRO 34
MACUL ESCUELA MILITAR -33.508787 METRO 34
BARRANCAS PLAZA MAIPU -33.452915 METRO 34
VESPUCIO NORTE LA CISTERNA -33.380622 METRO 34
LAS REJAS SALVADOR -33.457156 METRO 34
ESCUELA MILITAR MACUL -33.414312 METRO 33
IRARRAZAVAL BELLAVISTA DE LA FLORIDA -33.452436 METRO 33
PEDRO DE VALDIVIA LA CISTERNA -33.425466 METRO 33
UNIVERSIDAD DE CHILE LA CISTERNA -33.443937 METRO 33
LOS LEONES UNIVERSIDAD DE CHILE -33.422258 METRO 33
TOBALABA LAS MERCEDES -33.418227 METRO 33
PLAZA MAIPU IRARRAZAVAL -33.510217 METRO 32
BAQUEDANO PLAZA MAIPU -33.437186 METRO 32
PLAZA DE ARMAS LAS PARCELAS -33.437258 METRO 32
PLAZA DE PUENTE ALTO PEDRO DE VALDIVIA -33.609546 METRO 32
MANQUEHUE -33.609546 METRO 32
PEDRERO PLAZA DE ARMAS -33.507812 METRO 31
LAS REJAS MANQUEHUE -33.457156 METRO 31
SAN PABLO PLAZA DE ARMAS -33.445143 METRO 31
SANTA LUCIA TOBALABA -33.442848 METRO 31
IRARRAZAVAL PLAZA MAIPU -33.452436 METRO 31
LAS REJAS SANTA LUCIA -33.457156 METRO 31
PLAZA MAIPU PARQUE BUSTAMANTE -33.510217 METRO 31

100 rows × 1 columns


In [25]:
sorted_group.to_csv('od.csv')

In [28]:
another_group.head()


Out[28]:
tiempo_subida
par_subida par_bajada tipo_transporte
ALCANTARA BELLAS ARTES METRO 2
BELLAVISTA DE LA FLORIDA METRO 2
CAL Y CANTO METRO 1
CIUDAD DEL NINO METRO 11
CUMMING METRO 2

In [12]:
groupie_group = frame.groupby(['par_subida','tipo_transporte']).agg({'tiempo_subida':"count"})
sorted_par_subidas = groupie_group.sort_values('tiempo_subida',ascending=False)
sorted_par_subidas.to_csv('origin_.csv')
sorted_par_subidas.head()


Out[12]:
tiempo_subida
par_subida tipo_transporte
LA CISTERNA METRO 1382
TOBALABA METRO 1361
MANQUEHUE METRO 1116
UNIVERSIDAD DE CHILE METRO 1057
PLAZA DE ARMAS METRO 1022

In [13]:
groupie_group = frame.groupby(['par_bajada']).agg({'tiempo_subida':"count"})
sorted_par_bajadas = groupie_group.sort_values('tiempo_subida',ascending=False)
sorted_par_bajadas.to_csv('destination_.csv')
sorted_par_bajadas.head()


Out[13]:
tiempo_subida
par_bajada
TOBALABA 1492
LA CISTERNA 1213
PLAZA DE ARMAS 1142
UNIVERSIDAD DE CHILE 953
ESCUELA MILITAR 929

In [46]:
sorted_par_subidas[sorted_par_subidas['tiempo_subida']>10].to_csv('origin_10.csv')
sorted_par_bajadas[sorted_par_bajadas['tiempo_subida']>10].to_csv('destination_10.csv')

In [15]:
with open('correct_and_wrong_indexs_alg1.pickle') as f:
    correct_alg1 =  pickle.load(f)
    wrong_alg1 = pickle.load(f)

In [16]:
with open('index_id_users.pickle') as f:
    users_id = pickle.load(f)

In [17]:
correct_alg1_ids = []
wrong_alg1_ids = []
for i in range(len(correct_alg1)):
    correct_alg1_ids.append(users_id[i])
for i in range(len(wrong_alg1)):
    wrong_alg1_ids.append(users_id[i])

In [30]:
def write_csv_grouped_data(a_frame,name,threshold):
    groupie_group = a_frame.groupby(['par_bajada','tipo_transporte']).agg({'tiempo_subida':"count"})
    sorted_par_bajadas = groupie_group.sort_values('tiempo_subida',ascending=False)
    sorted_par_bajadas[sorted_par_bajadas['tiempo_subida']>threshold].to_csv(name+'_'+str(threshold)+'_destination.csv')
    groupie_group = a_frame.groupby(['par_subida','tipo_transporte']).agg({'tiempo_subida':"count"})
    sorted_par_subidas = groupie_group.sort_values('tiempo_subida',ascending=False)
    sorted_par_subidas[sorted_par_subidas['tiempo_subida']>threshold].to_csv(name+'_'+str(threshold)+'_origin.csv')
    return sorted_par_subidas

Guardar en csv viajes de los correctos e incorrectos


In [31]:
ff = write_csv_grouped_data(frame,'',0)

In [85]:
write_csv_grouped_data(frame_2,'',0)


Out[85]:
tiempo_subida
par_subida tipo_transporte
TOBALABA METRO 1448
LA CISTERNA METRO 1408
MANQUEHUE METRO 1186
PLAZA DE ARMAS METRO 1144
UNIVERSIDAD DE CHILE METRO 1143
PLAZA MAIPU METRO 947
PEDRO DE VALDIVIA METRO 938
SANTA ANA METRO 935
ESCUELA MILITAR METRO 905
LAS REJAS METRO 873
PLAZA DE PUENTE ALTO METRO 826
LOS LEONES METRO 795
CAL Y CANTO METRO 794
HOSPITAL SOTERO DEL RIO METRO 774
VICENTE VALDES METRO 717
LA MONEDA METRO 701
I-26-228-SN-25 BUS 697
SANTA LUCIA METRO 615
BAQUEDANO METRO 598
LOS HEROES METRO 594
MACUL METRO 568
SAN PABLO METRO 563
SALVADOR METRO 538
IRARRAZAVAL METRO 497
I-26-228-SN-25 ZP 486
BELLAVISTA DE LA FLORIDA METRO 459
MANUEL MONTT METRO 455
LOS DOMINICOS METRO 455
ELISA CORREA METRO 453
REPUBLICA METRO 440
... ... ...
L-15-12-OP-20 BUS 1
L-33-71-OP-30 BUS 1
L-33-64-PO-30 BUS 1
L-26-18-OP-30 BUS 1
L-14-2-OP-35 BUS 1
L-26-18-PO-25 BUS 1
T-20-410-PO-2 BUS 1
L-21-4-NS-35 BUS 1
T-20-409-OP-5 BUS 1
L-6-26-NS-10 BUS 1
T-11-64-PO-30 BUS 1
L-26-19-PO-40 BUS 1
L-26-19-PO-35 BUS 1
T-20-408-PO-20 BUS 1
L-14-10-OP-40 BUS 1
T-20-402-OP-10 BUS 1
L-14-10-PO-10 BUS 1
L-26-19-PO-10 BUS 1
L-33-60-NS-10 BUS 1
L-14-10-PO-30 BUS 1
L-14-2-OP-15 BUS 1
T-20-386-SN-15 BUS 1
T-20-344-NS-15 BUS 1
T-20-342-NS-5 BUS 1
L-33-61-PO-10 BUS 1
L-6-25-PO-15 BUS 1
L-21-33-NS-5 BUS 1
L-21-3-PO-5 BUS 1
L-33-64-OP-75 BUS 1
L-34-76-SN-15 BUS 1

6375 rows × 1 columns


In [37]:
frame


Out[37]:
tiempo_subida id x_subida y_subida tipo_transporte serviciosentidovariante tipo_dia nviaje netapa x_bajada ... par_bajada zona_subida zona_bajada adulto weekday lat_subida lat_bajada long_subida long_bajada diferencia_tiempo
26 2013-09-23 20:58:46 1132106 348106.0 6289139.0 BUS T203 00R LABORAL 1 1 346824.0 ... E-20-190-SN-40 328.0 307.0 0.0 0 -33.526277 -33.433786 -70.635551 -70.647786 0 days 00:00:00
25 2013-09-23 21:24:25 1132106 346789.0 6299372.0 BUS T502 00I LABORAL 1 2 351366.0 ... T-15-135-PO-5 307.0 188.0 0.0 0 -33.434116 -33.406027 -70.648104 -70.598251 0 days 00:25:39
24 2013-09-24 07:15:40 1132106 351362.0 6302563.0 BUS T502 00R LABORAL 2 1 346661.0 ... T-4-19-NS-100 188.0 55.0 0.0 1 -33.405971 -33.432332 -70.598379 -70.648651 0 days 09:51:15
23 2013-09-24 08:00:08 1132106 346719.0 6299344.0 BUS T206 06I LABORAL 3 1 348078.0 ... T-24-205-NS-20 307.0 348.0 0.0 1 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:44:28
22 2013-09-24 09:13:22 1132106 347658.0 6289320.0 BUS T352 00R LABORAL 3 2 NaN ... NaN 348.0 NaN NaN 1 -33.524874 NaN -70.640726 NaN 0 days 01:13:14
21 2013-09-24 21:12:10 1132106 348114.0 6289114.0 BUS T203 00R LABORAL 4 1 346811.0 ... E-20-190-SN-40 330.0 307.0 0.0 1 -33.529157 -33.433786 -70.635293 -70.647786 0 days 11:58:48
20 2013-09-24 21:41:09 1132106 346721.0 6299388.0 BUS T502 00I LABORAL 4 2 351366.0 ... T-15-135-PO-5 266.0 188.0 0.0 1 -33.433236 -33.406027 -70.653896 -70.598251 0 days 00:28:59
19 2013-09-25 06:53:03 1132106 351361.0 6302560.0 BUS T502 00R LABORAL 5 1 346734.0 ... T-4-19-NS-100 188.0 55.0 0.0 2 -33.405971 -33.432332 -70.598379 -70.648651 0 days 09:11:54
18 2013-09-25 07:08:48 1132106 346715.0 6299427.0 BUS T203 00I LABORAL 5 2 348095.0 ... T-24-205-NS-20 307.0 348.0 0.0 2 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:15:45
17 2013-09-25 20:51:51 1132106 348103.0 6289173.0 BUS T203 00R LABORAL 6 1 346821.0 ... E-20-190-SN-40 328.0 307.0 0.0 2 -33.526277 -33.433786 -70.635551 -70.647786 0 days 13:43:03
16 2013-09-25 21:20:23 1132106 346756.0 6299375.0 BUS T502 00I LABORAL 6 2 351385.0 ... T-15-135-PO-5 307.0 188.0 0.0 2 -33.434116 -33.406027 -70.648104 -70.598251 0 days 00:28:32
15 2013-09-26 07:09:30 1132106 351376.0 6302575.0 BUS T502 00R LABORAL 7 1 346746.0 ... T-4-19-NS-100 188.0 55.0 0.0 3 -33.405971 -33.432332 -70.598379 -70.648651 0 days 09:49:07
14 2013-09-26 07:25:54 1132106 346729.0 6299437.0 BUS T206 07I LABORAL 7 2 348092.0 ... T-24-205-NS-20 307.0 348.0 0.0 3 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:16:24
13 2013-09-26 20:57:48 1132106 348100.0 6289188.0 BUS T203 00R LABORAL 8 1 346811.0 ... E-20-190-SN-40 328.0 307.0 0.0 3 -33.526277 -33.433786 -70.635551 -70.647786 0 days 13:31:54
12 2013-09-26 21:31:12 1132106 346760.0 6299380.0 BUS T502 00I LABORAL 8 2 351363.0 ... T-15-135-PO-5 307.0 188.0 0.0 3 -33.434116 -33.406027 -70.648104 -70.598251 0 days 00:33:24
11 2013-09-27 07:36:13 1132106 351332.0 6302529.0 BUS T502 00R LABORAL 9 1 350199.0 ... T-14-121-OP-10 188.0 174.0 0.0 4 -33.405971 -33.415434 -70.598379 -70.611382 0 days 10:05:01
10 2013-09-27 08:39:19 1132106 350318.0 6301255.0 BUS T502 00I LABORAL 10 1 351367.0 ... T-15-135-PO-5 174.0 188.0 0.0 4 -33.417581 -33.406027 -70.609980 -70.598251 0 days 01:03:06
9 2013-09-27 09:05:57 1132106 351376.0 6302574.0 BUS T502 00R LABORAL 11 1 346731.0 ... T-4-19-NS-100 188.0 55.0 0.0 4 -33.405971 -33.432332 -70.598379 -70.648651 0 days 00:26:38
8 2013-09-27 09:29:40 1132106 346716.0 6299410.0 BUS T206 06I LABORAL 11 2 348090.0 ... T-24-205-NS-20 307.0 348.0 0.0 4 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:23:43
7 2013-09-27 21:03:27 1132106 348103.0 6289180.0 BUS T203 00R LABORAL 12 1 346811.0 ... E-20-190-SN-40 328.0 307.0 0.0 4 -33.526277 -33.433786 -70.635551 -70.647786 0 days 11:33:47
6 2013-09-27 21:36:19 1132106 346774.0 6299377.0 BUS T502 00I LABORAL 12 2 351366.0 ... T-15-135-PO-5 307.0 188.0 0.0 4 -33.434116 -33.406027 -70.648104 -70.598251 0 days 00:32:52
5 2013-09-28 07:07:06 1132106 351335.0 6302533.0 BUS T502 00R SABADO 13 1 346760.0 ... T-4-19-NS-100 188.0 55.0 0.0 5 -33.405971 -33.432332 -70.598379 -70.648651 0 days 09:30:47
4 2013-09-28 07:32:43 1132106 346729.0 6299435.0 BUS T206 00I SABADO 13 2 348099.0 ... T-24-205-NS-20 307.0 348.0 0.0 5 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:25:37
3 2013-09-29 06:48:11 1132106 348107.0 6289130.0 BUS T206 00R DOMINGO 14 1 346852.0 ... T-20-190-SN-35 328.0 309.0 0.0 6 -33.526277 -33.434819 -70.635551 -70.647429 0 days 23:15:28
2 2013-09-29 07:49:32 1132106 346768.0 6299380.0 BUS T502 00I DOMINGO 15 1 351370.0 ... T-15-135-PO-5 307.0 188.0 0.0 6 -33.434116 -33.406027 -70.648104 -70.598251 0 days 01:01:21
1 2013-09-29 19:38:31 1132106 351400.0 6302597.0 BUS T502 00R DOMINGO 16 1 346731.0 ... T-4-19-NS-100 188.0 55.0 0.0 6 -33.405971 -33.432332 -70.598379 -70.648651 0 days 11:48:59
0 2013-09-29 19:58:57 1132106 346742.0 6299422.0 BUS T206 00I DOMINGO 16 2 348092.0 ... T-24-205-NS-20 307.0 348.0 0.0 6 -33.433463 -33.525983 -70.649060 -70.635944 0 days 00:20:26
45 2013-09-23 08:21:15 1145290 345581.0 6300769.0 BUS T301 00I LABORAL 1 1 346334.0 ... T-1-3-NS-70 9.0 838.0 0.0 0 -33.421787 -33.430466 -70.663024 -70.653273 -7 days +12:22:18
44 2013-09-23 08:36:29 1145290 346607.0 6299509.0 ZP 402R-409I-517R LABORAL 1 2 NaN ... NaN 307.0 NaN NaN 0 -33.432845 NaN -70.650059 NaN 0 days 00:15:14
43 2013-09-24 08:19:15 1145290 345540.0 6300764.0 BUS T301 00I LABORAL 2 1 346335.0 ... E-20-174-NS-5 9.0 266.0 0.0 1 -33.421787 -33.432594 -70.663024 -70.653346 0 days 23:42:46
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
99954 2013-09-27 09:22:43 79787292 339834.0 6298035.0 METRO L1 LABORAL 10 2 343933.0 ... ESTACION CENTRAL 117.0 84.0 0.0 4 -33.445143 -33.450969 -70.723142 -70.679166 0 days 00:10:56
99953 2013-09-27 19:44:01 79787292 343933.0 6297455.0 METRO L1 LABORAL 11 1 339834.0 ... SAN PABLO 84.0 117.0 0.0 4 -33.450969 -33.445143 -70.679166 -70.723142 0 days 10:21:18
99952 2013-09-27 20:01:51 79787292 339883.0 6297984.0 ZP J18R-J18cR LABORAL 11 2 337339.0 ... L-10-42-OP-60 117.0 711.0 0.0 4 -33.445611 -33.436389 -70.722634 -70.748750 0 days 00:17:50
99992 2013-09-23 06:34:30 79790108 338700.0 6302262.0 BUS T105 00I LABORAL 1 1 342809.0 ... E-7-51-NS-15 718.0 77.0 0.0 0 -33.406772 -33.453275 -70.736683 -70.691274 -5 days +10:32:39
99991 2013-09-23 07:08:39 79790108 342772.0 6297116.0 BUS T424 00R LABORAL 2 1 340596.0 ... T-7-54-OP-5 77.0 71.0 0.0 0 -33.453820 -33.460204 -70.691618 -70.715326 0 days 00:34:09
99990 2013-09-23 17:25:42 79790108 340565.0 6296337.0 BUS T379 00I LABORAL 3 1 341480.0 ... E-7-53-PO-25 71.0 79.0 0.0 0 -33.460495 -33.457496 -70.715128 -70.705031 0 days 10:17:03
99989 2013-09-23 17:29:10 79790108 341496.0 6296805.0 ZP 101R-101cR-102R-107R LABORAL 3 2 341522.0 ... E-5-38-SN-5 77.0 62.0 0.0 0 -33.457709 -33.404895 -70.705314 -70.703593 0 days 00:03:28
99988 2013-09-23 20:01:20 79790108 341471.0 6302507.0 BUS T128 02R LABORAL 4 1 NaN ... NaN 62.0 NaN NaN 0 -33.404704 NaN -70.705020 NaN 0 days 02:32:10
99987 2013-09-23 20:01:22 79790108 341471.0 6302507.0 BUS T128 02R LABORAL 5 1 338590.0 ... T-5-34-OP-15 62.0 60.0 0.0 0 -33.404704 -33.406364 -70.705020 -70.736191 0 days 00:00:02
99986 2013-09-26 06:34:47 79790108 338742.0 6302247.0 BUS T105 00I LABORAL 6 1 342809.0 ... E-7-51-NS-15 719.0 77.0 0.0 3 -33.407053 -33.453275 -70.734022 -70.691274 2 days 10:33:25
99985 2013-09-26 07:13:09 79790108 342778.0 6297115.0 BUS T404 00R LABORAL 6 2 340509.0 ... T-7-54-OP-5 77.0 71.0 0.0 3 -33.453820 -33.460204 -70.691618 -70.715326 0 days 00:38:22
99984 2013-09-26 17:15:07 79790108 340556.0 6296331.0 BUS T401 00I LABORAL 7 1 341450.0 ... E-7-53-PO-20 71.0 73.0 0.0 3 -33.462559 -33.457728 -70.719806 -70.705896 0 days 10:01:58
99983 2013-09-26 17:26:52 79790108 341517.0 6296587.0 BUS T101 00R LABORAL 8 1 341209.0 ... T-8-65-SN-20 73.0 94.0 0.0 3 -33.461940 -33.409878 -70.705214 -70.707164 0 days 00:11:45
99982 2013-09-26 18:23:01 79790108 341006.0 6302699.0 BUS T128 02R LABORAL 8 2 338888.0 ... T-5-34-OP-10 724.0 60.0 0.0 3 -33.403196 -33.407241 -70.709743 -70.732155 0 days 00:56:09
99981 2013-09-27 06:41:53 79790108 338759.0 6302247.0 BUS T105 00I LABORAL 9 1 342803.0 ... E-7-51-NS-15 719.0 77.0 0.0 4 -33.407053 -33.453275 -70.734022 -70.691274 0 days 12:18:52
99980 2013-09-27 07:08:41 79790108 342785.0 6297120.0 BUS T379 E0 00R LABORAL 9 2 340541.0 ... E-9-53-OP-6 77.0 123.0 0.0 4 -33.453820 -33.457149 -70.691618 -70.706014 0 days 00:26:48
99979 2013-09-27 17:01:30 79790108 340538.0 6296329.0 BUS T405 00I LABORAL 10 1 341483.0 ... E-7-53-PO-15 71.0 73.0 0.0 4 -33.462559 -33.457855 -70.719806 -70.706533 0 days 09:52:49
99978 2013-09-27 17:12:31 79790108 341507.0 6296645.0 BUS T101 00R LABORAL 11 1 341512.0 ... E-5-38-SN-5 79.0 62.0 0.0 4 -33.457709 -33.404895 -70.705314 -70.703593 0 days 00:11:01
99977 2013-09-27 17:47:57 79790108 341419.0 6302542.0 BUS T130 00R LABORAL 12 1 338782.0 ... T-5-34-PO-20 62.0 719.0 0.0 4 -33.404704 -33.407053 -70.705020 -70.734022 0 days 00:35:26
99976 2013-09-28 07:45:42 79790108 338764.0 6302240.0 BUS T130 00I SABADO 13 1 348479.0 ... T-14-131-PO-5 719.0 173.0 0.0 5 -33.407053 -33.434435 -70.734022 -70.630750 0 days 13:57:45
99975 2013-09-28 08:22:53 79790108 348822.0 6299334.0 BUS T505 08I SABADO 13 2 350942.0 ... T-18-157-PO-45 178.0 232.0 0.0 5 -33.434811 -33.454186 -70.626185 -70.605815 0 days 00:37:11
99974 2013-09-28 19:01:28 79790108 351050.0 6297191.0 BUS T514 00R SABADO 14 1 348819.0 ... T-14-130-SN-25 757.0 178.0 0.0 5 -33.454204 -33.433998 -70.603018 -70.626278 0 days 10:38:35
99973 2013-09-28 19:26:39 79790108 348899.0 6299584.0 BUS T130 02R SABADO 14 2 339252.0 ... T-5-27-OP-15 178.0 723.0 0.0 5 -33.432129 -33.404908 -70.625071 -70.728711 0 days 00:25:11
99999 2013-09-25 18:34:14 79792044 345735.0 6289203.0 BUS T201 00I LABORAL 4 1 NaN ... NaN 375.0 NaN NaN 2 -33.523965 NaN -70.660832 NaN -4 days +23:07:35
99998 2013-09-26 07:41:12 79792044 342912.0 6283464.0 BUS T201 00R LABORAL 5 1 345710.0 ... T-26-228-SN-45 794.0 375.0 0.0 3 -33.576836 -33.527015 -70.692302 -70.661418 0 days 13:06:58
99997 2013-09-26 08:14:23 79792044 345639.0 6289137.0 BUS T231 C0 06I LABORAL 5 2 345639.0 ... L-26-10-PO-30 374.0 374.0 0.0 3 -33.525125 -33.526550 -70.665797 -70.661797 0 days 00:33:11
99996 2013-09-26 21:20:30 79792044 345724.0 6289148.0 BUS T201 00I LABORAL 6 1 342960.0 ... T-27-228-NS-65 375.0 795.0 0.0 3 -33.526202 -33.575920 -70.661467 -70.692004 0 days 13:06:07
99995 2013-09-27 07:33:36 79792044 342919.0 6283470.0 BUS T201 00R LABORAL 7 1 NaN ... NaN 794.0 NaN NaN 4 -33.576836 NaN -70.692302 NaN 0 days 10:13:06
99994 2013-09-28 08:41:26 79792044 342930.0 6283492.0 BUS T201 00R SABADO 8 1 345711.0 ... T-26-228-SN-45 794.0 375.0 0.0 5 -33.576836 -33.527015 -70.692302 -70.661418 1 days 01:07:50
99993 2013-09-28 21:12:06 79792044 345719.0 6289148.0 BUS T201 00I SABADO 9 1 342957.0 ... T-27-228-NS-65 375.0 795.0 0.0 5 -33.526202 -33.575920 -70.661467 -70.692004 0 days 12:30:40

100000 rows × 23 columns


In [19]:
write_csv_grouped_data(frame[frame['id'].isin(wrong_alg1_ids)],'wrong_alg1',1)

In [20]:
write_csv_grouped_data(frame[frame['id'].isin(correct_alg1_ids)],'correct_alg1',1)

Guardar en csv viajes de los correctos e incorrectos. sin transbordo


In [23]:
without_transbordors_frame = frame[frame['netapa']==1]

In [24]:
write_csv_grouped_data(without_transbordors_frame[without_transbordors_frame['id'].isin(wrong_alg1_ids)],'wrong_alg1_wo_tr',1)
write_csv_grouped_data(without_transbordors_frame[without_transbordors_frame['id'].isin(correct_alg1_ids)],'correct_alg1_wo_tr',1)

In [ ]: