notebook.community

Edit and run



In [4]:

    
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle
from __future__ import division
import csv
from tools import *
import datetime



In [23]:

    
dict_metro = load_metro_dictionary()



In [28]:

    
data_path = os.path.join(os.getcwd(),'..','data')
first_period_path = os.path.join(data_path,'Users_data','etapas_2013_abril_allyearsids_10_100000.csv')
second_period_path = os.path.join(data_path,'Users_data','etapas_2013_septiembre_allyearsids_10_100000.csv')



In [29]:

    
frame = pd.read_csv(first_period_path)
frame['tiempo_subida'] = pd.to_datetime(frame.tiempo_subida)
frame = frame.sort_values(by=['id', 'tiempo_subida'])
frame.head()









    Out[29]:






  
    
      
      tiempo_subida
      id
      x_subida
      y_subida
      tipo_transporte
      serviciosentidovariante
      tipo_dia
      nviaje
      netapa
      x_bajada
      y_bajada
      tiempo_bajada
      par_subida
      par_bajada
      zona_subida
      zona_bajada
      adulto
    
  
  
    
      23
      2013-04-14 06:45:44
      1132106
      348108.0
      6289153.0
      BUS
      T203 00R
      DOMINGO
      1
      1
      346818.0
      6299394.0
      2013-04-14 07:07:02
      T-22-205-SN-65
      E-20-190-SN-40
      328.0
      307.0
      0.0
    
    
      22
      2013-04-14 07:51:52
      1132106
      346751.0
      6299389.0
      BUS
      T502 00I
      DOMINGO
      2
      1
      351363.0
      6302549.0
      2013-04-14 08:04:11
      E-20-291-PO-20
      T-15-135-PO-5
      307.0
      188.0
      0.0
    
    
      21
      2013-04-14 19:56:47
      1132106
      351368.0
      6302559.0
      BUS
      T502 00R
      DOMINGO
      3
      1
      346763.0
      6299568.0
      2013-04-14 20:09:11
      T-15-135-OP-110
      T-4-19-NS-100
      188.0
      55.0
      0.0
    
    
      20
      2013-04-14 20:15:25
      1132106
      346713.0
      6299427.0
      BUS
      T203 00I
      DOMINGO
      3
      2
      348095.0
      6289148.0
      2013-04-14 20:40:51
      E-20-199-NS-2
      T-24-205-NS-20
      307.0
      348.0
      0.0
    
    
      19
      2013-04-15 21:04:59
      1132106
      348103.0
      6289191.0
      BUS
      T206 00R
      LABORAL
      4
      1
      346844.0
      6299320.0
      2013-04-15 21:33:23
      T-22-205-SN-65
      T-20-190-SN-35
      328.0
      309.0
      0.0



In [30]:

    
df_id_period = pd.read_csv(second_period_path)
df_id_period['tiempo_subida'] = pd.to_datetime(df_id_period.tiempo_subida)
df_id_period = df_id_period.sort_values(by=['id', 'tiempo_subida'])



In [31]:

    
reload(tpm_identification)









    Out[31]:





<module 'tpm_identification' from 'tpm_identification.pyc'>



In [32]:

    
profiles = tpm_identification.get_spatiotemporal_profiles(frame['id'],frame['tiempo_subida'],frame['par_subida'],frame['par_bajada'])



In [33]:

    
sequences = tpm_identification.get_spatiotemporal_sequences(df_id_period['id'],df_id_period['tiempo_subida'],df_id_period['par_subida'],df_id_period['par_bajada'])



In [22]:

    
start_time = time.time()
iden = tpm_identification.get_spatiotemporal_identification_matrix(profiles,sequences)
delta_time = time.time() - start_time
print delta_time



In [34]:

    
with open('iden_matrix_spatiotemporal.pickle','w') as f:
    pickle.dump(iden,f)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-34-e1ce4f8818bf> in <module>()
      1 with open('iden_matrix_spatiotemporal.pickle','w') as f:
----> 2     pickle.dump(iden,f)

NameError: name 'iden' is not defined



In [60]:

    
profiles_st = tpm_identification.get_spatiotemporal_profiles_2(frame['id'],frame['tiempo_subida'],frame['par_subida'],frame['par_bajada'])



In [61]:

    
start_time = time.time()
iden_st_2 = tpm_identification.get_spatiotemporal_identification_matrix_2(profiles_st,sequences)
delta_time = time.time() - start_time
print delta_time









    



6095.81366611



In [62]:

    
with open('iden_matrix_spatiotemporal_2.pickle','w') as f:
    pickle.dump(iden_st_2,f)



In [65]:

    
iden_matrix = np.matrix(iden_st_2)
df_ident = pd.DataFrame(iden_matrix)
i = 0
identified_indexs = []
wrong_indexs = []
correct_indexs = []
selected_indexs = []
n_identified = 0
limit = min(len(profiles),len(sequences))
while (i<limit):
    the_index = np.argmax(iden_matrix[:,i])
    selected_indexs.append(np.max(iden_matrix[:,i]))
    identified_indexs.append(the_index)
    if(the_index!=i):
        wrong_indexs.append(the_index)
    else:
        correct_indexs.append(the_index)
        n_identified += 1
    i += 1
porcentaje_correcto = n_identified*100/limit
print str(round(porcentaje_correcto,2))+ "%"



In [68]:

    
reload(tpm_identification)









    Out[68]:





<module 'tpm_identification' from 'tpm_identification.py'>



In [69]:

    
start_time = time.time()
iden_st_3 = tpm_identification.get_spatiotemporal_identification_matrix_3(profiles_st,sequences)
delta_time = time.time() - start_time
print delta_time









    



6426.14186883



In [70]:

    
with open('iden_matrix_spatiotemporal_3.pickle','w') as f:
    pickle.dump(iden_st_3,f)



In [71]:

    
iden_matrix = np.matrix(iden_st_3)
df_ident = pd.DataFrame(iden_matrix)
i = 0
identified_indexs = []
wrong_indexs = []
correct_indexs = []
selected_indexs = []
n_identified = 0
limit = min(len(profiles),len(sequences))
while (i<limit):
    the_index = np.argmax(iden_matrix[:,i])
    selected_indexs.append(np.max(iden_matrix[:,i]))
    identified_indexs.append(the_index)
    if(the_index!=i):
        wrong_indexs.append(the_index)
    else:
        correct_indexs.append(the_index)
        n_identified += 1
    i += 1
porcentaje_correcto = n_identified*100/limit
print str(round(porcentaje_correcto,2))+ "%"



In [ ]:

	tiempo_subida	id	x_subida	y_subida	tipo_transporte	serviciosentidovariante	tipo_dia	nviaje	netapa	x_bajada	y_bajada	tiempo_bajada	par_subida	par_bajada	zona_subida	zona_bajada
23	2013-04-14 06:45:44	1132106	348108.0	6289153.0	BUS	T203 00R	DOMINGO	1	1	346818.0	6299394.0	2013-04-14 07:07:02	T-22-205-SN-65	E-20-190-SN-40	328.0	307.0
22	2013-04-14 07:51:52	1132106	346751.0	6299389.0	BUS	T502 00I	DOMINGO	2	1	351363.0	6302549.0	2013-04-14 08:04:11	E-20-291-PO-20	T-15-135-PO-5	307.0	188.0
21	2013-04-14 19:56:47	1132106	351368.0	6302559.0	BUS	T502 00R	DOMINGO	3	1	346763.0	6299568.0	2013-04-14 20:09:11	T-15-135-OP-110	T-4-19-NS-100	188.0	55.0
20	2013-04-14 20:15:25	1132106	346713.0	6299427.0	BUS	T203 00I	DOMINGO	3	2	348095.0	6289148.0	2013-04-14 20:40:51	E-20-199-NS-2	T-24-205-NS-20	307.0	348.0
19	2013-04-15 21:04:59	1132106	348103.0	6289191.0	BUS	T206 00R	LABORAL	4	1	346844.0	6299320.0	2013-04-15 21:33:23	T-22-205-SN-65	T-20-190-SN-35	328.0	309.0