In [4]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle
from __future__ import division
import csv
from tools import *
import datetime
In [23]:
dict_metro = load_metro_dictionary()
In [28]:
data_path = os.path.join(os.getcwd(),'..','data')
first_period_path = os.path.join(data_path,'Users_data','etapas_2013_abril_allyearsids_10_100000.csv')
second_period_path = os.path.join(data_path,'Users_data','etapas_2013_septiembre_allyearsids_10_100000.csv')
In [29]:
frame = pd.read_csv(first_period_path)
frame['tiempo_subida'] = pd.to_datetime(frame.tiempo_subida)
frame = frame.sort_values(by=['id', 'tiempo_subida'])
frame.head()
Out[29]:
In [30]:
df_id_period = pd.read_csv(second_period_path)
df_id_period['tiempo_subida'] = pd.to_datetime(df_id_period.tiempo_subida)
df_id_period = df_id_period.sort_values(by=['id', 'tiempo_subida'])
In [31]:
reload(tpm_identification)
Out[31]:
In [32]:
profiles = tpm_identification.get_spatiotemporal_profiles(frame['id'],frame['tiempo_subida'],frame['par_subida'],frame['par_bajada'])
In [33]:
sequences = tpm_identification.get_spatiotemporal_sequences(df_id_period['id'],df_id_period['tiempo_subida'],df_id_period['par_subida'],df_id_period['par_bajada'])
In [22]:
start_time = time.time()
iden = tpm_identification.get_spatiotemporal_identification_matrix(profiles,sequences)
delta_time = time.time() - start_time
print delta_time
In [34]:
with open('iden_matrix_spatiotemporal.pickle','w') as f:
pickle.dump(iden,f)
In [60]:
profiles_st = tpm_identification.get_spatiotemporal_profiles_2(frame['id'],frame['tiempo_subida'],frame['par_subida'],frame['par_bajada'])
In [61]:
start_time = time.time()
iden_st_2 = tpm_identification.get_spatiotemporal_identification_matrix_2(profiles_st,sequences)
delta_time = time.time() - start_time
print delta_time
In [62]:
with open('iden_matrix_spatiotemporal_2.pickle','w') as f:
pickle.dump(iden_st_2,f)
In [65]:
iden_matrix = np.matrix(iden_st_2)
df_ident = pd.DataFrame(iden_matrix)
i = 0
identified_indexs = []
wrong_indexs = []
correct_indexs = []
selected_indexs = []
n_identified = 0
limit = min(len(profiles),len(sequences))
while (i<limit):
the_index = np.argmax(iden_matrix[:,i])
selected_indexs.append(np.max(iden_matrix[:,i]))
identified_indexs.append(the_index)
if(the_index!=i):
wrong_indexs.append(the_index)
else:
correct_indexs.append(the_index)
n_identified += 1
i += 1
porcentaje_correcto = n_identified*100/limit
print str(round(porcentaje_correcto,2))+ "%"
In [68]:
reload(tpm_identification)
Out[68]:
In [69]:
start_time = time.time()
iden_st_3 = tpm_identification.get_spatiotemporal_identification_matrix_3(profiles_st,sequences)
delta_time = time.time() - start_time
print delta_time
In [70]:
with open('iden_matrix_spatiotemporal_3.pickle','w') as f:
pickle.dump(iden_st_3,f)
In [71]:
iden_matrix = np.matrix(iden_st_3)
df_ident = pd.DataFrame(iden_matrix)
i = 0
identified_indexs = []
wrong_indexs = []
correct_indexs = []
selected_indexs = []
n_identified = 0
limit = min(len(profiles),len(sequences))
while (i<limit):
the_index = np.argmax(iden_matrix[:,i])
selected_indexs.append(np.max(iden_matrix[:,i]))
identified_indexs.append(the_index)
if(the_index!=i):
wrong_indexs.append(the_index)
else:
correct_indexs.append(the_index)
n_identified += 1
i += 1
porcentaje_correcto = n_identified*100/limit
print str(round(porcentaje_correcto,2))+ "%"
In [ ]: