Import Export

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle
from __future__ import division
import csv
from tools import *
import os

<module 'tools.tpm_identification' from 'tools\tpm_identification.pyc'>

data_path = os.path.join(os.getcwd(),'..','data')
first_period_path = os.path.join(data_path,'Users_data','etapas_2013_abril_allyearsids_10_100000.csv')
second_period_path = os.path.join(data_path,'Users_data','etapas_2013_septiembre_allyearsids_10_100000.csv')

dict_metro = auxiliar_functions.load_metro_dictionary()

first_period_frame = pd.read_csv(first_period_path)
first_period_frame['tiempo_subida'] = pd.to_datetime(first_period_frame.tiempo_subida)
first_period_frame = first_period_frame.sort_values(by=['id', 'tiempo_subida'])

tiempo_subida id x_subida y_subida tipo_transporte serviciosentidovariante tipo_dia nviaje netapa x_bajada y_bajada tiempo_bajada par_subida par_bajada zona_subida zona_bajada adulto
23 2013-04-14 06:45:44 1132106 348108.0 6289153.0 BUS T203 00R DOMINGO 1 1 346818.0 6299394.0 2013-04-14 07:07:02 T-22-205-SN-65 E-20-190-SN-40 328.0 307.0 0.0
22 2013-04-14 07:51:52 1132106 346751.0 6299389.0 BUS T502 00I DOMINGO 2 1 351363.0 6302549.0 2013-04-14 08:04:11 E-20-291-PO-20 T-15-135-PO-5 307.0 188.0 0.0
21 2013-04-14 19:56:47 1132106 351368.0 6302559.0 BUS T502 00R DOMINGO 3 1 346763.0 6299568.0 2013-04-14 20:09:11 T-15-135-OP-110 T-4-19-NS-100 188.0 55.0 0.0
20 2013-04-14 20:15:25 1132106 346713.0 6299427.0 BUS T203 00I DOMINGO 3 2 348095.0 6289148.0 2013-04-14 20:40:51 E-20-199-NS-2 T-24-205-NS-20 307.0 348.0 0.0
19 2013-04-15 21:04:59 1132106 348103.0 6289191.0 BUS T206 00R LABORAL 4 1 346844.0 6299320.0 2013-04-15 21:33:23 T-22-205-SN-65 T-20-190-SN-35 328.0 309.0 0.0

second_period_frame = pd.read_csv(second_period_path)
second_period_frame['tiempo_subida'] = pd.to_datetime(second_period_frame.tiempo_subida)
second_period_frame = second_period_frame.sort_values(by=['id', 'tiempo_subida'])

tiempo_subida id x_subida y_subida tipo_transporte serviciosentidovariante tipo_dia nviaje netapa x_bajada y_bajada tiempo_bajada par_subida par_bajada zona_subida zona_bajada adulto
26 2013-09-23 20:58:46 1132106 348106.0 6289139.0 BUS T203 00R LABORAL 1 1 346824.0 6299354.0 2013-09-23 21:23:02 T-22-205-SN-65 E-20-190-SN-40 328.0 307.0 0.0
25 2013-09-23 21:24:25 1132106 346789.0 6299372.0 BUS T502 00I LABORAL 1 2 351366.0 6302548.0 2013-09-23 21:37:55 E-20-291-PO-20 T-15-135-PO-5 307.0 188.0 0.0
24 2013-09-24 07:15:40 1132106 351362.0 6302563.0 BUS T502 00R LABORAL 2 1 346661.0 6299484.0 2013-09-24 07:32:11 T-15-135-OP-110 T-4-19-NS-100 188.0 55.0 0.0
23 2013-09-24 08:00:08 1132106 346719.0 6299344.0 BUS T206 06I LABORAL 3 1 348078.0 6289284.0 2013-09-24 08:36:40 E-20-199-NS-2 T-24-205-NS-20 307.0 348.0 0.0
22 2013-09-24 09:13:22 1132106 347658.0 6289320.0 BUS T352 00R LABORAL 3 2 NaN NaN NaN L-24-26-OP-25 NaN 348.0 NaN NaN

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 23 to 99994
Data columns (total 17 columns):
tiempo_subida              100000 non-null datetime64[ns]
id                         100000 non-null int64
x_subida                   99224 non-null float64
y_subida                   99224 non-null float64
tipo_transporte            100000 non-null object
serviciosentidovariante    98074 non-null object
tipo_dia                   100000 non-null object
nviaje                     100000 non-null int64
netapa                     100000 non-null int64
x_bajada                   86774 non-null float64
y_bajada                   86774 non-null float64
tiempo_bajada              86774 non-null object
par_subida                 99195 non-null object
par_bajada                 86767 non-null object
zona_subida                99180 non-null float64
zona_bajada                86764 non-null float64
adulto                     86774 non-null float64
dtypes: datetime64[ns](1), float64(7), int64(3), object(6)
memory usage: 13.7+ MB

Get Profiles from both periods

<module 'tools.tpm_identification' from 'tools/tpm_identification.pyc'>

users_profiles = tpm_identification.get_profiles(first_period_frame['id'],first_period_frame['zona_subida'],first_period_frame['zona_bajada'])

numero_usuarios = len(users_profiles)


profiles = tpm_identification.get_sequences(second_period_frame['id'],second_period_frame['zona_subida'],second_period_frame['zona_bajada'])

print len(profiles)
limit = min(len(users_profiles),len(profiles))
print limit


Check if the ids are the same for both periods

In [56]:
last_iddd = 0
ids_alg1 = []
for i in range(len(users_profiles)):
    assert last_iddd < users_profiles[i]['user_id']
    last_iddd = users_profiles[i]['user_id']
    assert users_profiles[i]['user_id'] == profiles[i]['user_id']

with open('data/ids_alg1.pickle', 'w') as f:


start_time = time.time()
iden = tpm_identification.get_identification_matrix(users_profiles,profiles)
delta_time = time.time() - start_time
print delta_time


iden_matrix_zona = np.matrix(iden)
df_ident = pd.DataFrame(iden_matrix_zona)

0 1 2 3 4 5 6 7 8 9 ... 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168
0 -68.142149 -750.000000 -780.000000 -774.00000 -750.00000 -780.00000 -750.00000 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
1 -780.000000 -427.135729 -750.000000 -731.00000 -750.00000 -780.00000 -750.00000 -621.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
2 -780.000000 -775.000000 -363.640879 -774.00000 -750.00000 -780.00000 -750.00000 -783.0 -672.903090 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
3 -780.000000 -775.000000 -780.000000 -344.48455 -750.00000 -780.00000 -750.00000 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
4 -780.000000 -775.000000 -780.000000 -774.00000 -351.59176 -780.00000 -750.00000 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
5 -780.000000 -775.000000 -780.000000 -774.00000 -750.00000 -780.00000 -750.00000 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
6 -780.000000 -775.000000 -780.000000 -774.00000 -750.00000 -720.09691 -500.29073 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
7 -780.000000 -775.000000 -780.000000 -774.00000 -750.00000 -780.00000 -750.00000 -783.0 -768.000000 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
8 -780.000000 -775.000000 -750.096910 -774.00000 -750.00000 -780.00000 -750.00000 -783.0 -257.694065 -817.000000 ... -792.0 -770.0 -820.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0
9 -780.000000 -775.000000 -780.000000 -774.00000 -750.00000 -780.00000 -750.00000 -756.0 -768.000000 -315.583278 ... -792.0 -770.0 -800.0 -810.0 -780.0 -799.0 -800.0 -756.0 -730.0 -783.0

10 rows × 5169 columns

n_identified,selected_distance,identified_indexs,abstenidos,correct_indexs,correct_distance,wrong_indexs,wrong_distances = auxiliar_functions.get_n_correct_tpm(iden_matrix_zona,limit)
porcentaje_correcto = n_identified*100.0/limit
print str(round(porcentaje_correcto,2))+ "%"


with open('resultados_alg_1_zona.pickle','w') as f:

wrong_distances_without_800 = filter(lambda x: x>-800,wrong_distances)
print len(wrong_distances_without_800)


colors = ['red', 'green']
plt.hist([wrong_distances_without_800,correct_distance], 30, histtype='bar',color=colors)

counter = 0
for i in correct_distance:
    if i ==0.0:
        counter +=1
print counter


Este gráfico muestra el indicador de similitud para los correctamente identificados, los incorrectamente identificados y en verde el valor de la distancia que debiese haber sido identificada

Se puede observar que la distribución verde está más a la izquierda por lo que habla de un gran número de personas gente que cambió de comportamiento. Esto se condice con los resultados del tercer algoritmo.

wrong_distances_selected = []
counter = 0
for i in range(limit):
    if i in wrong_indexs:
        counter += 1
    if i in correct_indexs:
        counter += 1

colors = ['red', 'blue','green']
plt.hist([wrong_distances_without_800,wrong_distances_selected,correct_distance], 30, histtype='bar',color=colors)

Los siguientes gráficos muestran los indicadores de la diagonal de los correcta e incorrectamente clasificados.

Cuidado que estos gráficos son engañosos porque solo consideran la diagonal. Entonces puede que haya otros incorrectos pero que no sean parte de la diagonal.

diagonal = iden.diagonal().copy()
correct_distance_ii = []
wrong_distance_ii = []
diagonal_d1_ii = []
for i in range(len(diagonal)):
    if diagonal[i]>-100:
        if i in correct_indexs:
print "diagonal: "+str(len(diagonal_d1_ii)   )
print "correctos: "+str(len(correct_distance_ii))
print "incorrectos: "+str(len(wrong_distance_ii))

diagonal: 593
correctos: 564
incorrectos: 29

colors = ['red', 'green']
plt.hist([wrong_distance_ii,correct_distance_ii], histtype='bar',color=colors)

diagonal = iden.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-1:
        if i in correct_indexs:
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))

diagonal: 82
correctos: 71
incorrectos: 11

colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)

diagonal = iden.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-0.1:
        if i in correct_indexs:
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))

diagonal: 44
correctos: 40
incorrectos: 4

colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)

with open('data/iden_matrix_zona.pickle','w') as f:

with open('data/iden_matrix_zona.pickle','r') as f:
    iden = pickle.load(f)

Comparee con paraderos

<module 'tools.auxiliar_functions' from 'tools/auxiliar_functions.pyc'>

start_time = time.time()
users_profiles = tpm_identification.get_profiles(first_period_frame['id'],first_period_frame['par_subida'],first_period_frame['par_bajada'])
profiles = tpm_identification.get_sequences(second_period_frame['id'],second_period_frame['par_subida'],second_period_frame['par_bajada'])
delta_time = time.time() - start_time
print delta_time


{'mls': ['T-22-205-SN-65',
 'nvisitas': [5, 5, 4, 6, 5, 6, 6, 4, 1, 2, 1, 1, 1],
 'tpm': array([[ 0.        ,  0.8       ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.2       ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.8       ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.2       ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.83333333,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.16666667],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.16666667,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.66666667,  0.        ,  0.        ,
          0.        ,  0.16666667,  0.        ],
        [ 0.66666667,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.33333333,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ]]),
 'user_id': 1132106}

start_time = time.time()
iden_paradero = tpm_identification.get_identification_matrix(users_profiles,profiles)
delta_time = time.time() - start_time
print delta_time


iden_matrix_paradero = np.matrix(iden_paradero)
df_ident_paradero = pd.DataFrame(iden_matrix_paradero)

with open('data/iden_matrix_paradero.pickle','w') as f:

with open('data/iden_matrix_paradero.pickle','r') as f:
    iden_matrix_paradero = pickle.load(f)

n_identified,selected_distance,identified_indexs,abstenidos,correct_indexs,correct_distance,wrong_indexs,wrong_distances = auxiliar_functions.get_n_correct_tpm(iden_matrix_paradero,limit)
porcentaje_correcto = n_identified*100.0/limit
print str(round(porcentaje_correcto,2))+ "%"


with open('data/resultados_alg_1.pickle','w') as f:

with open('data/resultados_alg_1.pickle','r') as f:
    n_identified = pickle.load(f)
    selected_distance = pickle.load(f)
    identified_indexs = pickle.load(f)
    abstenidos = pickle.load(f)
    correct_indexs = pickle.load(f)
    correct_distance = pickle.load(f)
    wrong_indexs = pickle.load(f)
    wrong_distances = pickle.load(f)
    diagonal = pickle.load(f)

counter = 0
for i in correct_distance:
    if i ==0.0:
        counter +=1
print counter


counter = 0
for i in wrong_distances:
    if i ==0.0:
        counter +=1
print counter


NameError                                 Traceback (most recent call last)
<ipython-input-8-fbaf082e6125> in <module>()
----> 1 iden_matrix_paradero.shape

NameError: name 'iden_matrix_paradero' is not defined

In [10]:

NameError                                 Traceback (most recent call last)
<ipython-input-10-34d5cf1a4ad4> in <module>()
----> 1 len(abstenidos)*100/limit

NameError: name 'limit' is not defined

In [14]:
wrong_distances_without_800 = filter(lambda x: x>-800,wrong_distances)
print len(wrong_distances_without_800)


(array([ 662.,  399.,  188.,  117.,   60.,   35.,   21.,   18.,   14.,   11.]),
 array([-799.30103 , -719.370927, -639.440824, -559.510721, -479.580618,
        -399.650515, -319.720412, -239.790309, -159.860206,  -79.930103,
           0.      ]),
 <a list of 10 Patch objects>)

colors = ['red', 'green']
plt.hist([wrong_distances_without_800,correct_distance], histtype='bar',color=colors)

([array([ 662.,  399.,  188.,  117.,   60.,   35.,   21.,   18.,   14.,   11.]),
  array([  19.,  173.,  322.,  496.,  490.,  491.,  520.,  425.,  371.,  257.])],
 array([-799.30103 , -719.370927, -639.440824, -559.510721, -479.580618,
        -399.650515, -319.720412, -239.790309, -159.860206,  -79.930103,
           0.      ]),
 <a list of 2 Lists of Patches objects>)

colors = ['green', 'red']
plt.hist([correct_distance,wrong_distances_without_800], 30, histtype='bar',color=colors,label=['Correctly recognized users','Wrongly recognized users'])
plt.xlabel('Similarity Indicator')
plt.ylabel('Number of Users')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=5)
#plt.legend((a,b),('Correctly identified users','Correctly identified users'))
plt.savefig('hist_tpm.eps', format='eps', dpi=1000)

from matplotlib.font_manager import FontProperties

fontP = FontProperties()
colors = [(132, 255, 108),(255, 57, 100)]
for i in range(len(colors)):
    r, g, b = colors[i]  
    colors[i] = (r / 255., g / 255., b / 255.)  
ax = plt.subplot(111)
# Ensure that the axis ticks only show up on the bottom and left of the plot.  
# Ticks on the right and top of the plot are generally unnecessary chartjunk.  
ax.get_yaxis().tick_left()  # Limit the range of the plot to only where the data is.  
ax.set_xlabel('Similarity Indicator')
ax.set_ylabel('Number of Users')
ax.set_title('Number of users identified with TPM Algorithm by similarity indicator',y=1.08)
# Avoid unnecessary whitespace.  
plt.xlim(-800.0, 0.0)  
plt.ylim(0, 250)  
#plt.yticks(range(36, 54, 2), [str(x) + "%" for x in range(36, 54, 2)], fontsize=11)  
#plt.xticks(a, [str(x) + "%" for x in a], fontsize=11)
plt.hist([correct_distance,wrong_distances_without_800], 30, histtype='bar',color=colors,label=['Correctly recognized users','Wrongly recognized users'])
plt.legend(loc='lower left',fancybox=False, shadow=False, prop = fontP, bbox_to_anchor=(1.0, 0.05))
plt.savefig("indicator correct wrong alg1.eps",format='eps', dpi=1000, bbox_inches="tight")

Distancia de cada usuario entre abril y septiembre, y en verde, las distancias que fueron seleccionadas como correctas pero no lo eran

wrong_distances_selected = []
counter = 0
for i in range(limit):
    if i in wrong_indexs:
        counter += 1
    if i in correct_indexs:
        counter += 1

colors = ['green','red','blue']
plt.hist([correct_distance,wrong_distances_without_800,wrong_distances_selected], 30, histtype='bar',color=colors)

En el gráfico anterior se puede observar que hay usuarios con distancia 0, es decir que hay solo una opción y que no son el mismo id

Plot wrong distances

wd_menor_100 = filter(lambda x: x > -100,wrong_distances_selected)

(array([  3.,   3.,   1.,   2.,   1.,   2.,   0.,   0.,   0.,  30.]),
 array([-97.09487623, -87.38538861, -77.67590099, -67.96641336,
        -58.25692574, -48.54743812, -38.83795049, -29.12846287,
        -19.41897525,  -9.70948762,   0.        ]),
 <a list of 10 Patch objects>)

Hay usuarios que se roban la pelicula

x = np.array(identified_indexs)
y = np.bincount(x)
ii = np.nonzero(y)[0]
frequency_identified_indexs = zip(ii,y[ii])
frequency_identified_indexs.sort(key = lambda t: t[1], reverse=True)

[(1480, 6),
 (2809, 6),
 (4844, 6),
 (161, 5),
 (563, 5),
 (1372, 5),
 (1954, 5),
 (2826, 5),
 (5030, 5),
 (201, 4),
 (232, 4),
 (239, 4),
 (456, 4),
 (525, 4),
 (836, 4),
 (947, 4),
 (980, 4),
 (1067, 4),
 (1205, 4),
 (1363, 4),
 (1441, 4),
 (1444, 4),
 (1742, 4),
 (1884, 4),
 (2405, 4),
 (2644, 4),
 (2670, 4),
 (2887, 4),
 (3088, 4),
 (3150, 4),
 (3237, 4),
 (4106, 4),
 (4318, 4),
 (4401, 4),
 (4460, 4),
 (4584, 4),
 (4636, 4),
 (4794, 4),
 (5048, 4),
 (0, 3),
 (11, 3),
 (41, 3),
 (50, 3),
 (74, 3),
 (203, 3),
 (222, 3),
 (229, 3),
 (256, 3),
 (318, 3),
 (331, 3),
 (335, 3),
 (389, 3),
 (394, 3),
 (410, 3),
 (424, 3),
 (431, 3),
 (441, 3),
 (482, 3),
 (489, 3),
 (507, 3),
 (532, 3),
 (591, 3),
 (605, 3),
 (638, 3),
 (649, 3),
 (677, 3),
 (715, 3),
 (755, 3),
 (766, 3),
 (817, 3),
 (823, 3),
 (863, 3),
 (914, 3),
 (940, 3),
 (968, 3),
 (1035, 3),
 (1040, 3),
 (1054, 3),
 (1058, 3),
 (1105, 3),
 (1134, 3),
 (1146, 3),
 (1161, 3),
 (1162, 3),
 (1176, 3),
 (1189, 3),
 (1197, 3),
 (1269, 3),
 (1325, 3),
 (1365, 3),
 (1376, 3),
 (1379, 3),
 (1387, 3),
 (1398, 3),
 (1399, 3),
 (1415, 3),
 (1507, 3),
 (1519, 3),
 (1538, 3),
 (1575, 3),
 (1584, 3),
 (1647, 3),
 (1675, 3),
 (1692, 3),
 (1728, 3),
 (1824, 3),
 (1829, 3),
 (1855, 3),
 (1903, 3),
 (1906, 3),
 (1907, 3),
 (2007, 3),
 (2026, 3),
 (2116, 3),
 (2167, 3),
 (2175, 3),
 (2180, 3),
 (2211, 3),
 (2242, 3),
 (2275, 3),
 (2304, 3),
 (2311, 3),
 (2338, 3),
 (2351, 3),
 (2385, 3),
 (2434, 3),
 (2441, 3),
 (2457, 3),
 (2477, 3),
 (2558, 3),
 (2598, 3),
 (2621, 3),
 (2702, 3),
 (2799, 3),
 (2806, 3),
 (2853, 3),
 (2856, 3),
 (2860, 3),
 (2877, 3),
 (2879, 3),
 (2880, 3),
 (2906, 3),
 (2908, 3),
 (2912, 3),
 (2918, 3),
 (2948, 3),
 (2949, 3),
 (2959, 3),
 (2972, 3),
 (2994, 3),
 (2998, 3),
 (3068, 3),
 (3079, 3),
 (3081, 3),
 (3089, 3),
 (3124, 3),
 (3144, 3),
 (3152, 3),
 (3256, 3),
 (3279, 3),
 (3287, 3),
 (3391, 3),
 (3445, 3),
 (3466, 3),
 (3486, 3),
 (3521, 3),
 (3527, 3),
 (3581, 3),
 (3733, 3),
 (3750, 3),
 (3775, 3),
 (3809, 3),
 (3821, 3),
 (3829, 3),
 (3844, 3),
 (3906, 3),
 (3907, 3),
 (3951, 3),
 (4044, 3),
 (4053, 3),
 (4126, 3),
 (4143, 3),
 (4170, 3),
 (4189, 3),
 (4216, 3),
 (4276, 3),
 (4298, 3),
 (4329, 3),
 (4333, 3),
 (4340, 3),
 (4362, 3),
 (4376, 3),
 (4513, 3),
 (4542, 3),
 (4554, 3),
 (4571, 3),
 (4582, 3),
 (4735, 3),
 (4767, 3),
 (4768, 3),
 (4775, 3),
 (4856, 3),
 (4879, 3),
 (4919, 3),
 (4999, 3),
 (5001, 3),
 (5036, 3),
 (5041, 3),
 (5056, 3),
 (5142, 3),
 (1, 2),
 (4, 2),
 (9, 2),
 (15, 2),
 (21, 2),
 (24, 2),
 (25, 2),
 (26, 2),
 (27, 2),
 (30, 2),
 (31, 2),
 (34, 2),
 (37, 2),
 (61, 2),
 (64, 2),
 (70, 2),
 (71, 2),
 (72, 2),
 (73, 2),
 (80, 2),
 (82, 2),
 (83, 2),
 (84, 2),
 (101, 2),
 (102, 2),
 (104, 2),
 (111, 2),
 (114, 2),
 (117, 2),
 (118, 2),
 (120, 2),
 (121, 2),
 (125, 2),
 (131, 2),
 (133, 2),
 (138, 2),
 (143, 2),
 (150, 2),
 (152, 2),
 (160, 2),
 (170, 2),
 (172, 2),
 (177, 2),
 (178, 2),
 (187, 2),
 (188, 2),
 (195, 2),
 (198, 2),
 (202, 2),
 (205, 2),
 (213, 2),
 (218, 2),
 (219, 2),
 (223, 2),
 (224, 2),
 (226, 2),
 (245, 2),
 (246, 2),
 (254, 2),
 (258, 2),
 (265, 2),
 (270, 2),
 (274, 2),
 (276, 2),
 (277, 2),
 (278, 2),
 (279, 2),
 (282, 2),
 (288, 2),
 (296, 2),
 (297, 2),
 (308, 2),
 (311, 2),
 (313, 2),
 (317, 2),
 (319, 2),
 (322, 2),
 (327, 2),
 (328, 2),
 (338, 2),
 (339, 2),
 (344, 2),
 (352, 2),
 (353, 2),
 (361, 2),
 (363, 2),
 (367, 2),
 (368, 2),
 (371, 2),
 (382, 2),
 (388, 2),
 (390, 2),
 (408, 2),
 (411, 2),
 (412, 2),
 (414, 2),
 (417, 2),
 (437, 2),
 (440, 2),
 (444, 2),
 (457, 2),
 (464, 2),
 (468, 2),
 (470, 2),
 (476, 2),
 (479, 2),
 (486, 2),
 (490, 2),
 (495, 2),
 (500, 2),
 (505, 2),
 (508, 2),
 (509, 2),
 (510, 2),
 (519, 2),
 (527, 2),
 (530, 2),
 (541, 2),
 (553, 2),
 (566, 2),
 (572, 2),
 (574, 2),
 (575, 2),
 (578, 2),
 (579, 2),
 (580, 2),
 (585, 2),
 (589, 2),
 (599, 2),
 (602, 2),
 (614, 2),
 (625, 2),
 (627, 2),
 (633, 2),
 (643, 2),
 (651, 2),
 (659, 2),
 (661, 2),
 (662, 2),
 (673, 2),
 (680, 2),
 (686, 2),
 (698, 2),
 (710, 2),
 (711, 2),
 (719, 2),
 (722, 2),
 (738, 2),
 (743, 2),
 (746, 2),
 (794, 2),
 (797, 2),
 (804, 2),
 (807, 2),
 (816, 2),
 (822, 2),
 (827, 2),
 (829, 2),
 (842, 2),
 (844, 2),
 (848, 2),
 (851, 2),
 (854, 2),
 (857, 2),
 (872, 2),
 (878, 2),
 (892, 2),
 (893, 2),
 (900, 2),
 (908, 2),
 (918, 2),
 (926, 2),
 (935, 2),
 (943, 2),
 (948, 2),
 (951, 2),
 (953, 2),
 (957, 2),
 (982, 2),
 (989, 2),
 (993, 2),
 (1004, 2),
 (1006, 2),
 (1017, 2),
 (1020, 2),
 (1023, 2),
 (1025, 2),
 (1028, 2),
 (1029, 2),
 (1034, 2),
 (1043, 2),
 (1044, 2),
 (1056, 2),
 (1061, 2),
 (1064, 2),
 (1065, 2),
 (1066, 2),
 (1084, 2),
 (1085, 2),
 (1092, 2),
 (1111, 2),
 (1122, 2),
 (1126, 2),
 (1145, 2),
 (1147, 2),
 (1152, 2),
 (1155, 2),
 (1164, 2),
 (1166, 2),
 (1177, 2),
 (1178, 2),
 (1185, 2),
 (1187, 2),
 (1193, 2),
 (1202, 2),
 (1212, 2),
 (1221, 2),
 (1223, 2),
 (1226, 2),
 (1229, 2),
 (1238, 2),
 (1239, 2),
 (1245, 2),
 (1249, 2),
 (1250, 2),
 (1252, 2),
 (1254, 2),
 (1265, 2),
 (1278, 2),
 (1279, 2),
 (1284, 2),
 (1290, 2),
 (1301, 2),
 (1303, 2),
 (1310, 2),
 (1314, 2),
 (1317, 2),
 (1324, 2),
 (1326, 2),
 (1332, 2),
 (1336, 2),
 (1341, 2),
 (1344, 2),
 (1352, 2),
 (1354, 2),
 (1368, 2),
 (1404, 2),
 (1410, 2),
 (1424, 2),
 (1432, 2),
 (1433, 2),
 (1439, 2),
 (1440, 2),
 (1447, 2),
 (1461, 2),
 (1466, 2),
 (1468, 2),
 (1476, 2),
 (1477, 2),
 (1478, 2),
 (1496, 2),
 (1510, 2),
 (1511, 2),
 (1522, 2),
 (1527, 2),
 (1528, 2),
 (1531, 2),
 (1546, 2),
 (1547, 2),
 (1549, 2),
 (1559, 2),
 (1560, 2),
 (1568, 2),
 (1573, 2),
 (1585, 2),
 (1596, 2),
 (1598, 2),
 (1610, 2),
 (1612, 2),
 (1615, 2),
 (1616, 2),
 (1617, 2),
 (1623, 2),
 (1628, 2),
 (1630, 2),
 (1632, 2),
 (1634, 2),
 (1636, 2),
 (1637, 2),
 (1640, 2),
 (1643, 2),
 (1651, 2),
 (1654, 2),
 (1658, 2),
 (1665, 2),
 (1674, 2),
 (1677, 2),
 (1681, 2),
 (1683, 2),
 (1685, 2),
 (1695, 2),
 (1698, 2),
 (1707, 2),
 (1714, 2),
 (1720, 2),
 (1729, 2),
 (1730, 2),
 (1744, 2),
 (1752, 2),
 (1756, 2),
 (1763, 2),
 (1764, 2),
 (1780, 2),
 (1782, 2),
 (1789, 2),
 (1804, 2),
 (1809, 2),
 (1826, 2),
 (1827, 2),
 (1834, 2),
 (1865, 2),
 (1877, 2),
 (1896, 2),
 (1898, 2),
 (1900, 2),
 (1904, 2),
 (1925, 2),
 (1926, 2),
 (1928, 2),
 (1942, 2),
 (1948, 2),
 (1955, 2),
 (1964, 2),
 (1983, 2),
 (1987, 2),
 (1991, 2),
 (2009, 2),
 (2021, 2),
 (2029, 2),
 (2033, 2),
 (2035, 2),
 (2036, 2),
 (2039, 2),
 (2040, 2),
 (2046, 2),
 (2054, 2),
 (2082, 2),
 (2096, 2),
 (2100, 2),
 (2103, 2),
 (2105, 2),
 (2119, 2),
 (2134, 2),
 (2146, 2),
 (2147, 2),
 (2183, 2),
 (2185, 2),
 (2189, 2),
 (2195, 2),
 (2204, 2),
 (2209, 2),
 (2219, 2),
 (2222, 2),
 (2225, 2),
 (2238, 2),
 (2254, 2),
 (2260, 2),
 (2266, 2),
 (2267, 2),
 (2268, 2),
 (2279, 2),
 (2280, 2),
 (2295, 2),
 (2298, 2),
 (2302, 2),
 (2306, 2),
 (2319, 2),
 (2327, 2),
 (2328, 2),
 (2329, 2),
 (2332, 2),
 (2366, 2),
 (2368, 2),
 (2372, 2),
 (2373, 2),
 (2374, 2),
 (2381, 2),
 (2390, 2),
 (2391, 2),
 (2400, 2),
 (2404, 2),
 (2410, 2),
 (2417, 2),
 (2420, 2),
 (2440, 2),
 (2443, 2),
 (2467, 2),
 (2480, 2),
 (2489, 2),
 (2492, 2),
 (2495, 2),
 (2497, 2),
 (2501, 2),
 (2502, 2),
 (2504, 2),
 (2507, 2),
 (2509, 2),
 (2511, 2),
 (2523, 2),
 (2527, 2),
 (2529, 2),
 (2530, 2),
 (2538, 2),
 (2547, 2),
 (2550, 2),
 (2561, 2),
 (2562, 2),
 (2563, 2),
 (2566, 2),
 (2569, 2),
 (2588, 2),
 (2611, 2),
 (2619, 2),
 (2622, 2),
 (2624, 2),
 (2626, 2),
 (2631, 2),
 (2634, 2),
 (2635, 2),
 (2637, 2),
 (2643, 2),
 (2655, 2),
 (2658, 2),
 (2675, 2),
 (2677, 2),
 (2681, 2),
 (2704, 2),
 (2710, 2),
 (2719, 2),
 (2723, 2),
 (2727, 2),
 (2734, 2),
 (2739, 2),
 (2749, 2),
 (2750, 2),
 (2753, 2),
 (2758, 2),
 (2764, 2),
 (2780, 2),
 (2781, 2),
 (2782, 2),
 (2784, 2),
 (2792, 2),
 (2802, 2),
 (2803, 2),
 (2805, 2),
 (2811, 2),
 (2814, 2),
 (2831, 2),
 (2837, 2),
 (2842, 2),
 (2849, 2),
 (2854, 2),
 (2855, 2),
 (2857, 2),
 (2859, 2),
 (2863, 2),
 (2867, 2),
 (2868, 2),
 (2870, 2),
 (2888, 2),
 (2898, 2),
 (2899, 2),
 (2901, 2),
 (2909, 2),
 (2919, 2),
 (2922, 2),
 (2939, 2),
 (2950, 2),
 (2960, 2),
 (2964, 2),
 (2966, 2),
 (2973, 2),
 (2981, 2),
 (2992, 2),
 (3008, 2),
 (3010, 2),
 (3012, 2),
 (3013, 2),
 (3022, 2),
 (3037, 2),
 (3040, 2),
 (3048, 2),
 (3062, 2),
 (3067, 2),
 (3069, 2),
 (3076, 2),
 (3082, 2),
 (3099, 2),
 (3103, 2),
 (3104, 2),
 (3105, 2),
 (3116, 2),
 (3138, 2),
 (3146, 2),
 (3149, 2),
 (3160, 2),
 (3166, 2),
 (3174, 2),
 (3179, 2),
 (3193, 2),
 (3201, 2),
 (3210, 2),
 (3212, 2),
 (3218, 2),
 (3221, 2),
 (3257, 2),
 (3260, 2),
 (3273, 2),
 (3290, 2),
 (3303, 2),
 (3308, 2),
 (3314, 2),
 (3318, 2),
 (3335, 2),
 (3341, 2),
 (3347, 2),
 (3358, 2),
 (3366, 2),
 (3372, 2),
 (3374, 2),
 (3381, 2),
 (3385, 2),
 (3389, 2),
 (3390, 2),
 (3396, 2),
 (3402, 2),
 (3406, 2),
 (3407, 2),
 (3424, 2),
 (3429, 2),
 (3450, 2),
 (3451, 2),
 (3454, 2),
 (3459, 2),
 (3467, 2),
 (3468, 2),
 (3485, 2),
 (3491, 2),
 (3512, 2),
 (3537, 2),
 (3551, 2),
 (3554, 2),
 (3562, 2),
 (3565, 2),
 (3567, 2),
 (3582, 2),
 (3587, 2),
 (3600, 2),
 (3601, 2),
 (3608, 2),
 (3612, 2),
 (3613, 2),
 (3617, 2),
 (3632, 2),
 (3633, 2),
 (3641, 2),
 (3643, 2),
 (3645, 2),
 (3647, 2),
 (3660, 2),
 (3662, 2),
 (3673, 2),
 (3676, 2),
 (3683, 2),
 (3690, 2),
 (3699, 2),
 (3702, 2),
 (3703, 2),
 (3714, 2),
 (3723, 2),
 (3725, 2),
 (3727, 2),
 (3730, 2),
 (3741, 2),
 (3760, 2),
 (3761, 2),
 (3768, 2),
 (3774, 2),
 (3784, 2),
 (3786, 2),
 (3788, 2),
 (3812, 2),
 (3813, 2),
 (3823, 2),
 (3848, 2),
 (3851, 2),
 (3854, 2),
 (3865, 2),
 (3871, 2),
 (3889, 2),
 (3892, 2),
 (3899, 2),
 (3904, 2),
 (3919, 2),
 (3923, 2),
 (3924, 2),
 (3931, 2),
 (3935, 2),
 (3939, 2),
 (3970, 2),
 (3977, 2),
 (3985, 2),
 (3994, 2),
 (3997, 2),
 (3998, 2),
 (4010, 2),
 (4011, 2),
 (4017, 2),
 (4019, 2),
 (4021, 2),
 (4027, 2),
 (4038, 2),
 (4046, 2),
 (4064, 2),
 (4069, 2),
 (4070, 2),
 (4080, 2),
 (4082, 2),
 (4084, 2),
 (4085, 2),
 (4097, 2),
 (4107, 2),
 (4112, 2),
 (4124, 2),
 (4125, 2),
 (4131, 2),
 (4135, 2),
 (4144, 2),
 (4152, 2),
 (4159, 2),
 (4169, 2),
 (4174, 2),
 (4183, 2),
 (4185, 2),
 (4188, 2),
 (4193, 2),
 (4197, 2),
 (4203, 2),
 (4204, 2),
 (4213, 2),
 (4225, 2),
 (4241, 2),
 (4245, 2),
 (4248, 2),
 (4250, 2),
 (4262, 2),
 (4265, 2),
 (4271, 2),
 (4274, 2),
 (4277, 2),
 (4278, 2),
 (4280, 2),
 (4294, 2),
 (4296, 2),
 (4301, 2),
 (4302, 2),
 (4308, 2),
 (4323, 2),
 (4324, 2),
 (4330, 2),
 (4338, 2),
 (4344, 2),
 (4346, 2),
 (4349, 2),
 (4351, 2),
 (4364, 2),
 (4370, 2),
 (4383, 2),
 (4391, 2),
 (4392, 2),
 (4400, 2),
 (4405, 2),
 (4410, 2),
 (4411, 2),
 (4421, 2),
 (4452, 2),
 (4455, 2),
 (4458, 2),
 (4461, 2),
 (4464, 2),
 (4468, 2),
 (4469, 2),
 (4470, 2),
 (4472, 2),
 (4482, 2),
 (4483, 2),
 (4484, 2),
 (4489, 2),
 (4493, 2),
 (4495, 2),
 (4498, 2),
 (4505, 2),
 (4507, 2),
 (4515, 2),
 (4517, 2),
 (4518, 2),
 (4528, 2),
 (4536, 2),
 (4539, 2),
 (4556, 2),
 (4570, 2),
 (4575, 2),
 (4589, 2),
 (4597, 2),
 (4600, 2),
 (4609, 2),
 (4614, 2),
 (4615, 2),
 (4624, 2),
 (4630, 2),
 (4635, 2),
 (4643, 2),
 (4648, 2),
 (4651, 2),
 (4659, 2),
 (4664, 2),
 (4674, 2),
 (4675, 2),
 (4676, 2),
 (4679, 2),
 (4680, 2),
 (4688, 2),
 (4702, 2),
 (4724, 2),
 (4741, 2),
 (4747, 2),
 (4751, 2),
 (4753, 2),
 (4759, 2),
 (4769, 2),
 (4772, 2),
 (4773, 2),
 (4774, 2),
 (4777, 2),
 (4789, 2),
 (4793, 2),
 (4805, 2),
 (4835, 2),
 (4847, 2),
 (4857, 2),
 (4858, 2),
 (4863, 2),
 (4871, 2),
 (4873, 2),
 (4874, 2),
 (4877, 2),
 (4880, 2),
 (4893, 2),
 (4900, 2),
 (4906, 2),
 (4921, 2),
 (4929, 2),
 (4943, 2),
 (4949, 2),
 (4974, 2),
 (4986, 2),
 (4990, 2),
 (5004, 2),
 (5012, 2),
 (5015, 2),
 (5019, 2),
 (5022, 2),
 (5023, 2),
 (5032, 2),
 (5040, 2),
 (5043, 2),
 (5053, 2),
 (5057, 2),
 (5061, 2),
 (5069, 2),
 (5075, 2),
 (5078, 2),
 (5079, 2),
 (5089, 2),
 (5102, 2),
 (5105, 2),
 (5114, 2),
 (5126, 2),
 (5137, 2),

wrong_indexs_0 = []
for i in range(len(wd_menor_100)):
    if wd_menor_100[i]==0:
#Hay uno en el histograma que es menor que 0.092 pero es distinto de 0


Resultados diagonal

Falta encontrar el índice donde se minimiza el error

diagonal = iden_matrix_paradero.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-800:
        if i in correct_indexs:
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))
colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)

diagonal: 5089
correctos: 3564
incorrectos: 1525

correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-300:
        if i in correct_indexs:
print len(diagonal_d1)   
print len(correct_distance)
print len(wrong_distance)


colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)

