Import Export



In [1]:

    
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import pickle
from __future__ import division
import csv
from tools import *
import os



In [2]:

    
reload(tpm_identification)









    Out[2]:





<module 'tools.tpm_identification' from 'tools\tpm_identification.pyc'>



In [10]:

    
data_path = os.path.join(os.getcwd(),'..','data')
first_period_path = os.path.join(data_path,'Users_data','etapas_2013_abril_allyearsids_10_100000.csv')
second_period_path = os.path.join(data_path,'Users_data','etapas_2013_septiembre_allyearsids_10_100000.csv')



In [11]:

    
dict_metro = auxiliar_functions.load_metro_dictionary()



In [17]:

    
first_period_frame = pd.read_csv(first_period_path)
first_period_frame['tiempo_subida'] = pd.to_datetime(first_period_frame.tiempo_subida)
first_period_frame = first_period_frame.sort_values(by=['id', 'tiempo_subida'])
first_period_frame.head()









    Out[17]:






  
    
      
      tiempo_subida
      id
      x_subida
      y_subida
      tipo_transporte
      serviciosentidovariante
      tipo_dia
      nviaje
      netapa
      x_bajada
      y_bajada
      tiempo_bajada
      par_subida
      par_bajada
      zona_subida
      zona_bajada
      adulto
    
  
  
    
      23
      2013-04-14 06:45:44
      1132106
      348108.0
      6289153.0
      BUS
      T203 00R
      DOMINGO
      1
      1
      346818.0
      6299394.0
      2013-04-14 07:07:02
      T-22-205-SN-65
      E-20-190-SN-40
      328.0
      307.0
      0.0
    
    
      22
      2013-04-14 07:51:52
      1132106
      346751.0
      6299389.0
      BUS
      T502 00I
      DOMINGO
      2
      1
      351363.0
      6302549.0
      2013-04-14 08:04:11
      E-20-291-PO-20
      T-15-135-PO-5
      307.0
      188.0
      0.0
    
    
      21
      2013-04-14 19:56:47
      1132106
      351368.0
      6302559.0
      BUS
      T502 00R
      DOMINGO
      3
      1
      346763.0
      6299568.0
      2013-04-14 20:09:11
      T-15-135-OP-110
      T-4-19-NS-100
      188.0
      55.0
      0.0
    
    
      20
      2013-04-14 20:15:25
      1132106
      346713.0
      6299427.0
      BUS
      T203 00I
      DOMINGO
      3
      2
      348095.0
      6289148.0
      2013-04-14 20:40:51
      E-20-199-NS-2
      T-24-205-NS-20
      307.0
      348.0
      0.0
    
    
      19
      2013-04-15 21:04:59
      1132106
      348103.0
      6289191.0
      BUS
      T206 00R
      LABORAL
      4
      1
      346844.0
      6299320.0
      2013-04-15 21:33:23
      T-22-205-SN-65
      T-20-190-SN-35
      328.0
      309.0
      0.0



In [16]:

    
second_period_frame = pd.read_csv(second_period_path)
second_period_frame['tiempo_subida'] = pd.to_datetime(second_period_frame.tiempo_subida)
second_period_frame = second_period_frame.sort_values(by=['id', 'tiempo_subida'])
second_period_frame.head()









    Out[16]:






  
    
      
      tiempo_subida
      id
      x_subida
      y_subida
      tipo_transporte
      serviciosentidovariante
      tipo_dia
      nviaje
      netapa
      x_bajada
      y_bajada
      tiempo_bajada
      par_subida
      par_bajada
      zona_subida
      zona_bajada
      adulto
    
  
  
    
      26
      2013-09-23 20:58:46
      1132106
      348106.0
      6289139.0
      BUS
      T203 00R
      LABORAL
      1
      1
      346824.0
      6299354.0
      2013-09-23 21:23:02
      T-22-205-SN-65
      E-20-190-SN-40
      328.0
      307.0
      0.0
    
    
      25
      2013-09-23 21:24:25
      1132106
      346789.0
      6299372.0
      BUS
      T502 00I
      LABORAL
      1
      2
      351366.0
      6302548.0
      2013-09-23 21:37:55
      E-20-291-PO-20
      T-15-135-PO-5
      307.0
      188.0
      0.0
    
    
      24
      2013-09-24 07:15:40
      1132106
      351362.0
      6302563.0
      BUS
      T502 00R
      LABORAL
      2
      1
      346661.0
      6299484.0
      2013-09-24 07:32:11
      T-15-135-OP-110
      T-4-19-NS-100
      188.0
      55.0
      0.0
    
    
      23
      2013-09-24 08:00:08
      1132106
      346719.0
      6299344.0
      BUS
      T206 06I
      LABORAL
      3
      1
      348078.0
      6289284.0
      2013-09-24 08:36:40
      E-20-199-NS-2
      T-24-205-NS-20
      307.0
      348.0
      0.0
    
    
      22
      2013-09-24 09:13:22
      1132106
      347658.0
      6289320.0
      BUS
      T352 00R
      LABORAL
      3
      2
      NaN
      NaN
      NaN
      L-24-26-OP-25
      NaN
      348.0
      NaN
      NaN



In [18]:

    
first_period_frame.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 23 to 99994
Data columns (total 17 columns):
tiempo_subida              100000 non-null datetime64[ns]
id                         100000 non-null int64
x_subida                   99224 non-null float64
y_subida                   99224 non-null float64
tipo_transporte            100000 non-null object
serviciosentidovariante    98074 non-null object
tipo_dia                   100000 non-null object
nviaje                     100000 non-null int64
netapa                     100000 non-null int64
x_bajada                   86774 non-null float64
y_bajada                   86774 non-null float64
tiempo_bajada              86774 non-null object
par_subida                 99195 non-null object
par_bajada                 86767 non-null object
zona_subida                99180 non-null float64
zona_bajada                86764 non-null float64
adulto                     86774 non-null float64
dtypes: datetime64[ns](1), float64(7), int64(3), object(6)
memory usage: 13.7+ MB

Get Profiles from both periods



In [8]:

    
reload(tpm_identification)









    Out[8]:





<module 'tools.tpm_identification' from 'tools/tpm_identification.pyc'>



In [52]:

    
users_profiles = tpm_identification.get_profiles(first_period_frame['id'],first_period_frame['zona_subida'],first_period_frame['zona_bajada'])



In [53]:

    
numero_usuarios = len(users_profiles)
numero_usuarios









    Out[53]:





5169



In [54]:

    
profiles = tpm_identification.get_sequences(second_period_frame['id'],second_period_frame['zona_subida'],second_period_frame['zona_bajada'])



In [55]:

    
print len(profiles)
limit = min(len(users_profiles),len(profiles))
print limit

Check if the ids are the same for both periods



In [56]:

    
last_iddd = 0
ids_alg1 = []
for i in range(len(users_profiles)):
    assert last_iddd < users_profiles[i]['user_id']
    last_iddd = users_profiles[i]['user_id']
    ids_alg1.append(last_iddd)
    assert users_profiles[i]['user_id'] == profiles[i]['user_id']



In [33]:

    
with open('data/ids_alg1.pickle', 'w') as f:
    pickle.dump(ids_alg1,f)

Comparee



In [57]:

    
start_time = time.time()
iden = tpm_identification.get_identification_matrix(users_profiles,profiles)
delta_time = time.time() - start_time
print delta_time









    



4717.84558797



In [58]:

    
iden_matrix_zona = np.matrix(iden)
df_ident = pd.DataFrame(iden_matrix_zona)
df_ident.head(10)









    Out[58]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      5159
      5160
      5161
      5162
      5163
      5164
      5165
      5166
      5167
      5168
    
  
  
    
      0
      -68.142149
      -750.000000
      -780.000000
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      1
      -780.000000
      -427.135729
      -750.000000
      -731.00000
      -750.00000
      -780.00000
      -750.00000
      -621.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      2
      -780.000000
      -775.000000
      -363.640879
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -672.903090
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      3
      -780.000000
      -775.000000
      -780.000000
      -344.48455
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      4
      -780.000000
      -775.000000
      -780.000000
      -774.00000
      -351.59176
      -780.00000
      -750.00000
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      5
      -780.000000
      -775.000000
      -780.000000
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      6
      -780.000000
      -775.000000
      -780.000000
      -774.00000
      -750.00000
      -720.09691
      -500.29073
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      7
      -780.000000
      -775.000000
      -780.000000
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -768.000000
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      8
      -780.000000
      -775.000000
      -750.096910
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -783.0
      -257.694065
      -817.000000
      ...
      -792.0
      -770.0
      -820.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
    
      9
      -780.000000
      -775.000000
      -780.000000
      -774.00000
      -750.00000
      -780.00000
      -750.00000
      -756.0
      -768.000000
      -315.583278
      ...
      -792.0
      -770.0
      -800.0
      -810.0
      -780.0
      -799.0
      -800.0
      -756.0
      -730.0
      -783.0
    
  

10 rows × 5169 columns



In [77]:

    
n_identified,selected_distance,identified_indexs,abstenidos,correct_indexs,correct_distance,wrong_indexs,wrong_distances = auxiliar_functions.get_n_correct_tpm(iden_matrix_zona,limit)
porcentaje_correcto = n_identified*100.0/limit
print str(round(porcentaje_correcto,2))+ "%"



In [79]:

    
with open('resultados_alg_1_zona.pickle','w') as f:
    pickle.dump(n_identified,f)
    pickle.dump(selected_distance,f)
    pickle.dump(identified_indexs,f)
    pickle.dump(abstenidos,f)
    pickle.dump(correct_indexs,f)
    pickle.dump(correct_distance,f)
    pickle.dump(wrong_indexs,f)
    pickle.dump(wrong_distances,f)
    pickle.dump(iden_matrix_zona.diagonal(),f)



In [78]:

    
wrong_distances_without_800 = filter(lambda x: x>-800,wrong_distances)
print len(wrong_distances_without_800)



In [63]:

    
colors = ['red', 'green']
plt.hist([wrong_distances_without_800,correct_distance], 30, histtype='bar',color=colors)
plt.show()



In [72]:

    
counter = 0
for i in correct_distance:
    if i ==0.0:
        counter +=1
print counter



In [71]:

    
correct_distance









    Out[71]:





[-68.142148664601734,
 -427.13572880038168,
 -363.64087877870162,
 -344.48455006504025,
 -351.59176003468815,
 -257.69406507958291,
 -315.58327766020079,
 -649.69897000433605,
 -174.97148116926616,
 -228.89075625191822,
 -546.20411998265593,
 -439.76042248342321,
 -348.89769495510524,
 -63.010299956639813,
 -178.61235994796778,
 -216.36427492623204,
 -600.30102999566395,
 -304.84334445858246,
 -5.1629201560966758,
 -124.35785774700021,
 -400.77815125038359,
 -571.55090746888061,
 -290.38764005203228,
 -250.14806253545544,
 -426.41076385087945,
 -0.90308998699194354,
 -487.50514997831982,
 -262.77121254719663,
 -136.68867004769621,
 -372.77477235796283,
 -339.16375752398199,
 -3.0920050882549504,
 -297.71907981048196,
 -483.61235994796772,
 -288.70436503622273,
 -355.00287114631914,
 -214.62602115806038,
 -337.10393035588379,
 -298.61572120142057,
 -536.90308998699197,
 -528.09691001300803,
 -375.43838410703472,
 -177.62160197493932,
 -379.51930129554614,
 -44.106156405481158,
 -313.69897000433605,
 -416.5686362358411,
 -436.81647993062359,
 -443.01029995663981,
 -269.49281624324618,
 -352.79588001734407,
 -482.40823996531185,
 -420.63751752524888,
 -273.21981497118247,
 -52.816841767314543,
 -335.93651374247889,
 -523.6901960800285,
 -556.44166372079872,
 -236.60002621289405,
 -354.6146562844358,
 -57.954242509439325,
 -337.09691001300803,
 -201.59176003468815,
 -347.06888128940784,
 -177.03342375548695,
 -1.998456328699687,
 -172.73938144657765,
 -338.72937670409294,
 -70.089971309821038,
 -152.76042248342321,
 -441.78018502881758,
 -52.3753063169585,
 -660.60745502321458,
 -355.14014361800122,
 -553.07572071393827,
 -371.70808812636631,
 -7.2434921493949087,
 -48.464787519645938,
 -424.96657624451302,
 -470.22184874961641,
 -521.13033376849489,
 -307.73239375982297,
 -90.92598623673959,
 -362.81157500587062,
 -43.41601603638415,
 -118.5036063070196,
 -216.81360878430451,
 -133.21441993929574,
 -251.38199675517856,
 -272.83420869758413,
 -64.665546248849068,
 -1.6148838335352884,
 -75.455176227453151,
 -500.47712125471963,
 -217.18148629094239,
 -557.0,
 -404.49358654738182,
 -344.88045629527841,
 -509.73172273434386,
 -165.42422680822204,
 -216.80617997398389,
 -306.06038756210251,
 -7.0394608210666343,
 -10.0,
 -92.505149978319906,
 -125.0,
 -130.70436503622273,
 -163.84703253979146,
 -289.11260500153458,
 -351.46478751964594,
 -451.43136376415896,
 -202.58433122436753,
 -60.260667536990013,
 -169.19057542118665,
 -143.9419087743656,
 -4.5625349005392719,
 -202.96910013008056,
 -409.20615376108981,
 -665.90308998699186,
 -241.74065993802887,
 -288.26778715852242,
 -276.67837009105642,
 -25.839603729470838,
 -178.79048498545737,
 -178.90308998699194,
 -217.61109647366959,
 -286.72237400018531,
 -36.232148706256169,
 -465.79588001734407,
 -362.55090746888061,
 -290.77528010406451,
 -150.97687620115286,
 -625.59666495944123,
 -30.486583843474193,
 -432.79588001734407,
 -85.171186334302575,
 -481.1461280356782,
 -344.19989876941275,
 -398.21046930005258,
 -420.26557246174309,
 -450.85193746454456,
 -312.69897000433605,
 -138.68663626926229,
 0.0,
 -142.08457627793433,
 -42.965032573212738,
 -84.191786247582201,
 -267.63710905780397,
 -289.71629030385151,
 -305.73982257014359,
 -486.59715506657483,
 -189.03545753392086,
 -147.2503675803502,
 -259.20411998265593,
 -48.193820026016112,
 -378.93651374247889,
 -389.28195611471665,
 -380.40823996531185,
 -53.290730039024169,
 -107.89279003035213,
 -278.31875876262438,
 -562.0791812460476,
 -102.91385105897932,
 -311.96167131975994,
 -247.64942515390968,
 -211.07559244349537,
 -159.8164799306237,
 -551.38916608436443,
 -78.118000033421282,
 -348.30102999566395,
 -53.131877439795318,
 -190.60002621289405,
 -238.39051119835142,
 -88.110372970350767,
 0.0,
 -173.55728271503449,
 -164.52287874528034,
 -386.26066753699001,
 -38.428002510706193,
 -525.24987747321666,
 -330.72447484236875,
 -114.39435120611978,
 -283.43136376415896,
 -197.82897694507483,
 -300.47712125471963,
 -101.09851768502907,
 -592.30102999566395,
 -513.50514997831988,
 -283.64087877870162,
 -486.30102999566395,
 -18.28330122870355,
 -297.05671083371146,
 -163.46506771664806,
 -180.26760624017703,
 -92.524684974614772,
 -239.88249007371232,
 -113.54936379758027,
 -137.38560627359831,
 -720.30102999566395,
 -481.90308998699192,
 -179.27096749362983,
 -91.769885072177743,
 -245.76785129374383,
 -104.98955413394941,
 -273.81697003775724,
 -387.0688812894079,
 -78.089481202687438,
 -314.3167249841905,
 -70.439629942364888,
 -282.7468102568983,
 -102.26809634731063,
 -367.71466499286254,
 -380.94190877436563,
 -388.39794000867209,
 -252.79588001734407,
 -379.60205999132796,
 -303.91878497551841,
 -146.0,
 -56.036294901806144,
 -281.65860754566205,
 -464.10973385521538,
 -173.90308998699194,
 -403.13523869324814,
 -250.48455006504028,
 -559.65321251377532,
 -237.02137021741532,
 -302.29360118534333,
 -692.68867004769641,
 -65.083054915667077,
 -2.2606675369900127,
 -458.54693919066938,
 -52.441943917800934,
 -548.28330122870352,
 -325.32807747634763,
 -282.23291901039198,
 -320.09200508825495,
 -371.96993749796576,
 -338.0,
 -181.96877330232437,
 -88.112605001534575,
 -576.90308998699197,
 -115.27145760076343,
 -2.7889413141570567,
 -309.82726999439706,
 -335.42614965061773,
 -45.385606273598313,
 -0.8804562952784063,
 -470.25207453102792,
 -379.27300127206377,
 -67.442153827932401,
 -446.47018255153256,
 -31.309786281003483,
 -58.744727494896694,
 -432.5183464248986,
 -46.655246292209256,
 -580.80617997398394,
 0.0,
 -130.86272752831798,
 -454.95375240230572,
 -199.91927508265206,
 -176.72708036762495,
 -143.59666495944126,
 -124.39590623023813,
 -401.63220413305078,
 -26.546982758394677,
 -351.38896752705108,
 -220.21441993929574,
 -244.36266339309657,
 -384.0,
 -173.80617997398389,
 -487.5513975760141,
 -486.95424250943927,
 -143.38250757817968,
 -113.92418000740517,
 -434.93192837529352,
 -582.10720996964778,
 -273.22184874961636,
 -310.18329950759335,
 -485.91388005076521,
 -509.52287874528042,
 -462.48742121135945,
 -443.5385737338068,
 -650.0,
 -552.9208187539524,
 -522.34678748622468,
 -437.26760624017697,
 -217.63008871492821,
 -105.98430501147348,
 -555.66554624884907,
 -516.95424250943938,
 -377.68142215572107,
 -329.53911648884309,
 -451.89075625191816,
 -0.88045629527840585,
 -5.1238738196235483,
 -568.25527250510333,
 -312.02511094652698,
 -336.38764005203222,
 -0.48455006504028197,
 -114.46128035678238,
 -355.70233125778879,
 -274.35252977886302,
 -313.01750820517742,
 -237.73085008852266,
 -174.68498196648721,
 -544.77487163661965,
 -431.86272752831792,
 -351.63220413305078,
 -528.95424250943938,
 -486.30102999566395,
 -375.73239375982291,
 -407.20411998265581,
 -252.8750612633917,
 -266.6101245491916,
 -200.89279003035213,
 -328.36248247475118,
 -154.73239375982297,
 -68.676336312622482,
 -629.35218251811125,
 -280.77324632563057,
 -114.92547541373851,
 -338.83420869758413,
 -189.67920745894168,
 -467.10720996964778,
 -153.31132995230379,
 -379.79588001734407,
 -232.88739499846542,
 -389.04372371212673,
 -169.3267266636463,
 -284.0668475109739,
 -226.69375589079473,
 -275.81851370905764,
 -212.19045877256332,
 -134.73064017839118,
 -593.43838410703461,
 -61.39100130548502,
 -293.27300127206377,
 -202.38560627359831,
 -310.62469368304153,
 -401.25527250510328,
 -255.63548374681491,
 -203.03342375548695,
 -229.0684728219629,
 -3.1922763547157995,
 -356.89796450045571,
 -486.19382002601617,
 -1.9897000433601884,
 -378.46457760951461,
 -522.98766626492613,
 -444.79048498545734,
 -228.70639881465664,
 -3.4948500216800937,
 -184.94564950347723,
 -202.38021124171161,
 -252.0,
 -156.66841739516826,
 -302.72717964628168,
 -439.91878497551846,
 -576.12493873660833,
 -617.43136376415896,
 -525.00875628533936,
 -483.27839630395044,
 -230.75678688477734,
 -446.56324120395431,
 -252.30642502755069,
 -244.16915255586866,
 -589.49965566777632,
 -89.692031301149001,
 -672.73788807036635,
 -555.24303804868623,
 -5.7933561317765614,
 -422.99257118967927,
 -452.36787750663785,
 -561.58433122436747,
 -545.68124123737539,
 -681.35218251811136,
 -237.41209132589825,
 -402.43675879604569,
 -430.99460496811332,
 -175.82390874094432,
 -271.73239375982291,
 -143.1835200693763,
 -469.50514997831988,
 -165.48742121135948,
 -0.88045629527840585,
 -140.99306129681298,
 -626.31182005943731,
 -447.06140584833315,
 -75.0,
 -225.76007522267153,
 -272.2095150145426,
 -443.50514997831988,
 -379.10460138136273,
 -424.61285005510138,
 -175.00336125345279,
 -540.65321251377532,
 -101.84163750790475,
 -483.48742121135945,
 -175.18975246914829,
 -170.40032104785763,
 -208.31875876262441,
 -469.19534605834838,
 -596.69897000433593,
 -450.63751752524894,
 -253.56543826176562,
 -620.72699872793623,
 -586.30102999566395,
 -4.0210900204132241,
 -54.193820026016112,
 -94.892790030352131,
 -585.90308998699186,
 -303.6069649160811,
 -351.67094128073575,
 -185.38070134884521,
 -146.70436503622273,
 -275.47712125471969,
 -345.60205999132796,
 -520.82390874094438,
 -490.06194258622077,
 -274.16039627052919,
 -430.05654755433409,
 -427.54946307623692,
 -83.298669389473872,
 -644.84509804001425,
 -0.96910013008056373,
 -269.42883987859148,
 -408.81360878430451,
 -109.81360878430451,
 -166.66408452180102,
 -601.36451625318512,
 -7.7055818797668536,
 -143.65685396423027,
 -196.14437445424642,
 -93.59176003468815,
 -216.66196879911485,
 -310.20102128723727,
 -512.09691001300803,
 -453.74065993802878,
 -580.53317870192018,
 -436.21441993929568,
 -561.85733249643135,
 -482.50514997831982,
 -430.8172502347594,
 -409.22471989593555,
 -392.17627217740107,
 -237.48742121135948,
 -446.40823996531185,
 -324.55090746888061,
 -0.99950989286639935,
 -152.02114266831586,
 -408.01029995663976,
 -560.44369749923271,
 -27.369421177938193,
 -595.19382002601606,
 -87.061942586220795,
 -3.2915620127585199,
 -419.0688812894079,
 -159.40852016231398,
 -327.01029995663976,
 -148.8164799306237,
 -39.903089986991944,
 -117.21671627576379,
 -236.99257118967938,
 -342.19003266615044,
 -353.52287874528031,
 -423.02820964194569,
 -624.27300127206377,
 -194.41566877563247,
 -116.90308998699194,
 -452.66554624884907,
 -272.07936216439305,
 -390.81444615218948,
 -346.61285005510138,
 -624.30102999566395,
 -194.33445375115093,
 -430.20411998265581,
 -197.80617997398389,
 -327.16039627052919,
 -559.60205999132791,
 -216.75502745153651,
 -280.33494385828448,
 -483.16375752398187,
 -73.712631214428626,
 -209.66147869198124,
 -497.0791812460476,
 -212.96503257321274,
 -203.99306129681298,
 -366.50514997831988,
 -37.524696327432821,
 -217.34705004425874,
 -424.66890750230186,
 -4.5154499349597179,
 -146.3167249841905,
 -481.55630250076729,
 -3.6520483181338799,
 -535.06145247908705,
 -370.89279003035216,
 -361.22108806848269,
 -506.95424250943933,
 -197.09438612744054,
 -314.40823996531185,
 -482.11954370472165,
 -346.81157500587051,
 -572.03342375548687,
 -287.14470128200264,
 -399.68663626926229,
 -563.61235994796778,
 -96.911356165197844,
 -45.0,
 -161.55630250076729,
 -164.48455006504028,
 -253.28002161493947,
 -186.3167249841905,
 -465.55630250076729,
 0.0,
 -165.34678748622466,
 -641.80617997398394,
 -246.62854504362789,
 -344.23754373814285,
 -270.18311160193139,
 -452.10230504489476,
 -138.9244602044073,
 -624.60205999132791,
 -132.58972625625424,
 -143.58595653535653,
 -181.16790672053847,
 -1.3802112417116061,
 -433.14729223131962,
 -335.83825861548394,
 -91.140633725134819,
 -337.80829539210652,
 -385.56863623584115,
 -254.60205999132796,
 -479.03881878737366,
 -178.90154631569163,
 -516.0,
 -300.96454246607914,
 -458.4593924877592,
 -436.28348214704897,
 -119.03293364835335,
 -308.87329004299193,
 -181.32023214705407,
 -83.334943858284532,
 -218.00018091834542,
 -134.0,
 -214.9918971011715,
 -589.09691001300814,
 -489.33445375115093,
 -364.49485002168007,
 -480.70436503622273,
 -350.3398487830375,
 -238.90683071610354,
 -492.80463630268343,
 -334.80617997398383,
 -299.49485002168007,
 -526.93111871059227,
 -516.95424250943938,
 -354.56022414822428,
 -295.3645652367527,
 -388.51797382052729,
 -95.617264872720881,
 -242.96302110381191,
 -421.50514997831982,
 -53.274544943364049,
 -262.54636438081832,
 -132.58972625625424,
 -59.903089986991944,
 -265.32964810505456,
 -2.8627275283179752,
 -227.54782012627996,
 -3.0102999566398121,
 -203.54752857645977,
 -362.2095150145426,
 -237.17405748062177,
 -88.304461535987457,
 -127.1835200693763,
 -560.69897000433593,
 -432.92081875395246,
 -71.990817608247625,
 -438.55375091201574,
 -421.86097394688608,
 -424.49281624324618,
 -1.5863650028014442,
 -624.95424250943938,
 -271.27636252551656,
 -0.48455006504028197,
 -464.78067513595113,
 -101.48251628660637,
 -373.20411998265593,
 -173.82187496251041,
 -193.58482133150113,
 -485.8927900303521,
 -593.7323937598228,
 -58.537074680572637,
 -193.78558006070426,
 -4.5380836266732549,
 -526.13033376849501,
 -152.04421381926036,
 -461.46142626619314,
 -476.98970004336019,
 -411.14063372513482,
 -3.4899450969269883,
 -95.928875022026801,
 -221.52491252371425,
 -651.44369749923271,
 -134.0,
 -353.39794000867198,
 -95.163757523981957,
 -480.60205999132791,
 -434.60205999132796,
 -204.19382002601611,
 -164.73442753825688,
 -76.352544354802177,
 -450.60205999132796,
 -309.6123599479676,
 -390.0,
 -111.86300772532013,
 -105.90673143744689,
 -453.46478751964582,
 -270.2907300390242,
 -5.8082137524177995,
 -80.709269960975831,
 -116.44369749923271,
 -55.132367546928919,
 -556.50514997831988,
 -298.63106892919541,
 -199.64124061539243,
 -53.79872346047928,
 -211.59176003468815,
 -471.71678041098511,
 -432.20461008978953,
 -98.899810373227865,
 -552.20411998265593,
 -579.2886962605902,
 -60.167825080849781,
 -127.12984366136141,
 -535.40484343392279,
 -360.46682129807988,
 -482.1043388233287,
 -395.59512128814083,
 -319.1051761912139,
 -244.01569498852652,
 -134.48607609737257,
 -196.14063372513482,
 -465.20411998265587,
 -139.67837009105639,
 -342.63548374681488,
 -83.520007598961143,
 -152.29023993189057,
 -501.50514997831982,
 -172.06348625752111,
 -624.30102999566395,
 -292.72006002474927,
 -303.34678748622468,
 -225.50514997831991,
 -440.0,
 -206.48076270517456,
 -102.40823996531185,
 -90.887394998465425,
 -109.29073003902417,
 -701.0791812460476,
 -376.1077000767815,
 -43.528273777167044,
 -227.20951501454263,
 -119.03832868024006,
 -152.03832868024006,
 -314.03167017405514,
 -256.98970004336019,
 -594.60205999132791,
 -202.64291255713553,
 -216.07211427241782,
 -391.85733249643124,
 -90.484550065040281,
 -301.37481620982487,
 -288.25527250510328,
 -273.01029995663981,
 -403.22396416773904,
 -501.80617997398383,
 -351.26760624017709,
 -161.48455006504028,
 -478.45939248775909,
 -217.28330122870355,
 -401.98227123303957,
 -202.7845988177877,
 -121.79973137793047,
 -380.41931022608736,
 -273.19382002601611,
 -143.88128826901277,
 -351.80483485999696,
 -386.75012252678346,
 -3.8976949551052371,
 -324.0,
 -2.1835200693763004,
 -307.10393035588379,
 -182.61803517685661,
 -1.8082137524178004,
 -594.17609125905574,
 -276.76581751530983,
 -434.0,
 -249.25527250510331,
 -160.11057122310066,
 -212.39474203459665,
 -609.90308998699197,
 -171.89075625191822,
 -229.03594764105446,
 -202.63751752524882,
 -135.08661005636824,
 -217.29073003902417,
 -138.79313993549513,
 -496.85193746454456,
 -525.90308998699186,
 -308.60745502321464,
 -409.91051879731253,
 -59.161723745548045,
 -40.781512503836439,
 -432.7501225267834,
 -88.14889990334072,
 -581.59176003468815,
 -461.2976511032432,
 -177.64781748188864,
 -406.91569326741632,
 -127.79588001734407,
 -162.12984366136141,
 -281.00539503188668,
 -107.87532382142584,
 -631.50514997831988,
 -402.23579015671106,
 -233.79335613177656,
 -198.30537146338398,
 -246.10097756987588,
 -258.12290495817439,
 -434.72699872793623,
 -164.69833108569148,
 -289.05703766146769,
 -498.05605744720054,
 -460.10924374808178,
 -343.68650799881954,
 -235.78151250383644,
 -501.50514997831988,
 -180.16393844232738,
 -490.88045629527841,
 -559.2115487929766,
 -466.03342375548692,
 -292.19382002601611,
 -378.34909081000922,
 -318.58972625625415,
 -251.50514997831991,
 -214.72209380318316,
 -432.95424250943927,
 -432.37040139220545,
 -352.32618757294506,
 -407.2380338452765,
 -233.13033376849501,
 -111.35400010026385,
 -289.39794000867209,
 -423.36248247475118,
 -290.3959062302381,
 -107.14855264258904,
 -201.0,
 -272.54600254412748,
 -283.50311619988594,
 -507.30102999566395,
 -151.70436503622273,
 -180.14315761070233,
 -250.57403126772772,
 -272.52159763201416,
 -253.43825583659194,
 -400.76245626185715,
 -317.70454595456812,
 -330.00769136835476,
 -355.09641990587448,
 -322.58636500280147,
 -449.45407909556116,
 -560.95424250943938,
 -423.97533252985255,
 -579.66351247041496,
 -471.56169753265397,
 -152.51054501020661,
 -64.1835200693763,
 0.0,
 -146.02059991327963,
 -308.33396364401733,
 -181.43136376415899,
 -246.8100023427842,
 -512.30102999566395,
 -362.58972625625421,
 -428.50851123177267,
 -465.83133755126499,
 -578.30102999566395,
 -372.51054501020644,
 -556.55630250076729,
 -335.04085256580754,
 -529.00539503188668,
 -95.817250234759456,
 -208.04418482747431,
 -547.39794000867209,
 -244.77611747194973,
 -466.78517159325935,
 -380.98768857395936,
 -471.75520836988193,
 -319.91338994363173,
 -313.02802872360024,
 -631.87506126339167,
 -445.66379266741734,
 -254.78018502881756,
 -317.40823996531179,
 -246.11347661870647,
 -502.43136376415896,
 -182.47221632996656,
 -2.68509259796198,
 -158.15846644081705,
 -316.26593429843399,
 -188.19382002601611,
 -350.79588001734407,
 -376.50514997831988,
 -438.59176003468815,
 -1.6812412373755874,
 -142.01976254539434,
 -320.2375437381429,
 -365.48251628660637,
 -183.41160121876464,
 -322.46682129807982,
 -201.38764005203222,
 -430.90308998699192,
 -247.42106380751918,
 -524.29514485664356,
 -408.07378621416092,
 -124.39590623023813,
 -322.2095150145426,
 -599.29360118534316,
 -433.49485002168007,
 -132.32261012321081,
 -372.7092699609758,
 -472.35042893667958,
 -381.64087877870156,
 -249.32905871926422,
 -621.23955987760883,
 -265.41032272731343,
 -317.04911874401347,
 -550.60205999132791,
 -210.39926748369092,
 -330.26614420856538,
 -347.11463877996846,
 -378.97215219474515,
 -307.2223588485316,
 -123.85278940836903,
 -551.4997549464332,
 -86.569963710859895,
 -109.20411998265593,
 -385.20411998265587,
 -420.60926125254906,
 -199.89075625191822,
 -361.28330122870352,
 -703.50514997831976,
 -488.47856564736321,
 -220.70436503622273,
 -305.64779984292062,
 -350.26066753698996,
 -523.94884747755259,
 -545.17609125905574,
 -173.02697515943353,
 -373.20411998265587,
 -327.91338994363173,
 -0.3959062302381241,
 -385.63751752524882,
 -440.58451214271298,
 -176.40860180200269,
 -388.88167313882246,
 -176.19382002601611,
 -435.84509804001425,
 -273.37481620982493,
 -211.49737390724761,
 0.0,
 -400.0,
 -480.70436503622273,
 -432.39794000867209,
 -41.387640052032225,
 -443.60745502321464,
 -188.97197127639976,
 -530.68663626926229,
 -219.1743376776239,
 -646.74472749489667,
 -353.98937321560396,
 -481.60745502321464,
 -564.41903002908521,
 -288.24497254846347,
 -275.56912634297458,
 -110.5578461720676,
 -184.69742633303571,
 -275.55630250076729,
 -117.97021769496796,
 -321.02802872360024,
 -500.19382002601611,
 -304.6302696332736,
 -211.53268859478655,
 -403.47712125471969,
 -522.82930377283094,
 -471.13735411137071,
 -486.51940057420279,
 -153.99460496811329,
 -351.30642502755063,
 -268.0,
 -503.85193746454462,
 -162.03342375548695,
 -394.47018255153262,
 -363.4205737003856,
 -274.34678748622463,
 -383.84703253979143,
 -541.59176003468815,
 -4.6689075023018622,
 -292.76042248342321,
 -396.29032157157923,
 -251.83624247601804,
 -548.93651374247884,
 -546.33445375115082,
 -685.14602875702155,
 -384.94200805302228,
 -420.90848501887865,
 -432.68142215572095,
 -498.89279003035216,
 -290.52827377716704,
 -327.2692139121981,
 -560.20411998265593,
 -373.0,
 -482.63548374681488,
 -434.62190417641102,
 -349.98438665116214,
 -629.52827377716699,
 -543.66554624884907,
 -57.801275049230782,
 -493.0,
 -140.65280404633046,
 -408.9719712763997,
 -169.41952013621886,
 -408.90848501887859,
 -0.48455006504028197,
 -321.22184874961641,
 -422.90799491174505,
 -524.67633631262242,
 -547.20411998265581,
 -381.61803517685672,
 -629.68921016704689,
 -576.95424250943938,
 -0.48455006504028197,
 -208.64781748188864,
 -487.08813608870054,
 -495.70387492908912,
 -630.69897000433593,
 -383.76581751530989,
 -482.92081875395235,
 -335.0663574038403,
 -527.49485002168012,
 -341.78845120702323,
 -312.79588001734407,
 -255.61235994796778,
 -174.70436503622273,
 -582.9699374979657,
 -352.38021124171161,
 -603.23754373814279,
 -501.89566117667118,
 -315.2907300390242,
 -316.7501225267834,
 -455.09691001300808,
 -378.60205999132791,
 -49.885851327165113,
 -448.5002450535668,
 -441.48251628660637,
 -309.06145247908722,
 -302.19718127946885,
 -292.9719712763997,
 -680.95424250943938,
 -470.91388005076533,
 -391.39794000867198,
 -241.50514997831991,
 -451.80617997398389,
 -436.37972113457812,
 -454.94208969271097,
 -43.387640052032225,
 -3.6392244759265537,
 -342.99275210802477,
 -505.02109002041317,
 -576.50514997831988,
 -558.72699872793623,
 -219.76245626185712,
 -1.6110964736695961,
 -267.2907300390242,
 -560.05440314442558,
 -266.54887369044667,
 -438.0,
 -330.23957751657679,
 -41.505149978319906,
 -496.56169753265397,
 -364.68867004769618,
 -432.08457627793433,
 -272.31875876262438,
 -405.76245626185721,
 -232.73064017839116,
 -438.63751752524882,
 -261.19129614044857,
 -404.09159662081004,
 ...]

Este gráfico muestra el indicador de similitud para los correctamente identificados, los incorrectamente identificados y en verde el valor de la distancia que debiese haber sido identificada

Se puede observar que la distribución verde está más a la izquierda por lo que habla de un gran número de personas gente que cambió de comportamiento. Esto se condice con los resultados del tercer algoritmo.



In [64]:

    
wrong_distances_selected = []
counter = 0
for i in range(limit):
    if i in wrong_indexs:
        wrong_distances_selected.append(selected_distance[counter])
        counter += 1
    if i in correct_indexs:
        counter += 1



In [65]:

    
colors = ['red', 'blue','green']
plt.hist([wrong_distances_without_800,wrong_distances_selected,correct_distance], 30, histtype='bar',color=colors)
plt.show()

Los siguientes gráficos muestran los indicadores de la diagonal de los correcta e incorrectamente clasificados.

Cuidado que estos gráficos son engañosos porque solo consideran la diagonal. Entonces puede que haya otros incorrectos pero que no sean parte de la diagonal.



In [27]:

    
diagonal = iden.diagonal().copy()
correct_distance_ii = []
wrong_distance_ii = []
diagonal_d1_ii = []
for i in range(len(diagonal)):
    if diagonal[i]>-100:
        diagonal_d1_ii.append(i)
        if i in correct_indexs:
            correct_distance_ii.append(diagonal[i])
        else:
            wrong_distance_ii.append(diagonal[i])
print "diagonal: "+str(len(diagonal_d1_ii)   )
print "correctos: "+str(len(correct_distance_ii))
print "incorrectos: "+str(len(wrong_distance_ii))









    



diagonal: 593
correctos: 564
incorrectos: 29



In [28]:

    
colors = ['red', 'green']
plt.hist([wrong_distance_ii,correct_distance_ii], histtype='bar',color=colors)
plt.show()



In [29]:

    
diagonal = iden.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-1:
        diagonal_d1.append(i)
        if i in correct_indexs:
            correct_distance.append(diagonal[i])
        else:
            wrong_distance.append(diagonal[i])
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))









    



diagonal: 82
correctos: 71
incorrectos: 11



In [30]:

    
colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)
plt.show()



In [31]:

    
diagonal = iden.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-0.1:
        diagonal_d1.append(i)
        if i in correct_indexs:
            correct_distance.append(diagonal[i])
        else:
            wrong_distance.append(diagonal[i])
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))









    



diagonal: 44
correctos: 40
incorrectos: 4



In [32]:

    
colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)
plt.show()



In [66]:

    
with open('data/iden_matrix_zona.pickle','w') as f:
    pickle.dump(iden_matrix_zona,f)



In [26]:

    
with open('data/iden_matrix_zona.pickle','r') as f:
    iden = pickle.load(f)

Comparee con paraderos



In [8]:

    
reload(auxiliar_functions)









    Out[8]:





<module 'tools.auxiliar_functions' from 'tools/auxiliar_functions.pyc'>



In [11]:

    
start_time = time.time()
users_profiles = tpm_identification.get_profiles(first_period_frame['id'],first_period_frame['par_subida'],first_period_frame['par_bajada'])
profiles = tpm_identification.get_sequences(second_period_frame['id'],second_period_frame['par_subida'],second_period_frame['par_bajada'])
delta_time = time.time() - start_time
print delta_time









    



0.99444103241



In [10]:

    
users_profiles[0]









    Out[10]:





{'mls': ['T-22-205-SN-65',
  'E-20-190-SN-40',
  'E-20-291-PO-20',
  'T-15-135-PO-5',
  'T-15-135-OP-110',
  'T-4-19-NS-100',
  'E-20-199-NS-2',
  'T-24-205-NS-20',
  'T-20-190-SN-35',
  'E-20-289-PO-5',
  'T-22-205-SN-55',
  'T-24-205-NS-30',
  'T-15-135-OP-105'],
 'nvisitas': [5, 5, 4, 6, 5, 6, 6, 4, 1, 2, 1, 1, 1],
 'tpm': array([[ 0.        ,  0.8       ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.2       ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.8       ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.2       ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.83333333,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.16666667],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.16666667,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.66666667,  0.        ,  0.        ,
          0.        ,  0.16666667,  0.        ],
        [ 0.66666667,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.33333333,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  1.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ]]),
 'user_id': 1132106}



In [45]:

    
start_time = time.time()
iden_paradero = tpm_identification.get_identification_matrix(users_profiles,profiles)
delta_time = time.time() - start_time
print delta_time









    



4841.82331514



In [46]:

    
iden_matrix_paradero = np.matrix(iden_paradero)
df_ident_paradero = pd.DataFrame(iden_matrix_paradero)



In [47]:

    
with open('data/iden_matrix_paradero.pickle','w') as f:
    pickle.dump(iden_matrix_paradero,f)



In [73]:

    
with open('data/iden_matrix_paradero.pickle','r') as f:
    iden_matrix_paradero = pickle.load(f)



In [74]:

    
n_identified,selected_distance,identified_indexs,abstenidos,correct_indexs,correct_distance,wrong_indexs,wrong_distances = auxiliar_functions.get_n_correct_tpm(iden_matrix_paradero,limit)
porcentaje_correcto = n_identified*100.0/limit
print str(round(porcentaje_correcto,2))+ "%"



In [21]:

    
with open('data/resultados_alg_1.pickle','w') as f:
    pickle.dump(n_identified,f)
    pickle.dump(selected_distance,f)
    pickle.dump(identified_indexs,f)
    pickle.dump(abstenidos,f)
    pickle.dump(correct_indexs,f)
    pickle.dump(correct_distance,f)
    pickle.dump(wrong_indexs,f)
    pickle.dump(wrong_distances,f)
    pickle.dump(iden_matrix_paradero.diagonal(),f)



In [5]:

    
with open('data/resultados_alg_1.pickle','r') as f:
    n_identified = pickle.load(f)
    selected_distance = pickle.load(f)
    identified_indexs = pickle.load(f)
    abstenidos = pickle.load(f)
    correct_indexs = pickle.load(f)
    correct_distance = pickle.load(f)
    wrong_indexs = pickle.load(f)
    wrong_distances = pickle.load(f)
    diagonal = pickle.load(f)



In [6]:

    
counter = 0
for i in correct_distance:
    if i ==0.0:
        counter +=1
print counter



In [7]:

    
counter = 0
for i in wrong_distances:
    if i ==0.0:
        counter +=1
print counter



In [8]:

    
iden_matrix_paradero.shape









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-fbaf082e6125> in <module>()
----> 1 iden_matrix_paradero.shape

NameError: name 'iden_matrix_paradero' is not defined



In [9]:

    
len(abstenidos)









    Out[9]:





1



In [10]:

    
len(abstenidos)*100/limit









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-34d5cf1a4ad4> in <module>()
----> 1 len(abstenidos)*100/limit

NameError: name 'limit' is not defined



In [11]:

    
len(correct_indexs)









    Out[11]:





3564



In [12]:

    
len(identified_indexs)









    Out[12]:





5168



In [13]:

    
len(wrong_indexs)









    Out[13]:





1604



In [14]:

    
wrong_distances_without_800 = filter(lambda x: x>-800,wrong_distances)
print len(wrong_distances_without_800)



In [15]:

    
plt.hist(wrong_distances_without_800)









    Out[15]:





(array([ 662.,  399.,  188.,  117.,   60.,   35.,   21.,   18.,   14.,   11.]),
 array([-799.30103 , -719.370927, -639.440824, -559.510721, -479.580618,
        -399.650515, -319.720412, -239.790309, -159.860206,  -79.930103,
           0.      ]),
 <a list of 10 Patch objects>)



In [16]:

    
colors = ['red', 'green']
plt.hist([wrong_distances_without_800,correct_distance], histtype='bar',color=colors)









    Out[16]:





([array([ 662.,  399.,  188.,  117.,   60.,   35.,   21.,   18.,   14.,   11.]),
  array([  19.,  173.,  322.,  496.,  490.,  491.,  520.,  425.,  371.,  257.])],
 array([-799.30103 , -719.370927, -639.440824, -559.510721, -479.580618,
        -399.650515, -319.720412, -239.790309, -159.860206,  -79.930103,
           0.      ]),
 <a list of 2 Lists of Patches objects>)



In [9]:

    
colors = ['green', 'red']
plt.hist([correct_distance,wrong_distances_without_800], 30, histtype='bar',color=colors,label=['Correctly recognized users','Wrongly recognized users'])
plt.xlabel('Similarity Indicator')
plt.ylabel('Number of Users')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=5)
#plt.legend((a,b),('Correctly identified users','Correctly identified users'))
plt.savefig('hist_tpm.eps', format='eps', dpi=1000)



In [17]:

    
from matplotlib.font_manager import FontProperties

fontP = FontProperties()
fontP.set_size('small')
colors = [(132, 255, 108),(255, 57, 100)]
for i in range(len(colors)):
    r, g, b = colors[i]  
    colors[i] = (r / 255., g / 255., b / 255.)  
plt.figure()
ax = plt.subplot(111)
ax.spines["top"].set_visible(False)  
ax.spines["bottom"].set_visible(False)  
ax.spines["right"].set_visible(False)  
ax.spines["left"].set_visible(False)
# Ensure that the axis ticks only show up on the bottom and left of the plot.  
# Ticks on the right and top of the plot are generally unnecessary chartjunk.  
ax.get_xaxis().tick_bottom()  
ax.get_yaxis().tick_left()  # Limit the range of the plot to only where the data is.  
ax.set_xlabel('Similarity Indicator')
ax.set_ylabel('Number of Users')
ax.set_title('Number of users identified with TPM Algorithm by similarity indicator',y=1.08)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray',linestyle='dashed')
# Avoid unnecessary whitespace.  
plt.xlim(-800.0, 0.0)  
plt.ylim(0, 250)  
#plt.yticks(range(36, 54, 2), [str(x) + "%" for x in range(36, 54, 2)], fontsize=11)  
#plt.xticks(a, [str(x) + "%" for x in a], fontsize=11)
#plt.plot(a,resultados_2,lw=2.5,color=(31/255.,119/255.,180/255.))
plt.hist([correct_distance,wrong_distances_without_800], 30, histtype='bar',color=colors,label=['Correctly recognized users','Wrongly recognized users'])
plt.legend(loc='lower left',fancybox=False, shadow=False, prop = fontP, bbox_to_anchor=(1.0, 0.05))
#plt.show()
plt.savefig("indicator correct wrong alg1.eps",format='eps', dpi=1000, bbox_inches="tight")

Distancia de cada usuario entre abril y septiembre, y en verde, las distancias que fueron seleccionadas como correctas pero no lo eran



In [41]:

    
wrong_distances_selected = []
counter = 0
for i in range(limit):
    if i in wrong_indexs:
        wrong_distances_selected.append(selected_distance[counter])
        counter += 1
    if i in correct_indexs:
        counter += 1



In [42]:

    
len(wrong_distances_selected)









    Out[42]:





1604



In [43]:

    
colors = ['green','red','blue']
plt.hist([correct_distance,wrong_distances_without_800,wrong_distances_selected], 30, histtype='bar',color=colors)
plt.show()

En el gráfico anterior se puede observar que hay usuarios con distancia 0, es decir que hay solo una opción y que no son el mismo id

Plot wrong distances



In [46]:

    
wd_menor_100 = filter(lambda x: x > -100,wrong_distances_selected)



In [47]:

    
plt.hist(wd_menor_100)









    Out[47]:





(array([  3.,   3.,   1.,   2.,   1.,   2.,   0.,   0.,   0.,  30.]),
 array([-97.09487623, -87.38538861, -77.67590099, -67.96641336,
        -58.25692574, -48.54743812, -38.83795049, -29.12846287,
        -19.41897525,  -9.70948762,   0.        ]),
 <a list of 10 Patch objects>)

Hay usuarios que se roban la pelicula



In [73]:

    
x = np.array(identified_indexs)
y = np.bincount(x)
ii = np.nonzero(y)[0]
frequency_identified_indexs = zip(ii,y[ii])
frequency_identified_indexs.sort(key = lambda t: t[1], reverse=True)
frequency_identified_indexs









    Out[73]:





[(1480, 6),
 (2809, 6),
 (4844, 6),
 (161, 5),
 (563, 5),
 (1372, 5),
 (1954, 5),
 (2826, 5),
 (5030, 5),
 (201, 4),
 (232, 4),
 (239, 4),
 (456, 4),
 (525, 4),
 (836, 4),
 (947, 4),
 (980, 4),
 (1067, 4),
 (1205, 4),
 (1363, 4),
 (1441, 4),
 (1444, 4),
 (1742, 4),
 (1884, 4),
 (2405, 4),
 (2644, 4),
 (2670, 4),
 (2887, 4),
 (3088, 4),
 (3150, 4),
 (3237, 4),
 (4106, 4),
 (4318, 4),
 (4401, 4),
 (4460, 4),
 (4584, 4),
 (4636, 4),
 (4794, 4),
 (5048, 4),
 (0, 3),
 (11, 3),
 (41, 3),
 (50, 3),
 (74, 3),
 (203, 3),
 (222, 3),
 (229, 3),
 (256, 3),
 (318, 3),
 (331, 3),
 (335, 3),
 (389, 3),
 (394, 3),
 (410, 3),
 (424, 3),
 (431, 3),
 (441, 3),
 (482, 3),
 (489, 3),
 (507, 3),
 (532, 3),
 (591, 3),
 (605, 3),
 (638, 3),
 (649, 3),
 (677, 3),
 (715, 3),
 (755, 3),
 (766, 3),
 (817, 3),
 (823, 3),
 (863, 3),
 (914, 3),
 (940, 3),
 (968, 3),
 (1035, 3),
 (1040, 3),
 (1054, 3),
 (1058, 3),
 (1105, 3),
 (1134, 3),
 (1146, 3),
 (1161, 3),
 (1162, 3),
 (1176, 3),
 (1189, 3),
 (1197, 3),
 (1269, 3),
 (1325, 3),
 (1365, 3),
 (1376, 3),
 (1379, 3),
 (1387, 3),
 (1398, 3),
 (1399, 3),
 (1415, 3),
 (1507, 3),
 (1519, 3),
 (1538, 3),
 (1575, 3),
 (1584, 3),
 (1647, 3),
 (1675, 3),
 (1692, 3),
 (1728, 3),
 (1824, 3),
 (1829, 3),
 (1855, 3),
 (1903, 3),
 (1906, 3),
 (1907, 3),
 (2007, 3),
 (2026, 3),
 (2116, 3),
 (2167, 3),
 (2175, 3),
 (2180, 3),
 (2211, 3),
 (2242, 3),
 (2275, 3),
 (2304, 3),
 (2311, 3),
 (2338, 3),
 (2351, 3),
 (2385, 3),
 (2434, 3),
 (2441, 3),
 (2457, 3),
 (2477, 3),
 (2558, 3),
 (2598, 3),
 (2621, 3),
 (2702, 3),
 (2799, 3),
 (2806, 3),
 (2853, 3),
 (2856, 3),
 (2860, 3),
 (2877, 3),
 (2879, 3),
 (2880, 3),
 (2906, 3),
 (2908, 3),
 (2912, 3),
 (2918, 3),
 (2948, 3),
 (2949, 3),
 (2959, 3),
 (2972, 3),
 (2994, 3),
 (2998, 3),
 (3068, 3),
 (3079, 3),
 (3081, 3),
 (3089, 3),
 (3124, 3),
 (3144, 3),
 (3152, 3),
 (3256, 3),
 (3279, 3),
 (3287, 3),
 (3391, 3),
 (3445, 3),
 (3466, 3),
 (3486, 3),
 (3521, 3),
 (3527, 3),
 (3581, 3),
 (3733, 3),
 (3750, 3),
 (3775, 3),
 (3809, 3),
 (3821, 3),
 (3829, 3),
 (3844, 3),
 (3906, 3),
 (3907, 3),
 (3951, 3),
 (4044, 3),
 (4053, 3),
 (4126, 3),
 (4143, 3),
 (4170, 3),
 (4189, 3),
 (4216, 3),
 (4276, 3),
 (4298, 3),
 (4329, 3),
 (4333, 3),
 (4340, 3),
 (4362, 3),
 (4376, 3),
 (4513, 3),
 (4542, 3),
 (4554, 3),
 (4571, 3),
 (4582, 3),
 (4735, 3),
 (4767, 3),
 (4768, 3),
 (4775, 3),
 (4856, 3),
 (4879, 3),
 (4919, 3),
 (4999, 3),
 (5001, 3),
 (5036, 3),
 (5041, 3),
 (5056, 3),
 (5142, 3),
 (1, 2),
 (4, 2),
 (9, 2),
 (15, 2),
 (21, 2),
 (24, 2),
 (25, 2),
 (26, 2),
 (27, 2),
 (30, 2),
 (31, 2),
 (34, 2),
 (37, 2),
 (61, 2),
 (64, 2),
 (70, 2),
 (71, 2),
 (72, 2),
 (73, 2),
 (80, 2),
 (82, 2),
 (83, 2),
 (84, 2),
 (101, 2),
 (102, 2),
 (104, 2),
 (111, 2),
 (114, 2),
 (117, 2),
 (118, 2),
 (120, 2),
 (121, 2),
 (125, 2),
 (131, 2),
 (133, 2),
 (138, 2),
 (143, 2),
 (150, 2),
 (152, 2),
 (160, 2),
 (170, 2),
 (172, 2),
 (177, 2),
 (178, 2),
 (187, 2),
 (188, 2),
 (195, 2),
 (198, 2),
 (202, 2),
 (205, 2),
 (213, 2),
 (218, 2),
 (219, 2),
 (223, 2),
 (224, 2),
 (226, 2),
 (245, 2),
 (246, 2),
 (254, 2),
 (258, 2),
 (265, 2),
 (270, 2),
 (274, 2),
 (276, 2),
 (277, 2),
 (278, 2),
 (279, 2),
 (282, 2),
 (288, 2),
 (296, 2),
 (297, 2),
 (308, 2),
 (311, 2),
 (313, 2),
 (317, 2),
 (319, 2),
 (322, 2),
 (327, 2),
 (328, 2),
 (338, 2),
 (339, 2),
 (344, 2),
 (352, 2),
 (353, 2),
 (361, 2),
 (363, 2),
 (367, 2),
 (368, 2),
 (371, 2),
 (382, 2),
 (388, 2),
 (390, 2),
 (408, 2),
 (411, 2),
 (412, 2),
 (414, 2),
 (417, 2),
 (437, 2),
 (440, 2),
 (444, 2),
 (457, 2),
 (464, 2),
 (468, 2),
 (470, 2),
 (476, 2),
 (479, 2),
 (486, 2),
 (490, 2),
 (495, 2),
 (500, 2),
 (505, 2),
 (508, 2),
 (509, 2),
 (510, 2),
 (519, 2),
 (527, 2),
 (530, 2),
 (541, 2),
 (553, 2),
 (566, 2),
 (572, 2),
 (574, 2),
 (575, 2),
 (578, 2),
 (579, 2),
 (580, 2),
 (585, 2),
 (589, 2),
 (599, 2),
 (602, 2),
 (614, 2),
 (625, 2),
 (627, 2),
 (633, 2),
 (643, 2),
 (651, 2),
 (659, 2),
 (661, 2),
 (662, 2),
 (673, 2),
 (680, 2),
 (686, 2),
 (698, 2),
 (710, 2),
 (711, 2),
 (719, 2),
 (722, 2),
 (738, 2),
 (743, 2),
 (746, 2),
 (794, 2),
 (797, 2),
 (804, 2),
 (807, 2),
 (816, 2),
 (822, 2),
 (827, 2),
 (829, 2),
 (842, 2),
 (844, 2),
 (848, 2),
 (851, 2),
 (854, 2),
 (857, 2),
 (872, 2),
 (878, 2),
 (892, 2),
 (893, 2),
 (900, 2),
 (908, 2),
 (918, 2),
 (926, 2),
 (935, 2),
 (943, 2),
 (948, 2),
 (951, 2),
 (953, 2),
 (957, 2),
 (982, 2),
 (989, 2),
 (993, 2),
 (1004, 2),
 (1006, 2),
 (1017, 2),
 (1020, 2),
 (1023, 2),
 (1025, 2),
 (1028, 2),
 (1029, 2),
 (1034, 2),
 (1043, 2),
 (1044, 2),
 (1056, 2),
 (1061, 2),
 (1064, 2),
 (1065, 2),
 (1066, 2),
 (1084, 2),
 (1085, 2),
 (1092, 2),
 (1111, 2),
 (1122, 2),
 (1126, 2),
 (1145, 2),
 (1147, 2),
 (1152, 2),
 (1155, 2),
 (1164, 2),
 (1166, 2),
 (1177, 2),
 (1178, 2),
 (1185, 2),
 (1187, 2),
 (1193, 2),
 (1202, 2),
 (1212, 2),
 (1221, 2),
 (1223, 2),
 (1226, 2),
 (1229, 2),
 (1238, 2),
 (1239, 2),
 (1245, 2),
 (1249, 2),
 (1250, 2),
 (1252, 2),
 (1254, 2),
 (1265, 2),
 (1278, 2),
 (1279, 2),
 (1284, 2),
 (1290, 2),
 (1301, 2),
 (1303, 2),
 (1310, 2),
 (1314, 2),
 (1317, 2),
 (1324, 2),
 (1326, 2),
 (1332, 2),
 (1336, 2),
 (1341, 2),
 (1344, 2),
 (1352, 2),
 (1354, 2),
 (1368, 2),
 (1404, 2),
 (1410, 2),
 (1424, 2),
 (1432, 2),
 (1433, 2),
 (1439, 2),
 (1440, 2),
 (1447, 2),
 (1461, 2),
 (1466, 2),
 (1468, 2),
 (1476, 2),
 (1477, 2),
 (1478, 2),
 (1496, 2),
 (1510, 2),
 (1511, 2),
 (1522, 2),
 (1527, 2),
 (1528, 2),
 (1531, 2),
 (1546, 2),
 (1547, 2),
 (1549, 2),
 (1559, 2),
 (1560, 2),
 (1568, 2),
 (1573, 2),
 (1585, 2),
 (1596, 2),
 (1598, 2),
 (1610, 2),
 (1612, 2),
 (1615, 2),
 (1616, 2),
 (1617, 2),
 (1623, 2),
 (1628, 2),
 (1630, 2),
 (1632, 2),
 (1634, 2),
 (1636, 2),
 (1637, 2),
 (1640, 2),
 (1643, 2),
 (1651, 2),
 (1654, 2),
 (1658, 2),
 (1665, 2),
 (1674, 2),
 (1677, 2),
 (1681, 2),
 (1683, 2),
 (1685, 2),
 (1695, 2),
 (1698, 2),
 (1707, 2),
 (1714, 2),
 (1720, 2),
 (1729, 2),
 (1730, 2),
 (1744, 2),
 (1752, 2),
 (1756, 2),
 (1763, 2),
 (1764, 2),
 (1780, 2),
 (1782, 2),
 (1789, 2),
 (1804, 2),
 (1809, 2),
 (1826, 2),
 (1827, 2),
 (1834, 2),
 (1865, 2),
 (1877, 2),
 (1896, 2),
 (1898, 2),
 (1900, 2),
 (1904, 2),
 (1925, 2),
 (1926, 2),
 (1928, 2),
 (1942, 2),
 (1948, 2),
 (1955, 2),
 (1964, 2),
 (1983, 2),
 (1987, 2),
 (1991, 2),
 (2009, 2),
 (2021, 2),
 (2029, 2),
 (2033, 2),
 (2035, 2),
 (2036, 2),
 (2039, 2),
 (2040, 2),
 (2046, 2),
 (2054, 2),
 (2082, 2),
 (2096, 2),
 (2100, 2),
 (2103, 2),
 (2105, 2),
 (2119, 2),
 (2134, 2),
 (2146, 2),
 (2147, 2),
 (2183, 2),
 (2185, 2),
 (2189, 2),
 (2195, 2),
 (2204, 2),
 (2209, 2),
 (2219, 2),
 (2222, 2),
 (2225, 2),
 (2238, 2),
 (2254, 2),
 (2260, 2),
 (2266, 2),
 (2267, 2),
 (2268, 2),
 (2279, 2),
 (2280, 2),
 (2295, 2),
 (2298, 2),
 (2302, 2),
 (2306, 2),
 (2319, 2),
 (2327, 2),
 (2328, 2),
 (2329, 2),
 (2332, 2),
 (2366, 2),
 (2368, 2),
 (2372, 2),
 (2373, 2),
 (2374, 2),
 (2381, 2),
 (2390, 2),
 (2391, 2),
 (2400, 2),
 (2404, 2),
 (2410, 2),
 (2417, 2),
 (2420, 2),
 (2440, 2),
 (2443, 2),
 (2467, 2),
 (2480, 2),
 (2489, 2),
 (2492, 2),
 (2495, 2),
 (2497, 2),
 (2501, 2),
 (2502, 2),
 (2504, 2),
 (2507, 2),
 (2509, 2),
 (2511, 2),
 (2523, 2),
 (2527, 2),
 (2529, 2),
 (2530, 2),
 (2538, 2),
 (2547, 2),
 (2550, 2),
 (2561, 2),
 (2562, 2),
 (2563, 2),
 (2566, 2),
 (2569, 2),
 (2588, 2),
 (2611, 2),
 (2619, 2),
 (2622, 2),
 (2624, 2),
 (2626, 2),
 (2631, 2),
 (2634, 2),
 (2635, 2),
 (2637, 2),
 (2643, 2),
 (2655, 2),
 (2658, 2),
 (2675, 2),
 (2677, 2),
 (2681, 2),
 (2704, 2),
 (2710, 2),
 (2719, 2),
 (2723, 2),
 (2727, 2),
 (2734, 2),
 (2739, 2),
 (2749, 2),
 (2750, 2),
 (2753, 2),
 (2758, 2),
 (2764, 2),
 (2780, 2),
 (2781, 2),
 (2782, 2),
 (2784, 2),
 (2792, 2),
 (2802, 2),
 (2803, 2),
 (2805, 2),
 (2811, 2),
 (2814, 2),
 (2831, 2),
 (2837, 2),
 (2842, 2),
 (2849, 2),
 (2854, 2),
 (2855, 2),
 (2857, 2),
 (2859, 2),
 (2863, 2),
 (2867, 2),
 (2868, 2),
 (2870, 2),
 (2888, 2),
 (2898, 2),
 (2899, 2),
 (2901, 2),
 (2909, 2),
 (2919, 2),
 (2922, 2),
 (2939, 2),
 (2950, 2),
 (2960, 2),
 (2964, 2),
 (2966, 2),
 (2973, 2),
 (2981, 2),
 (2992, 2),
 (3008, 2),
 (3010, 2),
 (3012, 2),
 (3013, 2),
 (3022, 2),
 (3037, 2),
 (3040, 2),
 (3048, 2),
 (3062, 2),
 (3067, 2),
 (3069, 2),
 (3076, 2),
 (3082, 2),
 (3099, 2),
 (3103, 2),
 (3104, 2),
 (3105, 2),
 (3116, 2),
 (3138, 2),
 (3146, 2),
 (3149, 2),
 (3160, 2),
 (3166, 2),
 (3174, 2),
 (3179, 2),
 (3193, 2),
 (3201, 2),
 (3210, 2),
 (3212, 2),
 (3218, 2),
 (3221, 2),
 (3257, 2),
 (3260, 2),
 (3273, 2),
 (3290, 2),
 (3303, 2),
 (3308, 2),
 (3314, 2),
 (3318, 2),
 (3335, 2),
 (3341, 2),
 (3347, 2),
 (3358, 2),
 (3366, 2),
 (3372, 2),
 (3374, 2),
 (3381, 2),
 (3385, 2),
 (3389, 2),
 (3390, 2),
 (3396, 2),
 (3402, 2),
 (3406, 2),
 (3407, 2),
 (3424, 2),
 (3429, 2),
 (3450, 2),
 (3451, 2),
 (3454, 2),
 (3459, 2),
 (3467, 2),
 (3468, 2),
 (3485, 2),
 (3491, 2),
 (3512, 2),
 (3537, 2),
 (3551, 2),
 (3554, 2),
 (3562, 2),
 (3565, 2),
 (3567, 2),
 (3582, 2),
 (3587, 2),
 (3600, 2),
 (3601, 2),
 (3608, 2),
 (3612, 2),
 (3613, 2),
 (3617, 2),
 (3632, 2),
 (3633, 2),
 (3641, 2),
 (3643, 2),
 (3645, 2),
 (3647, 2),
 (3660, 2),
 (3662, 2),
 (3673, 2),
 (3676, 2),
 (3683, 2),
 (3690, 2),
 (3699, 2),
 (3702, 2),
 (3703, 2),
 (3714, 2),
 (3723, 2),
 (3725, 2),
 (3727, 2),
 (3730, 2),
 (3741, 2),
 (3760, 2),
 (3761, 2),
 (3768, 2),
 (3774, 2),
 (3784, 2),
 (3786, 2),
 (3788, 2),
 (3812, 2),
 (3813, 2),
 (3823, 2),
 (3848, 2),
 (3851, 2),
 (3854, 2),
 (3865, 2),
 (3871, 2),
 (3889, 2),
 (3892, 2),
 (3899, 2),
 (3904, 2),
 (3919, 2),
 (3923, 2),
 (3924, 2),
 (3931, 2),
 (3935, 2),
 (3939, 2),
 (3970, 2),
 (3977, 2),
 (3985, 2),
 (3994, 2),
 (3997, 2),
 (3998, 2),
 (4010, 2),
 (4011, 2),
 (4017, 2),
 (4019, 2),
 (4021, 2),
 (4027, 2),
 (4038, 2),
 (4046, 2),
 (4064, 2),
 (4069, 2),
 (4070, 2),
 (4080, 2),
 (4082, 2),
 (4084, 2),
 (4085, 2),
 (4097, 2),
 (4107, 2),
 (4112, 2),
 (4124, 2),
 (4125, 2),
 (4131, 2),
 (4135, 2),
 (4144, 2),
 (4152, 2),
 (4159, 2),
 (4169, 2),
 (4174, 2),
 (4183, 2),
 (4185, 2),
 (4188, 2),
 (4193, 2),
 (4197, 2),
 (4203, 2),
 (4204, 2),
 (4213, 2),
 (4225, 2),
 (4241, 2),
 (4245, 2),
 (4248, 2),
 (4250, 2),
 (4262, 2),
 (4265, 2),
 (4271, 2),
 (4274, 2),
 (4277, 2),
 (4278, 2),
 (4280, 2),
 (4294, 2),
 (4296, 2),
 (4301, 2),
 (4302, 2),
 (4308, 2),
 (4323, 2),
 (4324, 2),
 (4330, 2),
 (4338, 2),
 (4344, 2),
 (4346, 2),
 (4349, 2),
 (4351, 2),
 (4364, 2),
 (4370, 2),
 (4383, 2),
 (4391, 2),
 (4392, 2),
 (4400, 2),
 (4405, 2),
 (4410, 2),
 (4411, 2),
 (4421, 2),
 (4452, 2),
 (4455, 2),
 (4458, 2),
 (4461, 2),
 (4464, 2),
 (4468, 2),
 (4469, 2),
 (4470, 2),
 (4472, 2),
 (4482, 2),
 (4483, 2),
 (4484, 2),
 (4489, 2),
 (4493, 2),
 (4495, 2),
 (4498, 2),
 (4505, 2),
 (4507, 2),
 (4515, 2),
 (4517, 2),
 (4518, 2),
 (4528, 2),
 (4536, 2),
 (4539, 2),
 (4556, 2),
 (4570, 2),
 (4575, 2),
 (4589, 2),
 (4597, 2),
 (4600, 2),
 (4609, 2),
 (4614, 2),
 (4615, 2),
 (4624, 2),
 (4630, 2),
 (4635, 2),
 (4643, 2),
 (4648, 2),
 (4651, 2),
 (4659, 2),
 (4664, 2),
 (4674, 2),
 (4675, 2),
 (4676, 2),
 (4679, 2),
 (4680, 2),
 (4688, 2),
 (4702, 2),
 (4724, 2),
 (4741, 2),
 (4747, 2),
 (4751, 2),
 (4753, 2),
 (4759, 2),
 (4769, 2),
 (4772, 2),
 (4773, 2),
 (4774, 2),
 (4777, 2),
 (4789, 2),
 (4793, 2),
 (4805, 2),
 (4835, 2),
 (4847, 2),
 (4857, 2),
 (4858, 2),
 (4863, 2),
 (4871, 2),
 (4873, 2),
 (4874, 2),
 (4877, 2),
 (4880, 2),
 (4893, 2),
 (4900, 2),
 (4906, 2),
 (4921, 2),
 (4929, 2),
 (4943, 2),
 (4949, 2),
 (4974, 2),
 (4986, 2),
 (4990, 2),
 (5004, 2),
 (5012, 2),
 (5015, 2),
 (5019, 2),
 (5022, 2),
 (5023, 2),
 (5032, 2),
 (5040, 2),
 (5043, 2),
 (5053, 2),
 (5057, 2),
 (5061, 2),
 (5069, 2),
 (5075, 2),
 (5078, 2),
 (5079, 2),
 (5089, 2),
 (5102, 2),
 (5105, 2),
 (5114, 2),
 (5126, 2),
 (5137, 2),
 ...]



In [48]:

    
wrong_indexs_0 = []
for i in range(len(wd_menor_100)):
    if wd_menor_100[i]==0:
        wrong_indexs_0.append(wrong_distances[i])
#Hay uno en el histograma que es menor que 0.092 pero es distinto de 0
len(wrong_indexs_0)









    Out[48]:





9

Resultados diagonal

Falta encontrar el índice donde se minimiza el error



In [49]:

    
diagonal = iden_matrix_paradero.diagonal().copy()
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-800:
        diagonal_d1.append(i)
        if i in correct_indexs:
            correct_distance.append(diagonal[i])
        else:
            wrong_distance.append(diagonal[i])
print "diagonal: "+str(len(diagonal_d1)   )
print "correctos: "+str(len(correct_distance))
print "incorrectos: "+str(len(wrong_distance))
colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)
plt.show()









    



diagonal: 5089
correctos: 3564
incorrectos: 1525



In [50]:

    
correct_distance = []
wrong_distance = []
diagonal_d1 = []
for i in range(len(diagonal)):
    if diagonal[i]>-300:
        diagonal_d1.append(i)
        if i in correct_indexs:
            correct_distance.append(diagonal[i])
        else:
            wrong_distance.append(diagonal[i])
print len(diagonal_d1)   
print len(correct_distance)
print len(wrong_distance)



In [51]:

    
colors = ['red', 'green']
plt.hist([wrong_distance,correct_distance], histtype='bar',color=colors)
plt.show()



In [ ]:

	tiempo_subida	id	x_subida	y_subida	tipo_transporte	serviciosentidovariante	tipo_dia	nviaje	netapa	x_bajada	y_bajada	tiempo_bajada	par_subida	par_bajada	zona_subida	zona_bajada
23	2013-04-14 06:45:44	1132106	348108.0	6289153.0	BUS	T203 00R	DOMINGO	1	1	346818.0	6299394.0	2013-04-14 07:07:02	T-22-205-SN-65	E-20-190-SN-40	328.0	307.0
22	2013-04-14 07:51:52	1132106	346751.0	6299389.0	BUS	T502 00I	DOMINGO	2	1	351363.0	6302549.0	2013-04-14 08:04:11	E-20-291-PO-20	T-15-135-PO-5	307.0	188.0
21	2013-04-14 19:56:47	1132106	351368.0	6302559.0	BUS	T502 00R	DOMINGO	3	1	346763.0	6299568.0	2013-04-14 20:09:11	T-15-135-OP-110	T-4-19-NS-100	188.0	55.0
20	2013-04-14 20:15:25	1132106	346713.0	6299427.0	BUS	T203 00I	DOMINGO	3	2	348095.0	6289148.0	2013-04-14 20:40:51	E-20-199-NS-2	T-24-205-NS-20	307.0	348.0
19	2013-04-15 21:04:59	1132106	348103.0	6289191.0	BUS	T206 00R	LABORAL	4	1	346844.0	6299320.0	2013-04-15 21:33:23	T-22-205-SN-65	T-20-190-SN-35	328.0	309.0

	tiempo_subida	id	x_subida	y_subida	tipo_transporte	serviciosentidovariante	tipo_dia	nviaje	netapa	x_bajada	y_bajada	tiempo_bajada	par_subida	par_bajada	zona_subida	zona_bajada	adulto
26	2013-09-23 20:58:46	1132106	348106.0	6289139.0	BUS	T203 00R	LABORAL	1	1	346824.0	6299354.0	2013-09-23 21:23:02	T-22-205-SN-65	E-20-190-SN-40	328.0	307.0	0.0
25	2013-09-23 21:24:25	1132106	346789.0	6299372.0	BUS	T502 00I	LABORAL	1	2	351366.0	6302548.0	2013-09-23 21:37:55	E-20-291-PO-20	T-15-135-PO-5	307.0	188.0	0.0
24	2013-09-24 07:15:40	1132106	351362.0	6302563.0	BUS	T502 00R	LABORAL	2	1	346661.0	6299484.0	2013-09-24 07:32:11	T-15-135-OP-110	T-4-19-NS-100	188.0	55.0	0.0
23	2013-09-24 08:00:08	1132106	346719.0	6299344.0	BUS	T206 06I	LABORAL	3	1	348078.0	6289284.0	2013-09-24 08:36:40	E-20-199-NS-2	T-24-205-NS-20	307.0	348.0	0.0
22	2013-09-24 09:13:22	1132106	347658.0	6289320.0	BUS	T352 00R	LABORAL	3	2	NaN	NaN	NaN	L-24-26-OP-25	NaN	348.0	NaN	NaN

	0	1	2	3	4	5	6	7	8	9	...	5159	5160	5161	5162	5163	5164	5165	5166	5167	5168
0	-68.142149	-750.000000	-780.000000	-774.00000	-750.00000	-780.00000	-750.00000	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
1	-780.000000	-427.135729	-750.000000	-731.00000	-750.00000	-780.00000	-750.00000	-621.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
2	-780.000000	-775.000000	-363.640879	-774.00000	-750.00000	-780.00000	-750.00000	-783.0	-672.903090	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
3	-780.000000	-775.000000	-780.000000	-344.48455	-750.00000	-780.00000	-750.00000	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
4	-780.000000	-775.000000	-780.000000	-774.00000	-351.59176	-780.00000	-750.00000	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
5	-780.000000	-775.000000	-780.000000	-774.00000	-750.00000	-780.00000	-750.00000	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
6	-780.000000	-775.000000	-780.000000	-774.00000	-750.00000	-720.09691	-500.29073	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
7	-780.000000	-775.000000	-780.000000	-774.00000	-750.00000	-780.00000	-750.00000	-783.0	-768.000000	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
8	-780.000000	-775.000000	-750.096910	-774.00000	-750.00000	-780.00000	-750.00000	-783.0	-257.694065	-817.000000	...	-792.0	-770.0	-820.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0
9	-780.000000	-775.000000	-780.000000	-774.00000	-750.00000	-780.00000	-750.00000	-756.0	-768.000000	-315.583278	...	-792.0	-770.0	-800.0	-810.0	-780.0	-799.0	-800.0	-756.0	-730.0	-783.0