In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
import time
import datetime as dt
import pickle
import numpy as np
from geopy.distance import vincenty
from itertools import chain, combinations
import random
import scipy as sp
from __future__ import division
from algoritmo_3.auxiliar_functions import *
import algoritmo_3.tfe
import algoritmo_3.borrador
from algoritmo_1.tpm_identification import get_similarity
In [5]:
with open('/home/cata/Proyectos/Notebooks & beyond/Notebooks/Normalizasound_2/users_profiles.pickle') as f:
profiles_abril = pickle.load(f)
In [6]:
with open('/home/cata/Proyectos/Notebooks & beyond/Notebooks/Normalizasound_2/profiles.pickle') as f:
sequences_septiembre = pickle.load(f)
In [8]:
with open('algoritmo_3/data/rois_meters_data.pickle') as f:
abril_the_rois = pickle.load(f)
septiembre_the_rois = pickle.load(f)
In [18]:
with open('algoritmo_3/data/shared_rois.pickle') as f:
shared_rois = pickle.load(f)
In [14]:
len(sequences_septiembre)
Out[14]:
In [15]:
len(profiles_abril)
Out[15]:
In [66]:
#obtiene los vecinos del usuario "user",
#considerando como vecino a quien comparte dos ubicaciones
def get_neighbours_index(rois_a,shared_rois,user,min_shared):
#min_shared_x = min(len(rois_a),min_shared)
neighbours = np.where(shared_rois[user] >= min_shared)
return neighbours[0]
In [50]:
# Funcion que construye la matriz de identificacion en que cada indice corresponde
# a la similitud entre la i-esima tpm y la j-esima secuencia, obtenidas a partir de un
# perfil de usuario y un periodo de identificacion.
# len(users_profiles) == len(users_sequences)
# asume que los usuarios de users_profiles y users_sequences son los mismos
# get_identification_matrix; get_profiles(...) get_sequences(...) -> [[int]]
def get_identification_matrix_2_rois(rois_a,shared_rois,min_shared,users_profiles,users_sequences):
i = 0
limit = np.min((len(users_profiles),len(users_sequences)))
identification_matrix = np.ones((limit,limit)) * -pow(10,6)
for profile in users_profiles:
tpm = profile['tpm']
id_user = profile['user_id']
mls = profile['mls']
neighbours = get_neighbours_index(rois_a,shared_rois,i,min_shared)
for neighbour in neighbours:
if neighbour < limit:
data_sequence = users_sequences[neighbour]
identification_matrix[i,neighbour] = get_similarity(tpm,mls,data_sequence['sequence'])
i += 1
if(i >= limit):
break
return identification_matrix
In [51]:
a_matrix = get_identification_matrix_2_rois(abril_the_rois,shared_rois,2,profiles_abril,sequences_septiembre)
In [52]:
iden_matrix = np.matrix(a_matrix)
df_ident = pd.DataFrame(iden_matrix)
df_ident.head(100)
Out[52]:
In [54]:
i = 0
identified_indexs = []
wrong_indexs = []
correct_indexs = []
selected_indexs = []
n_identified = 0
limit = 5168
while (i<limit):
the_index = np.argmax(iden_matrix[i,:])
selected_indexs.append(np.max(iden_matrix[i,:]))
identified_indexs.append(the_index)
if(the_index!=i):
wrong_indexs.append(the_index)
else:
correct_indexs.append(the_index)
n_identified += 1
i += 1
# In[11]:
porcentaje_correcto = n_identified*100/limit
print str(round(porcentaje_correcto,2))+ "%"
In [60]:
get_neighbours_index(abril_the_rois,shared_rois,5167,2)
Out[60]:
In [67]:
shared = 0
for i in range(len(profiles_abril)):
neighbours = get_neighbours_index(abril_the_rois,shared_rois,i,2)
if len(neighbours) > 0:
shared+=1
print shared*100/len(profiles_abril)
In [65]:
same = 0
for i in range(len(profiles_abril)):
if profiles_abril[i]['user_id'] == sequences_septiembre[i]['user_id']:
same +=1
print same*100/len(profiles_abril)
In [ ]: