In [297]:
import pandas as pd
import numpy as np
from copy import copy
from itertools import combinations
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

In [299]:
from scipy.sparse import csr_matrix

In [52]:
%matplotlib inline

In [2]:
data = pd.read_csv('../created_datasets/Combine.csv')

rows = data.user_id.unique()
cols = data['isbn'].unique()
print(data.user_id.nunique(), data.isbn.nunique())
data = data[['user_id', 'isbn', 'book_rating']]

idict  = dict(zip(cols, range(len(cols))))
udict = dict(zip(rows, range(len(rows))))

data.user_id = [
    udict[i] for i in data.user_id
]
data['isbn'] = [
    idict[i] for i in data['isbn']
]

nmat = data.as_matrix()
nmat = nmat.astype(int)
print(nmat.shape)

naive = np.zeros((len(rows),len(cols)))
for row in nmat:
    naive[row[0], row[1]] = row[2]

print(naive.T.shape)


12053 4959
(470749, 3)
(4959, 12053)

In [ ]:


In [ ]:

LSH with K Hash Functions and M Hash Tables


In [86]:
def get_hashes_in(data, n_hash_fn):
    rows, cols = data.shape
    #np.random.seed(42)
    hash_fns = np.random.standard_normal(size = (cols, n_hash_fn))
    bit_rep = 2 ** np.array(range(n_hash_fn -1 ,-1,-1))

    get_bits = np.dot(data, hash_fns) >= 0
    get_ints = np.dot(get_bits, bit_rep)
    
    store = {}
    
    for i in range(len(get_ints)):
        if get_ints[i] not in store:
            store[get_ints[i]] = list()
        store[get_ints[i]].append(i)

    return store, hash_fns

In [87]:
def get_hashes1(data, n_hash_fn, n_hash_t):
    rows, cols = data.shape
    hash_tables = []
    hash_funcs = []
    for i in range(n_hash_t):
        ht, hf = get_hashes_in(data, n_hash_fn)
        hash_tables.append(ht)
        hash_funcs.append(hf)
        
    return hash_tables, hash_funcs

In [308]:
def get_ksimilar1(data, idx, hf, ht, hash_tables, hash_funcs):
    n_hash_fn = hf
    bit_rep = 2 ** np.array(range(n_hash_fn -1 ,-1,-1))

    sim_items = set()
    
    for i in range(ht):
        get_bits = np.dot(data[idx,:],hash_funcs[i]) >=0
        indx = np.dot(get_bits, bit_rep)
        
        if indx in hash_tables[i]:
            sim_items.update(hash_tables[i][indx])
           
    sim_items.remove(idx)
    
    
    #Cosine similarity
    sim_ndarray = data[np.array(list(sim_items)), :]
    sims = cosine_similarity(csr_matrix(sim_ndarray), csr_matrix(data[idx,:].reshape(1,-1)))[:,0]
    
    return list(zip(list(sim_items), sims))

In [320]:
def get_ksimilar2(data, idx, hf, ht, hash_tables, hash_funcs):
    n_hash_fn = hf
    bit_rep = 2 ** np.array(range(n_hash_fn -1 ,-1,-1))

    sim_items = set()
    
    for i in range(ht):
        get_bits = np.dot(data[idx,:],hash_funcs[i]) >=0
        indx = np.dot(get_bits, bit_rep)
        
        if indx in hash_tables[i]:
            sim_items.update(hash_tables[i][indx])
           
    sim_items.remove(idx)
    
    
    #Cosine similarity
    sim_ndarray = data[np.array(list(sim_items)), :]
    sims = pairwise_distances(csr_matrix(sim_ndarray), csr_matrix(data[idx,:].reshape(1,-1)), 'euclidean')[:,0]
    sims = 1/(1+sims)
    return list(zip(list(sim_items), sims))

In [141]:
t1, t2 = get_hashes1(naive.T, 3, 3)

In [115]:
len(t1[0])


Out[115]:
32

In [321]:
get_ksimilar2(naive.T, 1, 3, 3, t1, t2)


Out[321]:
[(3, 0.017950640864425039),
 (15, 0.016333309349049696),
 (17, 0.017924195310633422),
 (18, 0.018936763096516762),
 (28, 0.010245119280821658),
 (31, 0.017405373730411105),
 (33, 0.017102281096912191),
 (35, 0.018039650776849193),
 (43, 0.018181818181818181),
 (44, 0.01829926810010658),
 (53, 0.017554862446034141),
 (57, 0.017158540843438642),
 (60, 0.013573956287139517),
 (66, 0.016752068049743541),
 (67, 0.017320146615915827),
 (68, 0.015130368801062177),
 (72, 0.017674473097914836),
 (73, 0.017171405216799003),
 (76, 0.018909136783460199),
 (85, 0.017756543557946168),
 (87, 0.018078637658867778),
 (93, 0.018943688737372698),
 (95, 0.018466972048042936),
 (98, 0.017413428815162908),
 (101, 0.014345240157882954),
 (102, 0.013653247735651217),
 (103, 0.015235204798843488),
 (105, 0.017367931449769544),
 (107, 0.016375555772310736),
 (109, 0.016238900224432772),
 (111, 0.019183808696768796),
 (113, 0.018403143956532472),
 (116, 0.01772527817914972),
 (124, 0.018249537029122385),
 (132, 0.017694178359989626),
 (133, 0.018203282751254195),
 (142, 0.016531796176072186),
 (143, 0.017924195310633422),
 (150, 0.018396797711811807),
 (152, 0.017089572234107536),
 (153, 0.018163480659691311),
 (154, 0.018163480659691311),
 (155, 0.013293870380547211),
 (156, 0.015578704687354287),
 (158, 0.016204189130703435),
 (163, 0.016524909366129919),
 (166, 0.015515718836487259),
 (167, 0.017541112200179142),
 (170, 0.016764034078063601),
 (174, 0.018042640762158432),
 (177, 0.018912583419097306),
 (185, 0.017140579478955219),
 (186, 0.017171405216799003),
 (187, 0.016902295232662172),
 (189, 0.017845558660260254),
 (193, 0.017935934363702132),
 (194, 0.018051619701094184),
 (198, 0.016411388542212975),
 (203, 0.016978949359749484),
 (204, 0.018384125016438588),
 (206, 0.018749253627654526),
 (212, 0.017041537151436845),
 (213, 0.018457355217981367),
 (216, 0.015576784682519367),
 (218, 0.017359939679156923),
 (227, 0.01693431317744137),
 (231, 0.017228356221274714),
 (232, 0.016210680413578229),
 (233, 0.013932565718227234),
 (235, 0.017733788619662509),
 (237, 0.017607411612549258),
 (241, 0.017983123705435825),
 (243, 0.017158540843438642),
 (246, 0.017983123705435825),
 (251, 0.015017352552249789),
 (255, 0.016479217423618953),
 (258, 0.015574865391147203),
 (259, 0.014927061049055047),
 (261, 0.018399970008223746),
 (264, 0.0158446462569557),
 (265, 0.018682445230846841),
 (274, 0.017051615943807753),
 (278, 0.018830383891102472),
 (284, 0.018495913550865505),
 (288, 0.016103505220139473),
 (293, 0.013258298726139126),
 (294, 0.018358858481868232),
 (296, 0.017825340776218664),
 (303, 0.018021742218093367),
 (312, 0.018117879979344997),
 (314, 0.019062607223677534),
 (317, 0.015607590677617781),
 (318, 0.017097194130514297),
 (327, 0.017459287761707527),
 (331, 0.018596659198216611),
 (333, 0.018626219635775841),
 (340, 0.017986085496266707),
 (344, 0.018221741961439275),
 (345, 0.014245952698563772),
 (347, 0.018739186372252529),
 (352, 0.018227907599303768),
 (355, 0.014635383386595681),
 (356, 0.015114567309912754),
 (367, 0.014947361023118753),
 (368, 0.017557616400687259),
 (373, 0.018039650776849193),
 (374, 0.016557122092336261),
 (377, 0.017680096413595723),
 (378, 0.015447875271241756),
 (381, 0.016988916810862783),
 (383, 0.018622928135814693),
 (385, 0.017932997425074138),
 (386, 0.018796451007545064),
 (387, 0.014790534250118023),
 (392, 0.017787975974118797),
 (396, 0.01750824376950216),
 (401, 0.017243987494641216),
 (407, 0.015885221268421724),
 (408, 0.017574167553615475),
 (415, 0.015899496736755909),
 (419, 0.01708703387940708),
 (420, 0.017541112200179142),
 (421, 0.018349410520930101),
 (431, 0.015875048141414751),
 (434, 0.016855826605607391),
 (436, 0.017730950436068503),
 (442, 0.018434974611113812),
 (445, 0.01891603195223494),
 (453, 0.012418722348297355),
 (457, 0.017280627538648848),
 (461, 0.017983123705435825),
 (464, 0.019470892800022361),
 (469, 0.014056253642407143),
 (470, 0.01514093084439661),
 (475, 0.011901348358785735),
 (476, 0.014112932319387565),
 (477, 0.017173981585613313),
 (479, 0.017716780052885274),
 (483, 0.015530917591974587),
 (487, 0.017848452583016327),
 (488, 0.01797424715770474),
 (489, 0.017076891825070242),
 (490, 0.015740596777552809),
 (491, 0.018302389891382317),
 (492, 0.018943688737372698),
 (493, 0.018796451007545064),
 (495, 0.018528231926059222),
 (498, 0.017621319426799297),
 (499, 0.015938440346820397),
 (501, 0.013523530996117958),
 (504, 0.017378604434577193),
 (506, 0.016380021965051238),
 (508, 0.01848947027332197),
 (512, 0.015548069978320089),
 (514, 0.017848452583016327),
 (515, 0.017280627538648848),
 (516, 0.017051615943807753),
 (523, 0.016848524630899128),
 (524, 0.017845558660260254),
 (525, 0.015186957979898644),
 (526, 0.017959482254255359),
 (532, 0.018732483928120738),
 (540, 0.015738615973150457),
 (541, 0.016495166219205225),
 (543, 0.019090917133255215),
 (544, 0.015701122951038413),
 (546, 0.016184762225694784),
 (556, 0.018120909279923773),
 (557, 0.018154332808467798),
 (560, 0.018349410520930101),
 (561, 0.017711121461934373),
 (563, 0.018075629634631957),
 (567, 0.018803222815062089),
 (568, 0.01885424725712255),
 (572, 0.019209052050989626),
 (576, 0.016788043702941177),
 (580, 0.01999184176315761),
 (581, 0.016333309349049696),
 (585, 0.017426878991831908),
 (586, 0.014800396605546384),
 (587, 0.017233561894042761),
 (588, 0.018277460474335348),
 (591, 0.016885130277969475),
 (601, 0.011900495432843215),
 (607, 0.017161111391614949),
 (608, 0.014609998882073216),
 (610, 0.017854244680297608),
 (613, 0.015111062620014432),
 (615, 0.018732483928120738),
 (616, 0.016709202749629691),
 (618, 0.017502783746903096),
 (619, 0.015756470327343158),
 (620, 0.013342953391136747),
 (622, 0.018252633276431816),
 (628, 0.015677581479159871),
 (630, 0.018502363611624377),
 (631, 0.015538533826520965),
 (636, 0.017210173662907037),
 (637, 0.01882358251829477),
 (642, 0.012353306894887613),
 (650, 0.016033929341371853),
 (651, 0.013140615964191232),
 (656, 0.016331094950510723),
 (660, 0.01804862522302865),
 (661, 0.013159049090700817),
 (670, 0.013546152407119962),
 (672, 0.016169700886437457),
 (675, 0.015828503551575487),
 (679, 0.01633995798320995),
 (691, 0.018988892583342565),
 (696, 0.016917050030530376),
 (698, 0.016304592612254306),
 (699, 0.017730950436068503),
 (701, 0.016929375426826432),
 (704, 0.017962432306340891),
 (708, 0.019554239771940585),
 (710, 0.016826675837083551),
 (714, 0.018252633276431816),
 (718, 0.011285780211060225),
 (721, 0.016697354328624698),
 (723, 0.017217959094664215),
 (725, 0.018639403220625309),
 (729, 0.018145198849974283),
 (733, 0.017660438390136927),
 (737, 0.018563979796544457),
 (739, 0.01862951289266896),
 (747, 0.018495913550865505),
 (749, 0.017543859649122806),
 (750, 0.017325436458317453),
 (751, 0.016882682432496365),
 (755, 0.014110082007427515),
 (764, 0.017238772316083995),
 (768, 0.016949152542372881),
 (775, 0.018943688737372698),
 (779, 0.016156824761411739),
 (781, 0.01539016573970998),
 (782, 0.01412149368003863),
 (784, 0.014472718107190008),
 (786, 0.017212767626413251),
 (789, 0.010935732049976781),
 (794, 0.014369260747995339),
 (798, 0.011360668973766173),
 (799, 0.014104386585303002),
 (803, 0.013198479615329215),
 (810, 0.01643616122947171),
 (815, 0.015118074451946578),
 (818, 0.018662542438690168),
 (820, 0.018833787361048442),
 (827, 0.017635260447718189),
 (830, 0.012282347626364348),
 (833, 0.017236166510951917),
 (835, 0.016082325524257004),
 (836, 0.017031476312922615),
 (837, 0.017582460807047735),
 (838, 0.019230769230769232),
 (841, 0.017071827604688932),
 (842, 0.016061229629527295),
 (845, 0.014199257072180768),
 (846, 0.017079425635684879),
 (850, 0.01777081022322918),
 (854, 0.017530135374779685),
 (864, 0.01717655912119764),
 (865, 0.017857142857142856),
 (867, 0.017716780052885274),
 (871, 0.01772811362293264),
 (877, 0.016790450375679524),
 (878, 0.017535621195451163),
 (880, 0.019596319346681036),
 (881, 0.017249207436972714),
 (884, 0.017839775060727134),
 (889, 0.016605798367932606),
 (891, 0.014102963812195305),
 (892, 0.018194074288192429),
 (895, 0.018063612626847508),
 (896, 0.015076149961154042),
 (897, 0.016666666666666666),
 (902, 0.016050712934782118),
 (903, 0.017122674728070632),
 (904, 0.016612787424434762),
 (905, 0.017632469580643403),
 (908, 0.017638052648364166),
 (909, 0.017582460807047735),
 (911, 0.016065442130968065),
 (912, 0.016543293459045399),
 (913, 0.017557616400687259),
 (916, 0.017092111726956894),
 (924, 0.015726746913502402),
 (928, 0.018695749414685128),
 (932, 0.013140615964191232),
 (935, 0.017516443449768087),
 (944, 0.018444556263723166),
 (945, 0.011857238853687019),
 (946, 0.017889116802271232),
 (948, 0.017992013497642966),
 (949, 0.01923439597380833),
 (951, 0.010981638764190825),
 (953, 0.017304306283499608),
 (955, 0.01442679914552385),
 (956, 0.017491879099535344),
 (958, 0.017918334473586043),
 (959, 0.015870984389102491),
 (966, 0.01848947027332197),
 (971, 0.018027705773966715),
 (972, 0.016996403996317289),
 (974, 0.017621319426799297),
 (977, 0.018518518518518517),
 (978, 0.018277460474335348),
 (984, 0.016929375426826432),
 (989, 0.018060612140870642),
 (991, 0.018221741961439275),
 (992, 0.014955843947498373),
 (996, 0.013581569061750046),
 (999, 0.017497328859602104),
 (1002, 0.017475560502367846),
 (1005, 0.016552508673859958),
 (1006, 0.01773662817482112),
 (1009, 0.01722315529055251),
 (1010, 0.016773625490250767),
 (1014, 0.01823407953895349),
 (1016, 0.014247419405407359),
 (1021, 0.017132899099408471),
 (1022, 0.016664313119140396),
 (1023, 0.01635995297792741),
 (1027, 0.015597944073821857),
 (1029, 0.016061229629527295),
 (1031, 0.017186880948908898),
 (1034, 0.017291139347640535),
 (1035, 0.0164497212410484),
 (1036, 0.019791097058792482),
 (1037, 0.018719100723816755),
 (1044, 0.017233561894042761),
 (1051, 0.018166533036810491),
 (1055, 0.018181818181818181),
 (1061, 0.016678449465304949),
 (1063, 0.017383948354492897),
 (1066, 0.015687377481759138),
 (1067, 0.016065442130968065),
 (1068, 0.016163258956299355),
 (1070, 0.017389297236324142),
 (1071, 0.017064239763538942),
 (1072, 0.017071827604688932),
 (1078, 0.016438418889803397),
 (1081, 0.015625),
 (1082, 0.016788043702941177),
 (1083, 0.018563979796544457),
 (1084, 0.0158446462569557),
 (1085, 0.016368863365732539),
 (1088, 0.017532877637619498),
 (1089, 0.016169700886437457),
 (1093, 0.017719611395228121),
 (1099, 0.015822462812910016),
 (1100, 0.014144400735620032),
 (1101, 0.015973529605968834),
 (1105, 0.018075629634631957),
 (1108, 0.018339977222168485),
 (1112, 0.017254432150352318),
 (1121, 0.014826793466588323),
 (1122, 0.018321154459399042),
 (1123, 0.018051619701094184),
 (1126, 0.015500564848202746),
 (1127, 0.018403143956532472),
 (1128, 0.01362110775527879),
 (1132, 0.017314861644301505),
 (1135, 0.017489156139699702),
 (1138, 0.018318022998222588),
 (1139, 0.017036504491288049),
 (1140, 0.018844008925379799),
 (1144, 0.010587554016019908),
 (1147, 0.015673668240535208),
 (1148, 0.017202398844113637),
 (1151, 0.015889296028372382),
 (1152, 0.016573299658121359),
 (1153, 0.016413635951619647),
 (1154, 0.018368321143099667),
 (1157, 0.018769437221973252),
 (1159, 0.016483769474769649),
 (1160, 0.017039020260722522),
 (1166, 0.017530135374779685),
 (1173, 0.016783233478142593),
 (1174, 0.017796577637499676),
 (1179, 0.017148270256890502),
 (1180, 0.01618045466835526),
 (1182, 0.017236166510951917),
 (1185, 0.017510975709063282),
 (1189, 0.013741838342522806),
 (1192, 0.013375576180816195),
 (1203, 0.017702643793378926),
 (1204, 0.01839362706586067),
 (1208, 0.015913810969124045),
 (1210, 0.019267129872779017),
 (1211, 0.017722444103615786),
 (1226, 0.014923685794957989),
 (1236, 0.018237167875312075),
 (1237, 0.019580986060846042),
 (1238, 0.018012808038288804),
 (1239, 0.017483714055403377),
 (1246, 0.01700140098799972),
 (1247, 0.016021376327535492),
 (1263, 0.016951629586434237),
 (1265, 0.01919101086048355),
 (1266, 0.015105810176661683),
 (1269, 0.016702090654585888),
 (1270, 0.017301670475860995),
 (1271, 0.017184298737405266),
 (1273, 0.018330558547605),
 (1274, 0.016420383754526834),
 (1275, 0.015632756298114194),
 (1278, 0.015311182772250086),
 (1289, 0.018108801240865547),
 (1294, 0.016783233478142593),
 (1295, 0.018203282751254195),
 (1301, 0.017306943303231015),
 (1307, 0.018662542438690168),
 (1308, 0.01484668453083062),
 (1309, 0.016490604669570804),
 (1314, 0.015762435367623565),
 (1315, 0.018926388954124254),
 (1317, 0.017663242640988785),
 (1318, 0.017626891842953653),
 (1320, 0.015572946812795103),
 (1321, 0.014895088738885391),
 (1322, 0.015630816135015171),
 (1324, 0.01719204888638684),
 (1326, 0.015820450777974455),
 (1333, 0.01804862522302865),
 (1335, 0.013696024599721374),
 (1336, 0.019009866036725272),
 (1337, 0.015917907894345704),
 (1343, 0.016853391553245431),
 (1346, 0.014583172754896959),
 (1348, 0.018105778044838435),
 (1357, 0.017857142857142856),
 (1363, 0.018518518518518517),
 (1364, 0.019300032570185973),
 (1365, 0.018374637769636284),
 (1367, 0.018039650776849193),
 (1369, 0.017489156139699702),
 (1370, 0.018120909279923773),
 (1371, 0.018590109423999298),
 (1374, 0.016311206032249288),
 (1381, 0.017874561786369984),
 (1383, 0.016747288848619493),
 (1387, 0.017938872754567414),
 (1413, 0.017638052648364166),
 (1414, 0.016902295232662172),
 (1415, 0.017977204537375346),
 (1426, 0.017251819196826279),
 (1427, 0.014330288616505406),
 (1428, 0.017808066074498752),
 (1429, 0.015046664658312926),
 (1430, 0.018255731109994816),
 (1431, 0.018039650776849193),
 (1436, 0.017986085496266707),
 (1438, 0.016907209183558144),
 (1440, 0.018419038526909796),
 (1443, 0.016633807960912936),
 (1447, 0.015774385909359197),
 (1450, 0.016295787288264144),
 (1451, 0.015938440346820397),
 (1452, 0.017971291245982245),
 (1454, 0.016234549068420721),
 (1458, 0.017833997114794678),
 (1460, 0.016988916810862783),
 (1464, 0.01661745172866437),
 (1466, 0.016846092759371364),
 (1468, 0.01866585512572554),
 (1473, 0.017383948354492897),
 (1474, 0.015594090463942398),
 (1479, 0.01767166346349925),
 (1483, 0.018346264460757159),
 (1489, 0.018537960730833761),
 (1490, 0.017249207436972714),
 (1493, 0.016388965385308145),
 (1494, 0.017930061937484698),
 (1500, 0.015603729877444083),
 (1504, 0.017421495160004494),
 (1505, 0.015934327465664318),
 (1509, 0.016025557371195279),
 (1513, 0.013757652563661175),
 (1520, 0.014288672913835378),
 (1525, 0.01808164719455287),
 (1528, 0.017125229094784991),
 (1531, 0.017877469930156275),
 (1533, 0.018268138333972155),
 (1550, 0.015742578334023118),
 (1551, 0.019002867115826286),
 (1553, 0.010074565937039454),
 (1554, 0.012399374271946951),
 (1558, 0.01442679914552385),
 (1561, 0.017962432306340891),
 (1562, 0.014158774652799017),
 (1564, 0.018163480659691311),
 (1567, 0.015185179897351688),
 (1568, 0.018157380546829659),
 (1571, 0.014811927785047155),
 (1578, 0.017716780052885274),
 (1579, 0.018072623120577603),
 (1581, 0.017375934332935491),
 (1587, 0.017023942434145876),
 (1588, 0.017787975974118797),
 (1589, 0.018528231926059222),
 (1591, 0.01525860019237517),
 (1599, 0.01695658695313779),
 (1602, 0.017959482254255359),
 (1611, 0.017084496662003658),
 (1622, 0.018583566609054369),
 (1623, 0.016718699771581969),
 (1624, 0.017842666153216089),
 (1626, 0.015377224292346545),
 (1627, 0.016912127447405115),
 (1630, 0.016506586767840215),
 (1632, 0.018554209732482219),
 (1633, 0.018961036353178667),
 (1634, 0.016210680413578229),
 (1643, 0.016543293459045399),
 (1644, 0.016626792202026015),
 (1653, 0.011909887757656806),
 (1654, 0.016031835112115932),
 (1655, 0.018495913550865505),
 (1661, 0.01882358251829477),
 (1663, 0.018957562992806293),
 (1665, 0.01759908280828618),
 (1666, 0.017971291245982245),
 (1667, 0.017204989272652023),
 (1671, 0.018483033767083015),
 (1682, 0.018187943118799559),
 (1688, 0.01603812027656441),
 (1689, 0.017410742537088519),
 (1690, 0.016103505220139473),
 (1691, 0.018450952390626212),
 (1697, 0.017143141915062887),
 (1702, 0.016882682432496365),
 (1708, 0.015421732220380239),
 (1713, 0.014927061049055047),
 (1714, 0.018305513290840934),
 (1716, 0.017582460807047735),
 (1717, 0.017930061937484698),
 (1718, 0.017652033692476133),
 (1721, 0.016880235657400221),
 (1723, 0.015392017200410144),
 (1734, 0.017241379310344827),
 (1741, 0.017705468321976205),
 (1746, 0.017782248497890462),
 (1751, 0.018779553649854352),
 (1758, 0.016752068049743541),
 (1763, 0.017579695077063251),
 (1767, 0.017259661642069435),
 (1770, 0.018419038526909796),
 (1771, 0.01875597420983674),
 (1775, 0.016946676590098901),
 (1776, 0.018093700477194479),
 (1778, 0.017796577637499676),
 (1779, 0.017753694370749088),
 (1780, 0.01848947027332197),
 (1784, 0.017713950075487015),
 (1786, 0.018999370571745718),
 (1789, 0.018486251174532696),
 (1792, 0.017992013497642966),
 (1793, 0.017563128223717386),
 (1807, 0.018099736220283946),
 (1812, 0.018550956492367831),
 (1814, 0.017179137824434969),
 (1820, 0.016404651879712039),
 (1822, 0.014324321139703233),
 (1825, 0.018261931541316181),
 (1829, 0.017921264168981023),
 (1834, 0.017230958464451659),
 (1840, 0.017868749779683835),
 (1844, 0.01823407953895349),
 (1847, 0.017056662089426397),
 (1848, 0.017478277078259191),
 (1853, 0.018249537029122385),
 (1857, 0.017524654729993355),
 (1859, 0.017254432150352318),
 (1862, 0.016577930585518923),
 (1864, 0.018803222815062089),
 (1865, 0.019187408758242049),
 (1866, 0.0176829100970258),
 (1874, 0.018655922385882012),
 (1875, 0.017593536873036532),
 (1883, 0.018606496935261549),
 (1884, 0.017280627538648848),
 (1885, 0.018039650776849193),
 (1892, 0.017980163386212441),
 (1896, 0.017502783746903096),
 (1899, 0.018479818049488651),
 (1902, 0.016959067277391413),
 (1903, 0.015333101487268807),
 (1912, 0.013585380277496756),
 (1918, 0.017546608397153052),
 (1921, 0.017312220982699009),
 (1922, 0.018123940109858946),
 (1924, 0.020232593878503843),
 (1927, 0.019935014161062671),
 (1928, 0.015873015873015872),
 (1929, 0.01800685933997441),
 (1930, 0.018060612140870642),
 (1932, 0.018547704973666243),
 (1933, 0.017621319426799297),
 (1935, 0.017220556601204268),
 (1941, 0.017635260447718189),
 (1949, 0.016286996293437345),
 (1950, 0.017654833916954514),
 (1953, 0.016000520350723933),
 (1956, 0.017618535211648197),
 (1958, 0.017510975709063282),
 (1962, 0.0098330769308011476),
 (1963, 0.015754483493016169),
 (1966, 0.017745155080767936),
 (1969, 0.01242648708554586),
 (1975, 0.014282757503551969),
 (1978, 0.017765099406778909),
 (1979, 0.015438523131827371),
 (1985, 0.011204526247802854),
 (1988, 0.016673733330787151),
 (1989, 0.016099262550184811),
 (1991, 0.0068639210061495863),
 (1994, 0.014085924209552566),
 (2014, 0.019023887260126014),
 (2019, 0.01525679669926043),
 (2023, 0.016186917301947192),
 (2024, 0.015427323070342683),
 (2029, 0.016561739392117473),
 (2033, 0.016721076572738393),
 (2034, 0.016036024396006852),
 (2037, 0.017535621195451163),
 (2038, 0.01618260801469084),
 (2039, 0.014446643370565691),
 (2040, 0.015846667588406884),
 (2041, 0.017971291245982245),
 (2042, 0.016882682432496365),
 (2043, 0.01647012468648933),
 (2047, 0.019248923626754524),
 (2061, 0.01722315529055251),
 (2062, 0.01651574040164672),
 (2080, 0.018209429548119164),
 (2081, 0.017079425635684879),
 (2083, 0.016951629586434237),
 (2091, 0.015421732220380239),
 (2093, 0.016025557371195279),
 (2094, 0.018099736220283946),
 (2099, 0.017309581535986662),
 (2100, 0.017378604434577193),
 (2101, 0.016807326281449476),
 (2105, 0.017186880948908898),
 (2106, 0.017965383821515163),
 (2109, 0.01752739440591088),
 (2114, 0.017306943303231015),
 (2115, 0.014253290788102661),
 (2117, 0.018090684883004294),
 (2121, 0.017486434458656715),
 (2122, 0.018096717589195427),
 (2131, 0.014278325787276565),
 (2137, 0.01324297316533753),
 (2138, 0.016380021965051238),
 (2139, 0.017733788619662509),
 (2140, 0.015766415847036561),
 (2142, 0.015907671540221165),
 (2144, 0.016577930585518923),
 (2146, 0.017932997425074138),
 (2147, 0.017894948832962131),
 (2152, 0.017280627538648848),
 (2156, 0.017638052648364166),
 (2157, 0.017785111540156214),
 (2159, 0.018072623120577603),
 (2163, 0.018380960958005874),
 (2167, 0.015088341709265341),
 (2171, 0.017668855176592412),
 (2175, 0.01343662885030102),
 (2176, 0.016601143922029517),
 (2177, 0.01696403121173717),
 (2178, 0.017886202937348831),
 (2180, 0.017857142857142856),
 (2181, 0.018422222416853026),
 (2183, 0.017953586534506644),
 (2186, 0.017166255975953174),
 (2196, 0.019094464805218078),
 (2199, 0.015632756298114194),
 (2202, 0.018412675727225398),
 (2203, 0.01523879705044648),
 (2209, 0.018318022998222588),
 (2211, 0.017576930659599258),
 (2212, 0.018169586962804105),
 (2213, 0.017489156139699702),
 (2216, 0.017839775060727134),
 (2218, 0.016251974829835168),
 (2220, 0.017383948354492897),
 (2226, 0.01767166346349925),
 (2228, 0.0126051547041996),
 (2231, 0.016902295232662172),
 (2245, 0.01739465108777254),
 (2247, 0.017418805125646593),
 (2249, 0.015294805601177483),
 (2250, 0.015788362806186562),
 (2251, 0.01652032296487408),
 (2253, 0.017989048759927736),
 (2254, 0.017486434458656715),
 (2255, 0.018154332808467798),
 (2257, 0.016601143922029517),
 (2267, 0.018308638299867099),
 (2268, 0.018752613009793393),
 (2270, 0.01647012468648933),
 (2271, 0.018268138333972155),
 (2272, 0.018181818181818181),
 (2275, 0.017440360568551243),
 (2276, 0.015975600943247553),
 (2277, 0.017166255975953174),
 (2278, 0.017959482254255359),
 (2285, 0.01824644236670989),
 (2287, 0.018593383440426065),
 (2288, 0.017696998813477815),
 (2299, 0.017632469580643403),
 (2301, 0.018646005586046614),
 (2304, 0.017635260447718189),
 (2305, 0.018803222815062089),
 (2308, 0.017612970761909118),
 (2313, 0.019083827755996059),
 (2320, 0.017944753897811791),
 (2321, 0.018221741961439275),
 (2323, 0.018412675727225398),
 (2324, 0.018166533036810491),
 (2327, 0.016973972246430373),
 (2328, 0.017546608397153052),
 (2337, 0.018123940109858946),
 (2339, 0.014334769145480082),
 (2341, 0.01800091656737594),
 (2345, 0.013994692688945024),
 (2348, 0.015072672056879401),
 (2358, 0.018499137732597457),
 (2359, 0.017799447649568194),
 (2362, 0.018314893152169078),
 (2366, 0.013494790853450919),
 (2368, 0.017059186852531404),
 (2369, 0.012431347479853668),
 (2371, 0.017330731179010378),
 (2376, 0.011334803463839372),
 (2377, 0.017314861644301505),
 (2379, 0.018169586962804105),
 (2383, 0.012176802864654755),
 (2389, 0.014680128059102871),
 (2395, 0.017986085496266707),
 (2397, 0.016169700886437457),
 (2399, 0.014875001769485471),
 (2400, 0.016063335463730795),
 (2401, 0.012853728588113464),
 (2405, 0.014066122889963404),
 (2410, 0.01619770568227789),
 (2415, 0.014875001769485471),
 (2422, 0.014619502508127579),
 (2425, 0.012960464055637021),
 (2427, 0.015421732220380239),
 (2431, 0.016406896506898815),
 (2440, 0.01453775126694151),
 (2441, 0.015291173398043357),
 (2443, 0.017831110259044394),
 (2445, 0.016163258956299355),
 (2446, 0.018343120028305487),
 (2448, 0.018166533036810491),
 (2452, 0.015534724301623771),
 (2454, 0.016907209183558144),
 (2456, 0.015132127598841067),
 (2467, 0.01764643726238448),
 (2472, 0.018434974611113812),
 (2481, 0.018719100723816755),
 (2482, 0.016495166219205225),
 (2487, 0.018123940109858946),
 (2491, 0.017543859649122806),
 (2496, 0.01823407953895349),
 (2497, 0.016131165024981884),
 (2500, 0.017980163386212441),
 (2502, 0.01437980795247502),
 (2503, 0.016584884301989865),
 (2505, 0.018120909279923773),
 (2512, 0.015412427699690626),
 (2518, 0.016554814898275674),
 (2520, 0.017877469930156275),
 (2521, 0.015822462812910016),
 (2524, 0.017408057509161801),
 (2532, 0.018752613009793393),
 (2534, 0.018473391677937958),
 (2538, 0.017924195310633422),
 (2540, 0.017677284080919734),
 (2546, 0.018377798542880283),
 (2547, 0.018905692043579676),
 (2550, 0.014606835143316864),
 (2556, 0.018950622031421852),
 (2561, 0.01661745172866437),
 (2570, 0.017836885381638013),
 (2572, 0.017694178359989626),
 (2573, 0.011924447187686291),
 (2587, 0.017674473097914836),
 (2588, 0.018054615679252144),
 (2590, 0.013254757314877999),
 (2591, 0.017610190525209068),
 (2593, 0.016156824761411739),
 (2594, 0.012847279493341383),
 (2596, 0.017621319426799297),
 (2601, 0.016846092759371364),
 (2602, 0.018157380546829659),
 (2608, 0.012358081537468941),
 (2616, 0.017039020260722522),
 (2618, 0.017519179252938871),
 (2622, 0.017418805125646593),
 (2623, 0.017275378860374519),
 (2625, 0.016968999532590449),
 (2627, 0.017983123705435825),
 (2628, 0.0164497212410484),
 (2632, 0.018027705773966715),
 (2637, 0.017296402493141357),
 (2639, 0.016445197488037674),
 (2640, 0.01806961811543838),
 (2641, 0.016120509688149442),
 (2642, 0.017304306283499608),
 (2646, 0.016284800779099731),
 (2647, 0.016000520350723933),
 (2648, 0.017785111540156214),
 (2649, 0.0174647069267196),
 (2650, 0.01693678368281645),
 (2651, 0.016262894625299739),
 (2652, 0.016440677485909473),
 (2653, 0.016438418889803397),
 (2654, 0.016747288848619493),
 (2655, 0.015534724301623771),
 (2656, 0.016773625490250767),
 (2658, 0.017150836164355046),
 (2662, 0.015969389360383453),
 (2663, 0.017184298737405266),
 (2664, 0.015474152429539777),
 (2668, 0.017941812598870129),
 (2669, 0.017049094559614293),
 (2671, 0.01638449183436429),
 (2674, 0.016812157376090608),
 (2675, 0.016236724206758701),
 (2676, 0.01780519186693005),
 (2677, 0.014500481886137893),
 (2680, 0.013802759151956312),
 (2682, 0.017257046298462489),
 (2683, 0.016547899131241717),
 (2684, 0.017320146615915827),
 (2689, 0.017262278182086768),
 (2691, 0.017666048236114971),
 (2693, 0.016790450375679524),
 (2696, 0.018682445230846841),
 (2704, 0.015760446264166193),
 (2706, 0.018033675290232094),
 (2708, 0.017148270256890502),
 (2712, 0.016711575479174401),
 (2713, 0.016382256439820282),
 (2718, 0.017278002598095081),
 (2719, 0.018830383891102472),
 (2720, 0.017810941682845048),
 (2721, 0.018518518518518517),
 (2722, 0.016036024396006852),
 (2724, 0.017938872754567414),
 (2726, 0.014229848905475611),
 (2735, 0.014512872990338358),
 (2737, 0.016158968634526465),
 (2740, 0.017249207436972714),
 (2748, 0.015816429022065665),
 (2750, 0.016406896506898815),
 (2755, 0.01823407953895349),
 (2759, 0.017848452583016327),
 (2765, 0.018084658242956537),
 (2766, 0.017285881031942923),
 (2768, 0.01731750352172862),
 (2769, 0.017212767626413251),
 (2773, 0.016320036526343212),
 (2775, 0.018249537029122385),
 (2776, 0.017868749779683835),
 (2778, 0.018682445230846841),
 (2779, 0.018072623120577603),
 (2780, 0.016678449465304949),
 (2786, 0.016899839872597088),
 (2787, 0.016438418889803397),
 (2788, 0.01713034127958803),
 (2789, 0.018265034141795967),
 (2791, 0.01823407953895349),
 (2792, 0.017886202937348831),
 (2793, 0.018057613158758489),
 (2796, 0.016850957561933378),
 (2799, 0.017660438390136927),
 (2800, 0.016752068049743541),
 (2804, 0.017049094559614293),
 (2806, 0.017336030785514805),
 (2807, 0.017921264168981023),
 (2808, 0.018096717589195427),
 (2809, 0.016377788409425179),
 (2810, 0.017217959094664215),
 (2813, 0.017660438390136927),
 (2814, 0.018018762672197659),
 (2815, 0.017538366049296678),
 (2816, 0.015474152429539777),
 (2818, 0.018111825961102519),
 (2819, 0.018289912361555059),
 (2821, 0.017016418601961847),
 (2827, 0.017125229094784991),
 (2828, 0.017649234807976422),
 (2829, 0.016297987274379547),
 (2830, 0.016863138136725613),
 (2831, 0.016223686546023429),
 (2833, 0.0176829100970258),
 (2834, 0.017074359148354066),
 (2835, 0.018009832947800251),
 (2836, 0.017779386846192242),
 (2839, 0.017579695077063251),
 (2840, 0.017104826293315532),
 (2841, 0.016973972246430373),
 (2842, 0.01804862522302865),
 (2843, 0.016355503267427979),
 (2844, 0.015712933737818839),
 (2846, 0.016814574495077492),
 (2849, 0.016596493409715724),
 (2853, 0.016931843759044257),
 (2855, 0.016860699896573406),
 (2856, 0.01773662817482112),
 (2858, 0.017557616400687259),
 (2864, 0.017299035879384729),
 (2871, 0.01808164719455287),
 (2875, 0.017833997114794678),
 (2876, 0.017320146615915827),
 (2877, 0.017410742537088519),
 (2879, 0.015988046003093868),
 (2881, 0.017112468745632964),
 (2882, 0.018384125016438588),
 (2887, 0.017429572791256408),
 (2888, 0.017102281096912191),
 (2889, 0.018438166825564654),
 (2894, 0.018992383307304024),
 (2898, 0.017451168520365098),
 (2901, 0.016966514823445737),
 (2902, 0.018695749414685128),
 (2905, 0.01639792355237011),
 (2907, 0.018051619701094184),
 (2912, 0.017069297193228319),
 (2915, 0.017851347922642327),
 (2917, 0.018163480659691311),
 (2921, 0.018679123648515688),
 (2923, 0.017092111726956894),
 (2924, 0.017513708935480928),
 (2925, 0.017112468745632964),
 (2927, 0.017680096413595723),
 (2929, 0.018280571046961572),
 (2933, 0.01760463402287659),
 (2934, 0.017486434458656715),
 (2948, 0.017921264168981023),
 (2952, 0.012744353604576208),
 (2955, 0.01773662817482112),
 (2956, 0.017530135374779685),
 (2958, 0.018268138333972155),
 (2960, 0.017745155080767936),
 (2966, 0.017056662089426397),
 (2968, 0.015358793307203317),
 (2969, 0.017912479416802393),
 (2974, 0.017354617993001017),
 (2978, 0.017576930659599258),
 (2980, 0.016395682625870645),
 (2982, 0.014404003755228371),
 (2983, 0.0154068531067063),
 (2984, 0.01800685933997441),
 (2987, 0.017028963902325802),
 (2988, 0.018181818181818181),
 (2991, 0.015567195351438098),
 (2998, 0.017660438390136927),
 (3003, 0.015988046003093868),
 (3011, 0.014078842667763896),
 (3013, 0.015569111793382273),
 (3022, 0.01722315529055251),
 (3023, 0.016897385588402904),
 (3025, 0.017685725132294185),
 (3028, 0.011581265024496426),
 (3029, 0.013502270532214975),
 (3031, 0.017140579478955219),
 (3032, 0.017461996709807519),
 (3035, 0.017668855176592412),
 (3040, 0.01780519186693005),
 (3045, 0.017989048759927736),
 (3046, 0.017278002598095081),
 (3047, 0.014297559893218167),
 (3051, 0.01814215727832624),
 (3056, 0.016776025931383349),
 (3057, 0.011651841234778013),
 (3060, 0.017084496662003658),
 (3064, 0.0156834568659352),
 (3065, 0.016929375426826432),
 (3067, 0.014229848905475611),
 (3068, 0.015532820595110382),
 (3070, 0.015292989174432085),
 (3071, 0.014411590149804249),
 (3075, 0.016173999811565882),
 (3076, 0.017467418413436908),
 (3077, 0.015553800148261333),
 (3079, 0.011934756769776275),
 (3083, 0.016008852910383172),
 (3084, 0.0093851827263384431),
 (3085, 0.013045102442339846),
 (3088, 0.015476034531457166),
 (3089, 0.016320036526343212),
 (3091, 0.013582839108943909),
 (3092, 0.017601857755139144),
 (3100, 0.015410568826232089),
 ...]

In [109]:
nmodel42 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel42.append(get_ksimilar1(naive.T,token, 4,2,t1,t2))
end = time.time()
tot_time42 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [131]:
print(tot_time42/60/60, "Hrs for LSH with Two Hash Table and 4 Hash Functions")


0.33367648747232226 Hrs for LSH with Two Hash Table and 4 Hash Functions

In [116]:
nmodel52 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel52.append(get_ksimilar1(naive.T,token, 5,2,t1,t2))
end = time.time()
tot_time52 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [130]:
print(tot_time52/60/60, "Hrs for LSH with Two Hash Table and 5 Hash Functions")


0.176792645321952 Hrs for LSH with Two Hash Table and 5 Hash Functions

In [118]:
nmodel62 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel62.append(get_ksimilar1(naive.T,token, 6,2,t1,t2))
end = time.time()
tot_time62 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [129]:
print(tot_time62/60/60, "Hrs for LSH with Two Hash Table and 6 Hash Functions")


0.09131465971469879 Hrs for LSH with Two Hash Table and 6 Hash Functions

In [120]:
nmodel63 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel63.append(get_ksimilar1(naive.T,token, 6,3,t1,t2))
end = time.time()
tot_time63 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [128]:
print(tot_time63/60/60, "Hrs for LSH with Three Hash Table and 6 Hash Functions")


0.14354393561681114 Hrs for LSH with Three Hash Table and 6 Hash Functions

In [123]:
nmodel53 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel53.append(get_ksimilar1(naive.T,token, 5,3,t1,t2))
end = time.time()
tot_time53 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [127]:
print(tot_time53/60/60, "Hrs for LSH with Three Hash Table and 5 Hash Functions")


0.2720466227663888 Hrs for LSH with Three Hash Table and 5 Hash Functions

In [125]:
nmodel43 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel43.append(get_ksimilar1(naive.T,token, 4,3,t1,t2))
end = time.time()
tot_time43 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [126]:
print(tot_time43/60/60, "Hrs for LSH with Three Hash Table and 4 Hash Functions")


0.5115059167146683 Hrs for LSH with Three Hash Table and 4 Hash Functions

In [136]:
nmodel44 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel44.append(get_ksimilar1(naive.T,token, 4,4,t1,t2))
end = time.time()
tot_time44 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [137]:
print(tot_time44/60/60, "Hrs for LSH with Four Hash Table and 4 Hash Functions")


0.5888034505314297 Hrs for LSH with Three Hash Table and 4 Hash Functions

In [139]:
nmodel32 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel32.append(get_ksimilar1(naive.T,token, 3,2,t1,t2))
end = time.time()
tot_time32 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [140]:
print(tot_time32/60/60, "Hrs for LSH with Two Hash Table and 3 Hash Functions")


0.6488283938831753 Hrs for LSH with Three Hash Table and 4 Hash Functions

In [142]:
nmodel33 = []
start = time.time()
for token in range(naive.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nmodel33.append(get_ksimilar1(naive.T,token, 3,3,t1,t2))
end = time.time()
tot_time33 = end - start


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900

In [143]:
print(tot_time33/60/60, "Hrs for LSH with Three Hash Table and 3 Hash Functions")


1.2936090388562944 Hrs for LSH with Three Hash Table and 3 Hash Functions

In [134]:
each_sims_n = []
each_sims_rmse_n = []
count = 1
for models in [nmodel42, nmodel43, nmodel52, nmodel53, nmodel62, nmodel63]:
    count += 1
    print(count)
    res_mod_mae_n = []
    res_mod_rmse_n = []
    for k in [5, 10, 15, 20, 25]:
        print("Number of Neighbors:", k)
        ans1, ans2 = get_results1(nmat, len(rows), len(cols), 5 , k, models, amean,umean, imean)
        res_mod_mae_n.append(ans1)
        res_mod_rmse_n.append(ans2)
    each_sims_n.append(res_mod_mae_n)
    each_sims_rmse_n.append(res_mod_rmse_n)


2
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8629
MAE : 0.6116
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8598
MAE : 0.6097
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8571
MAE : 0.6078
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8580
MAE : 0.6105
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8621
MAE : 0.6115
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.859971715244
AVG MAE : 0.610189085046
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8537
MAE : 0.6079
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8505
MAE : 0.6062
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8480
MAE : 0.6043
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8485
MAE : 0.6067
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8529
MAE : 0.6081
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.850733256214
AVG MAE : 0.606634469031
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8525
MAE : 0.6079
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8491
MAE : 0.6062
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8469
MAE : 0.6045
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8471
MAE : 0.6065
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8516
MAE : 0.6081
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.849459969435
AVG MAE : 0.606634413212
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8524
MAE : 0.6082
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8490
MAE : 0.6064
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8467
MAE : 0.6048
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8469
MAE : 0.6068
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8513
MAE : 0.6082
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.849265073095
AVG MAE : 0.606863910326
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8523
MAE : 0.6083
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8490
MAE : 0.6066
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8467
MAE : 0.6049
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8469
MAE : 0.6070
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8513
MAE : 0.6085
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.849242116296
AVG MAE : 0.607051943773
 
3
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8424
MAE : 0.5982
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8426
MAE : 0.5961
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8361
MAE : 0.5938
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8347
MAE : 0.5928
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8383
MAE : 0.5959
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.838803589516
AVG MAE : 0.595362251115
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8295
MAE : 0.5928
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8297
MAE : 0.5911
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8228
MAE : 0.5885
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8208
MAE : 0.5874
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8247
MAE : 0.5907
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.825474818967
AVG MAE : 0.590089327
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8274
MAE : 0.5928
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8276
MAE : 0.5910
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8210
MAE : 0.5886
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8188
MAE : 0.5874
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8226
MAE : 0.5907
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.823477810293
AVG MAE : 0.590099293148
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8269
MAE : 0.5931
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8274
MAE : 0.5916
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8204
MAE : 0.5890
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8185
MAE : 0.5880
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8219
MAE : 0.5908
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.823033481489
AVG MAE : 0.590505038856
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8270
MAE : 0.5935
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8273
MAE : 0.5919
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8205
MAE : 0.5894
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8185
MAE : 0.5884
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8218
MAE : 0.5911
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.823012677249
AVG MAE : 0.590842196982
 
4
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9041
MAE : 0.6428
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9056
MAE : 0.6428
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9015
MAE : 0.6412
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8958
MAE : 0.6395
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9019
MAE : 0.6433
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.901791316017
AVG MAE : 0.641911696852
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8998
MAE : 0.6412
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9012
MAE : 0.6410
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8972
MAE : 0.6395
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8914
MAE : 0.6378
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8971
MAE : 0.6411
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.897348256887
AVG MAE : 0.640101372597
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8995
MAE : 0.6412
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9009
MAE : 0.6412
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8968
MAE : 0.6394
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8910
MAE : 0.6377
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8967
MAE : 0.6411
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.896974084518
AVG MAE : 0.640123253869
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8994
MAE : 0.6413
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9008
MAE : 0.6413
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8968
MAE : 0.6395
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8910
MAE : 0.6378
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8966
MAE : 0.6411
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.896935702017
AVG MAE : 0.64020012367
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8994
MAE : 0.6414
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9008
MAE : 0.6412
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8968
MAE : 0.6395
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8910
MAE : 0.6378
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8966
MAE : 0.6412
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.896933288827
AVG MAE : 0.640238628921
 
5
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8776
MAE : 0.6207
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8765
MAE : 0.6203
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8706
MAE : 0.6178
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8725
MAE : 0.6185
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8751
MAE : 0.6215
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.87443852413
AVG MAE : 0.619748430093
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8707
MAE : 0.6181
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8693
MAE : 0.6177
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8635
MAE : 0.6153
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8655
MAE : 0.6159
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8673
MAE : 0.6182
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.867264606034
AVG MAE : 0.617042154076
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8697
MAE : 0.6179
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8684
MAE : 0.6177
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8629
MAE : 0.6156
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8645
MAE : 0.6158
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8664
MAE : 0.6180
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.866366306212
AVG MAE : 0.616986365701
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8696
MAE : 0.6181
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8682
MAE : 0.6177
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8628
MAE : 0.6157
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8644
MAE : 0.6159
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8663
MAE : 0.6181
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.866262473838
AVG MAE : 0.617134528971
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8695
MAE : 0.6182
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8682
MAE : 0.6178
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8627
MAE : 0.6158
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8644
MAE : 0.6161
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8663
MAE : 0.6182
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.866235446061
AVG MAE : 0.617211038988
 
6
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9364
MAE : 0.6724
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9376
MAE : 0.6735
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9333
MAE : 0.6707
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9308
MAE : 0.6692
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9351
MAE : 0.6731
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.934632489726
AVG MAE : 0.671784618177
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9346
MAE : 0.6717
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9359
MAE : 0.6728
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9315
MAE : 0.6698
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9289
MAE : 0.6684
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9332
MAE : 0.6723
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.932820898391
AVG MAE : 0.670983802465
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9346
MAE : 0.6718
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9357
MAE : 0.6727
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9314
MAE : 0.6698
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9288
MAE : 0.6684
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9331
MAE : 0.6723
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.932723983723
AVG MAE : 0.670992008756
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9346
MAE : 0.6718
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9357
MAE : 0.6727
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9314
MAE : 0.6698
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9288
MAE : 0.6684
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9331
MAE : 0.6723
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.932714310746
AVG MAE : 0.671004868675
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9346
MAE : 0.6718
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9357
MAE : 0.6728
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9314
MAE : 0.6699
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9288
MAE : 0.6684
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9331
MAE : 0.6723
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.93271514674
AVG MAE : 0.671018367439
 
7
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9160
MAE : 0.6513
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9145
MAE : 0.6504
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9096
MAE : 0.6481
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9092
MAE : 0.6475
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9121
MAE : 0.6494
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.91227389623
AVG MAE : 0.649325675711
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9126
MAE : 0.6499
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9109
MAE : 0.6489
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9064
MAE : 0.6470
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9058
MAE : 0.6463
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9084
MAE : 0.6478
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.908831853258
AVG MAE : 0.648009820112
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9124
MAE : 0.6500
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9106
MAE : 0.6490
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9062
MAE : 0.6471
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9055
MAE : 0.6463
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9081
MAE : 0.6479
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.908565143378
AVG MAE : 0.648054075263
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9124
MAE : 0.6501
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9106
MAE : 0.6490
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9062
MAE : 0.6471
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9055
MAE : 0.6463
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9081
MAE : 0.6479
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.908534197067
AVG MAE : 0.648081017869
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.9124
MAE : 0.6501
----------   Fold  2 ---------------
Test Errors
RMSE : 0.9106
MAE : 0.6490
----------   Fold  3 ---------------
Test Errors
RMSE : 0.9062
MAE : 0.6471
----------   Fold  4 ---------------
Test Errors
RMSE : 0.9055
MAE : 0.6464
----------   Fold  5 ---------------
Test Errors
RMSE : 0.9081
MAE : 0.6479
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.908532924276
AVG MAE : 0.648100699461
 

In [144]:
count = 1
for models in [nmodel44, nmodel32, nmodel33]:
    count += 1
    print(count)
    res_mod_mae_n = []
    res_mod_rmse_n = []
    for k in [5, 10, 15, 20, 25]:
        print("Number of Neighbors:", k)
        ans1, ans2 = get_results1(nmat, len(rows), len(cols), 5 , k, models, amean,umean, imean)
        res_mod_mae_n.append(ans1)
        res_mod_rmse_n.append(ans2)
    each_sims_n.append(res_mod_mae_n)
    each_sims_rmse_n.append(res_mod_rmse_n)


2
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8314
MAE : 0.5891
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8296
MAE : 0.5877
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8281
MAE : 0.5863
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8286
MAE : 0.5878
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8287
MAE : 0.5876
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.829278504885
AVG MAE : 0.587712921595
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8168
MAE : 0.5836
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8151
MAE : 0.5822
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8131
MAE : 0.5806
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8137
MAE : 0.5826
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8142
MAE : 0.5823
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.814599822783
AVG MAE : 0.582237208847
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8140
MAE : 0.5830
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8127
MAE : 0.5821
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8105
MAE : 0.5806
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8115
MAE : 0.5826
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8114
MAE : 0.5821
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.812012794808
AVG MAE : 0.58209510344
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8134
MAE : 0.5834
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8121
MAE : 0.5826
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8098
MAE : 0.5809
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8109
MAE : 0.5831
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8110
MAE : 0.5827
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.811442165773
AVG MAE : 0.582545333518
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8133
MAE : 0.5838
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8121
MAE : 0.5830
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8098
MAE : 0.5814
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8109
MAE : 0.5835
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8108
MAE : 0.5831
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.811380332244
AVG MAE : 0.582949521743
 
3
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8307
MAE : 0.5901
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8319
MAE : 0.5897
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8242
MAE : 0.5858
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8293
MAE : 0.5893
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8281
MAE : 0.5879
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.828838794469
AVG MAE : 0.588580806875
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8158
MAE : 0.5842
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8169
MAE : 0.5837
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8096
MAE : 0.5803
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8139
MAE : 0.5835
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8133
MAE : 0.5828
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.813901885079
AVG MAE : 0.582887264173
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8135
MAE : 0.5843
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8143
MAE : 0.5836
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8071
MAE : 0.5801
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8112
MAE : 0.5832
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8111
MAE : 0.5829
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.811427138329
AVG MAE : 0.582822606144
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8129
MAE : 0.5847
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8137
MAE : 0.5841
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8066
MAE : 0.5807
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8106
MAE : 0.5837
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8105
MAE : 0.5833
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.810887826982
AVG MAE : 0.583314770798
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8127
MAE : 0.5849
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8136
MAE : 0.5844
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8065
MAE : 0.5812
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8106
MAE : 0.5841
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8104
MAE : 0.5838
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.810753952651
AVG MAE : 0.583701931088
 
4
Number of Neighbors: 5
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8193
MAE : 0.5809
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8201
MAE : 0.5811
----------   Fold  3 ---------------
Test Errors
RMSE : 0.8132
MAE : 0.5768
----------   Fold  4 ---------------
Test Errors
RMSE : 0.8159
MAE : 0.5797
----------   Fold  5 ---------------
Test Errors
RMSE : 0.8141
MAE : 0.5794
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.816532707444
AVG MAE : 0.579591699617
 
Number of Neighbors: 10
----------   Fold  1 ---------------
Test Errors
RMSE : 0.8026
MAE : 0.5747
----------   Fold  2 ---------------
Test Errors
RMSE : 0.8031
MAE : 0.5745
----------   Fold  3 ---------------
Test Errors
RMSE : 0.7963
MAE : 0.5706
----------   Fold  4 ---------------
Test Errors
RMSE : 0.7982
MAE : 0.5735
----------   Fold  5 ---------------
Test Errors
RMSE : 0.7970
MAE : 0.5729
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.799444654493
AVG MAE : 0.573219468204
 
Number of Neighbors: 15
----------   Fold  1 ---------------
Test Errors
RMSE : 0.7998
MAE : 0.5748
----------   Fold  2 ---------------
Test Errors
RMSE : 0.7996
MAE : 0.5744
----------   Fold  3 ---------------
Test Errors
RMSE : 0.7931
MAE : 0.5707
----------   Fold  4 ---------------
Test Errors
RMSE : 0.7945
MAE : 0.5733
----------   Fold  5 ---------------
Test Errors
RMSE : 0.7935
MAE : 0.5725
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.796113054036
AVG MAE : 0.5731265525
 
Number of Neighbors: 20
----------   Fold  1 ---------------
Test Errors
RMSE : 0.7991
MAE : 0.5755
----------   Fold  2 ---------------
Test Errors
RMSE : 0.7987
MAE : 0.5751
----------   Fold  3 ---------------
Test Errors
RMSE : 0.7925
MAE : 0.5716
----------   Fold  4 ---------------
Test Errors
RMSE : 0.7937
MAE : 0.5739
----------   Fold  5 ---------------
Test Errors
RMSE : 0.7925
MAE : 0.5729
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.795289891181
AVG MAE : 0.573813013036
 
Number of Neighbors: 25
----------   Fold  1 ---------------
Test Errors
RMSE : 0.7990
MAE : 0.5761
----------   Fold  2 ---------------
Test Errors
RMSE : 0.7987
MAE : 0.5758
----------   Fold  3 ---------------
Test Errors
RMSE : 0.7922
MAE : 0.5720
----------   Fold  4 ---------------
Test Errors
RMSE : 0.7934
MAE : 0.5745
----------   Fold  5 ---------------
Test Errors
RMSE : 0.7924
MAE : 0.5735
-------------------------------------

Testing Avg Error:
AVG RMSE : 0.795135971935
AVG MAE : 0.574409318226
 

In [146]:
#nmodel42, nmodel43, nmodel52, nmodel53, nmodel62, nmodel63, nmodel44, nmodel32, nmodel33


Out[146]:
[[0.85997171524390592,
  0.85073325621353602,
  0.84945996943467572,
  0.84926507309465893,
  0.84924211629626589],
 [0.83880358951609113,
  0.8254748189673945,
  0.82347781029325995,
  0.82303348148902811,
  0.82301267724868787],
 [0.901791316017307,
  0.89734825688748787,
  0.89697408451816418,
  0.89693570201720141,
  0.89693328882709911],
 [0.87443852412962708,
  0.86726460603407252,
  0.86636630621182087,
  0.86626247383841215,
  0.86623544606116754],
 [0.93463248972634294,
  0.9328208983914168,
  0.93272398372345311,
  0.93271431074557665,
  0.93271514673959821],
 [0.912273896230125,
  0.90883185325815785,
  0.90856514337849925,
  0.90853419706684524,
  0.90853292427633681],
 [0.82927850488535193,
  0.81459982278340259,
  0.81201279480781563,
  0.81144216577286488,
  0.8113803322435853],
 [0.82883879446874698,
  0.81390188507937444,
  0.81142713832939894,
  0.81088782698154271,
  0.81075395265058048],
 [0.81653270744361406,
  0.79944465449296476,
  0.7961130540359409,
  0.79528989118114501,
  0.79513597193511265]]

In [160]:


In [188]:
import matplotlib.pyplot as plt

%matplotlib inline

In [189]:


In [ ]:


In [293]:
ll = each_sims_n[7:] + each_sims_n[:6]

hash_no = [3,4,5,6]
rep_no = [2,3]

c = 0

plt.rcParams['figure.figsize'] = (10,12)

for i in range(len(hash_no) - 1):
    for j in range(len(rep_no)):
        
        c+=1
        
        plt.subplot(4,2,c)
        plt.plot(ll[c-1])
        plt.title("# hash functions = " + str(hash_no[i]) +  ", # hash tables = " + str(rep_no[j]))
        plt.xticks(np.arange(5),[5,10,15,20,25])
        plt.ylim([0.56,0.66])
        plt.xlabel("Number of nearest neighbors")
        plt.ylabel("MAE")
        
        
plt.suptitle("Plot of MAE for various combinations hash functions and hash tables")
plt.tight_layout(h_pad=1, pad=4)
plt.savefig('LSH_MAE')



In [295]:
ll = each_sims_rmse_n[7:] + each_sims_rmse_n[:6]

hash_no = [3,4,5,6]
rep_no = [2,3]

c = 0

#plt.rcParams['figure.figsize'] = (8,10)

for i in range(len(hash_no) -1):
    for j in range(len(rep_no)):
        
        c+=1
        
        plt.subplot(4,2,c)
        plt.plot(ll[c-1])
        plt.title("# hash functions = " + str(hash_no[i]) +  ", # hash tables = " + str(rep_no[j]))
        plt.xticks(np.arange(5),[5,10,15,20,25])
        plt.ylim([0.78,0.92])
        plt.xlabel("Number of nearest neighbors")
        plt.ylabel("RMSE")
        
        
plt.suptitle("Plot of RMSE for various combiantion hash functions and hash tables")
plt.tight_layout(h_pad=1, pad=4)
plt.savefig('LSH_RMSE')



In [270]:
timings = [14780.524564027786,tot_time32, tot_time33, tot_time42, tot_time43, tot_time52, tot_time53, tot_time62, tot_time63]

In [277]:
timings = timings[:7]

In [247]:
hash_no = [3,4,5,6]
rep_no = [2,3]

plt.rcParams['figure.figsize'] = (17,5)

c = 0

# for i in range(len(hash_no)):
#     for j in range(len(rep_no)):
        
#         c += 1
#         plt.subplot(1,len(timings),c)
#         plt.bar(c,height = timings[c-1])
        
#         plt.xticks([])
#         plt.xlabel()
        
#         if(c != 1):
#             plt.yticks([])

In [322]:
plt.rcParams['figure.figsize'] = (17,5)
plt.bar(np.arange(len(timings)),np.array(timings)/60/60, align='center');
plt.xticks(np.arange(7),['ITEM-ITEM CF', (3,2),(3,3),(4,2),(4,3),(5,2),(5,3)])
plt.xlabel("Combination of ( # hash functions, # hash tables) ");
plt.ylabel("Model training time (hours)");
plt.title("Comparison of model training times for various hash function and hash table combinations with Item-Item CF")
plt.savefig('TimingComp')
#plt.axhline(np.mean(timings), c = 'green', linestyle='--');



In [404]:
plt.rcParams['figure.figsize'] = (8,5)
results_df_lsh = pd.DataFrame({'Nearest Neighbors': [5, 10, 15, 20, 25], '(3,2)': each_sims_rmse_n[-1], '(3,3)': each_sims_rmse_n[1], '(4,2)': each_sims_rmse_n[2], '(4,3)': each_sims_rmse_n[3], '(5,2)':each_sims_rmse_n[4], '(5,3)':each_sims_rmse_n[5]})
plot2 = results_df_lsh.plot(x='Nearest Neighbors', y=['(3,2)', '(3,3)', '(4,2)', '(4,3)','(5,2)', '(5,3)'], ylim=(0.78,0.94), title = 'Plot of Test RMSE for various combiantion hash functions and hash tables')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
fig = plot2.get_figure()
fig.savefig('LSH_RMSE.png')
plt.tight_layout(h_pad=3,pad=3)


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/plotting/_core.py:1714: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  series.name = label

In [402]:
plt.rcParams['figure.figsize'] = (8,5)
results_df_mae = pd.DataFrame({'Nearest Neighbors': [5, 10, 15, 20, 25], '(3,2)': each_sims_n[-1], '(3,3)': each_sims_n[1], '(4,2)': each_sims_n[2], '(4,3)': each_sims_n[3], '(5,2)':each_sims_n[4], '(5,3)':each_sims_n[5]})
plot2 = results_df_mae.plot(x='Nearest Neighbors', y=['(3,2)', '(3,3)', '(4,2)', '(4,3)','(5,2)', '(5,3)'], ylim=(0.56,0.68), title = 'Plot of Test MAE for various combiantion hash functions and hash tables')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
fig = plot2.get_figure()
fig.savefig('LSH_MAE.png')


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/plotting/_core.py:1714: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  series.name = label

In [260]:
cov1lsh = []
cov2lsh = []
cov3lsh = []
cov4lsh = []
cov5lsh = []
mlist = [5,10,15,20,25]
for i in range(len(rows)):
    if(i%100 == 0):
        print(i)
    ans = coverage(full_mat, i, nmodel33, 15, mlist, flipped_dict, True)
    cov1lsh.extend(ans[0])
    cov2lsh.extend(ans[1])
    cov3lsh.extend(ans[2])
    cov4lsh.extend(ans[3])
    cov5lsh.extend(ans[4])


0
/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/__main__.py:17: RuntimeWarning: invalid value encountered in double_scalars
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
11800
11900
12000

In [262]:
print("Coverage with recommending 5 books", len(set(cov1lsh))/4959 *100 ,"%")


Coverage with recommending 5 books 34.704577535793504 %

In [263]:
print("Coverage with recommending 10 books", len(set(cov2lsh))/4959 *100 ,"%")


Coverage with recommending 10 books 52.57108287961283 %

In [264]:
print("Coverage with recommending 15 books", len(set(cov3lsh))/4959 *100 ,"%")


Coverage with recommending 15 books 65.63823351482154 %

In [265]:
print("Coverage with recommending 20 books", len(set(cov4lsh))/4959 *100 ,"%")


Coverage with recommending 20 books 73.80520266182698 %

In [266]:
print("Coverage with recommending 25 books", len(set(cov5lsh))/4959 *100 ,"%")


Coverage with recommending 25 books 79.57249445452712 %

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [345]:
idnums = []
for i in cov5lsh:
    idnums.append(idict[i])

In [348]:
len(set(idnums))/4959


Out[348]:
0.7957249445452712

In [352]:
idnums1 = set(idnums)

In [353]:
len(idnums1)


Out[353]:
3946

In [358]:
smalldata = data[~data.isbn.isin(list(idnums))]

In [368]:
r1 = smalldata.user_id.nunique()
c1 = smalldata.isbn.nunique()

In [376]:
r1 = smalldata.user_id.unique()
c1 = smalldata['isbn'].unique()

idict1  = dict(zip(c1, range(len(c1))))
udict1 = dict(zip(r1, range(len(r1))))

smalldata.user_id = [
    udict1[i] for i in smalldata.user_id
]
smalldata['isbn'] = [
    idict1[i] for i in smalldata['isbn']
]


/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/core/generic.py:3643: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/__main__.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [377]:
nmat1 = smalldata.as_matrix()

In [378]:
nmat1 = nmat1.astype(int)

In [380]:
naivesmall = np.zeros((len(r1),len(c1)))
for row in nmat1:
    naivesmall[row[0], row[1]] = row[2]

In [381]:
naivesmall.shape


Out[381]:
(11092, 1013)

In [406]:
ts1, ts2 = get_hashes1(naivesmall.T, 3, 3)

In [407]:
nsmallmodel33 = []
start = time.time()
for token in range(naivesmall.T.shape[0]):
    if(token%100 == 0):
        print(token)
    nsmallmodel33.append(get_ksimilar1(naivesmall.T,token, 3,3,ts1,ts2))
end = time.time()
tot_time33 = end - start


0
100
200
300
400
500
600
700
800
900
1000

In [408]:
get_results1(nmat1, len(r1), len(c1), 5 ,15, nmodel33, amean,umean, imean)


----------   Fold  1 ---------------
/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/ipykernel/__main__.py:17: RuntimeWarning: invalid value encountered in double_scalars
Test Errors
RMSE : 1.3134
MAE : 1.0171
----------   Fold  2 ---------------
Test Errors
RMSE : 1.3200
MAE : 1.0210
----------   Fold  3 ---------------
Test Errors
RMSE : 1.3167
MAE : 1.0150
----------   Fold  4 ---------------
Test Errors
RMSE : 1.3170
MAE : 1.0175
----------   Fold  5 ---------------
Test Errors
RMSE : 1.3154
MAE : 1.0183
-------------------------------------

Testing Avg Error:
AVG RMSE : 1.31650288406
AVG MAE : 1.01777694059
 
Out[408]:
(1.0177769405909096, 1.3165028840572273)

In [ ]: