Test the idea that if we combine rank of CF and rank of features, whether we can get a better rank of prediction.

Possible issues:

  • I did not treat each purchase as individual event, instead, I calculate the CF based on users.

In [264]:
import pandas as pd
import numpy as np
import os
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator
import cv2
import glob
from keras.preprocessing import image
from matplotlib import pyplot as plt
import seaborn as sns
import random
%matplotlib inline

In [39]:
view_image = pd.DataFrame(glob.glob('view_data_image/*.jpg'))
view_image['produuct'] = view_image[0].apply(lambda x: int(x[16:-4]))
view_image.columns = [['file', 'product']]

In [410]:
def dot(K, L):
    if len(K) != len(L): return 0
    return sum(i[0]*i[1] for i in zip(K, L))

def similarity(item_1, item_2):
    return dot(item_1, item_2) / np.sqrt(dot(item_1, item_1)*dot(item_2, item_2))

def average(lists):
    return [np.mean(i) for i in zip(*[l for l in lists])]
def plot_top_rank(product_id):
    candidate = top_product(product_id)
    candidate_df = view_image[view_image['product'].isin(candidate)]
    test_view = candidate_df['file']
    
    fig,axes = plt.subplots(1, len(test_view))

    for i in range(len(test_view)):
        img = image.load_img(test_view.iloc[i], target_size=(224, 224))
        # images
        axes[i].imshow(img)
        axes[i].set_xticklabels([])
        #axes[0,i].get_xaxis().set_visible(False)
        axes[i].get_xaxis().set_ticks([])
        axes[i].get_yaxis().set_visible(False)

def plot_sim_rank(product_id):
    candidate = similar_product(product_id)
    candidate_df = view_image[view_image['product'].isin(candidate)]
    test_view = candidate_df['file']
    
    fig,axes = plt.subplots(1, len(test_view))

    for i in range(len(test_view)):
        img = image.load_img(test_view.iloc[i], target_size=(224, 224))
        # images
        axes[i].imshow(img)
        axes[i].set_xticklabels([])
        #axes[0,i].get_xaxis().set_visible(False)
        axes[i].get_xaxis().set_ticks([])
        axes[i].get_yaxis().set_visible(False)

In [417]:
df_spu[df_spu['spu'] == 458036191955415221]


Out[417]:
spu CF_item spu_features CF_features ave_CF_fea sim rank
1099 458036191955415221 [84800400172249093, 297314006163628534, 436644... [0.084, 0.306, 0.0, 0.2, 0.088, 0.029, 0.166, ... [[[0.449, 0.247, 0.007, 0.261, 0.25, 0.007, 0.... [0.371, 0.7426, 0.0304, 0.5118, 0.1884, 0.0459... 0.664724 612.0

In [ ]:
top_product

In [430]:
plot_sim_rank(3172644484366424)



In [429]:
plot_sim_rank(458036191955415221)



In [370]:
plot_top_rank(458036191955415221)


Similar products to 458036191955415221 include:

No. 1: 84800400172249093
No. 2: 297314006163628534
No. 3: 436644093725405288
No. 4: 305758255464948196
No. 5: 288588281885597903
No. 6: 444525391273811972
No. 7: 1732554800473563141
No. 8: 1760983773121339411
No. 9: 93526113400463377
No. 10: 89866935898595349

In [371]:
plot_top_rank(3172644484366424)


Similar products to 3172644484366424 include:

No. 1: 315046913029713936
No. 2: 461695358101766158
No. 3: 462821279656333429
No. 4: 3172644484366424
No. 5: 439458865467551753
No. 6: 89304008517624104
No. 7: 297314002868527124
No. 8: 300128739550408715
No. 9: 432703441537921049
No. 10: 32446015921365000

In [2]:
os.chdir('/Users/Walkon302/Desktop/deep-learning-models-master/view2buy')

In [204]:
df = pd.read_pickle('view2buy_url.pkl')

In [132]:
df.shape


Out[132]:
(17460, 15)

In [133]:
df.head()


Out[133]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...

In [134]:
df['user_buy'] = df.apply(lambda x: '{}-{}'.format(str(x['user_id']), str(x['buy_spu'])), axis = 1)

In [135]:
df.head()


Out[135]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url user_buy
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-453532580309307392
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 529805243-103096245561765919
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 3748045464-446777176556679168
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-438895881520357521
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-74104320184119307

In [136]:
df_cf = df.groupby(['user_buy','view_spu' ,'view_cnt']).count()

In [137]:
df_cf = df_cf.reset_index()[['user_buy', 'view_spu', 'view_cnt']]
df_cf.head()


Out[137]:
user_buy view_spu view_cnt
0 1018047589-2885476247461195776 293373310066720769 2
1 1018047589-2885476247461195776 302661994239094787 2
2 1018047589-2885476247461195776 447903106563379200 1
3 1018047589-2885476247461195776 972290827227131904 1
4 1018047589-2885476247461195776 2192766390940934144 2

In [138]:
piv = df_cf.pivot_table(index=['user_buy'], columns=['view_spu'], values='view_cnt')
piv_norm = piv.apply(lambda x: (x/np.mean(x)), axis=1)
piv_norm.fillna(0, inplace=True)
piv_norm = piv_norm.T
piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]
piv_sparse = sp.sparse.csr_matrix(piv_norm.values)

In [139]:
item_similarity = cosine_similarity(piv_sparse)
user_similarity = cosine_similarity(piv_sparse.T)

In [140]:
item_sim_df = pd.DataFrame(item_similarity, index = piv_norm.index, columns = piv_norm.index)
user_sim_df = pd.DataFrame(user_similarity, index = piv_norm.columns, columns = piv_norm.columns)

In [428]:
def similar_product(product_name):
    df_spu2 = df_spu
    candi = (df_spu[df_spu['spu'] == product_name]['spu_features'].iloc[0])
    df_spu2['sim'] = df_spu2.apply(lambda x: similarity(x['spu_features'], candi), axis = 1)
    df_spu2['rank'] = df_spu2['sim'].rank(ascending=False)
    return list(df_spu2.sort_values(by = 'rank')['spu'][0:10])

In [427]:
df_spu[df_spu['spu'] == 458036191955415221]['spu_features'].iloc[0]


Out[427]:
[0.084,
 0.306,
 0.0,
 0.2,
 0.088,
 0.029,
 0.166,
 0.127,
 0.257,
 0.009,
 0.358,
 0.052,
 0.0,
 0.0,
 0.059,
 0.0,
 0.059,
 0.063,
 0.357,
 0.027,
 0.665,
 0.69,
 0.0,
 0.167,
 0.097,
 0.0,
 0.005,
 0.969,
 0.016,
 0.212,
 0.0,
 0.121,
 0.384,
 0.008,
 0.048,
 0.0,
 0.0,
 0.736,
 0.0,
 0.001,
 0.21,
 0.0,
 0.276,
 0.0,
 0.035,
 0.127,
 0.133,
 0.012,
 0.269,
 0.676,
 0.005,
 0.212,
 1.042,
 0.042,
 0.159,
 0.021,
 0.102,
 0.48,
 0.105,
 1.2,
 0.346,
 0.072,
 0.023,
 0.0,
 0.719,
 0.026,
 0.495,
 0.037,
 0.37,
 0.014,
 0.0,
 0.0,
 0.0,
 0.012,
 0.001,
 0.778,
 0.294,
 0.863,
 0.455,
 0.0,
 0.421,
 1.058,
 0.081,
 0.047,
 0.164,
 1.853,
 0.0,
 0.718,
 0.082,
 0.124,
 0.184,
 0.0,
 0.15,
 0.006,
 0.0,
 0.0,
 0.058,
 0.026,
 0.995,
 1.288,
 0.208,
 3.183,
 0.274,
 0.154,
 0.99,
 0.101,
 0.0,
 0.328,
 0.874,
 2.979,
 0.2,
 0.093,
 0.238,
 0.144,
 0.0,
 0.157,
 0.182,
 0.0,
 0.0,
 0.293,
 0.069,
 0.211,
 0.058,
 0.156,
 0.629,
 0.296,
 0.111,
 0.0,
 0.083,
 2.953,
 0.049,
 0.482,
 0.031,
 0.041,
 0.017,
 0.032,
 2.279,
 0.0,
 0.0,
 0.0,
 1.778,
 0.029,
 0.431,
 0.445,
 0.01,
 0.107,
 0.006,
 0.019,
 0.169,
 0.003,
 0.0,
 0.566,
 0.03,
 0.0,
 0.007,
 0.018,
 0.52,
 0.177,
 0.381,
 1.059,
 0.0,
 0.139,
 0.326,
 0.0,
 0.401,
 0.13,
 1.363,
 0.045,
 0.514,
 1.407,
 0.758,
 0.907,
 0.0,
 0.0,
 0.076,
 0.013,
 4.054,
 0.2,
 0.555,
 1.115,
 0.0,
 0.056,
 0.0,
 0.218,
 0.271,
 0.074,
 0.0,
 0.615,
 0.007,
 0.175,
 0.145,
 0.055,
 0.762,
 0.591,
 0.178,
 0.593,
 1.042,
 0.827,
 0.18,
 0.785,
 0.394,
 0.0,
 0.089,
 0.349,
 0.321,
 0.0,
 0.038,
 0.17,
 0.0,
 2.378,
 0.291,
 0.417,
 0.08,
 0.035,
 0.294,
 0.843,
 0.06,
 0.11,
 0.101,
 0.886,
 1.076,
 0.869,
 0.023,
 0.008,
 0.053,
 0.297,
 0.034,
 0.195,
 0.017,
 1.094,
 0.456,
 0.021,
 0.0,
 0.05,
 0.0,
 0.28,
 0.0,
 0.005,
 0.078,
 0.0,
 0.122,
 0.534,
 1.314,
 0.049,
 0.0,
 0.256,
 0.05,
 0.03,
 1.044,
 0.426,
 0.0,
 0.008,
 0.057,
 0.131,
 0.249,
 0.006,
 0.333,
 0.019,
 0.149,
 0.112,
 0.324,
 0.346,
 0.247,
 0.001,
 0.32,
 0.108,
 0.004,
 0.067,
 0.523,
 0.231,
 0.0,
 0.0,
 0.067,
 0.318,
 0.0,
 0.023,
 0.006,
 0.244,
 0.074,
 2.387,
 0.397,
 0.496,
 0.221,
 0.0,
 0.0,
 0.486,
 0.0,
 0.019,
 0.048,
 0.107,
 0.054,
 0.0,
 0.036,
 0.014,
 0.006,
 1.431,
 1.23,
 0.052,
 0.0,
 0.0,
 0.46,
 0.052,
 0.0,
 0.96,
 0.189,
 0.056,
 0.021,
 0.193,
 0.166,
 0.044,
 0.143,
 1.921,
 0.577,
 0.368,
 0.0,
 0.0,
 0.031,
 2.319,
 0.179,
 0.004,
 0.105,
 0.167,
 0.008,
 0.23,
 0.005,
 0.0,
 0.598,
 0.103,
 0.0,
 0.15,
 0.024,
 0.0,
 0.029,
 0.056,
 0.004,
 1.179,
 0.28,
 1.374,
 1.81,
 0.16,
 0.163,
 1.253,
 0.119,
 0.017,
 0.335,
 0.1,
 0.0,
 0.313,
 0.235,
 0.017,
 0.001,
 0.426,
 0.072,
 0.556,
 0.0,
 0.019,
 0.01,
 0.538,
 0.0,
 0.501,
 0.026,
 0.271,
 0.399,
 0.0,
 1.205,
 0.028,
 3.248,
 0.109,
 0.0,
 0.01,
 0.035,
 0.036,
 0.682,
 0.0,
 0.09,
 0.0,
 0.0,
 0.115,
 0.187,
 0.173,
 0.114,
 0.0,
 0.0,
 0.0,
 0.037,
 0.052,
 0.142,
 0.029,
 0.005,
 1.109,
 0.147,
 0.0,
 1.586,
 0.175,
 0.549,
 1.063,
 0.082,
 0.179,
 0.752,
 0.0,
 0.321,
 0.0,
 0.556,
 0.0,
 0.124,
 0.052,
 0.16,
 0.084,
 0.252,
 0.069,
 0.049,
 0.34,
 0.009,
 0.0,
 0.0,
 0.0,
 0.101,
 1.743,
 0.058,
 0.0,
 0.0,
 0.336,
 0.089,
 1.804,
 0.056,
 0.006,
 0.095,
 0.318,
 0.146,
 0.088,
 0.009,
 2.152,
 0.0,
 0.044,
 0.0,
 0.057,
 0.373,
 0.018,
 0.128,
 1.293,
 0.676,
 0.0,
 0.043,
 0.01,
 0.038,
 0.003,
 0.005,
 0.207,
 2.47,
 0.127,
 0.162,
 0.0,
 0.012,
 0.0,
 0.44,
 0.685,
 0.189,
 0.074,
 0.0,
 0.611,
 0.151,
 1.678,
 0.18,
 0.261,
 0.05,
 0.426,
 0.193,
 0.326,
 0.18,
 0.187,
 0.045,
 2.542,
 0.037,
 0.208,
 0.215,
 0.943,
 0.047,
 0.082,
 0.247,
 0.853,
 0.106,
 0.402,
 0.491,
 0.014,
 0.245,
 0.0,
 0.0,
 0.658,
 2.221,
 0.0,
 0.071,
 2.839,
 0.123,
 0.852,
 1.838,
 0.835,
 0.0,
 0.293,
 0.048,
 0.044,
 0.475,
 0.033,
 0.089,
 0.0,
 0.29,
 0.956,
 2.701,
 2.531,
 0.449,
 0.166,
 0.242,
 0.515,
 0.434,
 0.146,
 0.238,
 0.0,
 0.038,
 0.202,
 0.0,
 0.0,
 0.379,
 0.257,
 0.095,
 0.259,
 0.0,
 0.362,
 0.104,
 0.584,
 0.0,
 1.079,
 0.021,
 0.561,
 0.0,
 0.0,
 0.01,
 0.319,
 0.003,
 0.32,
 0.392,
 0.101,
 0.252,
 0.0,
 0.041,
 0.234,
 0.102,
 4.737,
 0.2,
 0.0,
 0.0,
 0.099,
 0.689,
 0.041,
 3.696,
 0.054,
 0.076,
 0.0,
 0.892,
 0.235,
 0.12,
 0.319,
 0.687,
 0.347,
 0.452,
 0.061,
 0.024,
 0.02,
 0.844,
 0.034,
 0.644,
 0.004,
 0.043,
 0.682,
 0.236,
 0.354,
 0.209,
 0.159,
 0.2,
 0.0,
 0.615,
 0.264,
 0.375,
 0.047,
 0.296,
 0.077,
 0.488,
 0.055,
 0.032,
 0.168,
 0.471,
 0.007,
 0.017,
 0.213,
 1.324,
 0.072,
 0.007,
 0.132,
 0.056,
 0.496,
 0.054,
 0.573,
 0.051,
 0.811,
 0.0,
 0.0,
 0.0,
 0.058,
 0.0,
 0.106,
 0.037,
 0.822,
 0.058,
 0.346,
 0.204,
 0.007,
 0.446,
 0.025,
 0.0,
 0.192,
 0.247,
 0.036,
 0.02,
 0.0,
 0.044,
 0.509,
 0.061,
 0.549,
 6.572,
 0.02,
 0.127,
 0.0,
 0.052,
 0.14,
 0.489,
 0.0,
 0.013,
 0.613,
 0.0,
 0.0,
 0.0,
 0.0,
 0.003,
 0.046,
 0.489,
 0.132,
 0.315,
 1.312,
 0.755,
 0.0,
 0.322,
 0.091,
 1.003,
 0.0,
 0.028,
 0.006,
 0.148,
 0.097,
 0.003,
 1.363,
 0.223,
 0.266,
 0.011,
 0.126,
 0.078,
 0.097,
 0.0,
 0.021,
 0.009,
 0.0,
 0.036,
 0.257,
 0.006,
 0.534,
 0.0,
 1.094,
 0.344,
 0.189,
 0.016,
 1.248,
 0.0,
 0.237,
 0.03,
 0.227,
 0.086,
 0.899,
 0.0,
 0.268,
 0.205,
 0.0,
 0.119,
 0.0,
 0.013,
 0.011,
 0.0,
 0.187,
 0.02,
 0.042,
 0.284,
 0.054,
 0.0,
 0.288,
 0.012,
 0.428,
 1.034,
 0.0,
 0.0,
 0.127,
 0.111,
 0.205,
 0.214,
 0.029,
 0.0,
 0.38,
 0.547,
 0.937,
 0.342,
 0.0,
 0.131,
 0.059,
 0.0,
 0.013,
 0.053,
 0.001,
 0.464,
 0.048,
 0.064,
 0.035,
 0.008,
 0.564,
 0.0,
 0.018,
 0.19,
 0.365,
 3.314,
 0.414,
 0.817,
 0.0,
 0.279,
 0.0,
 1.552,
 0.06,
 0.316,
 0.8,
 0.841,
 0.476,
 0.324,
 0.024,
 0.229,
 0.133,
 0.0,
 0.0,
 3.043,
 0.355,
 0.28,
 0.0,
 0.013,
 0.249,
 0.529,
 0.103,
 0.017,
 0.053,
 0.009,
 0.18,
 0.0,
 0.386,
 0.276,
 0.323,
 0.0,
 0.771,
 0.27,
 0.513,
 0.476,
 0.239,
 0.063,
 0.0,
 0.807,
 0.705,
 0.025,
 0.015,
 0.008,
 1.374,
 0.0,
 0.0,
 0.045,
 0.034,
 0.0,
 0.34,
 0.13,
 0.0,
 0.215,
 0.0,
 0.075,
 0.0,
 0.691,
 0.26,
 0.043,
 0.214,
 0.096,
 0.019,
 0.332,
 0.174,
 0.089,
 0.039,
 0.0,
 0.0,
 1.721,
 0.821,
 0.199,
 0.063,
 0.874,
 0.007,
 0.0,
 0.0,
 0.104,
 0.241,
 0.0,
 1.289,
 0.263,
 0.06,
 0.45,
 0.798,
 0.104,
 0.001,
 0.223,
 0.0,
 0.008,
 0.034,
 0.191,
 0.0,
 0.178,
 0.048,
 0.034,
 0.001,
 0.017,
 0.0,
 0.0,
 0.032,
 0.37,
 0.0,
 0.07,
 0.0,
 0.599,
 0.476,
 0.222,
 0.0,
 6.315,
 0.274,
 0.621,
 0.021,
 0.037,
 0.341,
 0.053,
 4.8,
 0.086,
 0.184,
 0.014,
 0.047,
 0.355,
 0.192,
 0.176,
 0.554,
 0.0,
 0.177,
 0.112,
 0.31,
 0.0,
 0.03,
 0.0,
 0.175,
 0.0,
 0.707,
 1.375,
 0.248,
 0.098,
 0.093,
 0.317,
 0.098,
 0.059,
 0.13,
 2.406,
 0.0,
 0.0,
 1.71,
 0.0,
 0.449,
 0.275,
 0.153,
 0.019,
 0.514,
 1.551,
 0.0,
 0.0,
 0.0,
 0.057,
 0.696,
 0.0,
 0.297,
 0.03,
 0.002,
 0.146,
 0.0,
 0.311,
 0.56,
 0.429,
 0.102,
 1.062,
 0.219,
 0.03,
 0.161,
 0.426,
 0.039,
 0.175,
 0.0,
 0.884,
 0.509,
 0.052,
 0.104,
 0.0,
 0.0,
 0.0,
 0.178,
 0.241,
 0.26,
 0.128,
 0.831,
 0.045,
 0.072,
 0.0,
 0.011,
 0.0,
 0.0,
 0.022,
 0.003,
 0.04,
 0.022,
 0.0,
 0.406,
 0.315,
 0.059,
 0.332,
 0.21,
 0.377,
 0.429,
 0.0,
 0.234,
 0.123,
 0.0,
 0.154,
 3.243,
 0.904,
 2.536,
 3.8,
 0.052,
 0.0,
 0.009,
 0.439,
 0.02,
 0.211,
 0.391,
 0.03,
 0.467,
 0.181,
 0.408,
 0.036,
 0.467,
 0.0,
 0.008,
 0.0,
 0.0,
 0.044,
 0.0,
 0.262,
 0.002,
 0.277,
 0.007,
 0.348,
 0.012,
 0.815,
 0.211,
 0.047,
 0.593,
 0.547,
 0.027,
 0.155,
 0.0,
 0.0,
 0.605,
 1.441,
 0.536,
 0.389,
 0.401,
 0.355,
 0.025,
 0.42,
 0.143,
 0.127,
 0.016,
 0.102,
 0.0,
 0.316,
 0.006,
 ...]

In [ ]:
458036191955415221

In [408]:
similar_product(357872333107204)


Out[408]:
[357872333107204,
 447058691588489219,
 7653099516274397185,
 3778596430335328256,
 4429648062768701440,
 3773248405777813504,
 440866234308272136,
 937950908773744640,
 8081222873064148999,
 463665709926023172]

In [141]:
def top_product(product_name):
    count = 1
    print('Similar products to {} include:\n'.format(product_name))
    for item in item_sim_df.sort_values(by = product_name, ascending = False).index[1:11]:
        print('No. {}: {}'.format(count, item))
        count +=1
    return item_sim_df.sort_values(by = product_name, ascending = False).index[1:11]

In [190]:
def top_product_noprint(product_name):
    return list(item_sim_df.sort_values(by = product_name, ascending = False).index[1:])

In [191]:
a = top_product_noprint(357875526680651)

In [385]:
df_spu


Out[385]:
spu CF_item spu_features CF_features ave_CF_fea
0 357872333107204 [8952950888272863232, 2846351224980500480, 881... [0.035, 0.385, 0.112, 0.014, 0.0, 0.123, 0.438... [[[0.462, 0.551, 0.068, 0.833, 0.0, 0.0, 0.0, ... [0.3597, 0.7012, 0.0831, 1.0618, 0.2302, 0.123...
1 357875526680651 [2046769978417582, 461413925257830545, 3255052... [0.132, 1.678, 0.061, 0.918, 0.462, 0.342, 0.4... [[[0.357, 2.503, 0.0, 0.641, 0.143, 0.0, 0.104... [0.4727, 0.8078, 0.0697, 0.5796, 0.3075, 0.009...
2 357882254983171 [459725075493814272, 8582811288237465674, 1976... [0.026, 0.936, 0.056, 0.614, 0.139, 0.0, 0.302... [[[1.884, 0.52, 0.0, 3.98, 0.175, 0.008, 0.663... [0.6206, 0.8754, 0.1066, 0.7549, 0.2387, 0.186...
3 357901107539985 [2466922956389351424, 7804532970923106304, 130... [0.229, 0.543, 0.132, 0.144, 0.295, 0.018, 0.0... [[[0.124, 0.819, 0.0, 0.596, 0.306, 0.043, 0.2... [0.449, 0.7069, 0.1971, 0.7197, 0.2461, 0.0106...
4 639360131194904 [81141212316332372, 320394952166453251, 639360... [1.113, 0.5, 0.758, 0.218, 0.0, 0.0, 0.335, 1.... [[[0.06, 0.15, 0.011, 0.946, 0.501, 0.0, 1.309... [0.2552, 0.5227, 0.0964, 0.3326, 0.1703, 0.011...
5 639369692328147 [463947194068213779, 4861516576375025, 9630021... [1.489, 0.457, 0.0, 0.639, 0.0, 0.029, 0.656, ... [[[0.057, 0.172, 0.0, 0.068, 0.284, 0.0, 0.0, ... [0.5026, 0.6077, 0.1351, 1.0361, 0.3056, 0.098...
6 639371126526005 [2166589220222894080, 444243906496860160, 4611... [0.28, 0.566, 0.009, 0.004, 0.119, 0.199, 0.39... [[[0.051, 0.253, 0.0, 0.173, 0.803, 0.002, 0.2... [0.2053, 0.3088, 0.0316, 0.194, 0.2859, 0.1264...
7 639389503180805 [81985618826645681, 15838990034669717, 1014074... [0.023, 0.853, 0.11, 0.415, 0.681, 0.078, 0.50... [[[0.621, 0.779, 0.019, 0.708, 0.32, 0.268, 0.... [0.4553, 0.8364, 0.0406, 0.5165, 0.2133, 0.046...
8 639392717246493 [13587203813871817, 1765254792024080, 10440669... [0.496, 1.321, 0.014, 0.093, 0.145, 0.118, 0.1... [[[0.086, 0.85, 0.023, 0.019, 0.018, 0.0, 0.16... [0.3234, 0.9647, 0.089, 0.2659, 0.1303, 0.0571...
9 920816362999808 [2457352807885774848, 8584218663084269568, 465... [0.375, 0.498, 0.093, 1.439, 0.482, 0.268, 0.1... [[[0.156, 0.49, 0.359, 0.513, 0.0, 0.066, 0.49... [0.5876, 0.3963, 0.194, 0.5702, 0.0595, 0.1919...
10 920832942268416 [299565768102322188, 31601602323353621, 439458... [0.151, 0.729, 0.118, 0.091, 0.284, 0.0, 0.331... [[[0.804, 0.159, 0.134, 2.68, 0.272, 0.0, 0.14... [0.3458, 0.7266, 0.1114, 0.6795, 0.271, 0.0797...
11 920846404657158 [319550492123078656, 3454096121704448, 2995657... [0.172, 0.284, 0.004, 0.19, 0.132, 0.0, 0.123,... [[[0.01, 0.928, 0.125, 0.437, 0.17, 0.009, 0.2... [0.3545, 0.7114, 0.0839, 0.5078, 0.3372, 0.011...
12 1202299096490170 [1044348428712186010, 93526113400463372, 89532... [0.538, 0.997, 0.016, 0.28, 0.538, 0.436, 0.30... [[[0.672, 0.383, 0.297, 0.59, 0.604, 0.098, 0.... [0.352, 0.6496, 0.0722, 0.3963, 0.3971, 0.0728...
13 1202310084616220 [6550345938870290, 462821289168044046, 3329046... [0.897, 1.037, 0.0, 0.09, 0.027, 0.53, 0.088, ... [[[0.09, 1.494, 0.048, 0.0, 0.029, 0.298, 0.04... [0.2141, 0.5406, 0.1607, 0.9761, 0.1241, 0.055...
14 1202315469205505 [1044348428712186010, 93526113400463372, 89532... [0.175, 0.717, 0.0, 0.445, 0.103, 0.0, 0.578, ... [[[0.672, 0.383, 0.297, 0.59, 0.604, 0.098, 0.... [0.352, 0.6496, 0.0722, 0.3963, 0.3971, 0.0728...
15 1202326037671948 [9041615583843246080, 8939721565444202502, 420... [0.268, 0.209, 0.016, 0.789, 1.177, 0.0, 0.034... [[[0.205, 0.614, 0.0, 0.283, 0.029, 0.006, 0.3... [0.5225, 0.3637, 0.0274, 0.7803, 0.2, 0.0929, ...
16 1202342670667978 [8295425334878236672, 444806904121757743, 4346... [0.073, 0.544, 0.0, 0.228, 0.066, 0.004, 1.104... [[[0.736, 0.65, 0.0, 0.055, 0.027, 0.0, 0.337,... [0.2007, 0.8379, 0.0326, 0.3353, 0.0847, 0.009...
17 1202342670671910 [13587203813871817, 1765254792024080, 10440669... [0.339, 2.969, 0.149, 0.335, 0.0, 0.0, 0.334, ... [[[0.086, 0.85, 0.023, 0.019, 0.018, 0.0, 0.16... [0.3234, 0.9647, 0.089, 0.2659, 0.1303, 0.0571...
18 1483782161825904 [302662024581271741, 19216708624982102, 148378... [0.528, 1.082, 0.417, 1.969, 0.291, 0.022, 0.0... [[[0.207, 0.77, 0.009, 0.123, 0.631, 0.064, 0.... [0.2882, 0.7877, 0.1187, 1.1597, 0.3005, 0.073...
19 1765254792024080 [13587203813871817, 1765254792024080, 10440669... [0.69, 0.17, 0.0, 0.127, 0.051, 0.0, 0.462, 0.... [[[0.086, 0.85, 0.023, 0.019, 0.018, 0.0, 0.16... [0.3234, 0.9647, 0.089, 0.2659, 0.1303, 0.0571...
20 2046724026622135 [2046724026622135, 1386059101696032768, 164019... [0.095, 0.719, 0.0, 0.114, 0.0, 0.0, 0.02, 0.0... [[[0.095, 0.719, 0.0, 0.114, 0.0, 0.0, 0.02, 0... [0.2095, 0.5489, 0.1747, 0.2043, 0.0683, 0.113...
21 2046767967338535 [5047767116498747392, 4067389766593323193, 204... [1.915, 0.723, 0.068, 0.761, 0.082, 0.0, 0.098... [[[0.044, 0.315, 0.017, 0.256, 0.003, 0.004, 0... [0.306, 0.8059, 0.1071, 0.3562, 0.0785, 0.0614...
22 2046769978417582 [2046769978417582, 461413925257830545, 3255052... [0.357, 2.503, 0.0, 0.641, 0.143, 0.0, 0.104, ... [[[0.357, 2.503, 0.0, 0.641, 0.143, 0.0, 0.104... [0.4727, 0.8078, 0.0697, 0.5796, 0.3075, 0.009...
23 2609679722156042 [31038690201997892, 81985606739279872, 4374885... [0.34, 0.747, 0.0, 0.036, 0.287, 0.075, 0.652,... [[[0.026, 2.31, 0.026, 0.65, 0.027, 0.014, 0.0... [0.1925, 0.8188, 0.0477, 0.1953, 0.1594, 0.033...
24 2891145450029056 [8945069666075066368, 86770695957168136, 80015... [0.307, 0.529, 0.062, 0.137, 0.602, 0.26, 0.13... [[[0.047, 1.04, 0.0, 0.538, 0.273, 0.036, 0.04... [0.2889, 1.2939, 0.1574, 0.404, 0.1981, 0.3745...
25 2891152447287296 [89866911491678214, 76074637632249877, 1111339... [0.156, 1.136, 0.104, 0.368, 0.199, 0.223, 0.1... [[[1.039, 0.054, 0.159, 1.421, 0.168, 0.496, 0... [0.5565, 0.5092, 0.0445, 0.8707, 0.2669, 0.111...
26 2891160600990330 [453532585603989586, 14150149385499493, 901797... [0.011, 0.057, 0.003, 0.313, 0.017, 0.0, 0.93,... [[[0.166, 0.047, 0.0, 0.408, 0.044, 0.0, 1.0, ... [0.2168, 0.3611, 0.1358, 1.3846, 0.1856, 0.086...
27 2891169552208298 [3735594482335915, 464791631716921422, 4397403... [0.475, 1.321, 0.009, 0.157, 0.017, 0.065, 0.4... [[[0.245, 0.255, 0.06, 0.593, 0.0, 0.467, 0.08... [0.478, 0.927, 0.2476, 0.7833, 0.392, 0.093, 0...
28 3172644484366424 [315046913029713936, 461695358101766158, 46282... [0.706, 0.552, 0.0, 0.55, 0.241, 0.0, 0.43, 0.... [[[1.037, 1.816, 0.067, 0.334, 0.272, 0.04, 0.... [0.5111, 1.433, 0.0416, 0.5964, 0.2476, 0.0287...
29 3454095765094424 [75230216867246653, 10491016902120013, 8582811... [0.158, 0.029, 0.0, 0.332, 0.369, 0.0, 0.35, 0... [[[0.047, 1.367, 0.01, 0.016, 0.192, 0.144, 0.... [0.3853, 0.8521, 0.0434, 0.3653, 0.1598, 0.017...
... ... ... ... ... ...
2125 9014593909249236993 [9033734206588821512, 8459806721299259392, 131... [0.962, 0.83, 0.029, 0.529, 0.336, 1.437, 0.60... [[[1.107, 0.61, 0.0, 1.565, 0.093, 0.026, 0.19... [0.3268, 0.6027, 0.0596, 0.8568, 0.3677, 0.065...
2126 9015719808079347717 [1403510549529337857, 4716752541135134720, 246... [0.599, 0.232, 0.182, 0.959, 0.013, 0.54, 0.05... [[[0.398, 0.12, 0.211, 0.331, 0.114, 0.129, 0.... [0.4865, 0.5288, 0.125, 0.6937, 0.1892, 0.0139...
2127 9016845703713148936 [8943099259803860992, 4214038229457264801, 656... [1.14, 0.264, 0.0, 0.497, 0.115, 0.04, 0.146, ... [[[1.193, 0.452, 0.086, 1.264, 0.026, 0.153, 0... [1.0527, 0.4987, 0.1385, 0.8035, 0.0896, 0.107...
2128 9017127181215457486 [318143145884868618, 322365263185870869, 45212... [1.491, 0.487, 0.001, 0.313, 0.106, 0.0, 0.565... [[[0.043, 1.798, 0.06, 0.324, 0.522, 0.006, 0.... [0.3767, 0.922, 0.0483, 0.3788, 0.1126, 0.1851...
2129 9017971603620016137 [2480715230932934656, 9013468003992612872, 570... [0.457, 0.066, 0.125, 2.235, 0.394, 0.0, 0.921... [[[0.224, 2.103, 0.111, 1.814, 0.024, 0.053, 0... [0.2507, 0.7091, 0.0523, 1.216, 0.1013, 0.0805...
2130 9017971606044979200 [453532585603989586, 314765448579829939, 90179... [0.08, 0.252, 0.023, 2.211, 0.304, 0.157, 0.23... [[[0.166, 0.047, 0.0, 0.408, 0.044, 0.0, 1.0, ... [0.1928, 0.4477, 0.1556, 1.1285, 0.1973, 0.141...
2131 9018816028550127616 [25972089290305537, 82548606340628534, 2503076... [0.731, 0.577, 0.0, 0.579, 0.053, 0.0, 0.173, ... [[[0.223, 0.478, 0.018, 0.633, 0.081, 0.0, 0.0... [0.2758, 0.4644, 0.038, 0.8915, 0.1976, 0.1173...
2132 9022193728259383296 [8662750265739194369, 1396755150088278017, 896... [0.287, 0.508, 0.083, 1.605, 0.0, 0.0, 0.161, ... [[[0.098, 0.791, 0.037, 1.392, 0.091, 0.0, 0.8... [0.3521, 0.5703, 0.164, 1.1472, 0.1509, 0.0404...
2133 9022475203247398916 [8379867827911725056, 2833684851741020160, 247... [0.448, 0.296, 0.0, 0.289, 0.034, 0.0, 0.204, ... [[[0.244, 0.285, 0.014, 0.0, 0.026, 0.172, 0.0... [0.3138, 0.4603, 0.0615, 0.7818, 0.1203, 0.053...
2134 9022756682497105932 [2889135421455757312, 8964209883068317705, 139... [0.424, 0.469, 0.866, 0.072, 0.17, 0.029, 0.04... [[[0.0, 0.407, 0.0, 0.936, 0.368, 0.0, 0.633, ... [0.4331, 0.5593, 0.1124, 1.3875, 0.1317, 0.037...
2135 9023882578131013643 [2834529276655054848, 8944788109675335687, 247... [0.369, 0.951, 0.139, 0.554, 0.0, 0.057, 0.676... [[[0.508, 0.872, 0.053, 0.959, 0.406, 0.079, 0... [0.3402, 0.8657, 0.0735, 1.1642, 0.2633, 0.023...
2136 9024727008410832903 [2814544552628457472, 4710841566656688129, 246... [0.268, 1.398, 0.048, 0.996, 0.038, 0.175, 0.0... [[[0.654, 0.789, 0.047, 1.587, 0.603, 0.122, 0... [0.4715, 0.4672, 0.0954, 0.9115, 0.173, 0.0866...
2137 9026415852921315334 [8935499435443806210, 2469456231166840832, 250... [2.351, 0.923, 0.057, 2.389, 0.001, 0.279, 0.0... [[[0.151, 0.411, 0.516, 2.49, 0.478, 0.0, 0.02... [0.5588, 0.6993, 0.162, 1.3153, 0.3403, 0.052,...
2138 9026697332171030534 [306321203330416650, 8943943684745093124, 3139... [0.226, 2.182, 0.349, 1.539, 0.623, 0.08, 0.55... [[[0.642, 0.678, 0.035, 0.179, 0.209, 0.0, 0.6... [0.2872, 0.5251, 0.0575, 0.6054, 0.1139, 0.077...
2139 9027823227804868619 [32446013086847137, 4641598722385866752, 89650... [0.022, 0.249, 0.024, 1.01, 0.68, 0.342, 0.285... [[[0.5, 0.052, 0.004, 0.379, 1.003, 0.023, 0.1... [0.5151, 0.5221, 0.0721, 0.8971, 0.2982, 0.126...
2140 9031763960910209025 [299565803160592409, 437207054292414662, 29084... [0.439, 1.185, 0.27, 0.076, 0.193, 1.313, 0.11... [[[0.403, 0.956, 0.007, 0.555, 0.099, 0.064, 0... [0.3553, 0.8389, 0.063, 1.3263, 0.2993, 0.2635...
2141 9033171252362444801 [436644089099345920, 1394503350274605056, 7653... [0.111, 1.705, 0.287, 0.536, 0.174, 0.082, 0.3... [[[0.055, 0.927, 0.855, 0.292, 0.129, 0.095, 0... [0.3778, 0.6725, 0.1822, 0.6073, 0.1004, 0.073...
2142 9033734206588821512 [1314001506935377921, 8459806721299259392, 946... [1.107, 0.61, 0.0, 1.565, 0.093, 0.026, 0.198,... [[[0.003, 0.944, 0.131, 0.601, 1.503, 0.008, 0... [0.6796, 0.9908, 0.0429, 1.0009, 0.341, 0.2758...
2143 9035141577199349769 [8970120857579282441, 9007838504458428425, 282... [0.281, 0.999, 0.075, 0.693, 0.0, 0.0, 0.025, ... [[[0.0, 0.543, 0.01, 0.138, 0.018, 0.373, 0.12... [0.3044, 0.6704, 0.1144, 0.9836, 0.1997, 0.090...
2144 9036830509787594752 [4418107589125877775, 2813981603359453184, 439... [0.046, 0.136, 0.123, 0.013, 0.5, 0.233, 0.118... [[[0.162, 0.766, 0.024, 0.238, 0.041, 0.134, 0... [0.2982, 0.8262, 0.1709, 0.6126, 0.0938, 0.073...
2145 9037674851989819400 [5140090909293244416, 2910246045436305408, 889... [1.315, 0.858, 0.031, 2.212, 0.165, 0.041, 0.0... [[[0.012, 0.469, 0.0, 0.021, 0.004, 0.0, 0.149... [0.3437, 0.6673, 0.1118, 0.7965, 0.1679, 0.095...
2146 9038237806216196097 [4063730591896084758, 320113495556218907, 2990... [0.916, 0.513, 0.167, 0.201, 0.284, 0.023, 0.1... [[[0.53, 0.945, 0.0, 0.785, 0.028, 0.161, 0.14... [0.3446, 0.9183, 0.0247, 0.3677, 0.213, 0.057,...
2147 9039645182176481286 [1675415380238614528, 4641598722385866752, 246... [0.216, 0.17, 0.018, 3.05, 0.224, 0.835, 0.334... [[[0.067, 0.002, 0.048, 1.074, 0.542, 0.023, 0... [0.3266, 0.384, 0.0903, 0.7626, 0.4486, 0.1297...
2148 9039926651803541506 [467887839865233418, 33571969587101699, 282664... [0.219, 1.177, 0.074, 1.615, 0.112, 0.207, 0.0... [[[0.657, 0.508, 0.436, 0.075, 0.625, 0.192, 0... [0.3592, 0.6364, 0.113, 0.9559, 0.1909, 0.0398...
2149 9041615583843246080 [8939721565444202502, 13868682270638081, 12023... [0.205, 0.614, 0.0, 0.283, 0.029, 0.006, 0.328... [[[1.466, 0.586, 0.0, 0.439, 0.049, 0.0, 0.104... [0.7453, 0.8045, 0.0315, 0.8878, 0.3513, 0.138...
2150 9088621908743286785 [84800386939424768, 5705920362590242, 40713304... [0.0, 0.502, 0.0, 0.01, 0.206, 0.031, 1.15, 0.... [[[0.053, 0.258, 0.002, 0.0, 0.449, 0.061, 0.9... [0.2219, 0.8111, 0.0544, 0.2453, 0.2216, 0.046...
2151 9089747807251402752 [447903087581487120, 84800366075408747, 318987... [0.764, 0.496, 0.486, 0.159, 0.165, 0.474, 0.8... [[[0.176, 1.749, 0.0, 0.185, 0.02, 0.31, 0.084... [0.3317, 0.7604, 0.0334, 0.7855, 0.1488, 0.231...
2152 9090029283626840066 [88178053449961475, 299847247939522560, 909002... [0.423, 0.22, 0.083, 0.994, 0.096, 0.0, 0.048,... [[[0.004, 0.453, 0.0, 0.643, 0.442, 0.043, 0.0... [0.3801, 0.6667, 0.0895, 0.4036, 0.2618, 0.027...
2153 9090592232181542912 [8306121384015122432, 2900394421224820736, 283... [0.25, 0.926, 0.073, 1.526, 0.386, 0.024, 0.49... [[[0.157, 0.964, 0.117, 0.82, 0.915, 0.016, 0.... [0.7383, 0.6909, 0.0327, 1.0177, 0.4509, 0.134...
2154 9094251405871296512 [100844442657288193, 292247467889451008, 29252... [0.055, 0.351, 0.0, 0.475, 0.129, 0.055, 0.185... [[[0.206, 1.239, 0.043, 1.42, 0.057, 0.03, 0.0... [0.5435, 0.9577, 0.0513, 1.5471, 0.1106, 0.312...

2155 rows × 5 columns


In [143]:
plot_top_rank(639371126526005)


Similar products to 639371126526005 include:

No. 1: 2166589220222894080
No. 2: 444243906496860160
No. 3: 461132458872606754
No. 4: 452406680821547008
No. 5: 937950908773744640
No. 6: 1586187811960238082
No. 7: 1099236096233168896
No. 8: 1102050800015937536
No. 9: 98311167513120777
No. 10: 8951824985835937792

In [172]:
a = top_product_noprint(639371126526005)

In [173]:
a


Out[173]:
[2166589220222894080,
 444243906496860160,
 461132458872606754,
 452406680821547008,
 937950908773744640,
 1586187811960238082,
 1099236096233168896,
 1102050800015937536,
 98311167513120777,
 8951824985835937792]

In [44]:
#df.to_pickle('view2buy_url_CF_user_buy.pkl')

In [145]:
df = pd.read_pickle('view2buy_url_CF_user_buy.pkl')

In [45]:
df.head()


Out[45]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url user_buy CF_item
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-453532580309307392 [82267097285177879, 12742773141631009, 8883989...
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 529805243-103096245561765919 [82267097285177879, 12742773141631009, 8883989...
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 3748045464-446777176556679168 [82267097285177879, 12742773141631009, 8883989...
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-438895881520357521 [82267097285177879, 12742773141631009, 8883989...
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-74104320184119307 [82267097285177879, 12742773141631009, 8883989...

In [146]:
df_spu = pd.DataFrame(df.groupby('spu').count().reset_index()['spu'])

In [147]:
# assign CF_item and features to each spu
df_spu['CF_item'] = df_spu.apply(lambda x: df[df['spu'] == x['spu']]['CF_item'].iloc[0], axis = 1)
df_spu['spu_features'] = df_spu.apply(lambda x: df[df['spu'] == x['spu']]['view_features'].iloc[0], axis = 1)

In [148]:
df_spu.head()


Out[148]:
spu CF_item spu_features
0 357872333107204 [8952950888272863232, 2846351224980500480, 881... [0.035, 0.385, 0.112, 0.014, 0.0, 0.123, 0.438...
1 357875526680651 [2046769978417582, 461413925257830545, 3255052... [0.132, 1.678, 0.061, 0.918, 0.462, 0.342, 0.4...
2 357882254983171 [459725075493814272, 8582811288237465674, 1976... [0.026, 0.936, 0.056, 0.614, 0.139, 0.0, 0.302...
3 357901107539985 [2466922956389351424, 7804532970923106304, 130... [0.229, 0.543, 0.132, 0.144, 0.295, 0.018, 0.0...
4 639360131194904 [81141212316332372, 320394952166453251, 639360... [1.113, 0.5, 0.758, 0.218, 0.0, 0.0, 0.335, 1....

In [49]:
# Assign item_features for all CF_item
df_spu['CF_features'] = df_spu.apply(lambda x: [df_spu[df_spu['spu'] == i]['spu_features'] for i in x['CF_item']], axis = 1)

In [50]:
# Calculate the average features from 10 CF_recommended items for each spu
def CF_ave(CF_list):
    return [np.mean(i) for i in zip(*[list(x)[0] for x in CF_list])]

In [51]:
df_spu['ave_CF_fea'] = df_spu.apply(lambda x: CF_ave(x['CF_features']), axis = 1)

In [52]:
#df_spu.to_pickle('spu_CF_features_user_buy.pkl')

In [150]:
df_spu = pd.read_pickle('spu_CF_features_user_buy.pkl')

In [152]:
df.head()


Out[152]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url user_buy CF_item
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-453532580309307392 [82267097285177879, 12742773141631009, 8883989...
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 529805243-103096245561765919 [82267097285177879, 12742773141631009, 8883989...
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 3748045464-446777176556679168 [82267097285177879, 12742773141631009, 8883989...
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-438895881520357521 [82267097285177879, 12742773141631009, 8883989...
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-74104320184119307 [82267097285177879, 12742773141631009, 8883989...

In [151]:
df_spu.head()


Out[151]:
spu CF_item spu_features CF_features ave_CF_fea
0 357872333107204 [8952950888272863232, 2846351224980500480, 881... [0.035, 0.385, 0.112, 0.014, 0.0, 0.123, 0.438... [[[0.462, 0.551, 0.068, 0.833, 0.0, 0.0, 0.0, ... [0.3597, 0.7012, 0.0831, 1.0618, 0.2302, 0.123...
1 357875526680651 [2046769978417582, 461413925257830545, 3255052... [0.132, 1.678, 0.061, 0.918, 0.462, 0.342, 0.4... [[[0.357, 2.503, 0.0, 0.641, 0.143, 0.0, 0.104... [0.4727, 0.8078, 0.0697, 0.5796, 0.3075, 0.009...
2 357882254983171 [459725075493814272, 8582811288237465674, 1976... [0.026, 0.936, 0.056, 0.614, 0.139, 0.0, 0.302... [[[1.884, 0.52, 0.0, 3.98, 0.175, 0.008, 0.663... [0.6206, 0.8754, 0.1066, 0.7549, 0.2387, 0.186...
3 357901107539985 [2466922956389351424, 7804532970923106304, 130... [0.229, 0.543, 0.132, 0.144, 0.295, 0.018, 0.0... [[[0.124, 0.819, 0.0, 0.596, 0.306, 0.043, 0.2... [0.449, 0.7069, 0.1971, 0.7197, 0.2461, 0.0106...
4 639360131194904 [81141212316332372, 320394952166453251, 639360... [1.113, 0.5, 0.758, 0.218, 0.0, 0.0, 0.335, 1.... [[[0.06, 0.15, 0.011, 0.946, 0.501, 0.0, 1.309... [0.2552, 0.5227, 0.0964, 0.3326, 0.1703, 0.011...

In [153]:
df_final = pd.merge(df, df_spu, left_on='view_spu', right_on='spu')

In [160]:
df_final.columns


Out[160]:
Index([u'0', u'user_id', u'buy_spu', u'buy_sn', u'buy_ct3', u'view_spu',
       u'view_sn', u'view_ct3', u'time_interval', u'view_cnt',
       u'view_secondes', u'view_features', u'buy_features', u'spu_x', u'url',
       u'user_buy', u'CF_item_x', u'spu_y', u'CF_item_y', u'spu_features',
       u'CF_features', u'ave_CF_fea'],
      dtype='object')

In [162]:
df_final.drop(['spu_y', 'CF_item_y'], axis = 1, inplace = True)

In [164]:
df_final.head()


Out[164]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu_x url user_buy CF_item_x spu_features CF_features ave_CF_fea
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-453532580309307392 [82267097285177879, 12742773141631009, 8883989... [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [[[0.495, 1.32, 0.022, 0.126, 0.007, 0.181, 0.... [0.447, 1.2384, 0.0129, 0.1834, 0.0854, 0.0701...
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 529805243-103096245561765919 [82267097285177879, 12742773141631009, 8883989... [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [[[0.495, 1.32, 0.022, 0.126, 0.007, 0.181, 0.... [0.447, 1.2384, 0.0129, 0.1834, 0.0854, 0.0701...
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 3748045464-446777176556679168 [82267097285177879, 12742773141631009, 8883989... [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [[[0.495, 1.32, 0.022, 0.126, 0.007, 0.181, 0.... [0.447, 1.2384, 0.0129, 0.1834, 0.0854, 0.0701...
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-438895881520357521 [82267097285177879, 12742773141631009, 8883989... [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [[[0.495, 1.32, 0.022, 0.126, 0.007, 0.181, 0.... [0.447, 1.2384, 0.0129, 0.1834, 0.0854, 0.0701...
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 4209887493-74104320184119307 [82267097285177879, 12742773141631009, 8883989... [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [[[0.495, 1.32, 0.022, 0.126, 0.007, 0.181, 0.... [0.447, 1.2384, 0.0129, 0.1834, 0.0854, 0.0701...

In [163]:
#df_final.to_pickle('view2buy_CF_user_buy.pkl')

In [65]:
CF_user_fea = df_final.groupby('user_buy')['ave_CF_fea'].apply(lambda x: average(x))
CF_user_fea = pd.DataFrame(CF_user_fea)
CF_user_fea = CF_user_fea.reset_index()
Ori_user_fea = df_final.groupby('user_buy')['spu_features'].apply(lambda x: average(x))
Ori_user_fea = pd.DataFrame(Ori_user_fea)
Ori_user_fea = Ori_user_fea.reset_index()

In [66]:
CF_user_fea.head()


Out[66]:
user_buy ave_CF_fea
0 1018047589-2885476247461195776 [0.3402875, 0.632470833333, 0.1431, 0.91783333...
1 1074350764-20905574407417865 [0.345218478261, 0.517051086957, 0.12234130434...
2 1074350764-2836218125824516096 [0.353695, 0.54904, 0.10707, 0.827515, 0.21728...
3 1076790314-2893639021813923840 [0.221729032258, 0.651, 0.106264516129, 0.5287...
4 1076790314-4209253154853183622 [0.199768181818, 0.686179545455, 0.10204318181...

In [67]:
Ori_user_fea.head()


Out[67]:
user_buy spu_features
0 1018047589-2885476247461195776 [0.386333333333, 0.548166666667, 0.08983333333...
1 1074350764-20905574407417865 [0.36272826087, 0.396108695652, 0.108434782609...
2 1074350764-2836218125824516096 [0.37505, 0.50925, 0.11335, 0.96235, 0.12985, ...
3 1076790314-2893639021813923840 [0.257322580645, 0.676548387097, 0.17258064516...
4 1076790314-4209253154853183622 [0.210295454545, 0.680727272727, 0.15218181818...

In [68]:
user_fea = pd.merge(CF_user_fea, Ori_user_fea, on='user_buy')

In [69]:
user_fea.head()


Out[69]:
user_buy ave_CF_fea spu_features
0 1018047589-2885476247461195776 [0.3402875, 0.632470833333, 0.1431, 0.91783333... [0.386333333333, 0.548166666667, 0.08983333333...
1 1074350764-20905574407417865 [0.345218478261, 0.517051086957, 0.12234130434... [0.36272826087, 0.396108695652, 0.108434782609...
2 1074350764-2836218125824516096 [0.353695, 0.54904, 0.10707, 0.827515, 0.21728... [0.37505, 0.50925, 0.11335, 0.96235, 0.12985, ...
3 1076790314-2893639021813923840 [0.221729032258, 0.651, 0.106264516129, 0.5287... [0.257322580645, 0.676548387097, 0.17258064516...
4 1076790314-4209253154853183622 [0.199768181818, 0.686179545455, 0.10204318181... [0.210295454545, 0.680727272727, 0.15218181818...

In [82]:
#user_fea.to_pickle('user_buy_CF_Ori_fea2.pkl')

In [78]:
user_fea['user_id'] = user_fea.apply(lambda x: x['user_buy'].split('-')[0], axis = 1)

In [80]:
user_fea['buy_spu'] = user_fea.apply(lambda x: x['user_buy'].split('-')[1], axis = 1)

In [81]:
user_fea.head()


Out[81]:
user_buy ave_CF_fea spu_features user_id buy_spu
0 1018047589-2885476247461195776 [0.3402875, 0.632470833333, 0.1431, 0.91783333... [0.386333333333, 0.548166666667, 0.08983333333... 1018047589 2885476247461195776
1 1074350764-20905574407417865 [0.345218478261, 0.517051086957, 0.12234130434... [0.36272826087, 0.396108695652, 0.108434782609... 1074350764 20905574407417865
2 1074350764-2836218125824516096 [0.353695, 0.54904, 0.10707, 0.827515, 0.21728... [0.37505, 0.50925, 0.11335, 0.96235, 0.12985, ... 1074350764 2836218125824516096
3 1076790314-2893639021813923840 [0.221729032258, 0.651, 0.106264516129, 0.5287... [0.257322580645, 0.676548387097, 0.17258064516... 1076790314 2893639021813923840
4 1076790314-4209253154853183622 [0.199768181818, 0.686179545455, 0.10204318181... [0.210295454545, 0.680727272727, 0.15218181818... 1076790314 4209253154853183622

In [83]:
# Calculate similarity and ranking, return the final result for each buy item.
def sim_cal(buy_item, item):
    df_spu['sim'] = df_spu.apply(lambda x: similarity(x['spu_features'], item), axis = 1)
    df_spu['rank'] = df_spu['sim'].rank(ascending=False)
    print user
    return (df_spu[df_spu['spu'] == buy_item]['sim'], df_spu[df_spu['spu'] == buy_item]['rank'])

In [372]:



Out[372]:
(2155, 5)

In [ ]:
user_fea['ave_Ori_sim_rank']= user_fea.apply(lambda x: sim_cal(x['buy_spu'],x['spu_view_fea']), axis = 1)

In [199]:
user_fea


Out[199]:
user_buy ave_CF_fea spu_features user_id buy_spu
0 1018047589-2885476247461195776 [0.3402875, 0.632470833333, 0.1431, 0.91783333... [0.386333333333, 0.548166666667, 0.08983333333... 1018047589 2885476247461195776
1 1074350764-20905574407417865 [0.345218478261, 0.517051086957, 0.12234130434... [0.36272826087, 0.396108695652, 0.108434782609... 1074350764 20905574407417865
2 1074350764-2836218125824516096 [0.353695, 0.54904, 0.10707, 0.827515, 0.21728... [0.37505, 0.50925, 0.11335, 0.96235, 0.12985, ... 1074350764 2836218125824516096
3 1076790314-2893639021813923840 [0.221729032258, 0.651, 0.106264516129, 0.5287... [0.257322580645, 0.676548387097, 0.17258064516... 1076790314 2893639021813923840
4 1076790314-4209253154853183622 [0.199768181818, 0.686179545455, 0.10204318181... [0.210295454545, 0.680727272727, 0.15218181818... 1076790314 4209253154853183622
5 1107309816-2474241306483101696 [0.429824, 0.529608, 0.124608, 0.862208, 0.223... [0.35292, 0.52828, 0.1044, 1.07144, 0.21252, 0... 1107309816 2474241306483101696
6 1107309816-2917845869811523584 [0.403947368421, 0.561121052632, 0.11027368421... [0.568526315789, 0.388315789474, 0.05884210526... 1107309816 2917845869811523584
7 1107420964-6370980984573702144 [0.3403, 0.554653333333, 0.128223333333, 0.828... [0.2917, 0.592, 0.0722333333333, 1.08156666667... 1107420964 6370980984573702144
8 1145923275-2462700832436764672 [0.352986956522, 0.620339130435, 0.14589565217... [0.465869565217, 0.683086956522, 0.17413043478... 1145923275 2462700832436764672
9 1158035643-2832840426724958208 [0.361617741935, 0.558864516129, 0.12528387096... [0.373564516129, 0.543096774194, 0.10837096774... 1158035643 2832840426724958208
10 1170419598-2823270277605580800 [0.3525, 0.582928571429, 0.135685714286, 0.821... [0.422107142857, 0.586392857143, 0.13192857142... 1170419598 2823270277605580800
11 1171987394-2835655176583352320 [0.334142857143, 0.648964285714, 0.11824285714... [0.270392857143, 0.559928571429, 0.10410714285... 1171987394 2835655176583352320
12 1176141954-324335591819321344 [0.350716, 0.564152, 0.1234, 0.803876, 0.25326... [0.48424, 0.408, 0.11292, 0.70516, 0.20512, 0.... 1176141954 324335591819321344
13 1178987917-467606353885155328 [0.32172173913, 1.12372173913, 0.0753782608696... [0.426347826087, 1.05065217391, 0.115304347826... 1178987917 467606353885155328
14 1200352288-2480715230945681408 [0.368341025641, 0.591753846154, 0.1427, 0.901... [0.379615384615, 0.423564102564, 0.14179487179... 1200352288 2480715230945681408
15 1207880243-2914749645016145920 [0.341397727273, 0.556490909091, 0.12633636363... [0.353363636364, 0.457181818182, 0.11293181818... 1207880243 2914749645016145920
16 121339277-24001769421647872 [0.242223809524, 0.694738095238, 0.04935238095... [0.235142857143, 0.622380952381, 0.03961904761... 121339277 24001769421647872
17 1214726068-304913827234218339 [0.410868, 0.576348, 0.109336, 0.865796, 0.228... [0.36972, 0.4856, 0.11368, 0.75544, 0.1798, 0.... 1214726068 304913827234218339
18 1220000546-2909964570434449408 [0.39106969697, 0.549615151515, 0.131524242424... [0.46003030303, 0.683909090909, 0.077181818181... 1220000546 2909964570434449408
19 1238534392-2882661497718759424 [0.298785365854, 0.604512195122, 0.15552682926... [0.226195121951, 0.54787804878, 0.172926829268... 1238534392 2882661497718759424
20 1246054243-4483128308343681024 [0.4084, 0.572531707317, 0.0971243902439, 0.81... [0.473585365854, 0.487609756098, 0.11287804878... 1246054243 4483128308343681024
21 1249799965-2834810751655301120 [0.397360377358, 0.528694339623, 0.11369811320... [0.465056603774, 0.471433962264, 0.19145283018... 1249799965 2834810751655301120
22 1268074130-2455663958026235904 [0.318028571429, 0.708671428571, 0.0894, 0.944... [0.453857142857, 0.917428571429, 0.05471428571... 1268074130 2455663958026235904
23 1268074130-2482122605837119488 [0.335228571429, 0.712457142857, 0.08668571428... [0.473714285714, 0.878571428571, 0.04071428571... 1268074130 2482122605837119488
24 1268074130-2819048152931336192 [0.3264, 0.721642857143, 0.0877714285714, 0.98... [0.449571428571, 0.864, 0.0467142857143, 1.107... 1268074130 2819048152931336192
25 1268074130-8939440090467495945 [0.3309, 0.6432, 0.1045, 1.3653, 0.1829, 0.054... [1.995, 2.028, 0.14, 1.745, 0.09, 0.021, 0.198... 1268074130 8939440090467495945
26 1275960841-305476758874669060 [0.298533333333, 0.561316666667, 0.09656666666... [0.378666666667, 0.450666666667, 0.04425, 0.34... 1275960841 305476758874669060
27 1275960841-448184566666743834 [0.303254545455, 0.579772727273, 0.09851818181... [0.327, 0.464454545455, 0.0482727272727, 0.366... 1275960841 448184566666743834
28 1288091887-4641880197364293632 [0.448871428571, 0.523752380952, 0.15198571428... [0.424, 0.34619047619, 0.0848095238095, 1.2161... 1288091887 4641880197364293632
29 1288109591-9023882578131013643 [0.381041025641, 0.64971025641, 0.118297435897... [0.381974358974, 0.783384615385, 0.09889743589... 1288109591 9023882578131013643
... ... ... ... ... ...
472 798308058-4057256663630299136 [0.3336325, 0.56245, 0.122735, 0.9166075, 0.23... [0.2611, 0.454425, 0.119825, 1.015675, 0.26917... 798308058 4057256663630299136
473 801989759-4210379050958020608 [0.33159047619, 0.572923809524, 0.12615, 0.846... [0.426404761905, 0.581928571429, 0.16192857142... 801989759 4210379050958020608
474 802164527-2450315932750196736 [0.322765789474, 0.624960526316, 0.13622631578... [0.263921052632, 0.627342105263, 0.13189473684... 802164527 2450315932750196736
475 807686967-2889416897109704704 [0.328872413793, 0.596793103448, 0.13441724137... [0.308310344828, 0.492103448276, 0.12075862069... 807686967 2889416897109704704
476 807743886-4209253154853183512 [0.345247457627, 0.574888983051, 0.11897457627... [0.341618644068, 0.632025423729, 0.10235593220... 807743886 4209253154853183512
477 82073468-2913060795180240896 [0.452841176471, 0.661785294118, 0.13451764705... [0.573147058824, 0.7745, 0.0831176470588, 0.91... 82073468 2913060795180240896
478 826980444-2891105746980835328 [0.33855, 0.566007692308, 0.147638461538, 0.76... [0.269730769231, 0.644576923077, 0.15896153846... 826980444 2891105746980835328
479 831888402-2463545257346113536 [0.331425531915, 0.511642553191, 0.14469787234... [0.292553191489, 0.392063829787, 0.13655319148... 831888402 2463545257346113536
480 831888402-2898705571343994880 [0.327314893617, 0.514457446809, 0.14286382978... [0.300127659574, 0.394425531915, 0.13655319148... 831888402 2898705571343994880
481 85939638-2823270277605580800 [0.4015, 0.50786, 0.10467, 0.85096, 0.23821, 0... [0.2711, 0.2286, 0.0741, 0.4116, 0.1769, 0.125... 85939638 2823270277605580800
482 85939638-2832277476150169600 [0.3644, 0.5161, 0.115166666667, 0.85116666666... [0.256916666667, 0.241666666667, 0.12716666666... 85939638 2832277476150169600
483 860735147-3735622055772162 [0.306009090909, 0.621713636364, 0.18600454545... [0.132409090909, 0.668909090909, 0.19313636363... 860735147 3735622055772162
484 871875091-2898705571343994880 [0.358716666667, 0.629858333333, 0.11103333333... [0.260083333333, 0.2385, 0.154416666667, 0.585... 871875091 2898705571343994880
485 871875091-4210379050958020608 [0.381516666667, 0.623866666667, 0.10963333333... [0.250916666667, 0.232666666667, 0.15441666666... 871875091 4210379050958020608
486 903283685-1608142860794818562 [0.358162162162, 0.576337837838, 0.12680270270... [0.313162162162, 0.653567567568, 0.17264864864... 903283685 1608142860794818562
487 903283685-2471426556005482496 [0.352558333333, 0.577863888889, 0.12111111111... [0.303583333333, 0.664638888889, 0.17741666666... 903283685 2471426556005482496
488 905305268-33290487503762307 [0.326576, 0.670512, 0.073036, 0.45438, 0.1488... [0.30508, 0.47008, 0.06384, 0.2756, 0.12596, 0... 905305268 33290487503762307
489 905305268-448184566666743834 [0.35372, 0.611026666667, 0.0763333333333, 0.4... [0.3444, 0.225, 0.0674, 0.179933333333, 0.1460... 905305268 448184566666743834
490 910973934-2453130682530897920 [0.329588990826, 0.644018348624, 0.12413119266... [0.330926605505, 0.752889908257, 0.12066055045... 910973934 2453130682530897920
491 912273659-4641880197364293632 [0.378358695652, 0.544334782609, 0.12040869565... [0.4375, 0.35697826087, 0.124739130435, 1.0582... 912273659 4641880197364293632
492 916895118-2465797056472944640 [0.383656756757, 0.554891891892, 0.11150540540... [0.318135135135, 0.409432432432, 0.14237837837... 916895118 2465797056472944640
493 9254280-3925244893201649664 [0.360215151515, 0.618839393939, 0.13717272727... [0.356484848485, 0.575484848485, 0.11351515151... 9254280 3925244893201649664
494 931853555-941328597468446721 [0.291308695652, 0.587917391304, 0.12773043478... [0.216391304348, 0.416173913043, 0.09791304347... 931853555 941328597468446721
495 952914660-2457634282835726336 [0.227226086957, 0.682447826087, 0.08343478260... [0.35547826087, 0.682391304348, 0.143, 0.51091... 952914660 2457634282835726336
496 958739744-2893639021767663616 [0.364666666667, 0.592477777778, 0.10754444444... [0.32637037037, 0.717148148148, 0.171851851852... 958739744 2893639021767663616
497 960980006-2463545257346113536 [0.347568421053, 0.509164912281, 0.14401228070... [0.356824561404, 0.367473684211, 0.15691228070... 960980006 2463545257346113536
498 980478665-3925244893201649664 [0.31312745098, 0.562525490196, 0.131131372549... [0.246921568627, 0.650843137255, 0.13094117647... 980478665 3925244893201649664
499 981771354-33571915871158274 [0.369025, 1.24155, 0.116075, 0.802475, 0.2408... [0.291875, 1.379625, 0.102, 0.980375, 0.328375... 981771354 33571915871158274
500 981771354-80015338899632133 [0.37269375, 1.1361125, 0.1156875, 0.789725, 0... [0.384875, 1.037375, 0.1055625, 0.8285625, 0.2... 981771354 80015338899632133
501 996068355-1102050800015937536 [0.249123684211, 0.465634210526, 0.04947368421... [0.287684210526, 0.558631578947, 0.05186842105... 996068355 1102050800015937536

502 rows × 5 columns

Compare the CF with/without features


In [200]:
user_fea = pd.read_pickle('user_fea.pkl')

In [205]:
df.head()


Out[205]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/...

In [221]:
item_sim_df.head()


Out[221]:
view_spu 357872333107204 357875526680651 357882254983171 357901107539985 639360131194904 639369692328147 639371126526005 639389503180805 639392717246493 920816362999808 ... 9037674851989819400 9038237806216196097 9039645182176481286 9039926651803541506 9041615583843246080 9088621908743286785 9089747807251402752 9090029283626840066 9090592232181542912 9094251405871296512
view_spu
357872333107204 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00000 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0
357875526680651 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.00000 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0
357882254983171 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.63211 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0
357901107539985 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.00000 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.020383 0.0
639360131194904 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.00000 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.0

5 rows × 2155 columns


In [235]:
def CF_rank(view, buy):
    try:
        return (list(item_sim_df.sort_values(by = view, ascending = False).index[1:]).index(buy) + 1)
    except:
        pass

In [273]:
item_sim_df.index[0]


Out[273]:
357872333107204

In [266]:
random.seed(0)


Out[266]:
2155

In [291]:
def ran_CF_rank():
    view = random.randint(1,len(item_sim_df)-1)
    buy = random.randint(1,len(item_sim_df)-1)
    view = item_sim_df.index[view]
    buy = item_sim_df.index[buy]
    try:
        return (list(item_sim_df.sort_values(by = view, ascending = False).index[1:]).index(buy)+1)
    except:
        pass

In [203]:
user_fea.head()


Out[203]:
user_id buy_spu ave_CF_fea ave_view_fea ave_CF_sim_rank ave_Ori_sim_rank ave_CF_sim ave_CF_rank ave_Ori_sim ave_Ori_rank
0 3440325 2898705571343994880 [0.327609375, 0.573775, 0.11804375, 0.87390625... [0.30521875, 0.5995625, 0.167625, 0.99259375, ... ([0.799214495018], [215.0]) ([0.811977551064], [115.0]) 0.799214 215.0 0.811978 115.0
1 7052311 7957699990364366 [0.307052173913, 0.738013043478, 0.06435217391... [0.320739130435, 0.577217391304, 0.088, 0.6661... ([0.745014365081], [1155.0]) ([0.73996321102], [1112.0]) 0.745014 1155.0 0.739963 1112.0
2 7052311 299847271351230626 [0.306652173913, 0.7439, 0.0624434782609, 0.60... [0.314217391304, 0.620695652174, 0.08865217391... ([0.833736325406], [173.0]) ([0.855587371153], [66.0]) 0.833736 173.0 0.855587 66.0
3 7052311 448747521265074449 [0.306817391304, 0.735182608696, 0.06435217391... [0.323086956522, 0.60552173913, 0.088, 0.653, ... ([0.854382169198], [49.0]) ([0.853929200306], [84.0]) 0.854382 49.0 0.853929 84.0
4 9254280 3925244893201649664 [0.385093939394, 0.623651515152, 0.12763030303... [0.356484848485, 0.575484848485, 0.11351515151... ([0.730978541607], [1427.0]) ([0.752675987128], [535.0]) 0.730979 1427.0 0.752676 535.0

In [225]:
list(item_sim_df.sort_values(by = 74104320184119307, ascending = False).index[1:]).index(448747521265074449)


Out[225]:
1867

In [364]:
item_sim_df.shape


Out[364]:
(2155, 2155)

In [236]:
df['CF_rank'] = df.apply(lambda x: CF_rank(x['view_spu'], x['buy_spu']), axis=1)

In [292]:
df['Ran_CF_rank'] = df.apply(lambda x: ran_CF_rank(), axis=1)

In [336]:
#df.to_pickle('view2buy_url_CF_rank.pkl')

In [299]:
df_CF_rank = df.groupby(['user_id', 'buy_spu']).mean().reset_index()[['user_id', 'buy_spu', 'Ran_CF_rank', 'CF_rank']]

In [362]:
df.head()


Out[362]:
0 user_id buy_spu buy_sn buy_ct3 view_spu view_sn view_ct3 time_interval view_cnt view_secondes view_features buy_features spu url CF_rank Ran_CF_rank
0 4209887493\t453532580309307392\t10004616\t334\... 4209887493 453532580309307392 10004616 334 14150170026959126 10010102 334 21114 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 74.0 600.0
1 529805243\t103096245561765919\t10010102\t334\t... 529805243 103096245561765919 10010102 334 14150170026959126 10010102 334 37794 4 66 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 18.0 1196.0
2 3748045464\t446777176556679168\t10005711\t334\... 3748045464 446777176556679168 10005711 334 14150170026959126 10010102 334 18820 1 34 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 60.0 2106.0
3 4209887493\t438895881520357521\t10004616\t334\... 4209887493 438895881520357521 10004616 334 14150170026959126 10010102 334 13978 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 36.0 1761.0
4 4209887493\t74104320184119307\t10004616\t334\t... 4209887493 74104320184119307 10004616 334 14150170026959126 10010102 334 14313 1 11 [0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039... [0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341... 14150170026959126 http://a.vpimg2.com/upload/merchandise/pdcvis/... 35.0 422.0

In [254]:
user_fea_CF.head()


Out[254]:
user_id buy_spu ave_CF_fea ave_view_fea ave_CF_sim_rank ave_Ori_sim_rank ave_CF_sim ave_CF_rank ave_Ori_sim ave_Ori_rank CF_rank
0 3440325 2898705571343994880 [0.327609375, 0.573775, 0.11804375, 0.87390625... [0.30521875, 0.5995625, 0.167625, 0.99259375, ... ([0.799214495018], [215.0]) ([0.811977551064], [115.0]) 0.799214 215.0 0.811978 115.0 111.264706
1 7052311 7957699990364366 [0.307052173913, 0.738013043478, 0.06435217391... [0.320739130435, 0.577217391304, 0.088, 0.6661... ([0.745014365081], [1155.0]) ([0.73996321102], [1112.0]) 0.745014 1155.0 0.739963 1112.0 27.870968
2 7052311 299847271351230626 [0.306652173913, 0.7439, 0.0624434782609, 0.60... [0.314217391304, 0.620695652174, 0.08865217391... ([0.833736325406], [173.0]) ([0.855587371153], [66.0]) 0.833736 173.0 0.855587 66.0 31.548387
3 7052311 448747521265074449 [0.306817391304, 0.735182608696, 0.06435217391... [0.323086956522, 0.60552173913, 0.088, 0.653, ... ([0.854382169198], [49.0]) ([0.853929200306], [84.0]) 0.854382 49.0 0.853929 84.0 29.612903
4 9254280 3925244893201649664 [0.385093939394, 0.623651515152, 0.12763030303... [0.356484848485, 0.575484848485, 0.11351515151... ([0.730978541607], [1427.0]) ([0.752675987128], [535.0]) 0.730979 1427.0 0.752676 535.0 146.333333

In [300]:
user_fea_CF = pd.merge(user_fea, df_CF_rank, on=['user_id', 'buy_spu'])

In [326]:
user_fea_CF['CF_ave_fea_rank'] = (user_fea_CF['CF_rank'] + user_fea_CF['ave_Ori_rank'])/2

In [360]:
user_fea_CF.head()


Out[360]:
user_id buy_spu ave_CF_fea ave_view_fea ave_CF_sim_rank ave_Ori_sim_rank ave_CF_sim ave_CF_rank ave_Ori_sim ave_Ori_rank Ran_CF_rank CF_rank CF_ave_fea_rank
0 3440325 2898705571343994880 [0.327609375, 0.573775, 0.11804375, 0.87390625... [0.30521875, 0.5995625, 0.167625, 0.99259375, ... ([0.799214495018], [215.0]) ([0.811977551064], [115.0]) 0.799214 215.0 0.811978 115.0 1225.857143 111.264706 113.132353
1 7052311 7957699990364366 [0.307052173913, 0.738013043478, 0.06435217391... [0.320739130435, 0.577217391304, 0.088, 0.6661... ([0.745014365081], [1155.0]) ([0.73996321102], [1112.0]) 0.745014 1155.0 0.739963 1112.0 925.906250 27.870968 569.935484
2 7052311 299847271351230626 [0.306652173913, 0.7439, 0.0624434782609, 0.60... [0.314217391304, 0.620695652174, 0.08865217391... ([0.833736325406], [173.0]) ([0.855587371153], [66.0]) 0.833736 173.0 0.855587 66.0 1138.666667 31.548387 48.774194
3 7052311 448747521265074449 [0.306817391304, 0.735182608696, 0.06435217391... [0.323086956522, 0.60552173913, 0.088, 0.653, ... ([0.854382169198], [49.0]) ([0.853929200306], [84.0]) 0.854382 49.0 0.853929 84.0 1181.193548 29.612903 56.806452
4 9254280 3925244893201649664 [0.385093939394, 0.623651515152, 0.12763030303... [0.356484848485, 0.575484848485, 0.11351515151... ([0.730978541607], [1427.0]) ([0.752675987128], [535.0]) 0.730979 1427.0 0.752676 535.0 971.657143 146.333333 340.666667

In [337]:
#user_fea_CF.to_pickle('user_fea_CF.pkl')

In [366]:
sns.set_style('whitegrid')
g = sns.distplot(user_fea_CF[['ave_CF_rank']]/2155, bins = 100,color = 'red', hist =False, label = 'CF_fea')
g = sns.distplot(user_fea_CF[['ave_Ori_rank']]/2155, bins = 100,color = 'blue', hist =False, label = 'Ori_fea')
g = sns.distplot(user_fea_CF[['CF_rank']]/2155, bins = 100,color = 'green', hist =False, label = 'CF_rank')
g = sns.distplot(user_fea_CF[['Ran_CF_rank']]/2155, bins = 100,color = 'black', hist =False, label = 'Ran_CF_rank')
g = sns.distplot(user_fea_CF[['CF_ave_fea_rank']]/2155, bins = 100,color = 'orange', hist =False, label = 'CF_fea_rank')
g.set(xlabel='Rank of prediting item / Total items', ylabel='# of items')


Out[366]:
[<matplotlib.text.Text at 0x1430e5450>, <matplotlib.text.Text at 0x148b50e50>]

Basically, CF only gives the best recommendation compared to features_based.


In [ ]: