In [1]:
import sys 
import os
sys.path.append(os.getcwd()+'/../')
# other
import numpy as np
import glob
import pandas as pd
import ntpath

#keras
from keras.preprocessing import image

# plotting
import seaborn as sns
sns.set_style('white')
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload

# debuggin
from IPython.core.debugger import Tracer

#stats
import scipy.stats as stats


Using TensorFlow backend.

In [2]:
%autoreload 2

In [6]:
folder='../data_img_sample_item_view_sequences/'

In [34]:
from src.data_management import get_meta_data_from_sequence_data
df = get_meta_data_from_sequence_data(folder)
df.head()


Out[34]:
user_id view_position spu view_seconds tag jpg
0 1026336619 10 18935327703625752 30 view .jpg
1 1026336619 11 225819416539443204 8 view .jpg
2 1026336619 12 452688254114283543 7 view .jpg
3 1026336619 13 308854529632018448 14 view .jpg
4 1026336619 14 323209760887509007 13 view .jpg

Plotting to HTML Function


In [45]:
def make_html(user_set,newfile):
    with open(newfile, 'w') as outfile:
            outfile.write("<!DOCTYPE html><html><head><style>")
            outfile.write("table.app {border:1px solid #d4d4d4;}")

            # open table
            outfile.write("#circle {border-radius:50% 50% 50% 50%;}</style></head><body background-color: transparent;><table class='app'>")


            # loop through users #
            for user_id in user_set:
                print(user_id)
                outfile.write("<tr>") # new row

                # loop through his/her views 
                view_positions = np.sort(df.loc[df.user_id==user_id,'view_position'].unique())
                for view_pos in view_positions:

                    # get info
                    view_seconds = df.loc[(df.user_id==user_id)&(df.view_position==view_pos),'view_seconds'].as_matrix()[0]
                    tag = df.loc[(df.user_id==user_id)&(df.view_position==view_pos),'tag'].as_matrix()[0]
                    spu = df.loc[(df.user_id==user_id)&(df.view_position==view_pos),'spu'].as_matrix()[0]

                    # get img path
                    img_path = glob.glob(folder+str(user_id)+'_'+str(view_pos)+'_'+str(spu)+'_'+str(view_seconds)+'_'+str(tag)+'*')[0]
                    outfile.write("<td><img id='circle' src='" + img_path + "' style='height:150px; width:150px' /><p>user_id: "+str(user_id)+' pos: '+str(view_pos)+' '+str(view_seconds)+'secs '+tag+"</p></td>")

                outfile.write("</tr>") # close row

            # close table. 
            outfile.write("</table></body>")

Plotting Sets


In [44]:
for i,user_set in enumerate(np.split(df.user_id.unique(),10)): #split into 10 sets 
    print(i)
    # create HTML file
    newfile = '../figures/Sample_View_Sequences_set_'+str(i)+'.html'
    make_html(user_set,newfile)


0
1026336619
106144465
1201108847
1254629014
1259361786
1286454902
1331111242
139443270
1407961481
1411185938
1
1430062320
1446553986
1538001568
1540542657
1564790759
1581036046
1619537657
1622470454
1721873627
1749603486
2
1806454700
1826303654
183331960
1835355513
1849825115
1850538288
1867621197
1906109738
195542015
2001762092
3
2025210815
2163409455
2210400632
2250685114
2262640555
2269309385
2379508227
2437472421
2471805464
2485915290
4
2490191986
2496962655
2571951718
2624007058
2642715102
2659544743
2743203484
2766461040
277631953
2862420690
5
2865188308
2901191600
2908107778
2914970191
29580840
2978475940
3015240162
3050256974
3059275452
3095161062
6
3113698183
3158220067
3214334148
3221546800
3239019977
3252859300
3304400147
3362146075
3456547149
3476883273
7
3484636698
3491283850
3506468044
3522768721
3532442460
3560182072
3755218860
37635449
3765068409
3775478259
8
3851987456
3903270854
3905025056
3932629799
3934818663
4004400201
4091769000
4244797584
4245275931
439294679
9
44811438
471193609
497659869
57610368
577641385
591703685
668578150
800064636
810231768
940795593

Results:

set1 set2 ...


In [50]:
# from src.s3_data_management import push_results_to_s3

# for i,user_set in enumerate(np.split(df.user_id.unique(),10)):
#     newfile = '../figures/Sample_View_Sequences_set_'+str(i)+'.html'
#     newname = 'Sample_View_Sequences_set_'+str(i)+'.html'
#     push_results_to_s3(newname,newfile)

# doesn't work because pictures don't get pushed up as well

Plotting Single User


In [47]:
user_set = ['1411185938']
newfile = '../figures/Sample_View_Sequences_user_'+user_set[0]+'.html'
make_html(user_set,newfile)
# this user seems to like patterned things and is searching within a restricted set.


1411185938

In [47]:
user_set = ['...']
newfile = '../figures/Sample_View_Sequences_user_'+user_set[0]+'.html'
make_html(user_set,newfile)
# this user seems to like patterned things and is searching within a restricted set.


1411185938

In [38]:
%%bash 
jupyter nbconvert --to html Sample_Item_View_Sequences.ipynb && mv Sample_Item_View_Sequences.html ../notebook_htmls/Sample_Item_View_Sequences_v1.html
cp Sample_Item_View_Sequences.ipynb ../notebook_versions/Sample_Item_View_Sequences.ipynb_v1.ipynb


[NbConvertApp] Converting notebook Sample_Item_View_Sequences.ipynb to html
[NbConvertApp] Writing 264005 bytes to Sample_Item_View_Sequences.html

In [ ]: