Collaborative Filtering

Item Based: which takes similarities between items’ consumption histories

User Based: that considers similarities between user consumption histories and item similarities



In [1]:

    
#Import libraries
import pandas as pd
from scipy.spatial.distance import cosine



In [2]:

    
data = pd.read_csv("../data/groceries.csv")



In [3]:

    
data.head(100)









    Out[3]:






  
    
      
      Person
      item
    
  
  
    
      0
      1
      citrus fruit
    
    
      1
      1
      semi-finished bread
    
    
      2
      1
      margarine
    
    
      3
      1
      ready soups
    
    
      4
      2
      tropical fruit
    
    
      5
      2
      yogurt
    
    
      6
      2
      coffee
    
    
      7
      3
      whole milk
    
    
      8
      4
      pip fruit
    
    
      9
      4
      yogurt
    
    
      10
      4
      cream cheese
    
    
      11
      4
      meat spreads
    
    
      12
      5
      other vegetables
    
    
      13
      5
      whole milk
    
    
      14
      5
      condensed milk
    
    
      15
      5
      long life bakery product
    
    
      16
      6
      whole milk
    
    
      17
      6
      butter
    
    
      18
      6
      yogurt
    
    
      19
      6
      rice
    
    
      20
      6
      abrasive cleaner
    
    
      21
      7
      rolls/buns
    
    
      22
      8
      other vegetables
    
    
      23
      8
      UHT-milk
    
    
      24
      8
      rolls/buns
    
    
      25
      8
      bottled beer
    
    
      26
      8
      liquor (appetizer)
    
    
      27
      9
      pot plants
    
    
      28
      10
      whole milk
    
    
      29
      10
      cereals
    
    
      ...
      ...
      ...
    
    
      70
      25
      frozen dessert
    
    
      71
      25
      rolls/buns
    
    
      72
      25
      flour
    
    
      73
      25
      sweet spreads
    
    
      74
      25
      salty snack
    
    
      75
      25
      waffles
    
    
      76
      25
      candy
    
    
      77
      25
      bathroom cleaner
    
    
      78
      26
      bottled water
    
    
      79
      26
      canned beer
    
    
      80
      27
      yogurt
    
    
      81
      28
      sausage
    
    
      82
      28
      rolls/buns
    
    
      83
      28
      soda
    
    
      84
      28
      chocolate
    
    
      85
      29
      other vegetables
    
    
      86
      30
      brown bread
    
    
      87
      30
      soda
    
    
      88
      30
      fruit/vegetable juice
    
    
      89
      30
      canned beer
    
    
      90
      30
      newspapers
    
    
      91
      30
      shopping bags
    
    
      92
      31
      yogurt
    
    
      93
      31
      beverages
    
    
      94
      31
      bottled water
    
    
      95
      31
      specialty bar
    
    
      96
      32
      hamburger meat
    
    
      97
      32
      other vegetables
    
    
      98
      32
      rolls/buns
    
    
      99
      32
      spices
    
  

100 rows × 2 columns



In [4]:

    
#Assume that for all items only one quantity was bought

Exercise 1 Add a column to data : Quantity that has value 1



In [5]:

    
data["Quantity"] = 1



In [6]:

    
data.head()









    Out[6]:






  
    
      
      Person
      item
      Quantity
    
  
  
    
      0
      1
      citrus fruit
      1
    
    
      1
      1
      semi-finished bread
      1
    
    
      2
      1
      margarine
      1
    
    
      3
      1
      ready soups
      1
    
    
      4
      2
      tropical fruit
      1



In [7]:

    
len(pd.unique(data.item))









    Out[7]:





169



In [8]:

    
#This particular view isn't very helpful for us for analysis.
#This way of data being arranged is called LONG
#We need it in wide format



In [9]:

    
#Converting data from long to wide format
dataWide = data.pivot("Person", "item", "Quantity")



In [10]:

    
dataWide.head()









    Out[10]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      Person
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
    
    
      3
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
    
    
      4
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
    
    
      5
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
    
  

5 rows × 169 columns

Exercise 2 Print the data for Person number 2



In [11]:

    
dataWide[dataWide.index==2]









    Out[11]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      Person
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
    
  

1 rows × 169 columns



In [12]:

    
dataWide.iloc[1:2,:]









    Out[12]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      Person
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
    
  

1 rows × 169 columns



In [13]:

    
dataWide.loc[2,:]









    Out[13]:





item
Instant food products   NaN
UHT-milk                NaN
abrasive cleaner        NaN
artif. sweetener        NaN
baby cosmetics          NaN
baby food               NaN
bags                    NaN
baking powder           NaN
bathroom cleaner        NaN
beef                    NaN
berries                 NaN
beverages               NaN
bottled beer            NaN
bottled water           NaN
brandy                  NaN
brown bread             NaN
butter                  NaN
butter milk             NaN
cake bar                NaN
candles                 NaN
candy                   NaN
canned beer             NaN
canned fish             NaN
canned fruit            NaN
canned vegetables       NaN
cat food                NaN
cereals                 NaN
chewing gum             NaN
chicken                 NaN
chocolate               NaN
                         ..
soda                    NaN
soft cheese             NaN
softener                NaN
sound storage medium    NaN
soups                   NaN
sparkling wine          NaN
specialty bar           NaN
specialty cheese        NaN
specialty chocolate     NaN
specialty fat           NaN
specialty vegetables    NaN
spices                  NaN
spread cheese           NaN
sugar                   NaN
sweet spreads           NaN
syrup                   NaN
tea                     NaN
tidbits                 NaN
toilet cleaner          NaN
tropical fruit            1
turkey                  NaN
vinegar                 NaN
waffles                 NaN
whipped/sour cream      NaN
whisky                  NaN
white bread             NaN
white wine              NaN
whole milk              NaN
yogurt                    1
zwieback                NaN
Name: 2, dtype: float64

Exercise 3 Print the data for row number 2



In [14]:

    
dataWide.iloc[1,:]









    Out[14]:





item
Instant food products   NaN
UHT-milk                NaN
abrasive cleaner        NaN
artif. sweetener        NaN
baby cosmetics          NaN
baby food               NaN
bags                    NaN
baking powder           NaN
bathroom cleaner        NaN
beef                    NaN
berries                 NaN
beverages               NaN
bottled beer            NaN
bottled water           NaN
brandy                  NaN
brown bread             NaN
butter                  NaN
butter milk             NaN
cake bar                NaN
candles                 NaN
candy                   NaN
canned beer             NaN
canned fish             NaN
canned fruit            NaN
canned vegetables       NaN
cat food                NaN
cereals                 NaN
chewing gum             NaN
chicken                 NaN
chocolate               NaN
                         ..
soda                    NaN
soft cheese             NaN
softener                NaN
sound storage medium    NaN
soups                   NaN
sparkling wine          NaN
specialty bar           NaN
specialty cheese        NaN
specialty chocolate     NaN
specialty fat           NaN
specialty vegetables    NaN
spices                  NaN
spread cheese           NaN
sugar                   NaN
sweet spreads           NaN
syrup                   NaN
tea                     NaN
tidbits                 NaN
toilet cleaner          NaN
tropical fruit            1
turkey                  NaN
vinegar                 NaN
waffles                 NaN
whipped/sour cream      NaN
whisky                  NaN
white bread             NaN
white wine              NaN
whole milk              NaN
yogurt                    1
zwieback                NaN
Name: 2, dtype: float64



In [15]:

    
#Replace NA with 0 
dataWide.fillna(0, inplace=True)



In [16]:

    
dataWide.head()









    Out[16]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      Person
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 169 columns

Item-based Collaborative Filtering

In item based collaborative filtering we do not care about the user column



In [17]:

    
#Drop the Person column
data_ib = dataWide.copy()



In [18]:

    
data_ib.head()









    Out[18]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      Person
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 169 columns



In [19]:

    
data_ib = data_ib.reset_index()



In [20]:

    
data_ib.head()









    Out[20]:






  
    
      item
      Person
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
  
  
    
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      2
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 170 columns



In [21]:

    
#Drop the Person column
#data_ib = data_ib.iloc[:,1:]
data_ib = data_ib.drop("Person", axis=1)



In [22]:

    
data_ib.head()









    Out[22]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
  
  
    
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 169 columns



In [23]:

    
# Create a placeholder dataframe listing item vs. item
data_ibs = pd.DataFrame(index=data_ib.columns,
                        columns=data_ib.columns)



In [24]:

    
data_ibs.head()









    Out[24]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      item
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Instant food products
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      UHT-milk
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      abrasive cleaner
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      artif. sweetener
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      baby cosmetics
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 169 columns

Similarity Measure

We will now find similarities.

We will use cosine similarity

The resulting similarity ranges from −1 meaning exactly opposite, to 1 meaning exactly the same, with 0 indicating orthogonality (decorrelation), and in-between values indicating intermediate similarity or dissimilarity.

src https://en.wikipedia.org/wiki/Cosine_similarity

In essense the cosine similarity takes the sum product of the first and second column, then divides that by the product of the square root of the sum of squares of each column.



In [25]:

    
for i in range(0,len(data_ibs.columns)) :
    # Loop through the columns for each column
    for j in range(0,len(data_ibs.columns)) :
      # Fill in placeholder with cosine similarities
      data_ibs.ix[i,j] = 1-cosine(data_ib.ix[:,i],data_ib.ix[:,j])



In [26]:

    
data_ibs.head()









    Out[26]:






  
    
      item
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      beef
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
    
      item
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Instant food products
      1
      0.0248112
      0
      0
      0
      0
      0
      0.0255878
      0
      0.0396234
      ...
      0.0251577
      0.0140636
      0.0173605
      0.0296613
      0
      0.044236
      0
      0.0673304
      0.0425243
      0.0409311
    
    
      UHT-milk
      0.0248112
      1
      0
      0.0389841
      0
      0
      0
      0.0376158
      0.0212202
      0.0339786
      ...
      0.0308196
      0.0275659
      0.0595491
      0.0622915
      0
      0.0352245
      0.0120949
      0.0428914
      0.108655
      0.0401143
    
    
      abrasive cleaner
      0
      0
      1
      0.0298807
      0
      0
      0
      0
      0.03253
      0.044647
      ...
      0
      0.0422577
      0.017388
      0.0509286
      0
      0.0249222
      0.0123608
      0.0539498
      0.045634
      0
    
    
      artif. sweetener
      0
      0.0389841
      0.0298807
      1
      0
      0
      0
      0.0402042
      0
      0.00778216
      ...
      0
      0.0220971
      0.00909241
      0.0066578
      0
      0.0173762
      0
      0.0387901
      0.0524977
      0
    
    
      baby cosmetics
      0
      0
      0
      0
      1
      0
      0
      0.0309492
      0
      0
      ...
      0
      0
      0.020998
      0.0153755
      0
      0.0200643
      0
      0.0244315
      0
      0
    
  

5 rows × 169 columns

With our similarity matrix filled out we can look for each items “neighbour” by looping through ‘data_ibs’, sorting each column in descending order, and grabbing the name of each of the top 3 products.



In [27]:

    
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,4))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:3] = data_ibs.ix[0:,i].sort_values(ascending=False)[:3].index



In [28]:

    
data_neighbours









    Out[28]:






  
    
      
      1
      2
      3
    
    
      item
      
      
      
    
  
  
    
      Instant food products
      Instant food products
      hamburger meat
      canned fish
    
    
      UHT-milk
      UHT-milk
      bottled water
      yogurt
    
    
      abrasive cleaner
      abrasive cleaner
      preservation products
      cleaner
    
    
      artif. sweetener
      artif. sweetener
      potato products
      salad dressing
    
    
      baby cosmetics
      baby cosmetics
      cream
      dish cleaner
    
    
      baby food
      baby food
      finished products
      soups
    
    
      bags
      bags
      tidbits
      frozen potato products
    
    
      baking powder
      baking powder
      whole milk
      sugar
    
    
      bathroom cleaner
      bathroom cleaner
      cleaner
      liver loaf
    
    
      beef
      beef
      root vegetables
      other vegetables
    
    
      berries
      berries
      whipped/sour cream
      yogurt
    
    
      beverages
      beverages
      yogurt
      whole milk
    
    
      bottled beer
      bottled beer
      bottled water
      liquor
    
    
      bottled water
      bottled water
      soda
      whole milk
    
    
      brandy
      brandy
      whisky
      shopping bags
    
    
      brown bread
      brown bread
      whole milk
      other vegetables
    
    
      butter
      butter
      whole milk
      other vegetables
    
    
      butter milk
      butter milk
      other vegetables
      whole milk
    
    
      cake bar
      cake bar
      whole milk
      soda
    
    
      candles
      candles
      margarine
      pastry
    
    
      candy
      candy
      chocolate
      soda
    
    
      canned beer
      canned beer
      shopping bags
      soda
    
    
      canned fish
      canned fish
      other vegetables
      meat spreads
    
    
      canned fruit
      canned fruit
      rice
      citrus fruit
    
    
      canned vegetables
      canned vegetables
      other vegetables
      hamburger meat
    
    
      cat food
      cat food
      whole milk
      yogurt
    
    
      cereals
      cereals
      whole milk
      frozen fruits
    
    
      chewing gum
      chewing gum
      soda
      other vegetables
    
    
      chicken
      chicken
      other vegetables
      whole milk
    
    
      chocolate
      chocolate
      whole milk
      soda
    
    
      ...
      ...
      ...
      ...
    
    
      soda
      soda
      rolls/buns
      bottled water
    
    
      soft cheese
      soft cheese
      other vegetables
      yogurt
    
    
      softener
      softener
      detergent
      hygiene articles
    
    
      sound storage medium
      sound storage medium
      frozen potato products
      cat food
    
    
      soups
      soups
      baby food
      other vegetables
    
    
      sparkling wine
      sparkling wine
      other vegetables
      root vegetables
    
    
      specialty bar
      specialty bar
      soda
      waffles
    
    
      specialty cheese
      specialty cheese
      other vegetables
      canned vegetables
    
    
      specialty chocolate
      specialty chocolate
      whole milk
      shopping bags
    
    
      specialty fat
      specialty fat
      margarine
      specialty vegetables
    
    
      specialty vegetables
      specialty vegetables
      pickled vegetables
      specialty fat
    
    
      spices
      spices
      preservation products
      chicken
    
    
      spread cheese
      spread cheese
      rolls/buns
      yogurt
    
    
      sugar
      sugar
      flour
      whole milk
    
    
      sweet spreads
      sweet spreads
      white bread
      whole milk
    
    
      syrup
      syrup
      baby cosmetics
      sweet spreads
    
    
      tea
      tea
      tropical fruit
      curd
    
    
      tidbits
      tidbits
      bags
      rolls/buns
    
    
      toilet cleaner
      toilet cleaner
      dental care
      prosecco
    
    
      tropical fruit
      tropical fruit
      whole milk
      other vegetables
    
    
      turkey
      turkey
      other vegetables
      tropical fruit
    
    
      vinegar
      vinegar
      cleaner
      other vegetables
    
    
      waffles
      waffles
      chocolate
      whole milk
    
    
      whipped/sour cream
      whipped/sour cream
      other vegetables
      whole milk
    
    
      whisky
      whisky
      brandy
      flour
    
    
      white bread
      white bread
      whole milk
      processed cheese
    
    
      white wine
      white wine
      bottled water
      shopping bags
    
    
      whole milk
      whole milk
      other vegetables
      yogurt
    
    
      yogurt
      yogurt
      whole milk
      other vegetables
    
    
      zwieback
      zwieback
      chocolate marshmallow
      organic products
    
  

169 rows × 3 columns

Exercise 4 Modify the above code to print the top 10 similar products for each product



In [29]:

    
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,11))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:10] = data_ibs.ix[0:,i].sort_values(ascending=False)[:10].index
    
data_neighbours









    Out[29]:






  
    
      
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
    
    
      item
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Instant food products
      Instant food products
      hamburger meat
      canned fish
      other vegetables
      whole milk
      root vegetables
      curd
      rolls/buns
      butter
      kitchen utensil
    
    
      UHT-milk
      UHT-milk
      bottled water
      yogurt
      other vegetables
      soda
      coffee
      margarine
      domestic eggs
      brown bread
      citrus fruit
    
    
      abrasive cleaner
      abrasive cleaner
      preservation products
      cleaner
      curd cheese
      root vegetables
      dish cleaner
      other vegetables
      salad dressing
      rice
      berries
    
    
      artif. sweetener
      artif. sweetener
      potato products
      salad dressing
      skin care
      candles
      flour
      rum
      yogurt
      frankfurter
      mustard
    
    
      baby cosmetics
      baby cosmetics
      cream
      dish cleaner
      cookware
      syrup
      kitchen towels
      soups
      butter milk
      oil
      sweet spreads
    
    
      baby food
      baby food
      finished products
      soups
      cake bar
      pasta
      soft cheese
      butter milk
      dessert
      salty snack
      waffles
    
    
      bags
      bags
      tidbits
      frozen potato products
      pickled vegetables
      frozen vegetables
      napkins
      pork
      fruit/vegetable juice
      pip fruit
      pastry
    
    
      baking powder
      baking powder
      whole milk
      sugar
      whipped/sour cream
      other vegetables
      cooking chocolate
      flour
      margarine
      domestic eggs
      yogurt
    
    
      bathroom cleaner
      bathroom cleaner
      cleaner
      liver loaf
      decalcifier
      root vegetables
      soda
      other vegetables
      liquor (appetizer)
      berries
      napkins
    
    
      beef
      beef
      root vegetables
      other vegetables
      whole milk
      rolls/buns
      pork
      yogurt
      citrus fruit
      margarine
      whipped/sour cream
    
    
      berries
      berries
      whipped/sour cream
      yogurt
      other vegetables
      whole milk
      tropical fruit
      root vegetables
      beef
      citrus fruit
      grapes
    
    
      beverages
      beverages
      yogurt
      whole milk
      tropical fruit
      rolls/buns
      other vegetables
      soda
      pastry
      fruit/vegetable juice
      pip fruit
    
    
      bottled beer
      bottled beer
      bottled water
      liquor
      soda
      whole milk
      other vegetables
      red/blush wine
      rolls/buns
      root vegetables
      fruit/vegetable juice
    
    
      bottled water
      bottled water
      soda
      whole milk
      yogurt
      tropical fruit
      rolls/buns
      other vegetables
      bottled beer
      fruit/vegetable juice
      root vegetables
    
    
      brandy
      brandy
      whisky
      shopping bags
      nuts/prunes
      liquor (appetizer)
      skin care
      bottled beer
      condensed milk
      rolls/buns
      yogurt
    
    
      brown bread
      brown bread
      whole milk
      other vegetables
      yogurt
      sausage
      tropical fruit
      pastry
      fruit/vegetable juice
      root vegetables
      soda
    
    
      butter
      butter
      whole milk
      other vegetables
      yogurt
      root vegetables
      domestic eggs
      whipped/sour cream
      citrus fruit
      rolls/buns
      tropical fruit
    
    
      butter milk
      butter milk
      other vegetables
      whole milk
      yogurt
      pip fruit
      rolls/buns
      fruit/vegetable juice
      tropical fruit
      cream cheese
      sliced cheese
    
    
      cake bar
      cake bar
      whole milk
      soda
      baby food
      dessert
      shopping bags
      other vegetables
      curd
      berries
      rolls/buns
    
    
      candles
      candles
      margarine
      pastry
      whole milk
      artif. sweetener
      other vegetables
      root vegetables
      kitchen utensil
      canned fish
      citrus fruit
    
    
      candy
      candy
      chocolate
      soda
      tropical fruit
      rolls/buns
      whole milk
      other vegetables
      waffles
      chocolate marshmallow
      fruit/vegetable juice
    
    
      canned beer
      canned beer
      shopping bags
      soda
      rolls/buns
      bottled water
      other vegetables
      sausage
      liquor (appetizer)
      brown bread
      red/blush wine
    
    
      canned fish
      canned fish
      other vegetables
      meat spreads
      cream cheese
      Instant food products
      mayonnaise
      rolls/buns
      whole milk
      shopping bags
      mustard
    
    
      canned fruit
      canned fruit
      rice
      citrus fruit
      domestic eggs
      misc. beverages
      hygiene articles
      frozen dessert
      dessert
      whole milk
      shopping bags
    
    
      canned vegetables
      canned vegetables
      other vegetables
      hamburger meat
      specialty cheese
      tropical fruit
      yogurt
      root vegetables
      salt
      hard cheese
      whole milk
    
    
      cat food
      cat food
      whole milk
      yogurt
      napkins
      beef
      other vegetables
      tropical fruit
      shopping bags
      root vegetables
      chocolate
    
    
      cereals
      cereals
      whole milk
      frozen fruits
      yogurt
      curd
      other vegetables
      frozen meals
      long life bakery product
      cream cheese
      domestic eggs
    
    
      chewing gum
      chewing gum
      soda
      other vegetables
      shopping bags
      whole milk
      rolls/buns
      salty snack
      napkins
      bottled water
      pip fruit
    
    
      chicken
      chicken
      other vegetables
      whole milk
      root vegetables
      frozen vegetables
      whipped/sour cream
      domestic eggs
      butter
      pork
      citrus fruit
    
    
      chocolate
      chocolate
      whole milk
      soda
      waffles
      other vegetables
      candy
      rolls/buns
      long life bakery product
      pastry
      butter
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      soda
      soda
      rolls/buns
      bottled water
      sausage
      whole milk
      shopping bags
      other vegetables
      yogurt
      pastry
      fruit/vegetable juice
    
    
      soft cheese
      soft cheese
      other vegetables
      yogurt
      whole milk
      rolls/buns
      domestic eggs
      sliced cheese
      butter
      whipped/sour cream
      sausage
    
    
      softener
      softener
      detergent
      hygiene articles
      chicken
      whole milk
      pip fruit
      napkins
      white bread
      other vegetables
      domestic eggs
    
    
      sound storage medium
      sound storage medium
      frozen potato products
      cat food
      ham
      candy
      white bread
      pastry
      shopping bags
      bottled water
      soda
    
    
      soups
      soups
      baby food
      other vegetables
      whole milk
      whipped/sour cream
      citrus fruit
      sugar
      sauces
      margarine
      root vegetables
    
    
      sparkling wine
      sparkling wine
      other vegetables
      root vegetables
      fruit/vegetable juice
      misc. beverages
      dish cleaner
      white bread
      coffee
      red/blush wine
      specialty chocolate
    
    
      specialty bar
      specialty bar
      soda
      waffles
      pastry
      chocolate
      rolls/buns
      whole milk
      other vegetables
      salty snack
      yogurt
    
    
      specialty cheese
      specialty cheese
      other vegetables
      canned vegetables
      yogurt
      whole milk
      root vegetables
      onions
      nut snack
      whipped/sour cream
      bottled water
    
    
      specialty chocolate
      specialty chocolate
      whole milk
      shopping bags
      soda
      chocolate
      other vegetables
      candy
      rolls/buns
      yogurt
      long life bakery product
    
    
      specialty fat
      specialty fat
      margarine
      specialty vegetables
      butter
      napkins
      pasta
      hard cheese
      frozen vegetables
      whipped/sour cream
      pork
    
    
      specialty vegetables
      specialty vegetables
      pickled vegetables
      specialty fat
      white bread
      pasta
      soups
      processed cheese
      soft cheese
      bottled beer
      coffee
    
    
      spices
      spices
      preservation products
      chicken
      other vegetables
      sausage
      pip fruit
      vinegar
      citrus fruit
      onions
      butter
    
    
      spread cheese
      spread cheese
      rolls/buns
      yogurt
      sausage
      soda
      other vegetables
      shopping bags
      brown bread
      bottled water
      ham
    
    
      sugar
      sugar
      flour
      whole milk
      other vegetables
      baking powder
      margarine
      coffee
      domestic eggs
      root vegetables
      pork
    
    
      sweet spreads
      sweet spreads
      white bread
      whole milk
      root vegetables
      shopping bags
      hamburger meat
      soda
      yogurt
      fruit/vegetable juice
      ham
    
    
      syrup
      syrup
      baby cosmetics
      sweet spreads
      rubbing alcohol
      tropical fruit
      female sanitary products
      other vegetables
      white bread
      pastry
      semi-finished bread
    
    
      tea
      tea
      tropical fruit
      curd
      bottled beer
      cling film/bags
      domestic eggs
      cream cheese
      other vegetables
      whole milk
      pip fruit
    
    
      tidbits
      tidbits
      bags
      rolls/buns
      waffles
      soda
      butter milk
      pudding powder
      frozen fish
      whole milk
      cake bar
    
    
      toilet cleaner
      toilet cleaner
      dental care
      prosecco
      cleaner
      jam
      sauces
      zwieback
      hamburger meat
      newspapers
      chocolate marshmallow
    
    
      tropical fruit
      tropical fruit
      whole milk
      other vegetables
      yogurt
      pip fruit
      citrus fruit
      root vegetables
      rolls/buns
      bottled water
      whipped/sour cream
    
    
      turkey
      turkey
      other vegetables
      tropical fruit
      root vegetables
      whole milk
      herbs
      semi-finished bread
      curd
      butter
      pastry
    
    
      vinegar
      vinegar
      cleaner
      other vegetables
      oil
      whole milk
      onions
      cream cheese
      roll products
      root vegetables
      yogurt
    
    
      waffles
      waffles
      chocolate
      whole milk
      pastry
      soda
      other vegetables
      rolls/buns
      long life bakery product
      yogurt
      root vegetables
    
    
      whipped/sour cream
      whipped/sour cream
      other vegetables
      whole milk
      yogurt
      root vegetables
      berries
      curd
      butter
      tropical fruit
      domestic eggs
    
    
      whisky
      whisky
      brandy
      flour
      fruit/vegetable juice
      white wine
      chewing gum
      cat food
      shopping bags
      meat
      bottled water
    
    
      white bread
      white bread
      whole milk
      processed cheese
      ham
      other vegetables
      fruit/vegetable juice
      tropical fruit
      soda
      yogurt
      pip fruit
    
    
      white wine
      white wine
      bottled water
      shopping bags
      soda
      sausage
      bottled beer
      prosecco
      canned beer
      rolls/buns
      fruit/vegetable juice
    
    
      whole milk
      whole milk
      other vegetables
      yogurt
      root vegetables
      rolls/buns
      tropical fruit
      whipped/sour cream
      domestic eggs
      butter
      curd
    
    
      yogurt
      yogurt
      whole milk
      other vegetables
      tropical fruit
      rolls/buns
      root vegetables
      whipped/sour cream
      citrus fruit
      curd
      fruit/vegetable juice
    
    
      zwieback
      zwieback
      chocolate marshmallow
      organic products
      dishes
      tropical fruit
      other vegetables
      semi-finished bread
      toilet cleaner
      yogurt
      snack products
    
  

169 rows × 10 columns

User Based collaborative Filtering

The process for creating a User Based recommendation system is as follows:

Have Item-Based similarity matrix
Check which items the user has consumed
For each item the user has consumed, get the top X neighbours
Get the consumption record of the user for each neighbour.
Compute similarity score
Recommend the items with the highest score



In [30]:

    
#Helper function to get similarity scores
def getScore(history, similarities):
   return sum(history*similarities)/sum(similarities)

#Understand what this function does !



In [31]:

    
data_sims1 = dataWide.reset_index()



In [32]:

    
data_sims1.head()









    Out[32]:






  
    
      item
      Person
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
  
  
    
      0
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      2
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 170 columns



In [33]:

    
# Create a place holder matrix for similarities, and fill in the user name column
data_sims = pd.DataFrame(index=data_sims1.index,columns=data_sims1.columns)
data_sims.ix[:,:1] = data_sims1.ix[:,:1]



In [34]:

    
#This is the same as our original data but with nothing filled in except the headers
data_sims.head()









    Out[34]:






  
    
      item
      Person
      Instant food products
      UHT-milk
      abrasive cleaner
      artif. sweetener
      baby cosmetics
      baby food
      bags
      baking powder
      bathroom cleaner
      ...
      turkey
      vinegar
      waffles
      whipped/sour cream
      whisky
      white bread
      white wine
      whole milk
      yogurt
      zwieback
    
  
  
    
      0
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      3
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      4
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      5
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 170 columns



In [35]:

    
data_sims12 = data_sims1.iloc[:500,:]



In [36]:

    
data_sims11 = data_sims.iloc[:500,:]



In [37]:

    
for i in range(0,len(data_sims11.index)):
    for j in range(1,len(data_sims11.columns)):
        user = data_sims11.index[i]
        product = data_sims11.columns[j]
 
        if data_sims12.ix[i][j] == 1:
            data_sims11.ix[i][j] = 0
        else:
            product_top_names = data_neighbours.ix[product][1:10]
            product_top_sims = data_ibs.ix[product].sort_values(ascending=False)[1:10]
            user_purchases = data_ib.ix[user,product_top_names]
            data_sims11.ix[i][j] = getScore(user_purchases,product_top_sims)
    print i



In [38]:

    
# Get the top products
data_recommend = pd.DataFrame(index=data_sims.index, columns=['Person','1','2','3','4','5','6'])
data_recommend.ix[0:,0] = data_sims.ix[:,0]



In [39]:

    
# Instead of top product scores, we want to see names
for i in range(0,len(data_sims.index)):
    data_recommend.ix[i,1:] = data_sims.ix[i,:].sort_values(ascending=False).ix[1:7,].index.transpose()



In [40]:

    
# Print a sample
data_recommend.ix[:10,:4]









    Out[40]:






  
    
      
      Person
      1
      2
      3
    
  
  
    
      0
      1
      candles
      hair spray
      oil
    
    
      1
      2
      seasonal products
      pip fruit
      beverages
    
    
      2
      3
      cereals
      curd
      domestic eggs
    
    
      3
      4
      butter milk
      tropical fruit
      canned fish
    
    
      4
      5
      coffee
      cereals
      chocolate
    
    
      5
      6
      curd
      cleaner
      domestic eggs
    
    
      6
      7
      frankfurter
      sausage
      spread cheese
    
    
      7
      8
      red/blush wine
      dishes
      bottled water
    
    
      8
      9
      prosecco
      dog food
      finished products
    
    
      9
      10
      soap
      curd
      domestic eggs
    
    
      10
      11
      fruit/vegetable juice
      grapes
      processed cheese

This case/code was inspired from http://www.salemmarafi.com/code/collaborative-filtering-with-python/

Look into that link for more information

More links: http://blogs.gartner.com/martin-kihn/how-to-build-a-recommender-system-in-python/



In [ ]:

	Person	item
0	1	citrus fruit
1	1	semi-finished bread
2	1	margarine
3	1	ready soups
4	2	tropical fruit
5	2	yogurt
6	2	coffee
7	3	whole milk
8	4	pip fruit
9	4	yogurt
10	4	cream cheese
11	4	meat spreads
12	5	other vegetables
13	5	whole milk
14	5	condensed milk
15	5	long life bakery product
16	6	whole milk
17	6	butter
18	6	yogurt
19	6	rice
20	6	abrasive cleaner
21	7	rolls/buns
22	8	other vegetables
23	8	UHT-milk
24	8	rolls/buns
25	8	bottled beer
26	8	liquor (appetizer)
27	9	pot plants
28	10	whole milk
29	10	cereals
...	...	...
70	25	frozen dessert
71	25	rolls/buns
72	25	flour
73	25	sweet spreads
74	25	salty snack
75	25	waffles
76	25	candy
77	25	bathroom cleaner
78	26	bottled water
79	26	canned beer
80	27	yogurt
81	28	sausage
82	28	rolls/buns
83	28	soda
84	28	chocolate
85	29	other vegetables
86	30	brown bread
87	30	soda
88	30	fruit/vegetable juice
89	30	canned beer
90	30	newspapers
91	30	shopping bags
92	31	yogurt
93	31	beverages
94	31	bottled water
95	31	specialty bar
96	32	hamburger meat
97	32	other vegetables
98	32	rolls/buns
99	32	spices

item	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	beef	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
Person
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN
3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN
4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN
5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	...	whole milk	yogurt
0	...	0	0
1	...	0	1
2	...	1	0
3	...	0	1
4	...	1	0

item	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	beef	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
item
Instant food products	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
UHT-milk	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
abrasive cleaner	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
artif. sweetener	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
baby cosmetics	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

item	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	beef	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
item
Instant food products	1	0.0248112	0	0	0	0	0	0.0255878	0	0.0396234	...	0.0251577	0.0140636	0.0173605	0.0296613	0	0.044236	0	0.0673304	0.0425243	0.0409311
UHT-milk	0.0248112	1	0	0.0389841	0	0	0	0.0376158	0.0212202	0.0339786	...	0.0308196	0.0275659	0.0595491	0.0622915	0	0.0352245	0.0120949	0.0428914	0.108655	0.0401143
abrasive cleaner	0	0	1	0.0298807	0	0	0	0	0.03253	0.044647	...	0	0.0422577	0.017388	0.0509286	0	0.0249222	0.0123608	0.0539498	0.045634	0
artif. sweetener	0	0.0389841	0.0298807	1	0	0	0	0.0402042	0	0.00778216	...	0	0.0220971	0.00909241	0.0066578	0	0.0173762	0	0.0387901	0.0524977	0
baby cosmetics	0	0	0	0	1	0	0	0.0309492	0	0	...	0	0	0.020998	0.0153755	0	0.0200643	0	0.0244315	0	0

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	Person	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
0	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Person	1	2	3
0	1	candles	hair spray	oil
1	2	seasonal products	pip fruit	beverages
2	3	cereals	curd	domestic eggs
3	4	butter milk	tropical fruit	canned fish
4	5	coffee	cereals	chocolate
5	6	curd	cleaner	domestic eggs
6	7	frankfurter	sausage	spread cheese
7	8	red/blush wine	dishes	bottled water
8	9	prosecco	dog food	finished products
9	10	soap	curd	domestic eggs
10	11	fruit/vegetable juice	grapes	processed cheese

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	...	whole milk	yogurt
0	...	0	0
1	...	0	1
2	...	1	0
3	...	0	1
4	...	1	0

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	Person	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
0	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	...	whole milk	yogurt
0	...	0	0
1	...	0	1
2	...	1	0
3	...	0	1
4	...	1	0

item	Person	...	whole milk	yogurt
0	1	...	0	0
1	2	...	0	1
2	3	...	1	0
3	4	...	0	1
4	5	...	1	0

item	Person	Instant food products	UHT-milk	abrasive cleaner	artif. sweetener	baby cosmetics	baby food	bags	baking powder	bathroom cleaner	...	turkey	vinegar	waffles	whipped/sour cream	whisky	white bread	white wine	whole milk	yogurt	zwieback
0	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN