Load data from http://media.wiley.com/product_ancillary/6X/11186614/DOWNLOAD/ch02.zip, WineKMC.xlsx
In [108]:
# code written in py_3.0
import pandas as pd
import numpy as np
df_sales = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch02/WineKMC.xlsx','rb'), sheetname=1)
df_sales.columns = ['name', 'offer']
df_sales.head()
Out[108]:
In [109]:
# get list unique customer names
names = df_sales.name.unique()
names
Out[109]:
In [110]:
id = df_names.index+1 # give each name a unique id number
id = id.unique()
id
Out[110]:
In [111]:
id_dict = dict(zip(names, id))
df_sales['id']=df_sales.name.map(id_dict)
df_sales.head()
Out[111]:
In [157]:
pivot = pandas.pivot_table(df_sales, index=["offer"], columns=["id"], aggfunc=len, fill_value='0')
pivot.index.name = None
pivot.columns = pivot.columns.get_level_values(1) # sets cols to product categories
X = pivot.as_matrix()
X = np.matrix(X)
X = X.astype(int)
X
Out[157]:
In [156]:
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine
dist_out = 1-pairwise_distances(X.T, metric="cosine")
dist_out
Out[156]:
In [176]:
import networkx as nx
import matplotlib.pyplot as plt
G = G=nx.from_numpy_matrix(dist_out)
nx.draw(G)
plt.show()
In [170]:
degrees = sorted(nx.degree(G).values())
In [ ]: