In [9]:
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns; sns.set();
In [46]:
applied = {
"3M":["Dylan","Lois","Dan","Noah","Swetha","Ikenna","Melanie","Kyle","JF"],
"DogVacay":['Dan','Dylan','Ikenna','Jimmy','Natalie','Peter','Swetha'],
'Aurotech':['Ikenna','Kyle','Melanie','Noah','Peter','Cindy','Swetha'],
'tronc':['Cindy','Dan','Ikenna','JF','Jimmy','Natalie','Peter','Swetha'],
'InVenture':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Kyle','Melanie','Natalie','Peter','Swetha'],
'Nielsen':['Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Melanie','Natalie','Noah','Peter','Swetha'],
'BCG':['Cindy','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Melanie','Natalie','Noah','Peter','Swetha'],
'Facebook':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Lois','Melanie','Natalie','Noah','Peter','Swetha'],
'Netflix':['Cindy','Dylan','JF','Jimmy','Kyle','Melanie','Natalie','Peter','Swetha'],
'Virtu':['Dylan','JF','Noah','Peter'],
'Amazon':['Amelia','Cindy','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Lois','Natalie','Noah','Peter','Swetha'],
'Goodyear':['Cindy','Dan','Dylan','Ikenna','Jimmy','Kyle','Melanie','Noah','Peter'],
'HomeAway':['Cindy','Dan','Dylan','Ikenna','JF','Josh','Kyle','Lois','Melanie','Natalie','Noah','Peter'],
'Intuit':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Kyle','Lois','Melanie','Natalie','Peter','Swetha'],
'iSpot.tv':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Lois','Melanie','Natalie','Noah','Peter','Swetha'],
'Payoff':['Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Kyle','Melanie','Natalie','Peter','Swetha'],
'Red Bull':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Kyle','Natalie','Peter','Swetha'],
'Shopify':['Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Kyle','Lois','Melanie','Noah','Peter','Swetha'],
'Zymergen':['Amelia','Cindy','Dan','Dylan','Ikenna','JF','Jimmy','Josh','Kyle','Lois','Melanie','Natalie','Noah','Peter','Swetha'],
}
print "{} total companies in queue.".format(len(applied))
In [84]:
fellows = ['Amelia','Cindy','Dan','Dylan','Ikenna',
'JF','Jimmy','Josh','Kyle','Lois',
'Melanie','Natalie','Noah','Peter','Swetha']
indexes = []
arr = np.zeros((len(applied),len(fellows)),dtype=int)
for i,c in enumerate(applied):
indexes.append(c)
for person in applied[c]:
arr[i,fellows.index(person)] = 1
In [85]:
df = pd.DataFrame(data=arr,
index=indexes,
columns=fellows)
df.head()
Out[85]:
In [86]:
# Co-occurrence matrix is the product of the matrix and its transpose
coocc = df.T.dot(df)
coocc.head()
Out[86]:
In [87]:
fig,ax = plt.subplots(1,1,figsize=(14,10))
mask = np.zeros_like(coocc)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
sns.heatmap(coocc,
mask = mask,
annot=True, fmt="d",
square=True,
ax=ax)
ax.set_xlabel('Fellow',fontsize=16)
ax.set_ylabel('Fellow',fontsize=16)
ax.set_title("Applied to the same company?",fontsize=18);
In [71]:
# People applied to unequal numbers of companies, though. Maybe I could weight by that.
allnames = []
for job in applied:
allnames.extend(applied[job])
from collections import Counter
cntr = Counter(allnames)
weights = {}
for person in cntr:
weights[person] = 1./cntr[person]
print cntr
print weights
In [78]:
indexes = []
arr = np.zeros((len(applied),len(fellows)),dtype=float)
for i,c in enumerate(applied):
indexes.append(c)
for person in applied[c]:
arr[i,fellows.index(person)] = weights[person]
In [79]:
df = pd.DataFrame(data=arr,
index=indexes,
columns=fellows)
df.head()
Out[79]:
In [80]:
# Co-occurrence matrix is the product of the matrix and its transpose
coocc = df.T.dot(df)
coocc.head()
Out[80]:
In [82]:
fig,ax = plt.subplots(1,1,figsize=(14,10))
mask = np.zeros_like(coocc)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
sns.heatmap(coocc,
mask = mask,
square=True,
ax=ax,
cmap='YlGnBu')
ax.set_xlabel('Fellow',fontsize=16)
ax.set_ylabel('Fellow',fontsize=16)
ax.set_title("Weighted by total number of applications",fontsize=18);
In [ ]: