In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
In [2]:
%matplotlib inline
In [3]:
def level_up(n, base, increment):
x = range(n)
df = pd.DataFrame({
"level" : x,
"stat" : [base+(increment*i) for i in range(n)]
})
return(df)
Load yesterday's data
In [4]:
# Load data
dat = pd.read_csv("lol_base_stats.tsv", sep="\t")
dat.head()
Out[4]:
In [5]:
from bs4 import BeautifulSoup
import requests
primary_role = []
for url in dat.href:
html_data = requests.get(url).text
soup = BeautifulSoup(html_data, "html5lib")
role = soup.find('div', attrs={'class' : 'champion_info'}).table.a.text
primary_role.append(role)
In [6]:
dat["primary_role"] = primary_role
dat.head()
Out[6]:
In [79]:
# Save data
dat.to_csv("lol_base_stats-roles.tsv", index=False, sep="\t")
In [8]:
# Define colors
my_colors = ['b', 'r', 'm', 'g', 'k', 'y']
my_colors_key = {
'Controller' : 'b',
'Fighter' : 'r',
'Mage' : 'm',
'Marksman' : 'g',
'Slayer' : 'k',
'Tank' : 'y'
}
In [9]:
plt.rcParams["figure.figsize"] = [10,4]
# How many champions of each type?
dat.groupby(["primary_role"]).count()["Champions"].plot.bar(color=my_colors)
plt.ylabel("count")
plt.xlabel("Primary role (according to wikia's Base champion stats)")
Out[9]:
In [11]:
# Use only complete cases
datc = pd.DataFrame.dropna(dat)
datc = datc.iloc[:, 1:-2]
t-SNE is a tool to visualize high-dimensional data. It converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. t-SNE has a cost function that is not convex, i.e. with different initializations we can get different results.
In [14]:
# Plot t-SNE at different perplexities
plt.rcParams["figure.figsize"] = [15,15]
nrows = 4
ncols = 4
fig, ax = plt.subplots(nrows, ncols)
perplexity = list(range(50, 4, -3))
for i in range(nrows):
for j in range(ncols):
p = perplexity.pop()
# Run TSNE
model = TSNE(n_components=2, perplexity=p, random_state=0)
X = model.fit_transform(datc)
xlabel = "TNSE1"
ylabel = "TNSE2"
for k in my_colors_key.keys():
X_subset = X[dat.dropna()["primary_role"] == k,]
x = X_subset[:,0]
y = X_subset[:,1]
ax[i,j].scatter(x, y, color=my_colors_key[k])
ax[i,j].title.set_text("perplexity = {}".format(p))
ax[i,j].set(xlabel=xlabel, ylabel=ylabel)
PCA, another dimensionality reduction algorithm.
Principal component analysis (PCA) is a technique used to emphasize variation and bring out strong patterns in a dataset. It's often used to make data easy to explore and visualize (Victor Powell)
In [15]:
plt.rcParams["figure.figsize"] = [6,6]
fig, ax = plt.subplots(1, 1)
# Run PCA
pca = PCA(n_components=2)
pca.fit(datc)
X = pca.transform(datc)
xlabel = "PC1"
ylabel = "PC2"
for k in my_colors_key.keys():
X_subset = X[dat.dropna()["primary_role"] == k,]
x = X_subset[:,0]
y = X_subset[:,1]
ax.scatter(x, y, color=my_colors_key[k])
ax.set(xlabel=xlabel, ylabel=ylabel)
Out[15]: