In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
DAY 11 - Mar 7, 2017
Today I'll do something a little different. This morning I tweeted something along the lines of "will be working with matplotlib
." In terms of visualization, I'm more familiar with R and ggplot2 and so this is a perfect opportunity to explore matplotlib. But first I need data.
What data to choose? At the time, my younger brother had League of Legends on his computer screen. Inspiration. I'll use League of Legend data. Plotting game data is always fun. My brother said use the League of Legends: Base champion statistics website.
In [2]:
website_base_stats = "http://leagueoflegends.wikia.com/wiki/Base_champion_statistics"
In [3]:
# Save HTML to soup
html_data = requests.get(website_base_stats).text
soup = BeautifulSoup(html_data, "html5lib")
In [4]:
# Parse table
table = soup.find('table', attrs={'class' : 'wikitable'})
# Parse table header
lol_thead = [h.text.strip() for h in soup.find_all("th")]
# Parse table body
table_body = table.tbody
data = []
rows = table_body.find_all('tr')
for row in rows:
cols = row.find_all('td')
if len(cols) == 0: continue
cols[0] = cols[0].span
cols = [c.text.strip() for c in cols]
data.append(cols)
lol_table = pd.DataFrame(data, columns=lol_thead)
# Print
print(lol_table.shape)
lol_table.head()
Out[4]:
In [5]:
# Parse href link info to url with more info
lol_links = table.tbody.find_all("a", href=True, class_=False)
link_data = []
for l in lol_links:
link_data.append(l.attrs)
link_data = pd.DataFrame(link_data)
link_data = link_data.rename(columns={"title": "Champions"})
# Use full link
link_data.href = "http://leagueoflegends.wikia.com" + link_data.href
link_data.tail()
Out[5]:
In [6]:
# Join tables
lol_table = pd.merge(lol_table, link_data, on="Champions", how="left")
print(lol_table.shape)
lol_table.head()
Out[6]:
In [10]:
# Fix problematic data
print(lol_table[lol_table["Champions"]=="Kled"])
# by replacing with None type
lol_table.iloc[56, 7:9] = None
lol_table[lol_table["Champions"]=="Kled"]
Out[10]:
In [11]:
# Convert data types to float
columns_to_float = list(lol_table.columns[1:12])+ list(lol_table.columns[13:-1])
lol_table[columns_to_float] = lol_table[columns_to_float].astype(float)
lol_table
# Convert AS+ data types
lol_table[lol_table.columns[12]] = lol_table[lol_table.columns[12]].map(lambda x: float(x.strip("%"))/100)
In [12]:
lol_table.to_csv("lol_base_stats.tsv", index=False, sep="\t")
In [10]:
import matplotlib.pyplot as plt
In [11]:
%matplotlib inline
plt.rcParams["figure.figsize"] = [15,6]
In [51]:
# 2. Create plot
fig = plt.figure()
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
# Add title so know which plot is which
ax1.title.set_text('Plot 1')
ax2.title.set_text('Plot 2')
ax3.title.set_text('Plot 3')
ax4.title.set_text('Plot 4')
# 3. Plotting routines
# Add the data (same data to all plots)
ax1.scatter(x=lol_table.HP, y=lol_table.Range)
ax2.scatter(x=lol_table.HP, y=lol_table.Range)
ax3.scatter(x=lol_table.HP, y=lol_table.Range)
ax4.scatter(x=lol_table.HP, y=lol_table.Range, marker="+", color="red")
# 4. Customize plots
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.set(xlabel = "HP", ylabel = "Ranage")
for vline in range(300, 700, 100):
ax3.axvline(vline, color="grey", linestyle="dashed", linewidth=0.5)
ax3.xaxis.set(
ticks=range(300, 700, 100),
ticklabels=["HP:{}".format(x) for x in range(300, 700, 50)]
)
ax4.set(
xlim=[450, 650],
ylim=[400, 700]
)
ax4.text(600, 550, "Zoom!", style='italic', fontsize="x-large")
# 5. Save figures
# plt.savefig('foo.png')
# plt.savefig('foo.png', transparent=True)
plt.tight_layout()
In [ ]: