In [6]:
import md5
import os
import time
import urlparse
import bokeh.charts as bc
import matplotlib.pyplot as plt
import pandas as pd
import requests
import qgrid
import seaborn as sns
%matplotlib inline
qgrid.nbinstall()
# Quick and Dirty API wrapper
class Marvelous(object):
base_url = "http://gateway.marvel.com"
def __init__(self, private_key=None, public_key=None):
"""A microwrapper for the Marvel API"""
if private_key and public_key:
self.private_key = private
self.public_key = public_key
else:
self.private_key = os.getenv('MARVEL_PRIVATE_KEY')
self.public_key = os.getenv('MARVEL_PUBLIC_KEY')
def request(self, endpoint, **kwargs):
"""Make a request against the Marvel API"""
ts = str(int(time.time()))
concat = ''.join([ts, self.private_key, self.public_key])
query_params = {
'ts': ts,
'apikey': self.public_key,
'hash': md5.new(concat).hexdigest()}
query_params.update(kwargs)
url = urlparse.urljoin(self.base_url, endpoint)
resp = requests.get(url, params=query_params)
if resp.status_code != 200:
resp.raise_for_status()
else:
return resp.json()
def get_character(self, character_name):
"""Get the data for a given character name"""
resp = self.request('/v1/public/characters', name=character_name)
result = resp['data']['results'][0]
return {
'name': result['name'],
'description': result['description'],
'id': result['id'],
'comic_count': result['comics']['available'],
'story_count': result['stories']['available'],
'series_count': result['series']['available'],
'event_count': result['events']['available']
}
In [7]:
api = Marvelous()
In [8]:
api.get_character('Captain America')
Out[8]:
In [9]:
cap_comics = api.request('/v1/public/characters/1009220/comics', dateRange='2010-01-01,2014-11-01', limit=100)
In [10]:
# Let's get all of the characters in this list of comics
comic_list = cap_comics['data']['results']
character_set = {c['name'] for comic in comic_list for c in comic['characters']['items']}
In [11]:
# Let's only get comic ids for comics that have associated characters
comic_ids = [str(com['id']) for com in comic_list if com['characters']['available'] > 10]
# Marvel's API will only accept up to 10 comic ids
joined_ids = ','.join(comic_ids[:5])
joined_ids
Out[11]:
In [12]:
all_characters = api.request('/v1/public/characters', comics=joined_ids, limit=50)
In [13]:
character_list = all_characters['data']['results']
records = []
for character in character_list:
records.append({
'name': character['name'],
'description': character['description'],
'id': character['id'],
'comic_count': character['comics']['available'],
'story_count': character['stories']['available'],
'series_count': character['series']['available'],
'event_count': character['events']['available']
})
In [14]:
hero_df = pd.DataFrame(records)
qgrid.show_grid(hero_df, remote_js=True)
In [30]:
# For quick and dirty exploratory analysis, I usually start with Pandas native plotting functionality
hero_df['comic_count'].plot(kind='bar')
Out[30]:
In [42]:
top_10 = hero_df.sort('comic_count', ascending=False)[['name', 'comic_count']][:10]
qgrid.show_grid(top_10, remote_js=True)
In [32]:
top_10.plot(kind='bar', x='name')
Out[32]:
In [33]:
# Ok, time to create something a bit nicer
sns.factorplot('name', 'comic_count', data=top_10, kind='bar', aspect=2.0)
Out[33]:
In [34]:
# Seaborn is very customizable
sns.set_style("whitegrid")
sns.factorplot('name', 'comic_count', data=top_10, kind='bar', aspect=2.5,
palette="muted", size=7, x_order=top_10['name'].tolist())
Out[34]:
In [35]:
import vincent
vincent.initialize_notebook()
In [36]:
bar = (vincent.Bar(top_10, columns=['comic_count'], key_on='name')
.common_axis_properties(title_size=15)
.axis_titles(x='Character Name', y='Total Comic Book Count')
.colors(range_=['#6a9fb5'])
.x_axis_properties(title_offset=10)
.y_axis_properties(title_offset=-30))
bar
Out[36]:
In [1]:
from IPython.core.display import HTML
# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()
# This is for nbviewer:
styles = open("custom.css", "r").read()
HTML(styles)
Out[1]: