In [2]:
# Import some libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
%matplotlib inline
In [4]:
pixar_data = pd.read_csv("data/PixarMovies.csv")
In [5]:
## Data description
pixar_data.tail(3)
Out[5]:
In [50]:
pixar_data.info()
In [58]:
## Add the latest pixar movie: The Good Dinausaur
### General information
pixar_data.loc[15, 'Year Released'] = 2015
pixar_data.loc[15, 'Movie']= 'The Good Dinosaur'
pixar_data.loc[15, 'Length'] = 100
### Movie ratings
pixar_data.loc[15, 'RT Score'] = 77
pixar_data.loc[15, 'IMDB Score'] = 7.3
pixar_data.loc[15, 'Metacritic Score'] = 66
# Sources: http://www.the-numbers.com/movies/production-company/Pixar
#
# Revenues data
pixar_data.loc[15, 'Opening Weekend'] = 39.16
pixar_data.loc[15, 'Worldwide Gross'] = 136.815
pixar_data.loc[15, 'Domestic Gross'] = 81.4
pixar_data.loc[15, 'Adjusted Domestic Gross'] = 78.86
pixar_data.loc[15, 'International Gross'] = 55.4
pixar_data.loc[15, 'Domestic %'] = '59.5%'
pixar_data.loc[15, 'International %'] = '40.5%'
# Cost
pixar_data.loc[15, 'Production Budget'] = 187.5
# Oscar related information (not yet available)
pixar_data.loc[15, 'Oscars Nominated'] = ''
pixar_data.loc[15, 'Oscars Won'] = ''
In [59]:
## Save the augmented data set
pixar_data.to_csv('data/pixar-movies-updated.csv', index=False)
In [6]:
## Read the augmented data
## Save the augmented data set
pixar_augmented_data = pd.read_csv('data/pixar-movies-updated.csv')
In [15]:
pixar_augmented_data.columns
Out[15]:
In [9]:
sns.set_style('dark')
# Dataquest inital graph
plt.plot(pixar_augmented_data['Opening Weekend'],pixar_augmented_data['Adjusted Domestic Gross'])
plt.xlabel('Opening Weekend')
plt.ylabel('Adjusted Domestic Gross')
plt.title('Adjusted Domestic Gross vs Opening Weekend ')
Out[9]:
In [38]:
# A barplot
pixar_augmented_data.sort_values("Production Budget", inplace=True, ascending=False)
plt.figure(figsize=(20,10))
plt.xticks(rotation=40)
ax = sns.barplot(x="Movie", y="Worldwide Gross", hue="Production Budget",
data=pixar_augmented_data, palette="Spectral")
ax.set_ylabel('Worldwild Gross')
Out[38]:
In [42]:
# A ratio barplot: Return on investment (ROI) in %
pixar_augmented_data["ROI"] = 100 * (pixar_augmented_data["Worldwide Gross"] / pixar_augmented_data["Production Budget"])
pixar_augmented_data.sort_values("ROI", inplace=True, ascending=False)
plt.figure(figsize=(20,10))
plt.xticks(rotation=40)
ax = sns.barplot(x="Movie", y="ROI",
data=pixar_augmented_data, palette="Spectral")
ax.set_ylabel('Return on investment (%)')
Out[42]:
The embedding of D3 code into the Jupyter notebook is inspired from this notebook https://github.com/cmoscardi/embedded_d3_example/blob/master/Embedded_D3.ipynb
In [19]:
%%javascript
/**Require the d3.js library**/
require.config({
paths: {
d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
}
});
In [46]:
%%HTML
<style>
body { margin:0;position:fixed;top:0;right:0;bottom:0;left:0; }
svg { width: 100%; height: 100%; }
</style>
In [43]:
%%javascript
/*
* As it turns out, pandas returns a dictionary with
* the top-level keys as the column headers, and values
* as the list of values under that column.
* I'd recommend looking yourself.
* This function, in JS, converts to what D3 wants.
* Of course we could also easily do this on the python side.
*/
function convertPlotData(data){
var convertedData = [];
//pandas gives each column the same number of
//objects, arbitrarily choose one to iterate over
for(var i in data[Object.keys(data)[0]]){
var convertedDatum = {};
for(var key in data){
convertedDatum[key] = data[key][i];
}
convertedData.push(convertedDatum);
}
return convertedData;
}
window.convertPlotData = convertPlotData;
In [37]:
# Bind data to the window
from IPython.display import Javascript
Javascript("""
window.pixarData={};
""".format(pixar_data.to_json()))
Out[37]:
In [47]:
%%javascript
require(['d3'], function(d3){
var margin = {top: 20, right: 10, bottom: 20, left: 10};
var width = 960 - margin.left - margin.right;
var height = 500 - margin.top - margin.bottom;
//if (d3.select("svg")) {d3.select("svg").remove()};
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
svg.selectAll("rect")
.data(window.pixarData)
.enter()
.append("rect")
.attr({x: 100, y: 10})
.attr('width', function(d){ return width - 200})
.attr('height', function(d) {return height - 20})
.style({ fill: "#196857"})
console.log(window.pixarData)
})