In [1]:
import os
import pandas as pd
import settings
import etl
%matplotlib inline
%load_ext watermark
%watermark -d -t -v -m -p pea,pandas
In [2]:
data = etl.Data()
data.load()
In [3]:
data.movie.columns
Out[3]:
In [4]:
data.movie.dtypes
Out[4]:
In [5]:
data.movie['net'] = data.movie['gross'] - data.movie['budget']
In [6]:
data.movie.sort_values('budget',ascending=False)[['movie_title', 'title_year', 'budget', 'gross', 'net']]
Out[6]:
In [7]:
from iplotter import C3Plotter
In [8]:
c3 = C3Plotter()
In [9]:
plot_data = data.movie.groupby(['title_year']).min()[['gross', 'net', 'budget']].fillna(0)
c3.plot(plot_data, zoom=True)
Out[9]:
In [10]:
country_group = data.movie.groupby('country').mean()['imdb_score']
values = country_group.values.tolist()
countries = country_group.index.values.tolist()
In [11]:
from iplotter import PlotlyPlotter
from IPython.display import HTML
plotly = PlotlyPlotter()
c3_plotter = C3Plotter()
plotly_chart = [{
"type": 'choropleth',
"locationmode": 'country names',
"locations": countries,
"z": values,
"zmin": 0,
"zmax": max(values),
"colorscale": [
[0, 'rgb(242,240,247)'], [0.2, 'rgb(218,218,235)'],
[0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],
[0.8, 'rgb(117,107,177)'], [1, 'rgb(84,39,143)']
],
"colorbar": {
"title": 'Count',
"thickness": 10
},
"marker": {
"line": {
"color": 'rgb(255,255,255)',
"width": 2
}
}
}]
plotly_layout = {
"title": 'Movie Counts by Country',
"geo": {
"scope": 'country names',
}
}
country_plot = plotly.plot(data=plotly_chart)
In [12]:
data.movie.set_index(['budget'])['imdb_score']
Out[12]:
In [13]:
score_by_budget = data.movie.set_index(['director_facebook_likes'])[['net']]
c3.plot(score_by_budget, kind='scatter', zoom=True, )
Out[13]:
In [14]:
from ipywidgets import interact, interactive, fixed, interact_manual
In [15]:
def f(country):
df = data.movie[data.movie['country'] == country]
ax = df.groupby(['director_name']).agg({'director_facebook_likes':'sum', 'gross':'sum'}).plot(kind='scatter', x='director_facebook_likes', y='gross')
plt.show()
In [16]:
import matplotlib.pyplot as plt
In [17]:
interact(f, country=data.movie.country.drop_duplicates().dropna().values.tolist());
In [ ]: