In [1]:
%matplotlib inline
import pandas as pd
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [3]:
cast = pd.read_csv('data/cast.csv')
cast.head()
Out[3]:
In [4]:
release_dates = pd.read_csv('data/release_dates.csv',
parse_dates=['date'], infer_datetime_format=True)
release_dates.head()
Out[4]:
In [ ]:
In [5]:
# Make a bar plot of the months in which movies
# with "Christmas" in their title tend to be released in the USA.
rd = release_dates
rd = rd[rd.title.str.contains('Christmas')]
rd = rd[rd.country == 'USA']
rd.date.dt.month.value_counts().sort_index().plot(kind='bar')
Out[5]:
In [6]:
# Make a bar plot of the months in which movies
# whose titles start with "The Hobbit" are released in the USA.
rd = release_dates
rd = rd[rd.title.str.startswith('The Hobbit')]
rd = rd[rd.country == 'USA']
rd.date.dt.month.value_counts().sort_index().plot(kind='bar')
Out[6]:
In [7]:
# Make a bar plot of the day of the week on which movies
# with "Romance" in their title tend to be released in the USA.
rd = release_dates
rd = rd[rd.title.str.contains('Romance')]
rd = rd[rd.country == 'USA']
rd.date.dt.dayofweek.value_counts().sort_index().plot(kind='bar')
Out[7]:
In [8]:
# Make a bar plot of the day of the week on which movies
# with "Action" in their title tend to be released in the USA.
rd = release_dates
rd = rd[rd.title.str.contains('Action')]
rd = rd[rd.country == 'USA']
rd.date.dt.dayofweek.value_counts().sort_index().plot(kind='bar')
Out[8]:
In [9]:
# On which date was each Judi Dench movie from the 1990s released in the USA?
usa = release_dates[release_dates.country == 'USA']
c = cast
c = c[c.name == 'Judi Dench']
c = c[c.year // 10 * 10 == 1990]
c.merge(usa).sort_values('date')
Out[9]:
In [10]:
# In which months do films with Judi Dench tend to be released in the USA?
c = cast
c = c[c.name == 'Judi Dench']
m = c.merge(usa).sort_values('date')
m.date.dt.month.value_counts().sort_index().plot(kind='bar')
Out[10]:
In [11]:
# In which months do films with Tom Cruise tend to be released in the USA?
c = cast
c = c[c.name == 'Tom Cruise']
m = c.merge(usa).sort_values('date')
m.date.dt.month.value_counts().sort_index().plot(kind='bar')
Out[11]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: