In [ ]:
import os
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
import imdb_data_helper

In [ ]:
#JMF 24 Jul 2018: if you want interactive plots, open a terminal and `pip install mpld3 --user`,
#    then uncomment the lines below.
#import mpld3

In [ ]:
# Data downloaded from and cleaned slightly
if not imdb_data_helper.have_title_basics():
if not imdb_data_helper.have_title_ratings():

In [ ]:
title_basics = imdb_data_helper.get_title_basics()
title_ratings = imdb_data_helper.get_title_ratings()

In [ ]:
title_basics[title_basics['primaryTitle'].str.contains('Jurassic World', na=False)]

In [ ]:
all_genres = set()
def genres_list():
    for genres in title_basics['genres']:
        if type(genres) is str:
            yield genres.split(',')
for genres in genres_list():


In [ ]:
def make_histogram_for_genre(target_genre):
    title_basics_for_genre = title_basics[title_basics['genres'].str.contains(target_genre, na=False)]
    genre_ratings = title_ratings['averageRating'].reindex(title_basics_for_genre.index)
    genre_ratings = genre_ratings.dropna()
    #TODO: compute the mean and median rating (np.mean; np.median)
    #TODO: plot vertical lines denoting the mean and median
    #      using plt.axvline (use optional parameter color='r', color='g')
    bins = np.arange(-0.05, 10.05, 0.1)
    n, bins, _ = plt.hist(genre_ratings, bins=bins, density=True)
    plt.xlabel('Average Rating')
    plt.title("Average Ratings for genre='{target_genre}'".format(target_genre=target_genre))


Q: Are there any "outliers" that you can see in any of the histograms?

Q: Which genre has the highest mean rating? Which has the lowest mean rating?