Movie Analysis Plot


In [68]:
import pandas as pd
import random
from plotly.graph_objs import *

Using plotly offline mode


In [69]:
from plotly.offline import init_notebook_mode,iplot,plot
init_notebook_mode(connected=True)


Reading the Dataset


In [70]:
dataset = pd.read_csv('finalDataset.csv')

dataset.head(3)


Out[70]:
TMDB ID IMDB ID TITLE YEAR GENRE RATING RELEASED ACTORS AWARDS COUNTRY LANGUAGE BOX OFFICE ADJ. BOX OFFICE
0 862 tt0114709 Toy Story 1995 Animation, Adventure, Comedy 8.3 22 Nov 1995 Tom Hanks, Tim Allen, Don Rickles, Jim Varney Nominated for 3 Oscars. Another 23 wins & 18 n... USA English 191796233.0 360224189.0
1 8844 tt0113497 Jumanji 1995 Action, Adventure, Family 6.9 15 Dec 1995 Robin Williams, Jonathan Hyde, Kirsten Dunst, ... 4 wins & 9 nominations. USA English, French 100200000.0 188191724.0
2 15602 tt0113228 Grumpier Old Men 1995 Comedy, Romance 6.6 22 Dec 1995 Walter Matthau, Jack Lemmon, Sophia Loren, Ann... 2 wins & 2 nominations. USA English 69870000.0 131227103.0

List of Years


In [71]:
yearList = [1990, 1995, 2000, 2005, 2010, 2014]

List of Genres


In [72]:
genreList = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', \
 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', \
 'Musical', 'Mystery' ,'Romance', 'Sci-Fi', 'Sport', 'Thriller', 'War', 'Western']

Function to calcuate mean rating for specific genre and year


In [73]:
def calculateMeanRating(genre, year):
    
    return round(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)]['RATING'].mean(),2)

Function to calculate movie count for specific genre and year


In [74]:
def calculateMovieCount(genre, year):
    
    return len(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)])

Function to calculate mean box office for specific genre and year


In [75]:
def calculateMeanBoxOffice(genre, year):
    
    return int(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)]['ADJ. BOX OFFICE'].mean())

Function to calculate standard rating for specific year


In [76]:
def calculateStandradRating(year):
    
    return round(dataset[dataset.YEAR == year]['RATING'].mean(),2)

Lists with

  • First index - Mean Rating for each genre
  • Second index - Movie Count for each gerne
  • Third index - Mean Adj. Box Office for genre

In [77]:
list1990 = [[calculateMeanRating(genre, yearList[0]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[0]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[0]) for genre in genreList]]

list1995 = [[calculateMeanRating(genre, yearList[1]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[1]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[1]) for genre in genreList]]

list2000 = [[calculateMeanRating(genre, yearList[2]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[2]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[2]) for genre in genreList]]

list2005 = [[calculateMeanRating(genre, yearList[3]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[3]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[3]) for genre in genreList]]

list2010 = [[calculateMeanRating(genre, yearList[4]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[4]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[4]) for genre in genreList]]

list2014 = [[calculateMeanRating(genre, yearList[5]) for genre in genreList], \
           [calculateMovieCount(genre, yearList[5]) for genre in genreList], \
           [calculateMeanBoxOffice(genre, yearList[5]) for genre in genreList]]

List with standard rating for specific year


In [78]:
standardRatingList = [calculateStandradRating(year) for year in yearList]

List with rgb colors


In [88]:
randomColorList = ['rgb({},{},{})'.format(red, green, blue) 
    for red, green, blue in zip(random.sample(range(255), 20), random.sample(range(255), 20), random.sample(range(255), 20))]

Function that return dictionary data to plot standard rating


In [89]:
def function_(xValue, yValue):
    
    return {
        'x' : [-10000000] + xValue + [140000000],
        'y' : 22 * [yValue],
        'mode': 'lines',
        'showlegend' : True,
        'name': 'Standard Rating: ' + str(yValue),
        'hoverinfo': 'text',
        'text' : 'Standard Rating: ' + str(yValue)
    }

Function that return dicitonary data to plot the data point


In [90]:
def data_(rating, movieCount, boxoffice, genre, i):
    
    return {
        'x' : [boxoffice],
        'y' : [rating],
        'name': genre,
        'mode' : 'markers',
        'marker': {
            'size' : [movieCount],
            'sizemode' : 'diameter',
            'color' : randomColorList[i],
            'line' : {
                'width' : 1
            } 
        },
        'text' : 'Genre: ' + genre + \
                 '<br>Mean Rating: ' + str(rating) + \
                 '<br>Movie Count: ' + str(movieCount) + \
                 '<br>Mean Box Office: ' + str(round(boxoffice/1000000, 2)) + 'M',
        'hoverinfo': 'text'
    }

Initialize the figure


In [98]:
figure = {
    'data' : [], 
    'layout' : {},
    'frames' : []
}

Setting x-axis, y-axis, title, backgroud, buttons of layout


In [99]:
figure['layout']['xaxis'] = {'title' : ' Mean Adj. Box Office',
                             'titlefont' : {
                                 'size' : 20, 'family' : 'Droid Sans'
                             },
                            'showline' : True,
                            'zeroline' : False,
                            'range' : [-10000000, 140000000],
                            'gridcolor' : '#FFFFFF',
                            'ticks' : 'outside',
                            'tickwidth' : 2}

figure['layout']['yaxis'] = {'title' : 'Mean Rating',
                            'titlefont' : {
                                'size' : 20, 'family' : 'Droid Sans'
                            },
                            'showline' : True,
                            'range' : [3.5,9],
                            'gridcolor' : '#FFFFFF',
                            'ticks' : 'outside',
                            'tickwidth' : 2}

figure['layout']['title'] = 'Movie Analysis'

figure['layout']['titlefont'] = {'size' : 28, 'family' : 'Times New Roman'}

figure['layout']['plot_bgcolor'] = 'rgb(223,232,243)'

figure['layout']['updatemenus'] = [
    {
        'type': 'buttons',
        'buttons' : [{
            'label' : 'Play',
            'method' : 'animate',
            'args' : [None, {'frame' : {'duration' : 500, 'redraw': False},'fromcurrent': True,
                             'transistion' : {'duration': 300, 'easing': 'quadratic-in-out'}}]
        },
        {
            'label' :'Pause',
            'method' : 'animate',
            'args' : [[None], {'frame' : {'duration' : 0, 'redraw' : False}, 'mode': 'immediate',
                                'transistion': {'duration': 0}}]
        }],
        'direction' : 'left',
        'pad': {'r': 10, 't': 87},
        'showactive' : False,
        'x': 0.1,
        'y': 0,
        'xanchor': 'right',
        'yanchor': 'top'
    }
]

figure['layout']['legend'] = { 'font': { 'family' : 'Droid Sans',  
                                        'size' : 16 }}

Initial Data


In [100]:
for i in range(len(genreList)):
    figure['data'].append(data_(list1990[0][i], list1990[1][i], list1990[2][i], genreList[i], i))
    
figure['data'].append(function_(list1990[2], standardRatingList[0]))

Create a Dictionary for sliders


In [101]:
sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20}, 
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

Adding data to frames and then appending slider dict data to sliders


In [102]:
for year in yearList:
    frame = {'data' : [], 'name': year}
      
    for i in range(len(genreList)):
        if year == 1990:
            frame['data'].append(data_(list1990[0][i], list1990[1][i], list1990[2][i], genreList[i], i))
        elif year == 1995:
            frame['data'].append(data_(list1995[0][i], list1995[1][i], list1995[2][i], genreList[i], i))
        elif year == 2000:
            frame['data'].append(data_(list2000[0][i], list2000[1][i], list2000[2][i], genreList[i], i))
        elif year == 2005:
            frame['data'].append(data_(list2005[0][i], list2005[1][i], list2005[2][i], genreList[i], i))
        elif year == 2010:
            frame['data'].append(data_(list2010[0][i], list2010[1][i], list2010[2][i], genreList[i], i))
        else:
            frame['data'].append(data_(list2014[0][i], list2014[1][i], list2014[2][i], genreList[i], i))
         
    if year == 1990:
        frame['data'].append(function_(list1990[2], standardRatingList[0]))
    elif year == 1995: 
        frame['data'].append(function_(list1995[2], standardRatingList[1]))
    elif year == 2000:
        frame['data'].append(function_(list2000[2], standardRatingList[2]))
    elif year == 2005:
        frame['data'].append(function_(list2005[2], standardRatingList[3]))
    elif year == 2010:
        frame['data'].append(function_(list2010[2], standardRatingList[4]))
    else:
        frame['data'].append(function_(list2014[2], standardRatingList[5]))
        
    figure['frames'].append(frame)
        
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': True},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]

Plotting the figure


In [103]:
plot(figure)
# iplot(figure)


Out[103]:
'file:///Users/shivakumarswamybg/Desktop/Project/temp-plot.html'

In [ ]: