In [68]:
import pandas as pd
import random
from plotly.graph_objs import *
Using plotly offline mode
In [69]:
from plotly.offline import init_notebook_mode,iplot,plot
init_notebook_mode(connected=True)
Reading the Dataset
In [70]:
dataset = pd.read_csv('finalDataset.csv')
dataset.head(3)
Out[70]:
List of Years
In [71]:
yearList = [1990, 1995, 2000, 2005, 2010, 2014]
List of Genres
In [72]:
genreList = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', \
'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', \
'Musical', 'Mystery' ,'Romance', 'Sci-Fi', 'Sport', 'Thriller', 'War', 'Western']
Function to calcuate mean rating for specific genre and year
In [73]:
def calculateMeanRating(genre, year):
return round(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)]['RATING'].mean(),2)
Function to calculate movie count for specific genre and year
In [74]:
def calculateMovieCount(genre, year):
return len(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)])
Function to calculate mean box office for specific genre and year
In [75]:
def calculateMeanBoxOffice(genre, year):
return int(dataset[(dataset.GENRE.str.contains(genre, na = False)) & (dataset.YEAR == year)]['ADJ. BOX OFFICE'].mean())
Function to calculate standard rating for specific year
In [76]:
def calculateStandradRating(year):
return round(dataset[dataset.YEAR == year]['RATING'].mean(),2)
Lists with
In [77]:
list1990 = [[calculateMeanRating(genre, yearList[0]) for genre in genreList], \
[calculateMovieCount(genre, yearList[0]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[0]) for genre in genreList]]
list1995 = [[calculateMeanRating(genre, yearList[1]) for genre in genreList], \
[calculateMovieCount(genre, yearList[1]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[1]) for genre in genreList]]
list2000 = [[calculateMeanRating(genre, yearList[2]) for genre in genreList], \
[calculateMovieCount(genre, yearList[2]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[2]) for genre in genreList]]
list2005 = [[calculateMeanRating(genre, yearList[3]) for genre in genreList], \
[calculateMovieCount(genre, yearList[3]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[3]) for genre in genreList]]
list2010 = [[calculateMeanRating(genre, yearList[4]) for genre in genreList], \
[calculateMovieCount(genre, yearList[4]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[4]) for genre in genreList]]
list2014 = [[calculateMeanRating(genre, yearList[5]) for genre in genreList], \
[calculateMovieCount(genre, yearList[5]) for genre in genreList], \
[calculateMeanBoxOffice(genre, yearList[5]) for genre in genreList]]
List with standard rating for specific year
In [78]:
standardRatingList = [calculateStandradRating(year) for year in yearList]
List with rgb colors
In [88]:
randomColorList = ['rgb({},{},{})'.format(red, green, blue)
for red, green, blue in zip(random.sample(range(255), 20), random.sample(range(255), 20), random.sample(range(255), 20))]
Function that return dictionary data to plot standard rating
In [89]:
def function_(xValue, yValue):
return {
'x' : [-10000000] + xValue + [140000000],
'y' : 22 * [yValue],
'mode': 'lines',
'showlegend' : True,
'name': 'Standard Rating: ' + str(yValue),
'hoverinfo': 'text',
'text' : 'Standard Rating: ' + str(yValue)
}
Function that return dicitonary data to plot the data point
In [90]:
def data_(rating, movieCount, boxoffice, genre, i):
return {
'x' : [boxoffice],
'y' : [rating],
'name': genre,
'mode' : 'markers',
'marker': {
'size' : [movieCount],
'sizemode' : 'diameter',
'color' : randomColorList[i],
'line' : {
'width' : 1
}
},
'text' : 'Genre: ' + genre + \
'<br>Mean Rating: ' + str(rating) + \
'<br>Movie Count: ' + str(movieCount) + \
'<br>Mean Box Office: ' + str(round(boxoffice/1000000, 2)) + 'M',
'hoverinfo': 'text'
}
Initialize the figure
In [98]:
figure = {
'data' : [],
'layout' : {},
'frames' : []
}
Setting x-axis, y-axis, title, backgroud, buttons of layout
In [99]:
figure['layout']['xaxis'] = {'title' : ' Mean Adj. Box Office',
'titlefont' : {
'size' : 20, 'family' : 'Droid Sans'
},
'showline' : True,
'zeroline' : False,
'range' : [-10000000, 140000000],
'gridcolor' : '#FFFFFF',
'ticks' : 'outside',
'tickwidth' : 2}
figure['layout']['yaxis'] = {'title' : 'Mean Rating',
'titlefont' : {
'size' : 20, 'family' : 'Droid Sans'
},
'showline' : True,
'range' : [3.5,9],
'gridcolor' : '#FFFFFF',
'ticks' : 'outside',
'tickwidth' : 2}
figure['layout']['title'] = 'Movie Analysis'
figure['layout']['titlefont'] = {'size' : 28, 'family' : 'Times New Roman'}
figure['layout']['plot_bgcolor'] = 'rgb(223,232,243)'
figure['layout']['updatemenus'] = [
{
'type': 'buttons',
'buttons' : [{
'label' : 'Play',
'method' : 'animate',
'args' : [None, {'frame' : {'duration' : 500, 'redraw': False},'fromcurrent': True,
'transistion' : {'duration': 300, 'easing': 'quadratic-in-out'}}]
},
{
'label' :'Pause',
'method' : 'animate',
'args' : [[None], {'frame' : {'duration' : 0, 'redraw' : False}, 'mode': 'immediate',
'transistion': {'duration': 0}}]
}],
'direction' : 'left',
'pad': {'r': 10, 't': 87},
'showactive' : False,
'x': 0.1,
'y': 0,
'xanchor': 'right',
'yanchor': 'top'
}
]
figure['layout']['legend'] = { 'font': { 'family' : 'Droid Sans',
'size' : 16 }}
Initial Data
In [100]:
for i in range(len(genreList)):
figure['data'].append(data_(list1990[0][i], list1990[1][i], list1990[2][i], genreList[i], i))
figure['data'].append(function_(list1990[2], standardRatingList[0]))
Create a Dictionary for sliders
In [101]:
sliders_dict = {
'active': 0,
'yanchor': 'top',
'xanchor': 'left',
'currentvalue': {
'font': {'size': 20},
'prefix': 'Year:',
'visible': True,
'xanchor': 'right'
},
'transition': {'duration': 300, 'easing': 'cubic-in-out'},
'pad': {'b': 10, 't': 50},
'len': 0.9,
'x': 0.1,
'y': 0,
'steps': []
}
Adding data to frames and then appending slider dict data to sliders
In [102]:
for year in yearList:
frame = {'data' : [], 'name': year}
for i in range(len(genreList)):
if year == 1990:
frame['data'].append(data_(list1990[0][i], list1990[1][i], list1990[2][i], genreList[i], i))
elif year == 1995:
frame['data'].append(data_(list1995[0][i], list1995[1][i], list1995[2][i], genreList[i], i))
elif year == 2000:
frame['data'].append(data_(list2000[0][i], list2000[1][i], list2000[2][i], genreList[i], i))
elif year == 2005:
frame['data'].append(data_(list2005[0][i], list2005[1][i], list2005[2][i], genreList[i], i))
elif year == 2010:
frame['data'].append(data_(list2010[0][i], list2010[1][i], list2010[2][i], genreList[i], i))
else:
frame['data'].append(data_(list2014[0][i], list2014[1][i], list2014[2][i], genreList[i], i))
if year == 1990:
frame['data'].append(function_(list1990[2], standardRatingList[0]))
elif year == 1995:
frame['data'].append(function_(list1995[2], standardRatingList[1]))
elif year == 2000:
frame['data'].append(function_(list2000[2], standardRatingList[2]))
elif year == 2005:
frame['data'].append(function_(list2005[2], standardRatingList[3]))
elif year == 2010:
frame['data'].append(function_(list2010[2], standardRatingList[4]))
else:
frame['data'].append(function_(list2014[2], standardRatingList[5]))
figure['frames'].append(frame)
slider_step = {'args': [
[year],
{'frame': {'duration': 300, 'redraw': True},
'mode': 'immediate',
'transition': {'duration': 300}}
],
'label': year,
'method': 'animate'}
sliders_dict['steps'].append(slider_step)
figure['layout']['sliders'] = [sliders_dict]
In [103]:
plot(figure)
# iplot(figure)
Out[103]:
In [ ]: