notebook.community

Edit and run



In [2]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews.loc[0])









    



FILM                    Avengers: Age of Ultron (2015)
RT_user_norm                                       4.3
Metacritic_user_nom                               3.55
IMDB_norm                                          3.9
Fandango_Ratingvalue                               4.5
Fandango_Stars                                       5
Name: 0, dtype: object



In [7]:

    
num_cols= ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews[num_cols].iloc[0].values
bar_positions = np.arange(5) + 1

print(bar_positions)
print(tick_positions)

fig, ax = plt.subplots()

# bar positions, bar heights, bar width
ax.bar(bar_positions, bar_heights, 0.5)
# set the positions of the tick labels
ax.set_xticks(bar_positions)
#set the labels for the positions set above
ax.set_xticklabels(num_cols, rotation=90)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')


plt.show()









    



[1 2 3 4 5]
[ 1.    1.25  1.5   1.75  2.    2.25  2.5   2.75  3.    3.25  3.5   3.75
  4.    4.25  4.5   4.75  5.    5.25  5.5   5.75]



In [25]:

    
#lets plot the ratings on the x axis so bars will be displayed horizontally and therefore will be easier
#to look for max

fig,ax = plt.subplots()
bar_widths = bar_heights

ax.barh(bar_positions, bar_widths, 0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)

ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating for Avengers: Age of Ultron (2015)')

plt.show()



In [36]:

    
# lets scatter rotten tomatoes and fandango ratings on the plot, to see if there is any correlation between them

fig = plt.figure(figsize=(5,10))

ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)

ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])

ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')

ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')



plt.show()



In [39]:

    
#lets scatter fandango ratings against RT, metacritic and imdb ratings
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(3,1,1)
ax2 = fig.add_subplot(3,1,2)
ax3 = fig.add_subplot(3,1,3)

ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax1.set_xlim(0, 5)
ax1.set_ylim(0, 5)

ax2.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['Metacritic_user_nom'])
ax2.set_xlabel('Fandango')
ax2.set_ylabel('Metacritic')
ax2.set_xlim(0, 5)
ax2.set_ylim(0, 5)

ax3.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['IMDB_norm'])
ax3.set_xlabel('Fandango')
ax3.set_ylabel('IMDB')
ax3.set_xlim(0, 5)
ax3.set_ylim(0, 5)

plt.show()

#we can see that most similar ratings with fandango's are IMDB's



In [ ]: