In [1]:
import pandas as pd
import os
In [2]:
os.getcwd()
Out[2]:
In [3]:
movies = pd.read_csv("Movie-Ratings.csv")
In [4]:
len(movies)
Out[4]:
In [5]:
movies.head()
Out[5]:
In [6]:
movies.columns
Out[6]:
In [7]:
movies.columns=['Film', 'Genre', 'RottenRating', 'AudienceRating',\
'BudgetMillion', 'Year']
In [8]:
movies.head()
Out[8]:
In [9]:
movies.info()
In [10]:
movies.describe()
Out[10]:
In [ ]:
In [11]:
movies.Film =movies.Film.astype('category')
In [12]:
movies.head()
Out[12]:
In [13]:
movies.info()
In [14]:
movies.Genre = movies.Genre.astype('category')
movies.Year = movies.Year.astype('category')
#movies.CriticRating = movies.CriticRating.astype('category')
#movies.AudienceRating = movies.AudienceRating.astype('category')
#movies.BudgetMillion = movies.BudgetMillion.astype('category')
In [15]:
movies.head()
Out[15]:
In [16]:
movies.info()
In [17]:
movies.Genre.cat.categories
Out[17]:
In [18]:
# movies.Genre.unique()
In [19]:
movies.describe()
Out[19]:
In [20]:
from matplotlib import pyplot as plt
import seaborn as sns
% matplotlib inline
import warnings
warnings.filterwarnings('ignore')
In [21]:
#Jointplots
In [22]:
movies.head()
Out[22]:
In [23]:
j = sns.jointplot( data=movies , x='RottenRating', y='AudienceRating')
In [24]:
j = sns.jointplot( data=movies , x='RottenRating', y='AudienceRating',kind='hex')
In [25]:
#<<< Chart1
In [26]:
#Histogram
In [27]:
m1 = sns.distplot(movies.AudienceRating, bins=15)
In [28]:
sns.set_style('darkgrid')
m2= sns.distplot(movies.RottenRating, bins=15)
In [29]:
sns.set_style('white')
n1 = plt.hist(movies.AudienceRating, bins=15,edgecolor = "black")
In [30]:
#<<< chart2
In [31]:
n2 = plt.hist(movies.RottenRating, bins=15,edgecolor = "black")
In [32]:
#<<< chart3
In [33]:
#stacked Histograms
In [34]:
h1= plt.hist(movies.BudgetMillion,edgecolor = "black")
In [35]:
plt.hist(movies.BudgetMillion,edgecolor = "black")
plt.show()
In [36]:
movies[movies.Genre == 'Drama']
Out[36]:
In [37]:
movies[movies.Genre == 'Drama'].BudgetMillion
Out[37]:
In [38]:
plt.hist(movies[movies.Genre == 'Action'].BudgetMillion,bins=15,edgecolor = "black")
plt.hist(movies[movies.Genre == 'Drama'].BudgetMillion,bins=15,edgecolor = "brown")
plt.hist(movies[movies.Genre == 'Thriller'].BudgetMillion,bins=15,edgecolor = "white")
plt.show()
In [39]:
plt.hist([movies[movies.Genre == 'Action'].BudgetMillion,\
movies[movies.Genre == 'Drama'].BudgetMillion,\
movies[movies.Genre == 'Thriller'].BudgetMillion,\
movies[movies.Genre == 'Comedy'].BudgetMillion],\
bins=15,edgecolor='black',stacked=True)
plt.show()
In [40]:
for gen in movies.Genre.cat.categories:
print(gen)
In [41]:
list1=list()
for gen in movies.Genre.cat.categories:
list1.append(movies[movies.Genre == gen ].BudgetMillion)
print(list1)
In [42]:
list1=list()
mylabels=list()
for gen in movies.Genre.cat.categories:
list1.append(movies[movies.Genre == gen ].BudgetMillion)
mylabels.append(gen)
h1=plt.hist(list1,bins=30,stacked=True,edgecolor="black",rwidth=2, label=mylabels)
plt.legend()
plt.show()
In [43]:
#<<<< chart4
In [44]:
#KDE plot
In [45]:
vis1 = sns.lmplot(data=movies , x='RottenRating', y='AudienceRating',fit_reg=False, hue='Genre',size=7,aspect=1)
In [46]:
k1= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Reds')
#Tips:
#k1b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='Reds')#KDE=KERNEL DENSITY ESTIMATION
In [47]:
k1= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Reds')
#Tips:
k1b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='Reds')
In [48]:
#working with subplots()
In [49]:
from matplotlib import pyplot as plt
import seaborn as sns
% matplotlib inline
In [50]:
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating)
In [51]:
sns.set_style("dark")
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating)
In [52]:
f,ax=plt.subplots(1,2)
In [53]:
f,ax=plt.subplots(1,3)
In [54]:
f,ax=plt.subplots(3,3)
In [55]:
f,ax=plt.subplots(3,2)
In [56]:
f,axes = plt.subplots(1,2,figsize=(12,6),sharex=True,sharey=True)
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0])
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[1])
k1.set(xlim=(-20,160))
Out[56]:
In [57]:
axes
Out[57]:
In [58]:
f,axes = plt.subplots(2,2,figsize=(12,6))
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,1])
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[1,0])
In [59]:
f,axes = plt.subplots(2,2,figsize=(12,6))
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[1,1])
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[1,0])
In [60]:
#violinplots vs boxplots
In [61]:
w= sns.boxplot(data=movies,x='Genre', y='RottenRating')
In [62]:
w= sns.boxplot(data=movies[movies.Genre=="Drama"],x='Year', y='RottenRating')
In [63]:
z= sns.violinplot(data=movies,x='Genre', y='RottenRating')
In [64]:
z= sns.violinplot(data=movies[movies.Genre=='Drama'],x='Year', y='RottenRating')
In [65]:
#Creating a facet grid
In [66]:
g=sns.FacetGrid(movies,row='Genre', hue='Genre')
In [67]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
In [68]:
#g=g.map()
plt.scatter(movies.RottenRating,movies.AudienceRating)
Out[68]:
In [69]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
g=g.map(plt.scatter,'RottenRating','AudienceRating')
In [70]:
#CAN POPULATED WITH ANY TYPE OF CHART.example- Histograms
In [71]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
g=g.map(plt.hist,'BudgetMillion')
In [72]:
#back to the scatterplots:
In [73]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
kws=dict(s=50,linewidth=0.5,edgecolor='black')
g=g.map(plt.scatter,'RottenRating','AudienceRating',**kws)
In [74]:
kws
Out[74]:
In [75]:
#Coordinate and diagonals:CONTROLING AXES AND ADDING DIAGONALS
In [76]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
kws=dict(s=50,linewidth=0.5,edgecolor='black')
g=g.map(plt.scatter,'RottenRating','AudienceRating',**kws)
g.set(xlim=(0,100),ylim=(0,100))
for ax in g.axes.flat:
ax.plot((20,60),(20,60))
In [77]:
g=sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
kws=dict(s=50,linewidth=0.5,edgecolor='black')
g=g.map(plt.scatter,'RottenRating','AudienceRating',**kws)
g.set(xlim=(0,100),ylim=(0,100))
for ax in g.axes.flat:
ax.plot((0,100),(0,100),c='gray',ls='--')
g.add_legend()
plt.show()
In [78]:
#<<<< chart5
In [79]:
#Buildings dashboard in python
In [ ]:
In [86]:
from matplotlib import pyplot as plt
import seaborn as sns
% matplotlib inline
In [95]:
sns.set_style('darkgrid')
f, axes = plt.subplots(2,2,figsize=(15,15))
#1and 2
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0])
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1])
k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
#3
z= sns.violinplot(data=movies[movies.Genre=='Drama'],x='Year', y='RottenRating',ax=axes[1,0])
#4
k3= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Reds',\
ax=axes[1,1])
#Tips:
k3b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='Reds',ax=axes[1,1])
plt.show()
In [97]:
sns.set_style('darkgrid')
f, axes = plt.subplots(2,2,figsize=(15,15))
#1and 2
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0])
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1])
k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
#3
z= sns.violinplot(data=movies[movies.Genre=='Drama'],x='Year', y='RottenRating',ax=axes[1,0])
#4
#k3= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Reds',\
# ax=axes[1,1])
#Tips:
#k3b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='Reds',ax=axes[1,1])
axes[1,1].hist(movies.AudienceRating, bins=15,edgecolor = "black")
plt.show()
In [111]:
sns.set_style('dark',{"axes.facecolor":"black"})#white,whitegrid,dark,darkgrid,ticks
f, axes = plt.subplots(2,2,figsize=(15,15))
#plot[0,0]
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0],\
shade=True,shade_lowest=True,cmap='inferno')
k1b=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0],\
cmap='cool')
#plot[0,1]
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1],\
shade=True,shade_lowest=True,cmap='gist_rainbow')
k2b=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1],\
cmap='copper')
k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
#plot[1,0]
z= sns.violinplot(data=movies[movies.Genre=='Drama'],x='Year', y='RottenRating',ax=axes[1,0])
#plot[1,1]
k3= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Blues_r',\
ax=axes[1,1])
#Tips:
k3b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='gist_gray_r',ax=axes[1,1])
plt.show()
In [119]:
sns.set_style('dark',{"axes.facecolor":"black"})#white,whitegrid,dark,darkgrid,ticks
f, axes = plt.subplots(2,2,figsize=(15,15))
#plot[0,0]
k1=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0],\
shade=True,shade_lowest=True,cmap='inferno')
k1b=sns.kdeplot(movies.BudgetMillion, movies.AudienceRating,ax=axes[0,0],\
cmap='cool')
#plot[0,1]
k2=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1],\
shade=True,shade_lowest=True,cmap='inferno')
k2b=sns.kdeplot(movies.BudgetMillion, movies.RottenRating,ax=axes[0,1],\
cmap='copper')
k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
#plot[1,0]
z= sns.violinplot(data=movies,x='Year', y='BudgetMillion',ax=axes[1,0],pallete='YlOrRd')
#plot[1,1]
k3= sns.kdeplot(movies.RottenRating, movies.AudienceRating,shade=True,shade_lowest=False,cmap='Blues_r',\
ax=axes[1,1])
#Tips:
k3b= sns.kdeplot(movies.RottenRating, movies.AudienceRating,cmap='gist_gray_r',ax=axes[1,1])
plt.show()
In [120]:
#Thematics Edits
In [135]:
list1=list()
mylabels=list()
for gen in movies.Genre.cat.categories:
list1.append(movies[movies.Genre == gen ].BudgetMillion)
mylabels.append(gen)
sns.set_style('whitegrid')
fig,ax=plt.subplots()
fig.set_size_inches(11.7,8.27)#Size of A4
h1=plt.hist(list1,bins=30,stacked=True,edgecolor="black",rwidth=2, label=mylabels)
plt.title('Movie Budget Distribution',fontsize=35,color='DarkBlue',fontname='console')
plt.ylabel("Number of movies",fontsize=25,color='Red')
plt.xlabel("Budget",fontsize=25,color='Green')
plt.yticks(fontsize=20)
plt.xticks(fontsize=20)
plt.legend(frameon=True,fancybox=True,shadow=True,framealpha=1,prop={'size':20})
plt.show()
In [ ]: