In [200]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sbn
sbn.set()
sbn.set_context("talk")
sbn.set_style("whitegrid")
In [4]:
data = pd.read_csv("imdb_indian_te.csv",date_parser="releaseDate")
print(data.dtypes)
data.shape
Out[4]:
In [5]:
data_with_duration = data[data['duration']>0]
data_with_duration.shape
Out[5]:
In [201]:
yearGroup = data_with_duration.groupby('titleYear')
durationGroup = yearGroup['duration']
#plt.figure(1)
ax=durationGroup.median().plot(title="Median Duration of Telugu movies over the years",color="red")
ax.set_xlabel("Year")
ax.set_ylabel("Duration (Minutes)")
plt.show()
In [202]:
yearGroup = data.groupby('titleYear')
#plt.figure(1)
ax = yearGroup['titleYear'].count().plot(title="Number of telugu movies per Year",color="green")
ax.set_xlabel("Year")
ax.set_ylabel("Count")
start,end = ax.get_xlim()
ax.xaxis.set_ticks(np.arange(start,end,10))
plt.show()
In [92]:
releaseData = data.dropna(subset=['releaseDate'])
releaseData["releaseMonth"] = pd.DatetimeIndex(releaseData['releaseDate']).month
print releaseData.dtypes
In [203]:
releaseMonth = releaseData[releaseData['titleYear']>1989].groupby(['releaseMonth'])['releaseMonth'].count()
#plt.figure()
sbn.set_context("paper")
ax = releaseMonth.plot(kind='bar',title="Movies per month since 1990",)
ax.set_xlabel("Month")
ax.set_ylabel("Count")
plt.show()
In [206]:
releaseData = releaseData[releaseData['titleYear']>1989]
releaseMonthYear =releaseData.groupby(['releaseMonth','titleYear'])['releaseMonth'].count().reset_index(name="count")
g = sbn.FacetGrid(data=releaseMonthYear,col="releaseMonth",col_wrap=4,sharex=False)
g.map(plt.bar
,"titleYear","count")
g.set_xlabels("Year")
g.set_axis_labels(y_var='Count',x_var='Year')
g.set_titles(["A","B","C"])
plt.show()
g.savefig("titleYear.png")