In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv('movies.csv')
df.head()
Out[2]:
In [3]:
df['year'] = df['title'].str.extract('\((\d{4})\)', expand=True)
In [4]:
df.head()
Out[4]:
In [5]:
df1 = df['genres'].str.split('|', expand=True)
In [6]:
df1 = pd.concat([df, df1], axis=1)
In [7]:
df1.head()
Out[7]:
In [8]:
df2 = pd.melt(df1, id_vars=['title', 'year'],
value_vars=list(range(10)), value_name='genre')
In [9]:
df2.head()
Out[9]:
In [10]:
df2.dropna(inplace=True)
In [11]:
df2.drop('variable', axis=1, inplace=True)
In [12]:
df2.head()
Out[12]:
In [13]:
df3 = df2.groupby(['year', 'genre']).count()
df3.head()
Out[13]:
In [14]:
df4 = df3.unstack(1)
In [15]:
df4.head()
Out[15]:
In [16]:
df4.columns = df4.columns.levels[1]
df4.head()
Out[16]:
In [17]:
df4.fillna(0, inplace=True)
df4 = df4.astype('int')
df4.head()
Out[17]:
In [19]:
df4.plot.line()
plt.legend(loc=(1.05, 0), fontsize=8)
Out[19]:
In [ ]: