In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [2]:
plt.style.use('ggplot')
In [3]:
print(plt.style.available)
In [4]:
# dataset from https://github.com/sidooms/MovieTweetings
cols = ['user id', 'movie id', 'rate', 'ts']
ratings = pd.read_csv('dataset/ratings.dat.txt', sep='::',
index_col=False, names=cols, encoding='utf8')
In [5]:
ratings[:5]
Out[5]:
In [6]:
ratings['rate'].value_counts()
Out[6]:
In [7]:
ratings['rate'].value_counts().sort_index().plot(kind='bar')
plt.title('movie ratings')
plt.ylabel('counts')
plt.xlabel('rate')
Out[7]:
In [8]:
movies = pd.read_csv('dataset/movies.dat.txt', sep='::',
index_col=False, names=['movie id', 'name', 'genre'],
encoding='utf8')
movies[:5]
Out[8]:
In [9]:
drama = movies[movies['genre'] =='Crime|Drama']
drama[:5]
Out[9]:
In [10]:
dramaIds = drama['movie id']
criterion = ratings['movie id'].map(lambda x: (dramaIds==x).any())
dramaRates = ratings[criterion]
dramaRates[:5]
Out[10]:
In [11]:
dramaRates['rate'].value_counts().sort_index().plot(kind='bar')
plt.title('drama rating')
plt.xlabel('rate')
plt.ylabel('count')
Out[11]:
In [14]:
ratings.shape, movies.shape
Out[14]:
In [ ]: