In [1]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [2]:
conn = sqlite3.connect("/data/movie-lens.db")
In [3]:
movies = pd.read_sql("select * from movies", conn)
movies.head()
Out[3]:
In [4]:
ratings = pd.read_sql("select * from ratings", conn)
ratings.head()
Out[4]:
In [5]:
ratings.rating.value_counts().sort_index().plot.bar()
plt.title("Frequency plot for rating")
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.savefig("/tmp/rating.jpeg")
In [6]:
agg = ratings.groupby("movieId").rating.agg([np.mean, len])
agg.head()
Out[6]:
In [7]:
agg.to_sql("rating_avg", conn, if_exists="append") # Save the dataframe to database
In [ ]: