In [4]:
import numpy as np
import pandas as pd
import os

In [5]:
header = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(os.path.expanduser("~/ml-latest-small/ratings.csv"), sep=',', names=header)

In [7]:
n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]
print( 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items))


Number of users = 672 | Number of movies = 9067

In [8]:
from sklearn import cross_validation as cv
train_data, test_data = cv.train_test_split(df, test_size=0.25)


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-8-2933439eae91> in <module>()
----> 1 from sklearn import cross_validation as cv
      2 train_data, test_data = cv.train_test_split(df, test_size=0.25)
      3 

d:\python36\lib\site-packages\sklearn\__init__.py in <module>()
    132 else:
    133     from . import __check_build
--> 134     from .base import clone
    135     __check_build  # avoid flakes unused variable error
    136 

d:\python36\lib\site-packages\sklearn\base.py in <module>()
      8 
      9 import numpy as np
---> 10 from scipy import sparse
     11 from .externals import six
     12 from .utils.fixes import signature

ModuleNotFoundError: No module named 'scipy'

In [ ]: