In [2]:
import pandas as pd
#df = pd.read_csv('mlsmall.csv')
#print(df.describe())
#print(df.head())
In [8]:
print(df.info())
In [20]:
columns = ['userId', 'movieId', 'rating']
d2 = pd.DataFrame()
for column in columns:
d2[column] = df[column]
d2['rating'] = [int(rating*2.0) for rating in d2['rating'].values]
print(d2.head())
#d2.to_csv('mlsmall-new.csv', index=False, sep=',')\
In [14]:
columns = ['user_id', 'movie_id', 'rating', 'timestamp']
fields = columns[:-1]
#print(fields)
for i in range(1, 6):
train_file = 'u'+str(i)+'.base'
test_file = 'u'+str(i)+'.test'
df = pd.read_csv(train_file, sep='\t', names = columns)
#print(df.head())
df = df[fields]
#print(df.head())
train_file = 'train'+str(i)+'.csv'
df.to_csv(train_file, sep=',', index = False)
df = pd.read_csv(test_file, sep = '\t', names = columns)
df = df[fields]
test_file = 'test'+str(i)+'.csv'
df.to_csv(test_file, sep=',', index = False)
In [ ]: