In [1]:
# import
import graphlab as gl
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
gl.canvas.set_target('ipynb')
In [2]:
data = gl.SFrame('data/amazon_baby.gl/')
In [3]:
data.head(4)
Out[3]:
In [4]:
def remove_punctuation(text):
import string
return text.translate(None, string.punctuation)
In [5]:
data['review_clean'] = data['review'].apply(remove_punctuation)
In [7]:
data = data.fillna('review', '')
In [8]:
data = data[data['rating'] != 3]
In [9]:
data['sentiment'] = data['rating'].apply(lambda rating : +1 if rating > 3 else -1)
In [10]:
train_data, test_data = data.random_split(.8, seed=1)
In [ ]: