In [30]:
# import
import graphlab as gl
import pandas as pd
In [31]:
# reading the data
data = gl.SFrame("data/amazon_baby.gl/")
In [32]:
data.head()
Out[32]:
In [33]:
# Build a word count vector
data['word_count'] = gl.text_analytics.count_words(data['review'])
In [34]:
data.head()
Out[34]:
In [35]:
gl.canvas.set_target('ipynb')
In [36]:
data['name'].show()
In [37]:
giraffe_reviews = data[data['name'] == 'Vulli Sophie the Giraffe Teether']
In [38]:
giraffe_reviews['rating'].show(view="Categorical")
In [39]:
# Build a sentiment classifier
data['rating'].show(view="Categorical")
Defining Positive and Negative Sentense
Ignore 0 and 3 star ratings
1 and 2 are treated as Negative
4 and 4 are treated as Positive
In [40]:
# ignoring the 3 star rating
data2 = data[data['rating'] != 3 ]
In [41]:
data2['sentiment'] = data2['rating'] > 3
In [42]:
data2.head()
Out[42]:
In [43]:
# training the classifier model
# first, spliting the data into train and test datasets
train_data, test_data = data2.random_split(0.8, seed=0)
In [44]:
clf = gl.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
In [45]:
# Evaluate the sentiment model
clf.evaluate(test_data, metric='roc_curve')
Out[45]:
In [46]:
clf.show(view='Evaluation')
In [47]:
giraffe_reviews['predicted_sentment'] = clf.predict(giraffe_reviews, output_type='probability')
In [48]:
giraffe_reviews.head()
Out[48]:
In [49]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentment', ascending=False)
In [50]:
giraffe_reviews.head()
Out[50]:
In [51]:
giraffe_reviews[0]['review']
Out[51]:
In [52]:
giraffe_reviews[1]['review']
Out[52]:
In [53]:
# show most negative reviews
giraffe_reviews[-1]['review']
Out[53]:
In [54]:
giraffe_reviews[-2]['review']
Out[54]: