In [177]:
import graphlab
In [178]:
products = graphlab.SFrame('amazon_baby.gl/')
In [179]:
products.head(2)
Out[179]:
In [180]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])
In [181]:
products.head(2)
Out[181]:
In [182]:
graphlab.canvas.set_target('ipynb')
In [183]:
products['name'].show()
In [184]:
giraffe_reviews = products[products['name']=='Vulli Sophie the Giraffe Teether']
In [185]:
giraffe_reviews.head(2)
len(giraffe_reviews)
Out[185]:
In [186]:
giraffe_reviews['rating'].show(view='Categorical')
In [187]:
products['rating'].show(view='Categorical')
In [188]:
products = products[products['rating']!=3]
In [189]:
products['sentiment'] = products['rating']>=4
In [190]:
products.head()
Out[190]:
In [191]:
train_data, test_data = products.random_split(0.8, seed=0)
In [192]:
len(train_data)
len(test_data)
Out[192]:
In [193]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
In [194]:
sentiment_model.evaluate(test_data, metric='roc_curve')
Out[194]:
In [195]:
sentiment_model.show(view='Evaluation')
In [196]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
In [197]:
giraffe_reviews.head(2)
Out[197]:
In [198]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
In [199]:
giraffe_reviews.head(2)
Out[199]:
In [200]:
giraffe_reviews[1]['review']
Out[200]:
In [201]:
giraffe_reviews[-2]['review']
Out[201]:
In [202]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love',
'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']
In [203]:
def awesome_count(dictionary):
if 'awesome' in dictionary:
return dictionary['awesome']
else:
return 0
def great_count(dictionary):
if 'great' in dictionary:
return dictionary['great']
else:
return 0
def fantastic_count(dictionary):
if 'fantastic' in dictionary:
return dictionary['fantastic']
else:
return 0
def amazing_count(dictionary):
if 'amazing' in dictionary:
return dictionary['amazing']
else:
return 0
def love_count(dictionary):
if 'love' in dictionary:
return dictionary['love']
else:
return 0
def horrible_count(dictionary):
if 'horrible' in dictionary:
return dictionary['horrible']
else:
return 0
def bad_count(dictionary):
if 'bad' in dictionary:
return dictionary['bad']
else:
return 0
def terrible_count(dictionary):
if 'terrible' in dictionary:
return dictionary['terrible']
else:
return 0
def awful_count(dictionary):
if 'awful' in dictionary:
return dictionary['awful']
else:
return 0
def wow_count(dictionary):
if 'wow' in dictionary:
return dictionary['wow']
else:
return 0
def hate_count(dictionary):
if 'hate' in dictionary:
return dictionary['hate']
else:
return 0
In [204]:
products_copy = products
products['awesome'] = products['word_count'].apply(awesome_count)
products['great'] = products['word_count'].apply(great_count)
products['fantastic'] = products['word_count'].apply(fantastic_count)
products['amazing'] = products['word_count'].apply(amazing_count)
products['love'] = products['word_count'].apply(love_count)
products['horrible'] = products['word_count'].apply(horrible_count)
products['bad'] = products['word_count'].apply(bad_count)
products['terrible'] = products['word_count'].apply(terrible_count)
products['awful'] = products['word_count'].apply(awful_count)
products['wow'] = products['word_count'].apply(wow_count)
products['hate'] = products['word_count'].apply(hate_count)
In [205]:
print sum(products['awesome'])
print sum(products['great'])
print sum(products['fantastic'])
print sum(products['amazing'])
print sum(products['love'])
print sum(products['horrible'])
print sum(products['bad'])
print sum(products['terrible'])
print sum(products['awful'])
print sum(products['wow'])
print sum(products['hate'])
In [206]:
train_data, test_data = products.random_split(0.8, seed=0)
In [207]:
selected_words_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=selected_words,
validation_set=test_data)
In [208]:
#selected_words_model['coefficients']
print selected_words_model['coefficients']['name']
print selected_words_model['coefficients']['value']
In [209]:
selected_words_model.evaluate(test_data)
Out[209]:
In [210]:
sentiment_model.evaluate(test_data)
Out[210]:
In [211]:
len(test_data[test_data['sentiment']==1])
Out[211]:
In [212]:
27976.0/33304
Out[212]:
In [217]:
diaper_champ_reviews_all = products_copy[products_copy['name']=='Baby Trend Diaper Champ']
In [219]:
diaper_champ_reviews_all.head(2)
Out[219]:
In [220]:
diaper_champ_reviews = products[products['name']=='Baby Trend Diaper Champ']
In [221]:
diaper_champ_reviews.head(2)
Out[221]:
In [222]:
diaper_champ_reviews['predicted_sentiment1'] = sentiment_model.predict(diaper_champ_reviews,
output_type='probability')
In [223]:
diaper_champ_reviews.sort('predicted_sentiment1', ascending=False)
Out[223]:
In [224]:
diaper_champ_reviews['predicted_sentiment2'] = selected_words_model.predict(diaper_champ_reviews[selected_words], output_type='probability')
In [225]:
diaper_champ_reviews.sort('predicted_sentiment1', ascending=False).head(15)
Out[225]:
In [226]:
diaper_champ_reviews[0]['review']
Out[226]:
In [ ]: