In [1]:
import graphlab
In [2]:
products = graphlab.SFrame('amazon_baby.gl/')
In [3]:
products.head()
Out[3]:
In [4]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])
In [6]:
products.head()
Out[6]:
In [5]:
graphlab.canvas.set_target('ipynb')
In [6]:
products['rating'].show()
#print products['rating']
In [194]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']
In [195]:
len(giraffe_reviews)
Out[195]:
In [196]:
giraffe_reviews['rating'].show(view='Categorical')
In [7]:
products['rating'].show(view='Categorical')
In [8]:
#ignore all 3* reviews
products = products[products['rating'] != 3]
In [9]:
#positive sentiment = 4* or 5* reviews
products['sentiment'] = products['rating'] >=4
In [ ]:
In [10]:
train_data,test_data = products.random_split(.8, seed=0)
In [11]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
In [39]:
sentiment_model.evaluate(test_data, metric='roc_curve')
Out[39]:
In [40]:
sentiment_model.show(view='Evaluation')
In [205]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
In [13]:
giraffe_reviews.head()
In [29]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
In [14]:
giraffe_reviews.head()
In [22]:
giraffe_reviews[0]['review']
Out[22]:
In [23]:
giraffe_reviews[1]['review']
Out[23]:
In [24]:
giraffe_reviews[-1]['review']
Out[24]:
In [ ]:
In [13]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']
In [46]:
def awesome_count(dicts):
word = 'and'
if word in dicts:
return dicts[word]
else:
return 0
# automation function
""""def automate(word_list, products):
for i in xrange(len(word_list)):
print word_list[i]
products[word_list[i]]= products['word_count'].apply(awesome_count)
""""
In [17]:
products.head()
Out[17]:
In [ ]:
In [16]:
def awesome_count(dicts):
word = 'awesome'
if word in dicts:
return dicts[word]
else:
return 0
products['awesome'] = products['word_count'].apply(awesome_count)
In [ ]:
In [18]:
def awesome_count(dicts):
word = 'great'
if word in dicts:
return dicts[word]
else:
return 0
## Great
products['great'] = products['word_count'].apply(awesome_count)
In [19]:
def awesome_count(dicts):
word = 'fantastic'
if word in dicts:
return dicts[word]
else:
return 0
# Fantastic
products['fantastic'] = products['word_count'].apply(awesome_count)
In [20]:
def awesome_count(dicts):
word = 'amazing'
if word in dicts:
return dicts[word]
else:
return 0
## amazing
products['amazing'] = products['word_count'].apply(awesome_count, skip_undefined=True)
In [21]:
def awesome_count(dicts):
word = 'love'
if word in dicts:
return dicts[word]
else:
return 0
## love
products['love'] = products['word_count'].apply(awesome_count)
In [22]:
def awesome_count(dicts):
word = 'horrible'
if word in dicts:
return dicts[word]
else:
return 0
## horrible
products['horrible'] = products['word_count'].apply(awesome_count)
In [23]:
def awesome_count(dicts):
word = 'bad'
if word in dicts:
return dicts[word]
else:
return 0
## bad
products['bad'] = products['word_count'].apply(awesome_count)
In [24]:
def awesome_count(dicts):
word = 'terrible'
if word in dicts:
return dicts[word]
else:
return 0
## terrible
products['terrible'] = products['word_count'].apply(awesome_count)
In [ ]:
In [25]:
def awesome_count(dicts):
word = 'awful'
if word in dicts:
return dicts[word]
else:
return 0
## awful
products['awful'] = products['word_count'].apply(awesome_count)
In [26]:
def awesome_count(dicts):
word = 'wow'
if word in dicts:
return dicts[word]
else:
return 0
## wow
products['wow'] = products['word_count'].apply(awesome_count)
In [27]:
def awesome_count(dicts):
word = 'hate'
if word in dicts:
return dicts[word]
else:
return 0
## hate
products['hate'] = products['word_count'].apply(awesome_count)
In [ ]:
In [28]:
products
Out[28]:
In [29]:
# Sum of differents words
result = []
for i in xrange(len(selected_words)):
print selected_words[i]
print products[selected_words[i]].sum()
result.append(products[selected_words[i]].sum())
print result
In [30]:
train_data,test_data = products.random_split(.8, seed=0)
In [31]:
selected_words_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=selected_words,
validation_set=test_data)
In [38]:
coefficients = selected_words_model['coefficients']
coefficients.sort('value', ascending=False)
coefficients.print_rows(12,4)
In [ ]:
In [44]:
selected_words_model.evaluate(test_data, metric='roc_curve')
Out[44]:
In [46]:
selected_words_model.show(view='Evaluation')