In [1]:
from __future__ import print_function, division
import sqlalchemy as sql
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
from sklearn.externals import joblib
import numpy as np
# # import classifier
# filename = '/Users/kateliea/Documents/Insight/webapp/my_webapp/trained_classifiers/commentsclassifier_LR.pkl'
# classify_helpful = joblib.load(filename)
In [3]:
dbname = 'somanycooksDB'
username = 'kateliea'
engine = create_engine('postgres://%s@localhost/%s' % (username, dbname))
print(engine.url)
if not database_exists(engine.url):
create_database(engine.url)
print(database_exists(engine.url))
print(engine.table_names())
# connect:
con = None
con = psycopg2.connect(database = dbname, user = username)
In [6]:
recipes = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_recipes.csv', index_col=0)
recipes.to_sql('recipes', engine, if_exists='replace')
comments = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_comments.csv', index_col=0)
comments.to_sql('comments', engine, if_exists='replace')
comments = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_sentences.csv', index_col=0)
comments.to_sql('sentences', engine, if_exists='replace')
train = pd.read_csv('/Users/kateliea/Documents/Insight/project/algorithm/training_data.csv', index_col=0)
train.to_sql('trainingdata',engine,if_exists='replace')
In [7]:
train2 = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_training_data.csv', index_col=0)
train2.to_sql('sentimenttrainingdata',engine,if_exists='replace')
In [5]:
train.shape
Out[5]:
In [6]:
user_input = "a tatin auditioned"
ranks_query = ("""
SELECT * FROM ranks WHERE recipe_title='%s';
""" % user_input)
recipe_query = ("""
SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE title='%s';
""" % user_input)
ranks_returns = pd.read_sql_query(ranks_query, con)
if len(ranks_returns.sentence_rank) == 0:
rank = 0
# elif
else:
rank = int(ranks_returns.sentence_rank.mean()*100)
In [9]:
ranks_returns.head()
Out[9]:
In [ ]:
recipes = sorted(recipes)
In [ ]:
In [ ]:
predictions
In [ ]:
In [ ]:
# postgres data
user = 'kateliea'
host = 'localhost'
dbname = 'insight_project'
db = create_engine('postgres://%s%s/%s'%(user,host,dbname))
con = None
con = psycopg2.connect(database = dbname, user=user)
In [ ]:
user_input = 'chile-lime melon salad'
In [23]:
user_input = "a tatin auditioned"
sql_query = ("""
SELECT sentence, usercomment, url, username, sentence_tokens FROM sentences_table WHERE title='%s';
""" % user_input)
query_returns = pd.read_sql_query(sql_query,con)
In [ ]:
In [22]:
comments_to_show = []
if (helpful_comments.shape[0] > 0 )& (helpful_comments.shape[0] < min_to_show):
for ix, _ in range(0, helpful_comments.shape[0]):
comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'),
username=helpful_comments.iloc[ix].username.decode('utf-8')))
return comments_to_show, url, rank
elif helpful_comments.shape[0] > min_to_show:
randomnumbers = random.sample(range(0, helpful_comments.shape[0]), min_to_show)
for _, ix in enumerate(randomnumbers):
comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'),
username=helpful_comments.iloc[ix].username.decode('utf-8')))
return comments_to_show, url, rank
else:
return {'comment':'sorry, no comments', 'username':'-'}, 'blank', 'blank'
In [ ]:
helpful_sentences.drop_duplicates('usercomment','first')
In [ ]:
commentslist
In [33]:
# from flask import render_template, request
# from somanycooks import app
# from sqlalchemy import create_engine
# from sqlalchemy_utils import database_exists, create_database
# import pandas as pd
# import psycopg2
# from sklearn.externals import joblib
# import random
# # from wordcloud import WordCloud, STOPWORDS
# import matplotlib.pyplot as plt
# import classifier
filename = '/Users/kateliea/Documents/Insight/webapp/my_webapp/trained_classifiers/commentsclassifier_LR.pkl'
classify_helpful = joblib.load(filename)
min_to_show = 6
# def GetComments(user_input, con, fromUser='Default'):
sql_query = ("""
SELECT sentence, usercomment, url, username, sentence_tokens FROM sentences_table WHERE title='%s';
""" % user_input)
query_returns = pd.read_sql_query(sql_query,con)
if len(query_returns) > 0:
# url=query_returns.iloc[0].url.decode('utf-8')
ranks_query = ("""
SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE recipe_title='%s';
""" % user_input)
recipe_query = ("""
SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE title='%s';
""" % user_input)
ranks_returns = pd.read_sql_query(ranks_query, con)
if len(ranks_returns.sentence_rank) == 0:
rank = 0
# elif
else:
rank = int(ranks_returns.sentence_rank.mean()*100)
# apply classifier
predictions = classify_helpful.predict(query_returns.sentence)
helpful_sentences = query_returns.iloc[predictions == 'helpful', :]
helpful_comments = helpful_sentences.drop_duplicates('usercomment','first')
# tokens = []
# for _, tok in enumerate(helpful_comments.sentence_tokens):
# tokens += [tok]
# cloud = MakeWordcloud(tok).to_image()
comments_to_show = []
if (helpful_comments.shape[0] > 0 )& (helpful_comments.shape[0] < min_to_show):
for ix, _ in range(0, helpful_comments.shape[0]):
comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'),
username=helpful_comments.iloc[ix].username.decode('utf-8')))
# return comments_to_show, url, rank
elif helpful_comments.shape[0] > min_to_show:
randomnumbers = random.sample(range(0, helpful_comments.shape[0]), min_to_show)
for _, ix in enumerate(randomnumbers):
comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'),
username=helpful_comments.iloc[ix].username.decode('utf-8')))
# return comments_to_show, url, rank
# else:
# return {'comment':'sorry, no comments', 'username':'-'}, 'blank', 'blank'
def CommentChecker(user_input, con):
prediction = classify_helpful.predict([user_input])
if prediction.any() == "helpful":
return prediction, "yes! that sounds like useful feedback!"
elif prediction.all() == "other":
return prediction, "hmm... I don't know how useful that suggestion will be... can you give some more details?"
else:
return "sorry, Dave, I can't do that"
# def MakeWordcloud(comments):
# wordcloud = WordCloud(max_words=500, relative_scaling=0.5, font_path='/Library/Fonts/Chalkduster', stopwords=STOPWORDS, width=100, height=500).generate(comments)
# return wordcloud
# # return fig
In [34]:
helpful_comments.shape[0]
In [31]:
sql_query = ("""
SELECT sentence, usercomment, url, username, sentence_tokens FROM sentences_table WHERE title='%s';
""" % user_input)
query_returns = pd.read_sql_query(sql_query,con)
In [ ]: