In [1]:
from __future__ import print_function, division
import sqlalchemy as sql
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
from sklearn.externals import joblib
import numpy as np 

# # import classifier 
# filename = '/Users/kateliea/Documents/Insight/webapp/my_webapp/trained_classifiers/commentsclassifier_LR.pkl'
# classify_helpful = joblib.load(filename)

In [3]:
dbname = 'somanycooksDB'
username = 'kateliea'


engine = create_engine('postgres://%s@localhost/%s' % (username, dbname))
print(engine.url)

if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))

print(engine.table_names())

# connect:
con = None
con = psycopg2.connect(database = dbname, user = username)


postgres://kateliea@localhost/somanycooksDB
True
[]

In [6]:
recipes = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_recipes.csv', index_col=0)
recipes.to_sql('recipes', engine, if_exists='replace')

comments = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_comments.csv', index_col=0)
comments.to_sql('comments', engine, if_exists='replace')
                       
comments = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_sentences.csv', index_col=0)
comments.to_sql('sentences', engine, if_exists='replace')

train = pd.read_csv('/Users/kateliea/Documents/Insight/project/algorithm/training_data.csv', index_col=0)
train.to_sql('trainingdata',engine,if_exists='replace')

In [7]:
train2 = pd.read_csv('/Users/kateliea/Documents/Insight/project/data/sentiment_training_data.csv', index_col=0)
train2.to_sql('sentimenttrainingdata',engine,if_exists='replace')
ranks = pd.read_csv('/Users/kateliea/Documents/Insight/project/algorithm/Recipe_sentiment_ranks.csv') # ranks.to_sql('ranks',engine,if_exists='replace')

In [5]:
train.shape


Out[5]:
(97316, 23)

In [6]:
user_input = "a tatin auditioned"

ranks_query = ("""
        SELECT * FROM ranks WHERE recipe_title='%s';
                   """ % user_input)

recipe_query = ("""
        SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE title='%s';
                   """ % user_input)

ranks_returns = pd.read_sql_query(ranks_query, con)

if len(ranks_returns.sentence_rank) == 0: 
    rank = 0
# elif 
else: 
    rank = int(ranks_returns.sentence_rank.mean()*100)

In [9]:
ranks_returns.head()


Out[9]:
index Unnamed: 0 mean_negativity_sentences mean_positivity_comment plot_name recipe_title numberofcomments sentence_rank recipe_ID comment_rank mean_positivity_sentences mean_negativity_comment
query = """ SELECT title, url FROM recipes_table; """ query_results = pd.read_sql_query(query,con) recipes = [] for i in range(0,query_results.shape[0]): recipes.append(dict(index=i, recipe_title=query_results.iloc[i]['title'].decode('utf-8'), url=query_results.iloc[i]['url'].decode('utf-8'))) (recipes[0]['recipe_title'])

In [ ]:
recipes = sorted(recipes)

In [ ]:


In [ ]:
predictions

In [ ]:


In [ ]:
# postgres data
user = 'kateliea'
host = 'localhost'
dbname = 'insight_project'
db  = create_engine('postgres://%s%s/%s'%(user,host,dbname))
con = None
con = psycopg2.connect(database = dbname, user=user)

In [ ]:
user_input = 'chile-lime melon salad'

In [23]:
user_input = "a tatin auditioned"
sql_query = ("""
    SELECT sentence, usercomment, url, username, sentence_tokens  FROM sentences_table WHERE title='%s';
    """ % user_input)

query_returns = pd.read_sql_query(sql_query,con)

In [ ]:


In [22]:
comments_to_show = []
if (helpful_comments.shape[0] > 0 )& (helpful_comments.shape[0] < min_to_show): 
    for ix, _ in range(0, helpful_comments.shape[0]): 
        comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'), 
                                    username=helpful_comments.iloc[ix].username.decode('utf-8')))
    return comments_to_show, url, rank


elif helpful_comments.shape[0] > min_to_show: 
    randomnumbers = random.sample(range(0, helpful_comments.shape[0]), min_to_show)
    for _, ix in enumerate(randomnumbers): 
        comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'), 
                                    username=helpful_comments.iloc[ix].username.decode('utf-8')))
    return comments_to_show, url, rank

else: 
    return {'comment':'sorry, no comments', 'username':'-'}, 'blank', 'blank'


  File "<ipython-input-22-c4c4f096a755>", line 6
    return comments_to_show, url, rank
SyntaxError: 'return' outside function

In [ ]:
helpful_sentences.drop_duplicates('usercomment','first')

In [ ]:
commentslist

In [33]:
# from flask import render_template, request
# from somanycooks import app
# from sqlalchemy import create_engine
# from sqlalchemy_utils import database_exists, create_database
# import pandas as pd
# import psycopg2
# from sklearn.externals import joblib
# import random
# # from wordcloud import WordCloud, STOPWORDS
# import matplotlib.pyplot as plt


# import classifier 
filename = '/Users/kateliea/Documents/Insight/webapp/my_webapp/trained_classifiers/commentsclassifier_LR.pkl'
classify_helpful = joblib.load(filename) 



min_to_show = 6

# def GetComments(user_input, con, fromUser='Default'):
sql_query = ("""
    SELECT sentence, usercomment, url, username, sentence_tokens  FROM sentences_table WHERE title='%s';
    """ % user_input)

query_returns = pd.read_sql_query(sql_query,con)

if len(query_returns) > 0: 
    # url=query_returns.iloc[0].url.decode('utf-8')

    ranks_query = ("""
        SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE recipe_title='%s';
                   """ % user_input)

    recipe_query = ("""
        SELECT numberofcomments, sentence_rank, plot_name FROM ranks WHERE title='%s';
                   """ % user_input)

    ranks_returns = pd.read_sql_query(ranks_query, con)

    if len(ranks_returns.sentence_rank) == 0: 
        rank = 0
    # elif 
    else: 
        rank = int(ranks_returns.sentence_rank.mean()*100)


    # apply classifier 
    predictions = classify_helpful.predict(query_returns.sentence)
    helpful_sentences = query_returns.iloc[predictions == 'helpful', :]
    helpful_comments = helpful_sentences.drop_duplicates('usercomment','first')

    # tokens = []
    # for _, tok in enumerate(helpful_comments.sentence_tokens): 
    #     tokens += [tok]


    # cloud = MakeWordcloud(tok).to_image()

    comments_to_show = []
    if (helpful_comments.shape[0] > 0 )& (helpful_comments.shape[0] < min_to_show): 
        for ix, _ in range(0, helpful_comments.shape[0]): 
            comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'), 
                                        username=helpful_comments.iloc[ix].username.decode('utf-8')))
    #     return comments_to_show, url, rank


    elif helpful_comments.shape[0] > min_to_show: 
        randomnumbers = random.sample(range(0, helpful_comments.shape[0]), min_to_show)
        for _, ix in enumerate(randomnumbers): 
            comments_to_show.append(dict(comment=helpful_comments.iloc[ix].usercomment.decode('utf-8'), 
                                        username=helpful_comments.iloc[ix].username.decode('utf-8')))
    #     return comments_to_show, url, rank

    # else: 
    #     return {'comment':'sorry, no comments', 'username':'-'}, 'blank', 'blank'



    def CommentChecker(user_input, con):
        prediction = classify_helpful.predict([user_input])

        if prediction.any() == "helpful":
            return prediction, "yes! that sounds like useful feedback!"

        elif prediction.all() == "other":
            return prediction, "hmm... I don't know how useful that suggestion will be... can you give some more details?"

        else: 
            return "sorry, Dave, I can't do that"



# def MakeWordcloud(comments):
#     wordcloud = WordCloud(max_words=500, relative_scaling=0.5, font_path='/Library/Fonts/Chalkduster', stopwords=STOPWORDS, width=100, height=500).generate(comments)
#     return wordcloud
#     # return fig

In [34]:
helpful_comments.shape[0]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-34-d3538825dcde> in <module>()
----> 1 helpful_comments.shape[0]

NameError: name 'helpful_comments' is not defined

In [31]:
sql_query = ("""
    SELECT sentence, usercomment, url, username, sentence_tokens  FROM sentences_table WHERE title='%s';
    """ % user_input)

query_returns = pd.read_sql_query(sql_query,con)

In [ ]: