In [84]:
import json
from elasticsearch import Elasticsearch
ES_URL = 'http://192.168.11.2:9201'
INDEX = 'movies_4_blogs_pquery'
DATA_INDEX = 'movies_4_blogs_fbeat'
DOC_TYPE = '_doc'
DATA_DOC_TYPE = 'doc'
_es = Elasticsearch(hosts=[ ES_URL ])
# ----------------------------
# get percolate queries
# ----------------------------
def getPercolateQuery(userId):
_q = ''
if userId != None:
_q = '{"query":{"match":{"payload.user_id":"' + userId + '"}}}'
res = _es.search(index=INDEX, doc_type=DOC_TYPE, body=json.loads(_q))
else:
res = _es.search(index=INDEX, doc_type=DOC_TYPE, body=_q)
return res
# --------------------------------
# create significant text aggs
# --------------------------------
def generateNRunSigTextAggsByPQuery(jpquery):
# print(type(jpquery)) # type of the jpquery object -> dict
_qObj = jpquery['_source']['pquery']
_qObjStr = json.dumps(_qObj)
# query part 1
_dsl = """
{
"size": 0,
"aggs": {
"_s_overview_text": {
"significant_text": {
"field": "overview"
}
}
},
"query":
"""
# query part 2
_dsl += _qObjStr + '}'
return _runSigTextAggs(_dsl)
# --------------------------------
# get sig.text aggs results
# --------------------------------
def _runSigTextAggs(_dsl):
res = _es.search(index=DATA_INDEX, doc_type=DATA_DOC_TYPE, body=json.loads(_dsl))
# parse the response dictionary
keywords = []
buckets = res['aggregations']['_s_overview_text']['buckets']
for bucket in buckets:
keywords.append(bucket['key'])
return keywords
# ----------------------------------------
# generate query dsl from the keywords
# ----------------------------------------
def generateNRunRecommendationQuery(_keywords):
# create keyword criteria
_kstr = ''
for key in _keywords:
_kstr += key + ' '
_dsl = """
{
"sort": [
{
"_score": {
"order": "asc"
}
}
],
"query": {
"bool": {
"must": [
{
"match": {
"overview":
"""
_dsl += '"' + _kstr + '"' + "}} ]}}}"
# run query
res = _es.search(index=DATA_INDEX, doc_type=DATA_DOC_TYPE, body=json.loads(_dsl))
return res
# --------------------
# mail template
# --------------------
def prepareMailByTemplate(templateId, jres, jpayload):
# TODO: add template loading mechanism here. For demo purpose, just hardcode a html email template...
_tmpl = """
<p>
Dear {},
</p><br/>
We have found some movies you might be interested!
<ul>
"""
# create the recommendation content...
# TODO: could encapsulate this content logic to another function
recContent = ''
recs = jres['hits']['hits']
for rec in recs:
recContent += '<li>' + rec['_source']['title'] + '</li>'
# dsl 2nd part
_tmpl += recContent + "</ul>" + """
<p>
Thank you for visiting our website: http://awesome-awesome-movies.web!
</p>
"""
_tmpl = _tmpl.format( jpayload['user_name'] )
# TODO: load the smtplib for email sending (remember there is a field "user_email" within the payload object)
# :)
print(_tmpl)
# ----------------------------------------------------------------------------
# print(getPercolateQuery(None)) # every percolate query entry
# ----------------------------------------------------------------------------
jres = getPercolateQuery('123sdvTyue_23') # dedicated percolate query entry based on payload.user_id
payload = jres['hits']['hits'][0]['_source']['payload']
sigKeywords = generateNRunSigTextAggsByPQuery(jres['hits']['hits'][0])
jres = generateNRunRecommendationQuery(sigKeywords)
# create email template and send (etc)
prepareMailByTemplate(None, jres, payload)
In [ ]: