In [3]:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.sql import select, delete, or_
import re
import db_connect
from db_tables import metadata, CriteriaConceptStaging, ConceptPredictors, UserHistoryCriteria, Users, CriteriaConceptLookup
from db_tables import ConceptPredictorsReject, ConceptTerms, ConceptTermsReject, CriteriaText, CriteriaConcept
In [17]:
def get_concepts(concept_id, engine):
#check if the concept is new or existing
new_concept = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.new_concept], distinct=True).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id))][0]
concept_name = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).\
where(CriteriaConceptStaging.c.update_type ==
'concept-name'))][0]
concept_terms = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).\
where(CriteriaConceptStaging.c.update_type ==
'term'))]
if new_concept == 1:
#concept is new
existing_concept_terms = []
return concept_name, concept_terms, existing_concept_terms
else:
#concept is not new, need to pull old concept data to compare
existing_concept_terms = engine.execute(select([ConceptTerms.c.term]).\
where(ConceptTerms.c.concept_id ==
concept_id))
existing_concept_terms = [r[0] for r in existing_concept_terms]
#get new terms in the staging table
new_concept_terms = list(set(concept_terms).difference(set(existing_concept_terms)))
return concept_name, new_concept_terms, existing_concept_terms
In [ ]:
def transfer_concept(concept_id, engine, term_set, concept_name):
#pull other data for concept
concept_terms_reject = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).\
where(CriteriaConceptStaging.c.update_type ==
'term-reject'))]
concept_predictors_reject = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).\
where(CriteriaConceptStaging.c.update_type ==
'predictor-reject'))]
concept_predictors = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).\
where(CriteriaConceptStaging.c.update_type ==
'predictor'))]
#write all data into the new tables
#instert data into db
engine.execute(CriteriaConcept.insert(), [{
'concept_id': concept_id,
'concept_name':concept_name}])
engine.execute(ConceptTerms.insert(), [{
'concept_id': concept_id,
'term':term}
for term in term_set])
engine.execute(ConceptTermsReject.insert(), [{
'concept_id': concept_id,
'term':term}
for term in concept_terms_reject])
engine.execute(ConceptPredictors.insert(), [{
'concept_id': concept_id,
'predictor':predictor}
for predictor in concept_predictors])
engine.execute(ConceptPredictorsReject.insert(), [{
'concept_id': concept_id,
'predictor':predictor}
for predictor in concept_predictors_reject])
In [ ]:
def update_user_history(concept_id, choice, term_set, engine):
#keeps track of which concepts were accepted and also which terms were accepted
#pull all statged data for concept
all_staged_concept = engine.execute(select([CriteriaConceptStaging]).\
where(CriteriaConceptStaging.c.concept_id ==
concept_id).fetchall())
if choice:
#at least some terms were accepted
#instert data into db
engine.execute(UserHistoryCriteria.insert(),
[{'update_id': value[0],
'user_id': value[1],
'update_time': value[2],
'concept_id': value[3],
'new_concept': value[4],
'update_type': value[5],
'value':value[6],
'accepted': 0}
if value[5] == 'term' and value[6] not in term_set
else {'update_id': value[0],
'user_id': value[1],
'update_time': value[2],
'concept_id': value[3],
'new_concept': value[4],
'update_type': value[5],
'value':value[6],
'accepted': 1}
for value in all_staged_concept])
else:
#concept was not accepted
#instert data into db
engine.execute(UserHistoryCriteria.insert(), [{'update_id': value[0]
'user_id': value[1],
'update_time': value[2],
'concept_id': value[3],
'new_concept': value[4],
'update_type': value[5],
'value':value[6],
'accepted': 0}
for value in all_staged_concept])
#delete from staging table
engine.execute(delete(CriteriaConceptStaging).where(CriteriaConceptStaging.c.concept_id == concept))
In [18]:
def tag_criteria_sentences(concept_id, engine):
tagged_sentences = []
#pull the accepted terms with term_ids
term_list = engine.execute(select([ConceptTerms.c.term, ConceptTerms.c.term_id]).\
where(ConceptTerms.c.concept_id == concept_id)).fetchall()
criteria_text = engine.execute(select([CriteriaText.c.criteria_text,
CriteriaText.c.criteria_text_id]).\
where(or_(CriteriaText.c.criteria_text.op('rlike')('[[:<:]]' + t[0] + '[[:>:]]')
for t in term_list))).fetchall()
#find all the sentences where a concept term occurs
for sent, sent_id in criteria_text:
for term, term_id in term_list:
if term in sent.lower():
#check if there is a negative in the sentence related to the term.
#the negative term has to be within 3 words of the concept term
#or at the beginning of the sentece
string = sent.lower()
negative_pattern_start = r"^[not|no|isn't|didn't]"
negative_beginning = re.search(negative_pattern_start, string)
negative_in_sentence = re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}%s" % (term), string)
if negative_beginning or negative_in_sentence:
inverse = 1
else:
inverse = 0
tagged_sentences.append((sent_id, term_id, inverse))
#write tagged sentences to db
engine.execute(CriteriaConceptLookup.insert(), [{'criteria_text_id': v[0],
'term_id': v[1],
'concept_id': concept_id,
'inverse': v[2]}
for v in tagged_sentences])
In [124]:
string = "I am not happy happy is a test"
negative_pattern_start = r"^[not|no|isn't|didn't]"
negative_beginning = re.search(negative_pattern_start, string)
negative_in_sentence = re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}is", string)
if negative_beginning or negative_in_sentence:
print 'inverse'
#[(m.start(0), m.end(0)) for m in re.finditer(negative_pattern_start, string)]
In [119]:
print re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}is", string)
In [ ]:
def admin_action_response(concept_id, concept_name, accepted_terms):
'''This is run in response to the admins actions on the criteria review page'''
#initialize the connection to the db
engine = create_engine('mysql+pymysql://' + db_connect.conn)
metadata.create_all(engine)
if len(accepted_terms) > 0:
#at least some terms in the concept have been accepted
tansfer_concept(concept_id, engine, accepted_terms, concept_name)
#transfer data to user history table and delete from staging table
choice = True
update_user_history(concept_id, choice, accepted_terms, engine)
#tag sentences with the new concept
tag_criteria_sentences(concept_id, engine)
else:
#transfer data to user history table and delete from staging table
choice = False
update_user_history(concept_id, choice, accepted_terms, engine)
criteria_review_page_data()
In [ ]:
def criteria_review_page_data(concept_id=False):
'''This gets activated when an admin chooses to review staged criteria concepts
when the criteria review page is loaded'''
#initialize the connection to the db
engine = create_engine('mysql+pymysql://' + db_connect.conn)
metadata.create_all(engine)
#pull all the concept id's of staged concepts
concept_ids = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.concept_id],
distinct=True))]
#get concept name list
concept_names = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value],
distinct=True).\
where(CriteriaConceptStaging.c.update_type == 'concept-name'))]
if concept_id:
pass
else:
concept_id = concept_ids[0]
concept_name, concept_terms, existing_concept_terms = get_concepts(concept_id, engine)
return (concept_ids, concept_names, concept_id, concept_terms,
existing_concept_terms, concept_name)
In [ ]:
main()
In [3]:
engine = create_engine('mysql+pymysql://' + db_connect.conn)
metadata.create_all(engine)
In [ ]: