In [3]:
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.sql import select, delete, or_
import re
import db_connect
from db_tables import metadata, CriteriaConceptStaging, ConceptPredictors, UserHistoryCriteria, Users, CriteriaConceptLookup
from db_tables import ConceptPredictorsReject, ConceptTerms, ConceptTermsReject, CriteriaText, CriteriaConcept

In [17]:
def get_concepts(concept_id, engine):
    
    #check if the concept is new or existing
    new_concept = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.new_concept], distinct=True).\
                                  where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id))][0]
    
    concept_name = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
                                  where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).\
                                  where(CriteriaConceptStaging.c.update_type ==
                                       'concept-name'))][0]
    concept_terms = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
                                   where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).\
                                   where(CriteriaConceptStaging.c.update_type ==
                                       'term'))]
    
    
    if new_concept == 1:
        #concept is new
        existing_concept_terms = []
        return concept_name, concept_terms, existing_concept_terms
 
        
    else:
        #concept is not new, need to pull old concept data to compare
        existing_concept_terms = engine.execute(select([ConceptTerms.c.term]).\
                                   where(ConceptTerms.c.concept_id ==
                                       concept_id))
        existing_concept_terms = [r[0] for r in existing_concept_terms]
        
        #get new terms in the staging table
        new_concept_terms = list(set(concept_terms).difference(set(existing_concept_terms)))
        
        return concept_name, new_concept_terms, existing_concept_terms

In [ ]:
def transfer_concept(concept_id, engine, term_set, concept_name):
    #pull other data for concept
    concept_terms_reject = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
                                   where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).\
                                   where(CriteriaConceptStaging.c.update_type ==
                                       'term-reject'))]
    concept_predictors_reject = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
                                   where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).\
                                   where(CriteriaConceptStaging.c.update_type ==
                                       'predictor-reject'))]
    concept_predictors = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value]).\
                                   where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).\
                                   where(CriteriaConceptStaging.c.update_type ==
                                       'predictor'))]
    #write all data into the new tables
    #instert data into db
    engine.execute(CriteriaConcept.insert(), [{
                                      'concept_id': concept_id,
                                      'concept_name':concept_name}])
    engine.execute(ConceptTerms.insert(), [{
                                      'concept_id': concept_id,
                                      'term':term}
                                           for term in term_set])
    engine.execute(ConceptTermsReject.insert(), [{
                                      'concept_id': concept_id,
                                      'term':term}
                                           for term in concept_terms_reject])
    engine.execute(ConceptPredictors.insert(), [{
                                      'concept_id': concept_id,
                                      'predictor':predictor}
                                           for predictor in concept_predictors])
    engine.execute(ConceptPredictorsReject.insert(), [{
                                      'concept_id': concept_id,
                                      'predictor':predictor}
                                           for predictor in concept_predictors_reject])

In [ ]:
def update_user_history(concept_id, choice, term_set, engine):
    #keeps track of which concepts were accepted and also which terms were accepted
    
    #pull all statged data for concept
    all_staged_concept = engine.execute(select([CriteriaConceptStaging]).\
                                   where(CriteriaConceptStaging.c.concept_id ==
                                       concept_id).fetchall())
    if choice:
        #at least some terms were accepted
        #instert data into db
        engine.execute(UserHistoryCriteria.insert(),
                       [{'update_id': value[0],
                          'user_id': value[1],
                          'update_time': value[2],
                          'concept_id': value[3],
                          'new_concept': value[4],
                          'update_type': value[5],
                          'value':value[6],
                          'accepted': 0}
                         if value[5] == 'term' and value[6] not in term_set
                         else {'update_id': value[0],
                          'user_id': value[1],
                          'update_time': value[2],
                          'concept_id': value[3],
                          'new_concept': value[4],
                          'update_type': value[5],
                          'value':value[6],
                          'accepted': 1}
                         for value in all_staged_concept])
    else:
        #concept was not accepted
        #instert data into db
        engine.execute(UserHistoryCriteria.insert(), [{'update_id': value[0]
                                          'user_id': value[1],
                                          'update_time': value[2],
                                          'concept_id': value[3],
                                          'new_concept': value[4],
                                          'update_type': value[5],
                                          'value':value[6],
                                          'accepted': 0}
                                         for value in all_staged_concept])
    #delete from staging table
    engine.execute(delete(CriteriaConceptStaging).where(CriteriaConceptStaging.c.concept_id == concept))

In [18]:
def tag_criteria_sentences(concept_id, engine):
    tagged_sentences = []
    #pull the accepted terms with term_ids
    term_list = engine.execute(select([ConceptTerms.c.term, ConceptTerms.c.term_id]).\
                   where(ConceptTerms.c.concept_id == concept_id)).fetchall()
    
    criteria_text = engine.execute(select([CriteriaText.c.criteria_text,
                                       CriteriaText.c.criteria_text_id]).\
                               where(or_(CriteriaText.c.criteria_text.op('rlike')('[[:<:]]' + t[0] + '[[:>:]]')
                                         for t in term_list))).fetchall()
    
    #find all the sentences where a concept term occurs
    for sent, sent_id in criteria_text:
        for term, term_id in term_list:
            if term in sent.lower():
                #check if there is a negative in the sentence related to the term.
                #the negative term has to be within 3 words of the concept term
                #or at the beginning of the sentece
                string = sent.lower()
                negative_pattern_start = r"^[not|no|isn't|didn't]"
                negative_beginning = re.search(negative_pattern_start, string)
                negative_in_sentence = re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}%s" % (term), string)
                if negative_beginning or negative_in_sentence:
                    inverse = 1
                else:
                    inverse = 0
                tagged_sentences.append((sent_id, term_id, inverse))
                
    #write tagged sentences to db
    engine.execute(CriteriaConceptLookup.insert(), [{'criteria_text_id': v[0],
                                                     'term_id': v[1],
                                                     'concept_id': concept_id,
                                                     'inverse': v[2]}
                                                    for v in tagged_sentences])

In [124]:
string = "I am not happy happy is a test"
negative_pattern_start = r"^[not|no|isn't|didn't]"
negative_beginning = re.search(negative_pattern_start, string)
negative_in_sentence = re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}is", string)
if negative_beginning or negative_in_sentence:
    print 'inverse'
#[(m.start(0), m.end(0)) for m in re.finditer(negative_pattern_start, string)]


inverse

In [119]:
print re.search("[not|didn't|isn't|no]\W+(?:\w+\W+){1,2}is", string)


None

In [ ]:
def admin_action_response(concept_id, concept_name, accepted_terms):
    '''This is run in response to the admins actions on the criteria review page'''
    #initialize the connection to the db
    engine = create_engine('mysql+pymysql://' + db_connect.conn)
    metadata.create_all(engine)
    

    if len(accepted_terms) > 0:
        #at least some terms in the concept have been accepted
        tansfer_concept(concept_id, engine, accepted_terms, concept_name)
        #transfer data to user history table and delete from staging table
        choice = True
        update_user_history(concept_id, choice, accepted_terms, engine)
        #tag sentences with the new concept
        tag_criteria_sentences(concept_id, engine)
    else:
        #transfer data to user history table and delete from staging table
        choice = False
        update_user_history(concept_id, choice, accepted_terms, engine)
        
    criteria_review_page_data()

In [ ]:
def criteria_review_page_data(concept_id=False):
    '''This gets activated when an admin chooses to review staged criteria concepts
    when the criteria review page is loaded'''
    #initialize the connection to the db
    engine = create_engine('mysql+pymysql://' + db_connect.conn)
    metadata.create_all(engine)
    
    #pull all the concept id's of staged concepts
    concept_ids = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.concept_id],
                                        distinct=True))]
    
    #get concept name list
    concept_names = [r[0] for r in engine.execute(select([CriteriaConceptStaging.c.value],
                                        distinct=True).\
                                                where(CriteriaConceptStaging.c.update_type == 'concept-name'))]
    
    
    if concept_id:
        pass
    else:
        concept_id = concept_ids[0]
    
    concept_name, concept_terms, existing_concept_terms = get_concepts(concept_id, engine)
    
    return (concept_ids, concept_names, concept_id, concept_terms,
            existing_concept_terms, concept_name)

In [ ]:
main()

In [3]:
engine = create_engine('mysql+pymysql://' + db_connect.conn)
metadata.create_all(engine)

In [ ]: