In [2]:
from sqlalchemy import create_engine
from connect import mysqlusername, mysqlpassword, mysqlserver, mysqldbname
from collections import defaultdict

#from sqlalchemy.orm import sessionmaker
#from db_models import Base, ClinicalStudy, Facilities, Sponsors

from sqlalchemy.sql import func, select, and_, or_, not_, desc
from db_tables import metadata, InstitutionDescription, InstitutionLookup, ConditionDescription, ConditionLookup, InstitutionSponsors, InstitutionFacilities
from db_tables import RatingsCriteriaStars, InstitutionRatings, TrialSummary, Interventions, Conditions, ConditionSynonym,\
    ClinicalStudy, TrialPublications, Users, CriteriaText, CriteriaTagged, CriteriaConcept, CriteriaConceptLookup, \
    ConceptTerms

In [3]:
mysqlserver = 'localhost'
engine = create_engine('mysql://%s:%s@%s/%s?charset=utf8' % (mysqlusername, mysqlpassword, mysqlserver, mysqldbname))
conn = engine.connect()
metadata.create_all(engine)

# Session = sessionmaker(bind=engine)
# session = Session()

In [13]:
rand_select = 145
r = conn.execute(select([CriteriaTagged.c.tagged_text]).where(CriteriaTagged.c.random_select == rand_select)).fetchall()

In [19]:
data = [eval(s.tagged_text) for s in r]

In [20]:
data[:5]


Out[20]:
[[[['Exclusion', 'NNP', 'Exclusion'],
   ['Criteria', 'NNP', 'Criteria'],
   [':', ':', ':']]],
 [[['19', 'CD', '19'], ['.', '.', '.']],
  [['Hypersensitivity', 'NN', 'hypersensitivity'],
   ['and/or', 'CC', 'and/or'],
   ['contraindication', 'NN', 'contraindication'],
   ['to', 'TO', 'to'],
   ['any', 'DT', 'any'],
   ['one', 'CD', 'one'],
   ['of', 'IN', 'of'],
   ['the', 'DT', 'the'],
   ['Investigational', 'NNP', 'Investigational'],
   ['Medicinal', 'NNP', 'Medicinal'],
   ['Products', 'NNPS', 'Products'],
   ['-LRB-', '-LRB-', '-lrb-'],
   ['IMP', 'NN', 'imp'],
   ['-RRB-', '-RRB-', '-rrb-'],
   [',', ',', ','],
   ['acyclovir', 'NN', 'acyclovir'],
   ['or', 'CC', 'or'],
   ['similar', 'JJ', 'similar'],
   ['anti-viral', 'JJ', 'anti-viral'],
   ['drug', 'NN', 'drug'],
   ['.', '.', '.']]],
 [[['Patients', 'NNS', 'patient'],
   ['with', 'IN', 'with'],
   ['frontotemporal', 'JJ', 'frontotemporal'],
   ['dementia', 'NN', 'dementia']]],
 [[['Have', 'VBP', 'have'],
   ['a', 'DT', 'a'],
   ['known', 'JJ', 'known'],
   ['allergy', 'NN', 'allergy'],
   ['or', 'CC', 'or'],
   ['hypersensitivity', 'NN', 'hypersensitivity'],
   ['to', 'TO', 'to'],
   ['exenatide', 'VB', 'exenatide'],
   ['BID', 'NNP', 'BID'],
   [',', ',', ','],
   ['exenatide', 'NN', 'exenatide'],
   ['QW', 'NN', 'qw'],
   [',', ',', ','],
   ['or', 'CC', 'or'],
   ['excipients', 'NNS', 'excipient'],
   ['contained', 'VBN', 'contain'],
   ['in', 'IN', 'in'],
   ['these', 'DT', 'these'],
   ['agents', 'NNS', 'agent'],
   ['.', '.', '.']]],
 [[['male', 'NN', 'male'], ['or', 'CC', 'or'], ['female', 'NN', 'female']]]]

In [17]:
r[:5]


Out[17]:
[(u'[[["Exclusion","NNP","Exclusion"],["Criteria","NNP","Criteria"],[":",":",":"]]]',),
 (u'[[["19","CD","19"],[".",".","."]],[["Hypersensitivity","NN","hypersensitivity"],["and/or","CC","and/or"],["contraindication","NN","contraindication"],["to","TO","to"],["any","DT","any"],["one","CD","one"],["of","IN","of"],["the","DT","the"],["Investigational","NNP","Investigational"],["Medicinal","NNP","Medicinal"],["Products","NNPS","Products"],["-LRB-","-LRB-","-lrb-"],["IMP","NN","imp"],["-RRB-","-RRB-","-rrb-"],[",",",",","],["acyclovir","NN","acyclovir"],["or","CC","or"],["similar","JJ","similar"],["anti-viral","JJ","anti-viral"],["drug","NN","drug"],[".",".","."]]]',),
 (u'[[["Patients","NNS","patient"],["with","IN","with"],["frontotemporal","JJ","frontotemporal"],["dementia","NN","dementia"]]]',),
 (u'[[["Have","VBP","have"],["a","DT","a"],["known","JJ","known"],["allergy","NN","allergy"],["or","CC","or"],["hypersensitivity","NN","hypersensitivity"],["to","TO","to"],["exenatide","VB","exenatide"],["BID","NNP","BID"],[",",",",","],["exenatide","NN","exenatide"],["QW","NN","qw"],[",",",",","],["or","CC","or"],["excipients","NNS","excipient"],["contained","VBN","contain"],["in","IN","in"],["these","DT","these"],["agents","NNS","agent"],[".",".","."]]]',),
 (u'[[["male","NN","male"],["or","CC","or"],["female","NN","female"]]]',)]

In [44]:
for i, b in enumerate(a):
    print i, ' '.join(w[0].decode('unicode_escape') for s in eval(b[3]) for w in s)


0 Key Inclusion Criteria :
1 Histopathologic confirmation of one of the following CD20 + B-cell non-Hodgkin 's lymphomas . Tissue diagnostic procedures must be performed within 6 months of study entry and with biopsy material available for review :
2 follicular lymphoma -LRB- grade 1 or 2 -RRB-
3 immunoplasmacytoma/immunocytoma -LRB- Waldenstrom 's macroglobulinemia -RRB-
4 splenic marginal zone B-cell lymphoma
5 extra-nodal marginal zone lymphoma of mucosa associated lymphoid tumor -LRB- MALT -RRB- type
6 nodal marginal zone B-cell lymphoma
7 mantle cell lymphoma
8 Meets one of the following need-for-treatment criteria -LRB- with the exception of mantle cell lymphoma for which treatment is indicated -RRB- :
9 presence of at least one of the following B-symptoms :
10 fever -LRB- > 38ºC -RRB- of unclear etiology
11 night sweats
12 weight loss of greater than 10 % within the prior 6 months
13 large tumor mass -LRB- bulky disease -RRB-
14 presence of lymphoma-related complications , including narrowing of ureters or bile ducts , tumor-related compression of a vital organ , lymphoma induced pain , cytopenias related to lymphoma/leukemia , splenomegaly , pleural effusions , or ascites
15 hyperviscosity syndrome due to monoclonal gammopathy
16 CD20 positive B cells in lymph node biopsy or other lymphoma pathology specimen .
17 `` No prior treatment . Patients on '' `` watch and wait '' '' may enter the study if a recent biopsy -LRB- obtained within the last 6 months -RRB- is available . ''
18 Adequate hematologic function -LRB- unless abnormalities related to lymphoma infiltration of the bone marrow or hypersplenism due to lymphoma -RRB- as follows :
19 hemoglobin of > = 10.0 g/dl
20 absolute neutrophil count -LRB- ANC -RRB- > = 1.5 x 10 9th power/L
21 platelet count > = 100 x 10 9th power/L
22 Bidimensionally measurable disease -LRB- field not previously radiated -RRB- .
23 Able to provide written informed consent .
24 ECOG performance status < = 2 .
25 Estimated life expectancy > = 6 months .
26 Serum creatinine of < = 2.0 mg/dL or creatinine clearance > = 50 mL/min .
27 ALT and AST ≤ 2.5 x ULN , and alkaline phosphatase and total bilirubin within normal limits .
28 Left ventricular ejection fraction -LRB- LVEF -RRB- > = 50 % by multiple gated acquisition scan -LRB- MUGA -RRB- or cardiac echocardiogram -LRB- ECHO -RRB- , prior for any patient to be treated with R-CHOP .
29 A medically accepted method of contraception to be used by women of childbearing potential -LRB- not surgically sterile or at least 12 months naturally postmenopausal -RRB- .
30 Men capable of producing offspring and not surgically sterile must practice abstinence or use a barrier method of birth control .
31 Key Exclusion Criteria :
32 Chronic lymphocytic leukemia , small lymphocytic lymphoma -LRB- SLL -RRB- , or grade 3 follicular lymphoma .
33 Transformed disease . Bone marrow blasts are permitted , however , transformed disease indicating leukemic involvement is not permitted .
34 Central nervous system -LRB- CNS -RRB- lymphomatous involvement or leptomeningeal lymphoma .
35 Prior radiation for NHL , except for a single course of locally delimited radiation therapy with a radiation field not exceeding 2 adjacent lymph node regions .
36 Active malignancy , other than NHL , within the past 3 years except for localized prostate cancer treated with hormone therapy , cervical carcinoma in situ , breast cancer in situ , or non-melanoma skin cancer following definitive treatment .
37 New York Heart Association -LRB- NYHA -RRB- Class III or IV heart failure , arrhythmias or unstable angina , electrocardiographic evidence of active ischemia or active conduction system abnormalities , or myocardial infarction within the last 6 months . -LRB- Prior to study entry , ECG abnormalities at screening must be documented by the investigator as not medically relevant -RRB- .
38 Known human immunodeficiency virus -LRB- HIV -RRB- positivity .
39 Active hepatitis B or hepatitis C infection -LRB- Hepatitis B surface antigen testing required -RRB- .
40 Women who are pregnant or lactating .
41 Corticosteroids for treatment of lymphoma within 28 days of study entry . Chronically administered low-dose corticosteroids -LRB- e.g. , prednisone ≤ 20 mg/day -RRB- for indications other than lymphoma or lymphoma-related complications are permitted .
42 Any serious uncontrolled , medical or psychological disorder that would impair the ability of the patient to receive therapy .
43 Any condition which places the patient at unacceptable risk or confounds the ability of the investigators to interpret study data .
44 Any other investigational agent within 28 days of study entry .
45 Known hypersensitivity to bendamustine , mannitol , or other study-related drugs .
46 The patient has Ann Arbor stage I disease .
47 The patient has a history of congenital long QT syndrome .
48 The patient has a history of cardiac disease with significant potential for QT prolongation .
49 The patient has screening electrocardiography -LRB- ECG -RRB- on day 1 of cycle 1 with QTCF interval greater than 450 ms that is confirmed by a second ECG . If the QTCF interval is greater than 450 ms on both ECGs , the ECGs will be sent to ERT , the Central ECG Reader vendor , for an overread -LRB- with 24-hour turn around time -RRB- and ERT will make a final decision on enrollment .
50 The patient has serum potassium or magnesium less than the lower limit of normal .

In [ ]:


In [ ]:


In [ ]:


In [3]:
def construct_address(city,state,zipcode,country):
    if city:
        if country == 'United States':
            return '%s, %s %s<br/>%s' % (city, state, zipcode, country)
        else:
            return '%s, %s' % (city, country)
    else:
        return 'No location information provided' 

# construct star rating from number
def gen_stars(s):
    ''' s is a number between 1 and 5 '''
    if s > 5:
        s = 5
    elif s < 1:
        s = 1

    rating_str = "<span class='rating'><i class='fa fa-star'></i>"
    ok = [1] * (int(s * 2) - 2)
    ok.extend([0] * (8 - len(ok)))
    for i in range(0,7,2):
        if sum(ok[i:i+2]) == 2:
            rating_str += "<i class='fa fa-star'></i>"
        elif sum(ok[i:i+2]) == 1:
            rating_str += "<i class='fa fa-star-half-o'></i>"
        else:
            rating_str += "<i class='fa fa-star-o'></i>"

    return rating_str + "</span>"

# calculate overall rating
def calc_overall(r_array):
    ''' r_array is a list or tuple of category ratings, in the order:
            date, mesh, sites, description, criteria
    '''
    r_wgt_avg = (r_array[0] * 0.10) + \
                (r_array[1] * 0.25) + \
                (r_array[2] * 0.25) + \
                (r_array[3] * 0.25) + \
                (r_array[4] * 0.15)
    return 1 + (round((r_wgt_avg) * 2 - 2) / 2.0)

# turn a query result with multiple rows per key into a lookup dictionary
def dictify(qresult):
    ''' qresult is a list of 2-tuples '''
    output = defaultdict(set)
    for k, v in qresult:
        output[k].add(v)
    return output

# create a comma-separated list of words
def add_commas(strlist, conj='and'):
    ''' strlist is a list of strings to be concatenated with comma
        conj is the final conjunction, defaults to 'and'
    '''
    if 'Other' in strlist:
        strlist.remove('Other') 
        strlist.append('Other')
    if len(strlist) <= 1:
        return ''.join(strlist)
    elif len(strlist) == 2:
        return '%s %s %s' % (strlist[0], conj, strlist[1])
    else:
        return '%s, %s %s' % (', '.join(strlist[:-1]), conj, strlist[-1])

# create a layman's terms string that describes the trial
def layman_desc(phase, status, inv_dict, stype):
    missing_data = []

    if phase != 'N/A':
        lay_str = "This is a <span style='font-weight: bold'>%s</span> trial" % phase
    else:
        lay_str = "This is a trial"
        missing_data.append('phase')
    lay_str += " that is <span style='font-weight: bold'>%s</span>." % status

    inv_str = add_commas(inv_dict, 'and/or')
    if inv_str:
        if stype.lower() != 'n/a':
            stype = " <span style='font-weight: bold'>%s</span>" % stype
        else:
            stype = ''
            missing_data.append('study type')
        lay_str += " It is an%s assessment of a(n) <span style='font-weight: bold'>%s</span> treatment." % (stype, inv_str.lower())
    else:
        missing_data.append('intervention(s)')

    if missing_data:
        lay_str += " (No information was provided about the %s of the trial.)" % add_commas(missing_data, 'or')

    return lay_str

In [7]:
inst_id = 68233
inst_data = conn.execute(InstitutionDescription.\
                            select().\
                            where(InstitutionDescription.c.institution_id == inst_id)).\
                        fetchone()

sponsors = conn.execute(InstitutionSponsors.\
                            select().\
                            where(InstitutionSponsors.c.institution_id == inst_id).\
                            order_by(desc('trial_count'))).\
                        fetchall()
facilities = conn.execute(InstitutionFacilities.\
                            select().\
                            where(InstitutionFacilities.c.institution_id == inst_id).\
                            order_by(desc('trial_count'))).\
                        fetchall()

sponsor_list = [{'name': t[1], 'cnt': t[2]} for t in sponsors]
facility_list = [{'name': t[1],
                  'cnt': t[6],
                  'address': construct_address(t[2],t[3],t[4],t[5])}
                  for t in facilities[:50]]
if len(facilities) > 50:
    facility_list += [{'name': '...and %d more' % (len(facilities) - 50),
                       'cnt': sum([t[6] for t in facilities[50:]]),
                       'address': ''}]

rating_info = conn.execute(InstitutionRatings.select().where(InstitutionRatings.c.institution_id == inst_id)).fetchone()
rating_obj = {'dates': gen_stars(float(rating_info[1])),
              'mesh': gen_stars(float(rating_info[2])),
              'sites': gen_stars(float(rating_info[3])),
              'desc': gen_stars(float(rating_info[4])),
              'crit': gen_stars(float(rating_info[5])),
              'overall': gen_stars(4.0)}

In [11]:
rating_obj


Out[11]:
{'crit': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star-half-o'></i></span>",
 'dates': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i></span>",
 'desc': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star-o'></i><i class='fa fa-star-o'></i></span>",
 'mesh': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star-o'></i><i class='fa fa-star-o'></i></span>",
 'overall': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star-o'></i></span>",
 'sites': "<span class='rating'><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star'></i><i class='fa fa-star-o'></i><i class='fa fa-star-o'></i></span>"}

In [55]:
params = {}
params['id'] = '115854'
trials = conn.execute(select([TrialSummary]).\
                        select_from(TrialSummary.join(InstitutionLookup,
                            and_(InstitutionLookup.c.nct_id == TrialSummary.c.nct_id,
                                 InstitutionLookup.c.institution_id == int(params['id'])))).\
                        limit(50)).\
                    fetchall()

inv = conn.execute(select([Interventions.c.nct_id, Interventions.c.intervention_type]).\
                        select_from(Interventions.join(InstitutionLookup,
                            and_(InstitutionLookup.c.nct_id == Interventions.c.nct_id,
                                 InstitutionLookup.c.institution_id == int(params['id']))))).\
                    fetchall()
inv_dict = dictify(inv)

# compile JSON object
out_obj = []
for nct_id, title, status, phase, stype in trials:
    out_obj.append({'nct_id': nct_id,
                    'trial_title': title,
                    'lay_str': layman_desc(phase, status, list(inv_dict[nct_id]) if nct_id in inv_dict else '', stype)
                    })

In [ ]:


In [39]:
cond_id=3121
cond_data = conn.execute(ConditionDescription.\
                            select().\
                            where(ConditionDescription.c.condition_id == cond_id)).\
                        fetchone()

cond_syn = conn.execute(select([ConditionDescription.c.mesh_term]).\
                            select_from(ConditionSynonym.join(ConditionDescription,
                                and_(ConditionDescription.c.condition_id == ConditionSynonym.c.synonym_id,
                                     ConditionSynonym.c.condition_id == cond_id)))).\
                        fetchall()

top_inst = conn.execute(select([InstitutionDescription.c.institution_id,
                                InstitutionDescription.c.name,
                                InstitutionDescription.c.image_url,
                                InstitutionDescription.c.location,
                                func.count(ConditionLookup.c.nct_id).label('cnt')]).\
                            select_from(InstitutionDescription.join(InstitutionLookup.join(ConditionLookup,
                                    and_(ConditionLookup.c.condition_id == cond_id,
                                         ConditionLookup.c.source == 'CTGOV',
                                         ConditionLookup.c.syn_flag == 0,
                                         ConditionLookup.c.nct_id == InstitutionLookup.c.nct_id)),
                                    InstitutionLookup.c.institution_id == InstitutionDescription.c.institution_id)).\
                            group_by(InstitutionDescription.c.institution_id,
                                     InstitutionDescription.c.name,
                                     InstitutionDescription.c.image_url,
                                     InstitutionDescription.c.location).\
                            order_by(desc('cnt')).\
                            limit(5)).\
                        fetchall()

inst_rating = conn.execute(InstitutionRatings.select().\
                    where(or_(InstitutionRatings.c.institution_id == t[0] for t in top_inst))).\
                fetchall()
rating_dict = {t[0]: gen_stars(calc_overall([float(f) for f in t[1:6]])) for t in inst_rating}

inst_obj = [{'inst_id': t[0],
             'inst_name': t[1],
             'inst_img': t[2],
             'inst_loc': t[3],
             'inst_stars': rating_dict[t[0]]}
             for t in top_inst]

In [4]:
nct_id = 'NCT01457924'
summary_data = conn.execute(select([ClinicalStudy.c.brief_title,
                                    ClinicalStudy.c.brief_summary,
                                    ClinicalStudy.c.detailed_description,
                                    ClinicalStudy.c.overall_status,
                                    ClinicalStudy.c.phase,
                                    ClinicalStudy.c.study_type,
                                    ClinicalStudy.c.criteria,
                                    ClinicalStudy.c.gender,
                                    ClinicalStudy.c.minimum_age,
                                    ClinicalStudy.c.maximum_age,
                                    ClinicalStudy.c.healthy_volunteers,
                                    ClinicalStudy.c.lastchanged_date]).
                            select_from(ClinicalStudy).\
                            where(ClinicalStudy.c.nct_id == nct_id)).\
                        fetchone()

inv = conn.execute(select([Interventions.c.nct_id, Interventions.c.intervention_type]).\
                        select_from(Interventions).\
                        where(Interventions.c.nct_id == nct_id)).\
                    fetchall()

inv_dict = dictify(inv)

cond = conn.execute(select([ConditionDescription.c.condition_id,
                            ConditionDescription.c.mesh_term]).\
                            select_from(ConditionDescription.join(ConditionLookup,
                                and_(ConditionLookup.c.nct_id == nct_id,
                                    ConditionLookup.c.source == 'CTGOV',
                                    ConditionLookup.c.syn_flag == 0,
                                    ConditionLookup.c.condition_id == ConditionDescription.c.condition_id)))).\
                    fetchall()

cond_suggest = conn.execute(select([ConditionDescription.c.condition_id,
                                    ConditionDescription.c.mesh_term]).\
                            select_from(ConditionDescription.join(ConditionLookup,
                                and_(ConditionLookup.c.nct_id == nct_id,
                                    ConditionLookup.c.source != 'CTGOV',
                                    ConditionLookup.c.syn_flag == 0,
                                    ConditionLookup.c.condition_id == ConditionDescription.c.condition_id)))).\
                    fetchall()

In [5]:
cond_suggest


Out[5]:
[(987L, 'Lymphoma'), (304L, 'Neoplasm Recurrence, Local')]

In [6]:
[{'id': c[0],
                                                              'name': c[1]}
                                                              for c in set(tuple(t) for t in cond_suggest)]


Out[6]:
[{'id': 304L, 'name': 'Neoplasm Recurrence, Local'},
 {'id': 987L, 'name': 'Lymphoma'}]

In [ ]:


In [1]:
a = '#REDIRECT [[Brenner tumour]]'

In [5]:
a[12:a.index(']]')]


Out[5]:
'Brenner tumour'

In [3]:
a[13:]


Out[3]:
'renner tumour]]'

In [ ]: