In [1]:
import codecs, string, random, math, cPickle as pickle, re
from collections import Counter
from IPython.display import HTML, Javascript, display

from __future__ import division

Load data


In [2]:
corrections = {"Sarcoma, Ewing's": 'Sarcoma, Ewing',
               'Beta-Thalassemia': 'beta-Thalassemia',
               'Von Willebrand Disease, Type 3': 'von Willebrand Disease, Type 3',
               'Von Willebrand Disease, Type 2': 'von Willebrand Disease, Type 2',
               'Von Willebrand Disease, Type 1': 'von Willebrand Disease, Type 1',
               'Felty''s Syndrome': 'Felty Syndrome',
               'Von Hippel-Lindau Disease': 'von Hippel-Lindau Disease',
               'Retrognathism': 'Retrognathia',
               'Regurgitation, Gastric': 'Laryngopharyngeal Reflux',
               'Persistent Hyperinsulinemia Hypoglycemia of Infancy': 'Congenital Hyperinsulinism',
               'Von Willebrand Diseases': 'von Willebrand Diseases',
               'Pontine Glioma': 'Brain Stem Neoplasms',
               'Mental Retardation': 'Intellectual Disability',
               'Overdose': 'Drug Overdose',
               'Beta-Mannosidosis': 'beta-Mannosidosis',
               'Alpha 1-Antitrypsin Deficiency': 'alpha 1-Antitrypsin Deficiency',
               'Intervertebral Disk Displacement': 'Intervertebral Disc Displacement',
               'Alpha-Thalassemia': 'alpha-Thalassemia',
               'Mycobacterium Infections, Atypical': 'Mycobacterium Infections, Nontuberculous',
               'Legg-Perthes Disease': 'Legg-Calve-Perthes Disease',
               'Intervertebral Disk Degeneration': 'Intervertebral Disc Degeneration',
               'Alpha-Mannosidosis': 'alpha-Mannosidosis',
               'Gestational Trophoblastic Disease': 'Gestational Trophoblastic Neoplasms'
               }
cond = {}
cond_r = {}
for row in codecs.open('../data/condition_browse.txt','r','utf-8').readlines():
    row_id, trial_id, mesh_term = row.strip().split('|')
    if mesh_term in corrections: mesh_term = corrections[mesh_term]
    if mesh_term not in cond: cond[mesh_term] = []
    cond[mesh_term].append(trial_id)
    if trial_id not in cond_r: cond_r[trial_id] = []
    cond_r[trial_id].append(mesh_term)

mesh_codes = {}
mesh_codes_r = {}
for row in codecs.open('../data/mesh_thesaurus.txt','r','utf-8').readlines():
    row_id, mesh_id, mesh_term = row.strip().split('|')
    mesh_codes[mesh_id] = mesh_term
    if mesh_term not in mesh_codes_r: mesh_codes_r[mesh_term] = []
    mesh_codes_r[mesh_term].append(mesh_id)

# limiting to conditions that appear in ten or more trials
top_cond = {c for c in cond if len(cond[c]) >= 10}
trials = {t for c in top_cond for t in cond[c]}

# trial descriptions
trial_desc = pickle.load(open('../data/trial_desc.pkl','rb'))

Load model predictions


In [3]:
multi_preds = pickle.load(open('../data/mesh_guesses.pkl','rb'))
single_preds = pickle.load(open('../data/mesh_guesses_maxent.pkl','rb'))
knn_preds = pickle.load(open('../data/mesh_guesses_knn.pkl','rb'))

Testing random instances


In [7]:
trial_id = random.choice([k for k in knn_preds.keys() if k in cond_r])

print 'Trial ID: %s' % trial_id
print 
print 'BRIEF DESCRIPTION:'
print trial_desc[trial_id][0]
print
print 'DETAILED DESCRIPTION:'
print trial_desc[trial_id][1] or 'None'
print

print 'CURRENT MeSH ASSIGNMENT, IF ANY:'
if trial_id in cond_r:
    cur_codes = [m for t in cond_r[trial_id] if t in mesh_codes_r for m in mesh_codes_r[t]]
    cur_codes.sort()
    for code in cur_codes:
        print '  %s %s' % (code, mesh_codes[code])
else:
    print 'None'
print 

print 'TOP ONE-WAY PREDICTION:'
cur_single_preds = list({m for m in mesh_codes_r[single_preds[trial_id]]})
cur_single_preds.sort()
for cur_pred in cur_single_preds:
    print '  %s %s' % (cur_pred, mesh_codes[cur_pred])
print

print 'TOP 20 LEVEL-2 MULTI-WAY PREDICTIONS:'
for tup in [(t,mesh_codes[t],(a-0.5)*10000) 
            for t, a in sorted(multi_preds[trial_id].items(), 
                               key=lambda x: x[1], 
                               reverse=True)
            if 'C22' not in t][:20]:
    print '  %s %s (%g)' % tup
print

print 'KNN PREDICTIONS:'
for tup in sorted([(mc, k, sum([1 / (10 ** d) for d in v])) 
                   for k, v in knn_preds[trial_id].items()
                   if k in mesh_codes_r
                   for mc in mesh_codes_r[k]
                   ], key=lambda x: x[2], reverse=True):
    print '  %s %s (%g)' % tup
print


Trial ID: NCT00030940

BRIEF DESCRIPTION:
This study will explore the role of various immune factors involved in producing the disease symptoms in stiff-person syndrome (SPS) and follow disease progression in patients. SPS is a progressive disease in which unexpected noises, touches or stressful events set off muscle spasms and stiffness. It is thought to be an autoimmune disease in which the body produces antibodies that attack certain healthy tissues. A better understanding of the disease may help researchers design new therapies.  Patients of any age with SPS may be eligible for this study, except those who:  - Lack of serum anti-GAD antibodies  - Have very advanced disease that precludes traveling  - Have severe cardiovascular, renal, or other end-organ-disease states  Candidates will be screened with a medical history and physical and neurological examinations to confirm the diagnosis of SPS.  After screening, those enrolled in the study will be followed at the NIH Clinical Center every 6 months for 2 years (months 6, 12, 18, and 24) to have the following tests and procedures:  - Physical and neurological examinations and review of symptoms (every visit)  - Blood draw for routine tests and for research studies (every visit)  - Stiffness assessment (every visit) - Patients are asked a series of questions about their stiffness, which physicians rate according to the number of stiff areas (e.g., 0-no stiff areas; 1-stiffness of the lower trunk; 2-stiffness of the upper trunk, etc.).  - Lymphapheresis (at the beginning of the study and at 12 months) - This is a procedure for collecting large quantities of white blood cells. A needle is placed in a vein in the arm. Blood flows from the vein through a plastic tube (catheter) into a machine that spins the blood, separating it into its components. The white blood cells (lymphocytes) are removed, and the rest of the blood-plasma, red cells and platelets-is returned to the body through a second needle placed in the other arm.  - Electrophysiologic studies - These studies include electromyography and nerve conduction testing. For electromyography, a small needle is inserted into a few muscles and the patient is asked to relax or to contract the muscles. The electrical activity of the muscle cells is recorded and analyzed by a computer. For nerve conduction testing, nerves are stimulated through small wire electrodes attached to the skin, and the response is recorded and analyzed.  - Lumbar puncture (at the beginning of the study and at 12 months) - This procedure is done to examine the cerebrospinal fluid (CSF), which bathes the brain and spinal cord. After a local anesthetic is administered, a needle is inserted in the space between the bones in the lower back where the CSF circulates below the spinal cord. About 2 tablespoons of fluid is collected through the needle.

DETAILED DESCRIPTION:
Stiff-person syndrome (SPS) is a progressive neurological disorder characterized by stiffness of the trunk or limb muscles and frequent muscle spasms induced by unexpected visual, auditory, or somatosensory stimuli. It is an incapacitating disorder that leads to recurrent falls and impaired ambulation. The cause of the disease is unknown but an autoimmune pathogenesis is implicated based on its association with other autoimmune diseases and auto-antibodies, specific HLA haplotypes and high titer antibodies against GAD, the rate-limiting enzyme for the synthesis of GABA. Understanding the autoimmune mechanisms of SPS is fundamental to refine the diagnostic criteria and develop specific therapies. The goals of this study are: a) define the natural history of SPS in a homogeneous cohort of patients, b) explore a pathogenetic link between SPS and viral infections based on the known peptide homology between GAD and certain viruses and c) establish GAD-specific T-cell clones and search for candidate antigenic epitopes using synthetic peptide libraries. Collected clinical data will be used to delineate the rate of disease progression and the frequency of association with other autoimmune illnesses, auto-antibodies, or malignancies. It is anticipated that the knowledge acquired from the study will help us understand the mechanism of the disease and design antigen-specific therapeutic strategies. This is an investigative study intended to define the natural history and pathogenesis of SPS. No new therapy will be provided except of standard care.

CURRENT MeSH ASSIGNMENT, IF ANY:
  C10.114.812 Stiff-Person Syndrome
  C10.228.854.790 Stiff-Person Syndrome
  C10.668.900 Stiff-Person Syndrome
  C20.111.258.850 Stiff-Person Syndrome

TOP ONE-WAY PREDICTION:
  C05.651.594.600 Myositis, Inclusion Body
  C10.668.491.562.500 Myositis, Inclusion Body

TOP 20 LEVEL-2 MULTI-WAY PREDICTIONS:
  C10.574 Neurodegenerative Diseases (0.283865)
  G12.425 Immune System Processes (0.231772)
  C16.320 Genetic Diseases, Inborn (0.216849)
  C04.730 Paraneoplastic Syndromes (0.213707)
  C17.300 Connective Tissue Diseases (0.200937)
  C15.378 Hematologic Diseases (0.190226)
  C11.270 Eye Diseases, Hereditary (0.186345)
  C20.111 Autoimmune Diseases (0.141723)
  C10.114 Autoimmune Diseases of the Nervous System (0.126398)
  C15.604 Lymphatic Diseases (0.114658)
  C20.683 Immunoproliferative Disorders (0.0985806)
  C04.557 Neoplasms by Histologic Type (0.0975847)
  C10.500 Nervous System Malformations (0.0921099)
  C04.651 Neoplasms, Multiple Primary (0.0911343)
  C04.834 Precancerous Conditions (0.0906306)
  C10.314 Demyelinating Diseases (0.0805225)
  C11.941 Uveal Diseases (0.075716)
  C11.250 Eye Abnormalities (0.069001)
  C12.777 Urologic Diseases (0.0679326)
  C19.053 Adrenal Gland Diseases (0.0490695)

KNN PREDICTIONS:
  C05.651.594 Myositis (0.127119)
  C10.668.491.562 Myositis (0.127119)
  C05.651.594.600 Myositis, Inclusion Body (0.127119)
  C10.668.491.562.500 Myositis, Inclusion Body (0.127119)
  C10.597.613.550.500 Muscle Rigidity (0.057933)
  C05.651.504 Muscle Rigidity (0.057933)
  C23.888.592.608.550.500 Muscle Rigidity (0.057933)
  C04.700.630 Multiple Endocrine Neoplasia (0.057933)
  C04.651.600 Multiple Endocrine Neoplasia (0.057933)
  C04.588.322.400 Multiple Endocrine Neoplasia (0.057933)
  C16.320.700.630 Multiple Endocrine Neoplasia (0.057933)
  C19.344.400 Multiple Endocrine Neoplasia (0.057933)
  C10.597.613.750 Spasm (0.057933)
  C23.888.592.608.750 Spasm (0.057933)
  C10.228.854.790 Stiff-Person Syndrome (0.057933)
  C10.114.812 Stiff-Person Syndrome (0.057933)
  C20.111.258.850 Stiff-Person Syndrome (0.057933)
  C10.668.900 Stiff-Person Syndrome (0.057933)
  C05.799 Rheumatic Diseases (0.0563748)
  C17.300.775 Rheumatic Diseases (0.0563748)
  C10.668.829.800 Polyneuropathies (0.0563051)
  C10.228.140.140.254 Cerebral Palsy (0.0557426)
  C10.574 Neurodegenerative Diseases (0.0554473)
  C20.673 Immunologic Deficiency Syndromes (0.0543541)
  C06.552.380 Hepatitis (0.0538724)
  C06.552.380.350.050 Hepatitis, Autoimmune (0.0538724)
  C20.111.567 Hepatitis, Autoimmune (0.0538724)
  C02.782.687.359.500 Hepatitis A (0.0538724)
  C06.552.380.705.422 Hepatitis A (0.0538724)
  C02.440.420 Hepatitis A (0.0538724)
  C10.574.812 Parkinson Disease (0.0536697)
  C10.228.140.079.862.500 Parkinson Disease (0.0536697)
  C10.228.662.600.400 Parkinson Disease (0.0536697)


In [ ]: