In [4]:
raw_corpus = []

with open('corpus/NCBItrainset_corpus.txt', 'r') as rf:
    lines = rf.readlines()
    for line in lines:
        raw_corpus.append(line)

In [64]:
text_indices = []
blank_lines = []
for i in range(len(raw_corpus)):
    if raw_corpus[i] in ['\n', '\r\n']:
        blank_lines.append(i)
        text_indices.append((i+1, i+2))

ann_indices = []
for i in range(len(raw_corpus)):
    if (i, i+1) not in text_indices and (i-1, i) not in text_indices and i not in blank_lines:
        ann_indices.append(i)

In [103]:
#titles = []
#abstracts = []
#text_corpus = {} # as dictionary
text_corpus = []
for indices in text_indices:
#    titles.append(raw_corpus[indices[0]][11:])
#    abstracts.append(raw_corpus[indices[1]][11:])

    # find end of id string (second pipe)
    title_noID = raw_corpus[indices[0]].split('|')[2].strip('\n')
    abstract_noID = raw_corpus[indices[1]].split('|')[2].strip('\n')

    # as dictionary
#    text_corpus[raw_corpus[indices[0]].split('|')[0]] = [title_noID + ' ' + abstract_noID]
    # as list
    text_corpus.append(title_noID + ' ' + abstract_noID)

In [105]:
# each element in list corresponds to one title+abstract combo
print text_corpus[0]


A common human skin tumour is caused by activating mutations in beta-catenin. WNT signalling orchestrates a number of developmental programs. In response to this stimulus, cytoplasmic beta-catenin (encoded by CTNNB1) is stabilized, enabling downstream transcriptional activation by members of the LEF/TCF family. One of the target genes for beta-catenin/TCF encodes c-MYC, explaining why constitutive activation of the WNT pathway can lead to cancer, particularly in the colon. Most colon cancers arise from mutations in the gene encoding adenomatous polyposis coli (APC), a protein required for ubiquitin-mediated degradation of beta-catenin, but a small percentage of colon and some other cancers harbour beta-catenin-stabilizing mutations. Recently, we discovered that transgenic mice expressing an activated beta-catenin are predisposed to developing skin tumours resembling pilomatricomas. Given that the skin of these adult mice also exhibits signs of de novo hair-follicle morphogenesis, we wondered whether human pilomatricomas might originate from hair matrix cells and whether they might possess beta-catenin-stabilizing mutations. Here, we explore the cell origin and aetiology of this common human skin tumour. We found nuclear LEF-1 in the dividing tumour cells, providing biochemical evidence that pilomatricomas are derived from hair matrix cells. At least 75% of these tumours possess mutations affecting the amino-terminal segment, normally involved in phosphorylation-dependent, ubiquitin-mediated degradation of the protein. This percentage of CTNNB1 mutations is greater than in all other human tumours examined thus far, and directly implicates beta-catenin/LEF misregulation as the major cause of hair matrix cell tumorigenesis in humans.. 

In [123]:
# save corpus to separate files to be read into nltk

for indices in text_indices:
    # find end of id string (second pipe)
    ID = raw_corpus[indices[0]].split('|')[0]
    title_noID = raw_corpus[indices[0]].split('|')[2].strip('\n')
    abstract_noID = raw_corpus[indices[1]].split('|')[2].strip('\n')
    
    save_file = 'corpus_nltk/' + ID + '.txt'
    
    with open(save_file, 'w') as wf:
        wf.write('\n')
        wf.write(title_noID + ' ' + abstract_noID)
        wf.write('\n')

In [76]:
annotations = {}

for index in ann_indices:
    entry = raw_corpus[index].split('\t')
    entry_id = entry[0]
    start = entry[1]
    end = entry[2]
    ann_mention = entry[3]
    ann_type = entry[4]
    ann_concept = entry[5]
    annotations[(entry_id, ann_mention)] = [start, end, ann_type, ann_concept]

In [106]:
annotations


Out[106]:
{('7663517', 'breast and/or ovarian cancer'): ['325',
  '353',
  'Modifier',
  'D001943|D010051\n'],
 ('10533031', 'FRDA'): ['157', '161', 'Modifier', 'D005621\n'],
 ('8375105', 'Duchenne muscular dystrophy'): ['189',
  '216',
  'SpecificDisease',
  'D020388\n'],
 ('6337374', 'malaria'): ['811', '818', 'SpecificDisease', 'D008288\n'],
 ('2862466', 'CAH'): ['709', '712', 'SpecificDisease', 'D006521\n'],
 ('1338904', 'FAP'): ['273', '276', 'Modifier', 'D011125\n'],
 ('10465113', 'EDM2'): ['1455', '1459', 'SpecificDisease', 'OMIM:600204\n'],
 ('1505217', 'lysosomal storage disorder'): ['208',
  '234',
  'DiseaseClass',
  'D016464\n'],
 ('1978564', 'colorectal carcinomas'): ['1005',
  '1026',
  'SpecificDisease',
  'D015179\n'],
 ('1301937', 'TSD'): ['673', '676', 'Modifier', 'D013661\n'],
 ('1303171', 'NDP'): ['888', '891', 'Modifier', 'C537849\n'],
 ('10403837', 'Hereditary progressive dystonia'): ['154',
  '185',
  'SpecificDisease',
  'D020821\n'],
 ('8571953', 'breast and ovarian cancer'): ['688',
  '713',
  'CompositeMention',
  'D061325\n'],
 ('7315872', 'tendinous xanthomas'): ['630',
  '649',
  'SpecificDisease',
  'D014973\n'],
 ('1833974', 'Tay-Sachs disease'): ['817', '834', 'Modifier', 'D013661\n'],
 ('2241452', 'recurrent meningitis'): ['1112',
  '1132',
  'SpecificDisease',
  'D008581+D012008\n'],
 ('3362213', 'autosomal recessive genetic disorder'): ['97',
  '133',
  'DiseaseClass',
  'D030342\n'],
 ('3169738', 'Duchenne and Becker muscular dystrophy'): ['95',
  '133',
  'CompositeMention',
  'D020388|C537666\n'],
 ('2894613', 'hemangioblastomas'): ['233',
  '250',
  'SpecificDisease',
  'D018325\n'],
 ('2310692', 'inherited deficiency in the ninth complement component'): ['226',
  '280',
  'SpecificDisease',
  'OMIM:613825\n'],
 ('10923035', 'BFIC'): ['1353', '1357', 'Modifier', 'D020936\n'],
 ('7599636', 'inherited disorders'): ['305',
  '324',
  'DiseaseClass',
  'D030342\n'],
 ('10417280', 'Angelman syndrome'): ['189',
  '206',
  'SpecificDisease',
  'D017204\n'],
 ('2303408', 'C5-deficient'): ['1577', '1589', 'Modifier', 'OMIM:609536\n'],
 ('8533762', 'G6PD deficiency'): ['991',
  '1006',
  'SpecificDisease',
  'D005955\n'],
 ('10190331', 'Pendred syndrome'): ['533',
  '549',
  'SpecificDisease',
  'C536648\n'],
 ('3012567', 'chromosomal abnormalities'): ['1253',
  '1278',
  'DiseaseClass',
  'D002869\n'],
 ('3524231', 'DMD'): ['1052', '1055', 'Modifier', 'D020388\n'],
 ('7481765', 'BRCA1 abnormalities'): ['613',
  '632',
  'DiseaseClass',
  'OMIM:604370\n'],
 ('10369876', 'neuronal damage'): ['488', '503', 'DiseaseClass', 'D009410\n'],
 ('8408659', 'CETP deficiency'): ['338',
  '353',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('2390095', 'CETP-deficient'): ['245', '259', 'Modifier', 'OMIM:143470\n'],
 ('7663517', 'hereditary breast and ovarian cancer'): ['114',
  '150',
  'Modifier',
  'D061325\n'],
 ('8302543', 'choriocapillaris of the macula'): ['1049',
  '1079',
  'SpecificDisease',
  'D008268\n'],
 ('10943845', 'breast cancer'): ['90', '103', 'SpecificDisease', 'D001943\n'],
 ('8533757', 'breast cancers'): ['1526',
  '1540',
  'SpecificDisease',
  'D001943\n'],
 ('10470286', 'neurofibrosarcomas'): ['368',
  '386',
  'SpecificDisease',
  'D018319\n'],
 ('10447258', 'infantile Refsum disease'): ['208',
  '232',
  'SpecificDisease',
  'D052919\n'],
 ('10094559', 'Alkaptonuria'): ['64', '76', 'SpecificDisease', 'D000474\n'],
 ('10742101', 'partial or complete deficiency of Hmgic'): ['646',
  '685',
  'CompositeMention',
  'OMIM:600698\n'],
 ('10441573', 'ovarian cancers'): ['1104',
  '1119',
  'SpecificDisease',
  'D010051\n'],
 ('8530105', 'trichorhinophalangeal syndrome'): ['886',
  '916',
  'SpecificDisease',
  'OMIM:190350\n'],
 ('3346018', 'DMD'): ['275', '278', 'Modifier', 'D020388\n'],
 ('8528198', 'Wiskott-Aldrich syndrome'): ['50',
  '74',
  'SpecificDisease',
  'D014923\n'],
 ('10589394', 'cleft lip'): ['348', '357', 'SpecificDisease', 'D002971\n'],
 ('2852474', 'congenital adrenal hypoplasia'): ['1547',
  '1576',
  'SpecificDisease',
  'D000312\n'],
 ('10554035', 'von Hippel-Lindau (VHL) disease'): ['118',
  '149',
  'SpecificDisease',
  'D006623\n'],
 ('8116611', 'myotonic dystrophy'): ['94',
  '112',
  'SpecificDisease',
  'D009223\n'],
 ('10737981', 'GM1-gangliosidosis'): ['532',
  '550',
  'SpecificDisease',
  'D016537\n'],
 ('2886237', 'Progressive Tapeto-Choroidal Dystrophy'): ['263',
  '301',
  'SpecificDisease',
  'C531652\n'],
 ('2450401', 'DMD'): ['408', '411', 'Modifier', 'D020388\n'],
 ('7611277', 'breast and prostate cancer'): ['960',
  '986',
  'CompositeMention',
  'D001943|D011471\n'],
 ('7857677', 'Gaucher disease'): ['163',
  '178',
  'SpecificDisease',
  'D005776\n'],
 ('10915770', 'PWS'): ['1694', '1697', 'SpecificDisease', 'D011218\n'],
 ('3862128', 'PKU'): ['1225', '1228', 'Modifier', 'D010661\n'],
 ('1301190', 'TSD'): ['1284', '1287', 'Modifier', 'D013661\n'],
 ('10598803', 'colorectal cancer'): ['526',
  '543',
  'SpecificDisease',
  'D015179\n'],
 ('6103091', 'proteinuria'): ['328', '339', 'SpecificDisease', 'D011507\n'],
 ('1717985', 'Piebaldism'): ['739', '749', 'SpecificDisease', 'D016116\n'],
 ('10470088', 'attenuated polyposis'): ['440', '460', 'Modifier', 'C538265\n'],
 ('8589721', 'MCF-7 tumours'): ['770', '783', 'DiseaseClass', 'D009369\n'],
 ('7717396', 'ALD'): ['1349', '1352', 'SpecificDisease', 'D000326\n'],
 ('1358807', 'X-linked amelogenesis imperfecta'): ['227',
  '259',
  'SpecificDisease',
  'C538243\n'],
 ('10382909', 'X-linked Emery-Dreifuss muscular dystrophy'): ['31',
  '73',
  'SpecificDisease',
  'D020389\n'],
 ('8533762', 'malaria'): ['262', '269', 'SpecificDisease', 'D008288\n'],
 ('2544995', 'aniridia'): ['299', '307', 'SpecificDisease', 'D015783\n'],
 ('7543316', 'Myotonic dystrophy'): ['0',
  '18',
  'SpecificDisease',
  'D009223\n'],
 ('10071185', 'clinical abnormalities'): ['1252',
  '1274',
  'DiseaseClass',
  'D013568\n'],
 ('8301658', 'X linked recessive thrombocytopenia'): ['882',
  '917',
  'SpecificDisease',
  'OMIM:313900\n'],
 ('7605382', 'CETP deficiency'): ['1566',
  '1581',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('2760209', 'Lesch-Nyhan'): ['31', '42', 'Modifier', 'D007926\n'],
 ('1301161', 'congenital blindness'): ['161',
  '181',
  'SpecificDisease',
  'D057130\n'],
 ('10471457', 'hereditary hemochromatosis'): ['1950',
  '1976',
  'SpecificDisease',
  'D006432\n'],
 ('10071185', 'mental retardation'): ['1029',
  '1047',
  'DiseaseClass',
  'D008607\n'],
 ('10051005', 'hereditary non-polyposis cancer syndrome'): ['278',
  '318',
  'SpecificDisease',
  'D003123\n'],
 ('2773936', 'inborn errors of myelin metabolism'): ['1444',
  '1478',
  'DiseaseClass',
  'D020279\n'],
 ('10083733', 'APC'): ['1410', '1413', 'Modifier', 'D011125\n'],
 ('8317477', 'HD'): ['721', '723', 'Modifier', 'D006816\n'],
 ('10737980', 'X-linked adrenoleukodystrophy'): ['20',
  '49',
  'Modifier',
  'D000326\n'],
 ('10607954', 'defect of the anterior midline scalp'): ['252',
  '288',
  'DiseaseClass',
  'C538225\n'],
 ('3348216', 'hematologic malignancy'): ['62',
  '84',
  'DiseaseClass',
  'D019337\n'],
 ('7523157',
  'completely deficient in the seventh component of complement'): ['577', '636', 'SpecificDisease', 'OMIM:610102\n'],
 ('10556283', 'medulloblastomas'): ['1347',
  '1363',
  'SpecificDisease',
  'D008527\n'],
 ('10426139', 'McLeod'): ['690', '696', 'Modifier', 'OMIM:300842\n'],
 ('3343337', 'inherited disorder'): ['1465',
  '1483',
  'DiseaseClass',
  'D030342\n'],
 ('7573040', 'MJD'): ['844', '847', 'SpecificDisease', 'D017827\n'],
 ('6524872', 'adrenal cortical insufficiency'): ['199',
  '229',
  'DiseaseClass',
  'D000309\n'],
 ('10528860', 'maternal heterodisomy'): ['1180',
  '1201',
  'DiseaseClass',
  'D024182\n'],
 ('2352258', 'renal cell carcinoma'): ['1209',
  '1229',
  'SpecificDisease',
  'D002292\n'],
 ('1307245', 'Norrie disease'): ['944', '958', 'SpecificDisease', 'C537849\n'],
 ('7607677', 'deficiency in N-acetylgalactosamine-6-sulfatase'): ['193',
  '240',
  'SpecificDisease',
  'D009085\n'],
 ('8301658', 'thrombocytopenia'): ['88',
  '104',
  'SpecificDisease',
  'D013921\n'],
 ('10330430', 'familial hypertrophic cardiomyopathy'): ['188',
  '224',
  'SpecificDisease',
  'D024741\n'],
 ('7964884', 'cerebrotendinous xanthomatosis'): ['329',
  '359',
  'SpecificDisease',
  'D019294\n'],
 ('1684088', 'metachromatic leukodystrophy'): ['179',
  '207',
  'SpecificDisease',
  'D007966\n'],
 ('6103091', 'nephritis'): ['746', '755', 'SpecificDisease', 'D009393\n'],
 ('2995231', 'Becker muscular dystrophy'): ['236',
  '261',
  'SpecificDisease',
  'C537666\n'],
 ('10817650', 'ataxia-telangiectasia'): ['103',
  '124',
  'Modifier',
  'D001260\n'],
 ('2895982', 'Lowe syndrome'): ['821', '834', 'SpecificDisease', 'D009800\n'],
 ('10500204', 'absence of deep tendon reflexes'): ['293',
  '324',
  'DiseaseClass',
  'D012021\n'],
 ('10465113', 'club foot'): ['749', '758', 'SpecificDisease', 'D003025\n'],
 ('7717396', 'X-ALD'): ['1527', '1532', 'Modifier', 'D000326\n'],
 ('10817650', 'A-T'): ['1573', '1576', 'Modifier', 'D001260\n'],
 ('7581380', 'dominant non-dystrophic myotonias'): ['61',
  '94',
  'DiseaseClass',
  'C536245\n'],
 ('10571943', 'WT1 gene abnormality'): ['642',
  '662',
  'SpecificDisease',
  'D009396\n'],
 ('7315872', 'CTX'): ['702', '705', 'SpecificDisease', 'D019294\n'],
 ('7599636', 'amelogenesis imperfecta'): ['245',
  '268',
  'DiseaseClass',
  'D000567\n'],
 ('1468459', 'developmental delay'): ['623',
  '642',
  'DiseaseClass',
  'D002658\n'],
 ('10788334', 'breast and ovarian cancer'): ['1097',
  '1122',
  'CompositeMention',
  'D001943|D010051\n'],
 ('1361318', 'cutaneous vasculitis'): ['312',
  '332',
  'SpecificDisease',
  'D018366\n'],
 ('8004674', 'McLeod syndrome'): ['903',
  '918',
  'SpecificDisease',
  'OMIM:300842\n'],
 ('2703233', 'myotonic dystrophy'): ['527',
  '545',
  'SpecificDisease',
  'D009223\n'],
 ('10861282', 'ankylosing spondylitis'): ['60',
  '82',
  'SpecificDisease',
  'D013167\n'],
 ('10190331', 'EVA'): ['628', '631', 'SpecificDisease', 'OMIM:600791\n'],
 ('3591825', 'ALD'): ['721', '724', 'SpecificDisease', 'D000326\n'],
 ('2792129', 'recurrent meningitis'): ['158',
  '178',
  'SpecificDisease',
  'D008581+D012008\n'],
 ('2912886', 'glucose 6 phosphate dehydrogenase (G6PD) deficiency'): ['52',
  '103',
  'SpecificDisease',
  'D005955\n'],
 ('10205262', 'AKU'): ['653', '656', 'SpecificDisease', 'D000474\n'],
 ('10533031', 'diabetes mellitus'): ['948',
  '965',
  'SpecificDisease',
  'D003920\n'],
 ('1248000', 'A-T'): ['1319', '1322', 'Modifier', 'D001260\n'],
 ('8198128', 'neurodegenerative disease'): ['167',
  '192',
  'DiseaseClass',
  'D019636\n'],
 ('10556283', 'gliomas'): ['1125', '1132', 'SpecificDisease', 'D005910\n'],
 ('2209091', 'Friedreich ataxia'): ['120', '137', 'Modifier', 'D005621\n'],
 ('10194428', 'Hereditary hemochromatosis'): ['119',
  '145',
  'SpecificDisease',
  'D006432\n'],
 ('1279971', 'piebald'): ['786', '793', 'Modifier', 'D016116\n'],
 ('10571950', 'recessive inherited disorder'): ['133',
  '161',
  'DiseaseClass',
  'D030342\n'],
 ('3464560', 'Duchenne muscular dystrophy'): ['241',
  '268',
  'SpecificDisease',
  'D020388\n'],
 ('10447259', 'WAS'): ['1488', '1491', 'SpecificDisease', 'D014923\n'],
 ('10484981', 'cancers'): ['300', '307', 'DiseaseClass', 'D009369\n'],
 ('10077614', 'DDS'): ['1037', '1040', 'SpecificDisease', 'D030321\n'],
 ('7857677', 'deficiency of beta-glucocerebrosidase'): ['200',
  '237',
  'SpecificDisease',
  'D005776\n'],
 ('10612394', 'NBS'): ['371', '374', 'SpecificDisease', 'D049932\n'],
 ('7076260', 'G6PD deficient'): ['253', '267', 'Modifier', 'D005955\n'],
 ('10747931', 'adipose tissue inflammation'): ['721',
  '748',
  'SpecificDisease',
  'D007249\n'],
 ('7759075', 'familial breast cancer'): ['73',
  '95',
  'SpecificDisease',
  'D001943\n'],
 ('10369876', 'neurohypophyseal diabetes insipidus'): ['627',
  '662',
  'SpecificDisease',
  'D020790\n'],
 ('2352258', 'Sporadic cerebellar haemangioblastoma'): ['1162',
  '1199',
  'SpecificDisease',
  'D018325\n'],
 ('1056013', 'leukemia'): ['971', '979', 'DiseaseClass', 'D007938\n'],
 ('3422216', 'choroideremia'): ['601', '614', 'Modifier', 'D015794\n'],
 ('10766245', 'NBS'): ['1239', '1242', 'Modifier', 'D049932\n'],
 ('7769092', 'VLCAD deficiency'): ['1145',
  '1161',
  'SpecificDisease',
  'C536353\n'],
 ('8198128', 'FRDA'): ['1255', '1259', 'Modifier', 'D005621\n'],
 ('8554067', 'breast and ovarian cancer'): ['280',
  '305',
  'Modifier',
  'D061325\n'],
 ('7599636', 'AIH1'): ['610', '614', 'SpecificDisease', 'C538243\n'],
 ('10915776', 'retinoblastoma'): ['1376', '1390', 'Modifier', 'D012175\n'],
 ('1303277', 'maternal disomy for chromosome 15'): ['234',
  '267',
  'SpecificDisease',
  'C538037\n'],
 ('8281142', 'breast tumour'): ['724', '737', 'Modifier', 'D001943\n'],
 ('10982189', 'APC tumor'): ['30', '39', 'Modifier', 'D011125\n'],
 ('10465113', 'Multiple epiphyseal dysplasia'): ['1323',
  '1352',
  'SpecificDisease',
  'D010009\n'],
 ('3455778', 'PKU'): ['1322', '1325', 'SpecificDisease', 'D010661\n'],
 ('10593994', 'Papillon-Lefevre syndrome'): ['81',
  '106',
  'SpecificDisease',
  'D010214\n'],
 ('1248000', 'ataxia-telangiectasia'): ['53',
  '74',
  'SpecificDisease',
  'D001260\n'],
 ('1056013', 'chromosome instability'): ['145',
  '167',
  'DiseaseClass',
  'D043171\n'],
 ('2303408', 'Deficiency of the murine fifth complement component (C5)'): ['0',
  '56',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('7991123', 'impaired function of adrenal cortex and testes'): ['355',
  '401',
  'CompositeMention',
  'D000303\n'],
 ('10581027', 'PLS'): ['1386', '1389', 'Modifier', 'D010214\n'],
 ('7298854', 'CTX'): ['1399', '1402', 'SpecificDisease', 'D019294\n'],
 ('8252631', 'adenomatous polyposis coli'): ['181',
  '207',
  'SpecificDisease',
  'D011125\n'],
 ('8375105', 'DMD'): ['551', '554', 'Modifier', 'D020388\n'],
 ('10417286', 'H'): ['351', '352', 'SpecificDisease', 'D000848\n'],
 ('2786201', 'type IIA vWD'): ['989', '1001', 'Modifier', 'D056728\n'],
 ('8563759', 'neuroepithelial abnormalities'): ['71',
  '100',
  'DiseaseClass',
  'D018302\n'],
 ('10571950', 'developmental abnormalities of the cochlea'): ['179',
  '221',
  'DiseaseClass',
  'D015834\n'],
 ('3014348', 'Duchenne muscular dystrophy'): ['88',
  '115',
  'SpecificDisease',
  'D020388\n'],
 ('10196381', 'X-linked adrenoleukodystrophy'): ['296',
  '325',
  'SpecificDisease',
  'D000326\n'],
 ('6783144', 'meningococcal meningitis'): ['271',
  '295',
  'SpecificDisease',
  'D008585\n'],
 ('7611277', 'male breast cancer'): ['1516',
  '1534',
  'SpecificDisease',
  'D018567\n'],
 ('2316519', 'Duchenne muscular dystrophy'): ['201',
  '228',
  'SpecificDisease',
  'D020388\n'],
 ('7076260', 'Glucose-6-phosphate dehydrogenase deficiency'): ['0',
  '44',
  'SpecificDisease',
  'D005955\n'],
 ('8441467', 'Central nervous system demyelination'): ['353',
  '389',
  'DiseaseClass',
  'D003711\n'],
 ('7762560', 'DM'): ['1344', '1346', 'SpecificDisease', 'D009223\n'],
 ('8554067', 'breast-ovarian syndrome'): ['1154',
  '1177',
  'SpecificDisease',
  'D061325\n'],
 ('8266996', 'Prader-Willi syndrome'): ['240',
  '261',
  'SpecificDisease',
  'D011218\n'],
 ('10930571', 'X-linked agammaglobulinemia'): ['1291',
  '1318',
  'SpecificDisease',
  'OMIM:300755\n'],
 ('10192399', 'syndromic deafness'): ['115',
  '133',
  'DiseaseClass',
  'D003638\n'],
 ('1269174', 'corneal arcus'): ['237', '250', 'SpecificDisease', 'D001112\n'],
 ('777027', 'C5-deficient'): ['317', '329', 'Modifier', 'OMIM:609536\n'],
 ('10353787', 'Overgrowth of oral mucosa and facial skin'): ['0',
  '41',
  'CompositeMention',
  'D006965\n'],
 ('10807385', 'BRCA-linked and sporadic ovarian cancer'): ['30',
  '69',
  'CompositeMention',
  'OMIM:604370|OMIM:612555|OMIM:613399|D010051\n'],
 ('8530105', 'TRPS'): ['918', '922', 'SpecificDisease', 'OMIM:190350\n'],
 ('3480530', 'retinoblastoma'): ['824', '838', 'SpecificDisease', 'D012175\n'],
 ('10802668', 'cardiac arrhythmia'): ['185',
  '203',
  'DiseaseClass',
  'D001145\n'],
 ('10814710', 'MPS IVA'): ['1880', '1887', 'SpecificDisease', 'OMIM:253000\n'],
 ('10077651', 'hereditary hemochromatosis'): ['58',
  '84',
  'SpecificDisease',
  'D006432\n'],
 ('3524231', 'Duchenne muscular dystrophy'): ['236',
  '263',
  'SpecificDisease',
  'D020388\n'],
 ('10737981', 'cardiomyopathy'): ['1124', '1138', 'DiseaseClass', 'D009202\n'],
 ('2404853', 'Becker muscular dystrophy'): ['399',
  '424',
  'SpecificDisease',
  'C537666\n'],
 ('7586656', 'HPRT enzyme deficiency'): ['274',
  '296',
  'SpecificDisease',
  'D007926\n'],
 ('7390473', 'G6PD deficiency'): ['189',
  '204',
  'SpecificDisease',
  'D005955\n'],
 ('10484772', 'Norrie disease'): ['544',
  '558',
  'SpecificDisease',
  'C537849\n'],
 ('3659917', 'DMD'): ['624', '627', 'Modifier', 'D020388\n'],
 ('10543403', 'FRDA'): ['525', '529', 'Modifier', 'D005621\n'],
 ('8434621', 'Familial Mediterranean fever'): ['0',
  '28',
  'SpecificDisease',
  'D010505\n'],
 ('10891444', 'autosomal, recessive disorder'): ['143',
  '172',
  'DiseaseClass',
  'D030342\n'],
 ('2912886', 'Chronic hemolysis'): ['497',
  '514',
  'SpecificDisease',
  'D006461\n'],
 ('7959759', 'adrenomyeloneuropathy'): ['1173',
  '1194',
  'SpecificDisease',
  'D000326\n'],
 ('10192399', 'adenoma'): ['702', '709', 'DiseaseClass', 'D000236\n'],
 ('8528200', 'MJD'): ['1739', '1742', 'SpecificDisease', 'D017827\n'],
 ('10677309', 'autosomal recessive disorder'): ['259',
  '287',
  'DiseaseClass',
  'D030342\n'],
 ('1301187', 'hyperphenylalaninemias'): ['47',
  '69',
  'DiseaseClass',
  'D010661\n'],
 ('10213492', 'APC'): ['1243', '1246', 'Modifier', 'D011125\n'],
 ('7858169', 'neurodegenerative disease'): ['95',
  '120',
  'DiseaseClass',
  'D019636\n'],
 ('8522307', 'renal cell carcinoma'): ['818',
  '838',
  'SpecificDisease',
  'D002292\n'],
 ('10441571', 'corneal dystrophy'): ['249',
  '266',
  'SpecificDisease',
  'D003317\n'],
 ('1978564', 'FAP'): ['1334', '1337', 'SpecificDisease', 'D011125\n'],
 ('10767347', 'Frda'): ['838', '842', 'Modifier', 'D005621\n'],
 ('3678494', 'DMD'): ['589', '592', 'SpecificDisease', 'D020388\n'],
 ('10807385', 'BRCA-associated hereditary ovarian cancers'): ['2276',
  '2318',
  'SpecificDisease',
  'OMIM:604370|OMIM:612555|OMIM:613399\n'],
 ('7825586', 'breast-ovary cancer'): ['301', '320', 'Modifier', 'D061325\n'],
 ('2995231', 'Duchenne and Becker muscular dystrophy'): ['60',
  '98',
  'CompositeMention',
  'D020388|C537666\n'],
 ('7795591', 'retinoblastoma'): ['845', '859', 'SpecificDisease', 'D012175\n'],
 ('318684', 'decreased synthesis of C3'): ['1618',
  '1643',
  'DiseaseClass',
  'OMIM:613779\n'],
 ('10732816', 'EDMD'): ['446', '450', 'SpecificDisease', 'D020389\n'],
 ('10802667', 'multigenic disorder'): ['1286',
  '1305',
  'DiseaseClass',
  'D030342\n'],
 ('10712209', 'prostate cancer'): ['2194', '2209', 'Modifier', 'D011471\n'],
 ('8088831', 'Canavan disease'): ['1481',
  '1496',
  'SpecificDisease',
  'D017825\n'],
 ('3014348', 'Becker muscular dystrophy'): ['1058',
  '1083',
  'SpecificDisease',
  'C537666\n'],
 ('10471457', 'Hereditary hemochromatosis'): ['105',
  '131',
  'SpecificDisease',
  'D006432\n'],
 ('1311721', 'acute leukemia'): ['1105',
  '1119',
  'SpecificDisease',
  'D007938\n'],
 ('10484981', 'tumor'): ['1753', '1758', 'Modifier', 'D009369\n'],
 ('8258524', 'DMD'): ['1512', '1515', 'SpecificDisease', 'D020388\n'],
 ('7599636', 'X-linked amelogenesis imperfecta'): ['524',
  '556',
  'SpecificDisease',
  'C538243\n'],
 ('10208848', 'X-linked inborn error of glycosphingolipid metabolism'): ['217',
  '270',
  'DiseaseClass',
  'D008052\n'],
 ('10330348', 'AT'): ['1202', '1204', 'Modifier', 'D001260\n'],
 ('10085150', 'hereditary hemochromatotic'): ['2094',
  '2120',
  'Modifier',
  'D006432\n'],
 ('107868', 'spondylitis'): ['1002', '1013', 'SpecificDisease', 'D013166\n'],
 ('8195156', 'hereditary glucose/galactose malabsorption'): ['222',
  '264',
  'SpecificDisease',
  'OMIM:606824\n'],
 ('10323252', 'cardiac conduction defects'): ['370',
  '396',
  'SpecificDisease',
  'OMIM:115080\n'],
 ('1334370', 'Wilms tumor'): ['729', '740', 'SpecificDisease', 'D009396\n'],
 ('1709636', 'Phenylketonuria'): ['119',
  '134',
  'SpecificDisease',
  'D010661\n'],
 ('1468459', 'Hypotonia'): ['535', '544', 'SpecificDisease', 'D009123\n'],
 ('10618304', 'congenital long-QT syndrome'): ['224',
  '251',
  'SpecificDisease',
  'D008133\n'],
 ('8500791', 'hemophilia A'): ['369', '381', 'Modifier', 'D006467\n'],
 ('7605382', 'hyperalphalipoproteinemic'): ['1032',
  '1057',
  'Modifier',
  'OMIM:143470\n'],
 ('7726234', 'Duchenne dystrophy'): ['721',
  '739',
  'SpecificDisease',
  'D020388\n'],
 ('8326491', 'TSD'): ['945', '948', 'Modifier', 'D013661\n'],
 ('10598815', 'obesity'): ['208', '215', 'SpecificDisease', 'D009765\n'],
 ('2568588', 'tumours'): ['1193', '1200', 'DiseaseClass', 'D009369\n'],
 ('10732811', 'MJD'): ['1552', '1555', 'SpecificDisease', 'D017827\n'],
 ('1361318', 'Complement factor 2 deficiency'): ['0',
  '30',
  'SpecificDisease',
  'OMIM:217000\n'],
 ('2568588', 'retinoblastoma'): ['1110',
  '1124',
  'SpecificDisease',
  'D012175\n'],
 ('100562', 'disseminated gonococcal infection'): ['183',
  '216',
  'SpecificDisease',
  'D004673\n'],
 ('1301938', 'Tay-Sachs'): ['303', '312', 'Modifier', 'D013661\n'],
 ('10417280', 'PWS'): ['1159', '1162', 'Modifier', 'D011218\n'],
 ('10802669', 'cerebellar dysfunction'): ['197',
  '219',
  'DiseaseClass',
  'D002526\n'],
 ('10943845', 'breast and ovarian cancers'): ['187',
  '213',
  'CompositeMention',
  'D010051|D001943\n'],
 ('409732', 'complement deficiencies (C2 and C7)'): ['1106',
  '1141',
  'CompositeMention',
  'OMIM:610102|OMIM:217000\n'],
 ('1248000', 'ovarian, gastric, and biliary system carcinomas'): ['1219',
  '1266',
  'CompositeMention',
  'D010051|D013274|D001661\n'],
 ('7726234', 'Myotonic dystrophy'): ['647',
  '665',
  'SpecificDisease',
  'D009223\n'],
 ('3600793', 'Duchenne muscular dystrophy'): ['159',
  '186',
  'Modifier',
  'D020388\n'],
 ('2852474', 'AHC'): ['1578', '1581', 'SpecificDisease', 'D000312\n'],
 ('8198124', 'iminodipeptiduria'): ['181',
  '198',
  'SpecificDisease',
  'D000592\n'],
 ('1325652', 'APC'): ['1108', '1111', 'Modifier', 'D011125\n'],
 ('1313112', 'epidermoid cysts'): ['170',
  '186',
  'SpecificDisease',
  'D004814\n'],
 ('10323252', 'weakness'): ['325', '333', 'DiseaseClass', 'D018908\n'],
 ('1302003', 'CHM'): ['890', '893', 'Modifier', 'D015794\n'],
 ('1351034', 'adrenocortical tumors'): ['583',
  '604',
  'SpecificDisease',
  'D018268\n'],
 ('7166314', 'upper respiratory infection'): ['445',
  '472',
  'SpecificDisease',
  'D012141\n'],
 ('10417286', 'clefts'): ['593', '599', 'DiseaseClass', 'D002971|D002972\n'],
 ('10353787', 'mucosal lesions'): ['1340',
  '1355',
  'SpecificDisease',
  'D009059\n'],
 ('6387532', 'neurologic deterioration'): ['296',
  '320',
  'DiseaseClass',
  'D019636\n'],
 ('10788334', 'breast cancer'): ['1155',
  '1168',
  'SpecificDisease',
  'D001943\n'],
 ('1351034', 'adrenocorotical carcinoma'): ['231',
  '256',
  'SpecificDisease',
  'D018268\n'],
 ('10408776', 'RCC'): ['1651', '1654', 'SpecificDisease', 'D002292\n'],
 ('8084618', 'Ewing family of tumors'): ['26',
  '48',
  'DiseaseClass',
  'D012512\n'],
 ('10736265', 'diabetes mellitus'): ['323',
  '340',
  'SpecificDisease',
  'D003920\n'],
 ('6337374',
  'Glucose-6-phosphate dehydrogenase (G6PD; EC 1. 1. 1. 49) -deficient'): ['96',
  '163',
  'Modifier',
  'D005955\n'],
 ('7106752', 'G6PD deficiency'): ['1504',
  '1519',
  'SpecificDisease',
  'D005955\n'],
 ('10633128', 'autosomal recessive neurodegenerative disease'): ['54',
  '99',
  'DiseaseClass',
  'D020271\n'],
 ('10429004', 'Maternal Phenylketonuria'): ['148',
  '172',
  'Modifier',
  'D017042\n'],
 ('10746568', 'factor X deficiency'): ['333',
  '352',
  'SpecificDisease',
  'D005171\n'],
 ('10484772', 'retinal telangiectasis'): ['235',
  '257',
  'SpecificDisease',
  'D058456\n'],
 ('3615198', 'PKU'): ['299', '302', 'Modifier', 'D010661\n'],
 ('7315872', 'cerebrotendinous xanthomatosis'): ['193',
  '223',
  'SpecificDisease',
  'D019294\n'],
 ('10208848', 'Fabry disease'): ['158', '171', 'SpecificDisease', 'D000795\n'],
 ('10915770', 'dysmorphic features'): ['1654',
  '1673',
  'DiseaseClass',
  'D057215\n'],
 ('1731805', 'normotriglyceridemic abetalipoproteinemia'): ['43',
  '84',
  'SpecificDisease',
  'D000012\n'],
 ('8370681', 'Autosomal dominant neurohypophyseal diabetes insipidus'): ['76',
  '130',
  'SpecificDisease',
  'OMIM:125700\n'],
 ('100562', 'arthritis'): ['1013', '1022', 'DiseaseClass', 'D001168\n'],
 ('2310692', 'paroxysmal nocturnal haemoglobinuria'): ['162',
  '198',
  'SpecificDisease',
  'D006457\n'],
 ('10556285', 'Machado-Joseph disease'): ['148',
  '170',
  'SpecificDisease',
  'D017827\n'],
 ('10196381', 'peroxisomal disorder'): ['275',
  '295',
  'DiseaseClass',
  'D018901\n'],
 ('1351034', 'familial adenomatous polyposis'): ['190',
  '220',
  'SpecificDisease',
  'D011125\n'],
 ('409732', 'C7 deficiency'): ['707',
  '720',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('1301161', 'sensory neural deafness'): ['183',
  '206',
  'SpecificDisease',
  'D006319\n'],
 ('7579347', 'thrombocytopenia'): ['198',
  '214',
  'SpecificDisease',
  'D013921\n'],
 ('2352258', 'tumour'): ['1138', '1144', 'Modifier', 'D009369\n'],
 ('8023850', 'autosomal recessive leukodystrophy'): ['87',
  '121',
  'DiseaseClass',
  'D007966\n'],
 ('7437512', 'Wiskott-Aldrich defect'): ['1235',
  '1257',
  'SpecificDisease',
  'D014923\n'],
 ('10766245', 'ataxia-telangiectasia'): ['80',
  '101',
  'SpecificDisease',
  'D001260\n'],
 ('10598803', 'attenuated polyposis'): ['1113',
  '1133',
  'Modifier',
  'C538265\n'],
 ('6337374', 'thalassemia'): ['606', '617', 'Modifier', 'D013789\n'],
 ('1776638', 'familial polyposis coli'): ['477',
  '500',
  'SpecificDisease',
  'D011125\n'],
 ('8104633', 'Metachromatic leukodystrophy'): ['105',
  '133',
  'SpecificDisease',
  'D007966\n'],
 ('10366443', 'beta-glucuronidase deficiency'): ['296',
  '325',
  'SpecificDisease',
  'D016538\n'],
 ('10721669', 'WD'): ['1338', '1340', 'SpecificDisease', 'D006527\n'],
 ('1981994', 'amyloidosis'): ['786', '797', 'DiseaseClass', 'D000686\n'],
 ('3346018', 'Duchenne muscular dystrophy'): ['166',
  '193',
  'SpecificDisease',
  'D020388\n'],
 ('7617034', 'G6PD deficiency'): ['1278',
  '1293',
  'SpecificDisease',
  'D005955\n'],
 ('10072428', 'Familial gastric cancer'): ['231',
  '254',
  'SpecificDisease',
  'D013274\n'],
 ('10788334', 'cancer'): ['358', '364', 'DiseaseClass', 'D009369\n'],
 ('1302022', 'myotonic dystrophy'): ['124',
  '142',
  'SpecificDisease',
  'D009223\n'],
 ('10366443', 'mucopolysaccharidosis VII'): ['209',
  '234',
  'SpecificDisease',
  'D016538\n'],
 ('6337374', 'malarious'): ['716', '725', 'Modifier', 'D008288\n'],
 ('10798358', 'nonketotic hyperglycinemia'): ['274',
  '300',
  'SpecificDisease',
  'D020158\n'],
 ('10441329', 'copper deficient'): ['980', '996', 'DiseaseClass', 'C535468\n'],
 ('1338904', 'adenomatous polyposis coli'): ['134',
  '160',
  'Modifier',
  'D011125\n'],
 ('10430841', 'spinal xanthomatosis'): ['523',
  '543',
  'SpecificDisease',
  'D014973\n'],
 ('492812', 'meningococcal meningitis'): ['180',
  '204',
  'SpecificDisease',
  'D008585\n'],
 ('10982189', 'FAP'): ['611', '614', 'Modifier', 'D011125\n'],
 ('7573040', 'spinocerebellar ataxia 3/Machado-Joseph disease'): ['90',
  '137',
  'Modifier',
  'D017827\n'],
 ('10090880', 'familial Mediterranean fever'): ['34',
  '62',
  'SpecificDisease',
  'D010505\n'],
 ('10377440',
  'deficiency of glycosyl phosphatidylinositol (GPI) -anchored proteins'): ['160',
  '228',
  'DiseaseClass',
  'C537277\n'],
 ('10441571', 'congenital cataract'): ['1066',
  '1085',
  'SpecificDisease',
  'D002386\n'],
 ('8530105', 'LGS'): ['1121', '1124', 'Modifier', 'D015826\n'],
 ('10737119', 'primary dystonias'): ['1470',
  '1487',
  'DiseaseClass',
  'D020821\n'],
 ('2884570', 'autosomal recessive human genetic disorder'): ['134',
  '176',
  'DiseaseClass',
  'D030342\n'],
 ('1301189', 'TSD'): ['200', '203', 'SpecificDisease', 'D013661\n'],
 ('1307253', 'Duchenne and Becker muscular dystrophy'): ['293',
  '331',
  'Modifier',
  'D020388|C537666\n'],
 ('7825586', 'ovarian cancers'): ['964',
  '979',
  'SpecificDisease',
  'D010051\n'],
 ('10924409', 'tumors'): ['1294', '1300', 'DiseaseClass', 'D009369\n'],
 ('10528860', 'intrauterine growth retardation'): ['418',
  '449',
  'DiseaseClass',
  'D005317\n'],
 ('1325652', 'colorectal carcinoma'): ['226',
  '246',
  'SpecificDisease',
  'D015179\n'],
 ('10533031', 'loss of proprioception'): ['967',
  '989',
  'DiseaseClass',
  'D020886\n'],
 ('1317264', 'familial adenomatous polyposis'): ['30',
  '60',
  'SpecificDisease',
  'D011125\n'],
 ('6524872', 'peroxisomal disorders'): ['628',
  '649',
  'DiseaseClass',
  'D018901\n'],
 ('3014348', 'DMD'): ['1378', '1381', 'Modifier', 'D020388\n'],
 ('3862128', 'autosomal recessive disorder'): ['134',
  '162',
  'DiseaseClass',
  'D030342\n'],
 ('10192393', 'APC'): ['567', '570', 'SpecificDisease', 'D011125\n'],
 ('3393536', 'genetic abnormality'): ['217',
  '236',
  'DiseaseClass',
  'D030342\n'],
 ('2316519', 'DMD'): ['1107', '1110', 'Modifier', 'D020388\n'],
 ('7523157', 'deficiency of the seventh component of complement'): ['11',
  '60',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('10639175', 'immune defects'): ['269', '283', 'DiseaseClass', 'D007154\n'],
 ('7991123', 'adolescent cerebral ALD'): ['689',
  '712',
  'SpecificDisease',
  'D000326\n'],
 ('313733', 'hypoimmunoglobulinemia'): ['188',
  '210',
  'DiseaseClass',
  'D007153\n'],
 ('1322637', 'Tay-Sachs disease'): ['1171',
  '1188',
  'SpecificDisease',
  'D013661\n'],
 ('7937795', 'ovarian cancers'): ['139',
  '154',
  'SpecificDisease',
  'D010051\n'],
 ('107868', 'ankylosing spondylitis'): ['1197',
  '1219',
  'SpecificDisease',
  'D013167\n'],
 ('3600793', 'X-linked Duchenne muscular dystrophy'): ['37',
  '73',
  'Modifier',
  'D020388\n'],
 ('2491010', 'DMD'): ['1186', '1189', 'Modifier', 'D020388\n'],
 ('10449794', 'cancer'): ['216', '222', 'DiseaseClass', 'D009369\n'],
 ('4019732', 'CP'): ['818', '820', 'SpecificDisease', 'D002972\n'],
 ('2180286', 'muscular dystrophy'): ['120',
  '138',
  'DiseaseClass',
  'D009136\n'],
 ('1201235', 'severe neonatal jaundice'): ['735',
  '759',
  'SpecificDisease',
  'D007567\n'],
 ('2241452', 'complete C9 deficiency'): ['1157',
  '1179',
  'SpecificDisease',
  'OMIM:613825\n'],
 ('2215607', 'CETP deficiency'): ['2113',
  '2128',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('6604602', 'Ankylosing spondylitis'): ['1393',
  '1415',
  'SpecificDisease',
  'D013167\n'],
 ('3615198', 'phenylketonuria'): ['84', '99', 'SpecificDisease', 'D010661\n'],
 ('10196379', 'ovarian cancer'): ['1459', '1473', 'Modifier', 'D010051\n'],
 ('1302008', 'developmental disorder'): ['142',
  '164',
  'DiseaseClass',
  'D002658\n'],
 ('8244393',
  'autosomal recessive disease glucose/galactose malabsorption'): ['496', '555', 'SpecificDisease', 'OMIM:606824\n'],
 ('1978564', 'APC'): ['1409', '1412', 'Modifier', 'D011125\n'],
 ('1302008', "Wilms' tumour"): ['46', '59', 'Modifier', 'D009396\n'],
 ('10593994', 'autosomal recessive disorder'): ['119',
  '147',
  'DiseaseClass',
  'D030342\n'],
 ('8116611', 'DM'): ['1584', '1586', 'SpecificDisease', 'D009223\n'],
 ('1517503', 'Deficiencies of C7'): ['326',
  '344',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('1324223', 'colorectal cancer'): ['205',
  '222',
  'SpecificDisease',
  'D015179\n'],
 ('10323740', 'lip pits'): ['403', '411', 'SpecificDisease', 'C536528\n'],
 ('3789016', 'deficiency of the third component of complement'): ['33',
  '80',
  'SpecificDisease',
  'OMIM:613779\n'],
 ('492335', 'deficiency of the fifth (C5) component of complement1-3'): ['64',
  '119',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('1302032', 'fragile X'): ['552', '561', 'SpecificDisease', 'D005600\n'],
 ('8566952', 'Adrenoleukodystrophy'): ['108',
  '128',
  'SpecificDisease',
  'D000326\n'],
 ('7668252', 'VLCAD-deficient'): ['603', '618', 'Modifier', 'C536353\n'],
 ('7759075', 'breast cancer'): ['618', '631', 'Modifier', 'D001943\n'],
 ('1505982', 'WAGR contiguous gene syndrome'): ['248',
  '277',
  'SpecificDisease',
  'D017624\n'],
 ('7523157', 'C7 deficient'): ['934',
  '946',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('8162051', 'cancer'): ['762', '768', 'Modifier', 'D009369\n'],
 ('1282899', 'HPRT deficient'): ['1189', '1203', 'Modifier', 'D007926\n'],
 ('10417286', 'van der Woude syndrome'): ['156',
  '178',
  'SpecificDisease',
  'C536528\n'],
 ('7825586', 'breast-ovary cancer-family syndrome'): ['116',
  '151',
  'SpecificDisease',
  'D061325\n'],
 ('1338764', 'FAP'): ['1298', '1301', 'Modifier', 'D011125\n'],
 ('1776638', 'adenomatous polyposis coli'): ['406',
  '432',
  'SpecificDisease',
  'D011125\n'],
 ('7573040', 'type I autosomal dominant cerebellar ataxia'): ['191',
  '234',
  'SpecificDisease',
  'OMIM:109150\n'],
 ('10434119', 'clefts to the lip and/or palate'): ['286',
  '317',
  'CompositeMention',
  'D002971|D002972\n'],
 ('2912886', 'CNSHA'): ['443', '448', 'SpecificDisease', 'D000746\n'],
 ('7937795', 'ovarian tumors'): ['558', '572', 'SpecificDisease', 'D010051\n'],
 ('8281152', 'Myotonic dystrophy'): ['0', '18', 'Modifier', 'D009223\n'],
 ('10077614', 'nephropathy'): ['598', '609', 'SpecificDisease', 'D007674\n'],
 ('8282802', 'LCAT deficiency'): ['625',
  '640',
  'SpecificDisease',
  'D007863\n'],
 ('777027',
  'Hereditary deficiency of the fifth component of complement'): ['0',
  '58',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('1937471',
  'Hypoxanthine-guanine phosphoribosyltransferase deficiency'): ['0', '57', 'SpecificDisease', 'OMIM:300323\n'],
 ('10470088', 'tumors'): ['1515', '1521', 'DiseaseClass', 'D009369\n'],
 ('1311721', 'tumors'): ['679', '685', 'DiseaseClass', 'D009369\n'],
 ('23402', 'neonatal jaundice'): ['326',
  '343',
  'SpecificDisease',
  'D007567\n'],
 ('10636421', 'XLRS'): ['1555', '1559', 'SpecificDisease', 'D041441\n'],
 ('8281142', 'breast cancer'): ['409', '422', 'Modifier', 'D001943\n'],
 ('10470088', 'FAP desmoids'): ['1268',
  '1280',
  'SpecificDisease',
  'D018222\n'],
 ('2894613', 'Von Hippel-Lindau disease'): ['99',
  '124',
  'SpecificDisease',
  'D006623\n'],
 ('1380672', 'genetic disorders'): ['314', '331', 'DiseaseClass', 'D030342\n'],
 ('10932179', 'Huntington disease'): ['117',
  '135',
  'SpecificDisease',
  'D006816\n'],
 ('313733', 'Hereditary C2 deficiency'): ['0',
  '24',
  'SpecificDisease',
  'OMIM:217000\n'],
 ('7874117', 'cleft palate'): ['334', '346', 'SpecificDisease', 'D002972\n'],
 ('1468459', 'craniofrontonasal dysplasia'): ['61',
  '88',
  'SpecificDisease',
  'C536456\n'],
 ('511159', 'G6PD deficiency'): ['632', '647', 'SpecificDisease', 'D005955\n'],
 ('8162071', 'anterior segment malformations'): ['443',
  '473',
  'DiseaseClass',
  'C537775\n'],
 ('10434119', 'mucous cysts'): ['252', '264', 'SpecificDisease', 'D009078\n'],
 ('10192399', 'Pendred syndrome'): ['1490',
  '1506',
  'SpecificDisease',
  'C536648\n'],
 ('10364525', 'hereditary prostate cancer'): ['599',
  '625',
  'SpecificDisease',
  'C537243\n'],
 ('10802667', 'dominantly inherited disease'): ['212',
  '240',
  'DiseaseClass',
  'D030342\n'],
 ('1301938', 'autosomal recessive genetic disorder'): ['108',
  '144',
  'DiseaseClass',
  'D030342\n'],
 ('10807385', 'ovarian cancer'): ['1223',
  '1237',
  'SpecificDisease',
  'D010051\n'],
 ('10556283', 'ependymomas'): ['1134', '1145', 'SpecificDisease', 'D004806\n'],
 ('10662807', 'HMS'): ['1995', '1998', 'SpecificDisease', 'C537627\n'],
 ('10712201', 'PWS'): ['688', '691', 'Modifier', 'D011218\n'],
 ('7825578', 'Machado-Joseph disease'): ['1211',
  '1233',
  'SpecificDisease',
  'D017827\n'],
 ('100562', 'Complete absence of C7'): ['898',
  '920',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('2852474', 'Duchenne muscular dystrophy'): ['1230',
  '1257',
  'Modifier',
  'D020388\n'],
 ('7523157', 'terminal complement deficiency'): ['501',
  '531',
  'SpecificDisease',
  'D007153\n'],
 ('1307245', 'ND'): ['271', '273', 'Modifier', 'C537849\n'],
 ('10732811', 'Machado-Joseph disease'): ['64',
  '86',
  'SpecificDisease',
  'D017827\n'],
 ('10430841', 'Spinal xanthomatosis'): ['0',
  '20',
  'SpecificDisease',
  'D014973\n'],
 ('10472529', 'phenylketonuria'): ['427',
  '442',
  'SpecificDisease',
  'D010661\n'],
 ('3876122', 'Type I von Willebrand disease'): ['112',
  '141',
  'SpecificDisease',
  'D056725\n'],
 ('8023850', 'Canavan'): ['1235', '1242', 'Modifier', 'D017825\n'],
 ('3029599', 'neurological and behavioural disorder'): ['141',
  '178',
  'DiseaseClass',
  'D009422|D001523\n'],
 ('10802669', 'ataxia-telangiectasia'): ['142',
  '163',
  'SpecificDisease',
  'D001260\n'],
 ('10817650', 'ischemic heart disease'): ['266',
  '288',
  'SpecificDisease',
  'D017202\n'],
 ('8522307', 'tumor'): ['1394', '1399', 'Modifier', 'D009369\n'],
 ('6604602', 'HEMRI'): ['1367',
  '1372',
  'SpecificDisease',
  'D007249+D030342\n'],
 ('10484772', 'deficiency of norrin'): ['938',
  '958',
  'DiseaseClass',
  'C537849\n'],
 ('7759076', 'cancer'): ['611', '617', 'Modifier', 'D009369\n'],
 ('10618304', 'Brugada syndrome'): ['37',
  '53',
  'SpecificDisease',
  'D053840\n'],
 ('8563759', 'Brca1-deficient'): ['1226', '1241', 'Modifier', 'OMIM:604370\n'],
 ('6337374', 'Glucose-6-phosphate dehydrogenase deficiency'): ['0',
  '44',
  'SpecificDisease',
  'D005955\n'],
 ('4019732', 'oral cleft'): ['325',
  '335',
  'SpecificDisease',
  'D002971|D002972\n'],
 ('1709636', 'autosomal recessive disease'): ['147',
  '174',
  'DiseaseClass',
  'D030342\n'],
 ('8301658', 'Wiskott-Aldrich syndrome'): ['657',
  '681',
  'SpecificDisease',
  'D014923\n'],
 ('10612394', 'A-T'): ['924', '927', 'SpecificDisease', 'D001260\n'],
 ('218453', 'pituitary-adrenal abnormality'): ['473',
  '502',
  'DiseaseClass',
  'D010900+D000307\n'],
 ('10930571', 'faciogenital dysplasia'): ['93',
  '115',
  'SpecificDisease',
  'C535331\n'],
 ('1468459', 'Craniofrontonasal dysplasia'): ['0',
  '27',
  'SpecificDisease',
  'C536456\n'],
 ('2241452', 'acute meningococcal meningitis'): ['513',
  '543',
  'SpecificDisease',
  'D008585\n'],
 ('1517503', 'C7 deficiency'): ['311',
  '324',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('1346773', 'Wiskott-Aldrich syndrome'): ['137',
  '161',
  'SpecificDisease',
  'D014923\n'],
 ('7479827', 'sudden, unexplained death'): ['289',
  '314',
  'SpecificDisease',
  'D003645\n'],
 ('3718019', 'angioedema'): ['188', '198', 'SpecificDisease', 'D000799\n'],
 ('10716718', 'Atm deficiency'): ['952',
  '966',
  'SpecificDisease',
  'OMIM:208900\n'],
 ('1468459', 'CFND'): ['775', '779', 'SpecificDisease', 'C536456\n'],
 ('7550349', 'breast cancer'): ['403', '416', 'SpecificDisease', 'D001943\n'],
 ('10353787', 'fibroepithelial or epithelial hyperplasias'): ['1164',
  '1206',
  'CompositeMention',
  'D017573\n'],
 ('10923035', 'Benign familial infantile convulsions'): ['132',
  '169',
  'SpecificDisease',
  'D020936\n'],
 ('10712201', 'AS'): ['328', '330', 'SpecificDisease', 'D017204\n'],
 ('2703233', 'DM'): ['214', '216', 'SpecificDisease', 'D009223\n'],
 ('8533757', 'prostatic cancer'): ['858',
  '874',
  'SpecificDisease',
  'D011471\n'],
 ('10798358', 'neurological disturbance'): ['363',
  '387',
  'DiseaseClass',
  'D009461\n'],
 ('10930361',
  'autosomal recessive cardiodegenerative and neurodegenerative disease'): ['169',
  '237',
  'DiseaseClass',
  'D030342+D019636\n'],
 ('3393536', 'hemolytic anemia'): ['1497',
  '1513',
  'SpecificDisease',
  'D000743\n'],
 ('8113388', 'type I protein S deficiency'): ['145',
  '172',
  'SpecificDisease',
  'D018455\n'],
 ('10447259', 'XLT'): ['1589', '1592', 'SpecificDisease', 'OMIM:313900\n'],
 ('2310692', 'deficiency of the ninth component of complement'): ['53',
  '100',
  'SpecificDisease',
  'OMIM:613825\n'],
 ('1684088', 'MLD'): ['1990', '1993', 'SpecificDisease', 'D007966\n'],
 ('10406661', 'type II collagenopathy'): ['119',
  '141',
  'DiseaseClass',
  'C535964\n'],
 ('10706858', 'deficiencies of protein S'): ['97',
  '122',
  'SpecificDisease',
  'D018455\n'],
 ('10353787', 'mucosal overgrowth'): ['1459',
  '1477',
  'SpecificDisease',
  'D009059\n'],
 ('1676565', 'PMD'): ['643', '646', 'SpecificDisease', 'OMIM:312080\n'],
 ('8195156', 'diarrhea'): ['404', '412', 'SpecificDisease', 'D003967\n'],
 ('8563759', 'tumours'): ['418', '425', 'DiseaseClass', 'D009369\n'],
 ('10364518', 'abnormalities of eyes, nervous system, and kidneys'): ['229',
  '279',
  'CompositeMention',
  'D000015\n'],
 ('1345170', 'DTD'): ['693', '696', 'Modifier', 'C536170\n'],
 ('1999552', 'Hypocomplementemia'): ['184',
  '202',
  'DiseaseClass',
  'D007153\n'],
 ('1056013', 'Ataxia-telangiectasia'): ['61',
  '82',
  'SpecificDisease',
  'D001260\n'],
 ('10767313', 'fragile X syndrome'): ['290',
  '308',
  'SpecificDisease',
  'D005600\n'],
 ('1351034', 'benign tumors'): ['774', '787', 'DiseaseClass', 'D009369\n'],
 ('10411929', 'hemophagocytosis'): ['706', '722', 'DiseaseClass', 'D051359\n'],
 ('116187', 'PC Portland deficiency'): ['349',
  '371',
  'SpecificDisease',
  'D015324\n'],
 ('8370681', 'autosomal dominant neurohypophyseal diabetes insipidus'): ['365',
  '419',
  'SpecificDisease',
  'OMIM:125700\n'],
 ('8198124', 'skin ulcers'): ['200', '211', 'SpecificDisease', 'D012883\n'],
 ('3600794', 'Duchenne and the milder Becker muscular dystrophies'): ['166',
  '217',
  'CompositeMention',
  'D020388|C537666\n'],
 ('7815415', 'myotonic dystrophy'): ['144', '162', 'Modifier', 'D009223\n'],
 ('7790377', 'PTS1 protein-import defect'): ['1391',
  '1417',
  'SpecificDisease',
  'OMIM:202370|OMIM:214100\n'],
 ('10077614', 'Wilms tumor'): ['1152', '1163', 'SpecificDisease', 'D009396\n'],
 ('10735274', 'Friedreich ataxia'): ['42',
  '59',
  'SpecificDisease',
  'D005621\n'],
 ('10417286', 'cleft palate'): ['303', '315', 'SpecificDisease', 'D002972\n'],
 ('10470088', 'colorectal adenomas'): ['808',
  '827',
  'SpecificDisease',
  'D000236\n'],
 ('3578281', 'Prader-Willi syndrome'): ['123',
  '144',
  'SpecificDisease',
  'D011218\n'],
 ('2703233', 'Myotonic dystrophy'): ['0',
  '18',
  'SpecificDisease',
  'D009223\n'],
 ('107868', 'peripheral arthritis'): ['211',
  '231',
  'SpecificDisease',
  'D001168\n'],
 ('3258663', 'genetic abnormality'): ['1152',
  '1171',
  'DiseaseClass',
  'D030342\n'],
 ('10377440', 'GPI-anchor deficiency'): ['321',
  '342',
  'SpecificDisease',
  'C537277\n'],
 ('2008213', 'GGM'): ['1309', '1312', 'Modifier', 'OMIM:606824\n'],
 ('10382909', 'X-linked EMD'): ['365', '377', 'SpecificDisease', 'D020389\n'],
 ('1358807', 'inherited dental abnormality'): ['198',
  '226',
  'DiseaseClass',
  'D014071\n'],
 ('2180286', 'Duchenne muscular dystrophy'): ['232',
  '259',
  'SpecificDisease',
  'D020388\n'],
 ('10408776', 'VHL'): ['1658', '1661', 'Modifier', 'D006623\n'],
 ('2037285', 'DM'): ['1246', '1248', 'Modifier', 'D009223\n'],
 ('7814011', 'Norrie disease'): ['129', '143', 'Modifier', 'C537849\n'],
 ('10470286', 'gastric cancers'): ['268',
  '283',
  'SpecificDisease',
  'D013274\n'],
 ('10411929', 'Chediak-Higashi syndrome'): ['393',
  '417',
  'SpecificDisease',
  'D002609\n'],
 ('10465113', 'multiple epiphyseal dysplasia'): ['820',
  '849',
  'SpecificDisease',
  'D010009\n'],
 ('7581380', 'Dominant myotonia congenita'): ['219',
  '246',
  'SpecificDisease',
  'D009224\n'],
 ('10814710', 'Mucopolysaccharidosis IVA'): ['149',
  '174',
  'SpecificDisease',
  'OMIM:253000\n'],
 ('10636421', 'X-linked retinoschisis'): ['75',
  '97',
  'SpecificDisease',
  'D041441\n'],
 ('10500204', 'progressive gait and limb ataxia'): ['259',
  '291',
  'CompositeMention',
  'D020234|D001259\n'],
 ('10571950', 'PDS'): ['1162', '1165', 'Modifier', 'C536648\n'],
 ('523196', 'central nervous system symptoms'): ['203',
  '234',
  'DiseaseClass',
  'D002493\n'],
 ('10541953', 'PWS'): ['1018', '1021', 'Modifier', 'D011218\n'],
 ('10581027', 'autosomal recessive disorder'): ['211',
  '239',
  'DiseaseClass',
  'D030342\n'],
 ('1978564', 'colorectal cancer'): ['1517',
  '1534',
  'SpecificDisease',
  'D015179\n'],
 ('10839544', 'cancer'): ['212', '218', 'DiseaseClass', 'D009369\n'],
 ('2390095', 'genetic defects'): ['980', '995', 'DiseaseClass', 'D030342\n'],
 ('10662807', 'radiographic deformity of the fingers'): ['482',
  '519',
  'SpecificDisease',
  'D006226\n'],
 ('7696601', 'white matter lesions'): ['1139',
  '1159',
  'DiseaseClass',
  'D056784\n'],
 ('7991123', 'adrenomyeloneuropathy'): ['484',
  '505',
  'SpecificDisease',
  'D000326\n'],
 ('10891444', 'afibrinogenemia'): ['1271', '1286', 'Modifier', 'D000347\n'],
 ('6859721', 'C7 deficiency'): ['579',
  '592',
  'SpecificDisease',
  'OMIM:610102\n'],
 ('10830910', 'pheochromocytoma'): ['1110',
  '1126',
  'SpecificDisease',
  'D010673\n'],
 ('10398436', 'APC tumor'): ['415', '424', 'Modifier', 'D011125\n'],
 ('3524231', 'growth hormone (GH) deficiency'): ['382',
  '412',
  'SpecificDisease',
  'OMIM:262400\n'],
 ('7055648', 'CNSHA'): ['1317', '1322', 'SpecificDisease', 'D000746\n'],
 ('7857677', 'type I GD'): ['276', '285', 'SpecificDisease', 'D005776\n'],
 ('2963536', 'glomerulonephritis'): ['54',
  '72',
  'SpecificDisease',
  'D005921\n'],
 ('10471457', 'hepatic fibrosis'): ['1724',
  '1740',
  'DiseaseClass',
  'D008103\n'],
 ('10581027', 'periodontopathia'): ['169',
  '185',
  'SpecificDisease',
  'D010510\n'],
 ('10807385', 'advanced-stage hereditary cancer'): ['2423',
  '2455',
  'Modifier',
  'D009386\n'],
 ('3455778',
  'deficiency of the hepatic enzyme phenylalanine hydroxylase'): ['137', '195', 'SpecificDisease', 'OMIM:261600\n'],
 ('7166314', 'hemolytic anemia'): ['293',
  '309',
  'SpecificDisease',
  'D000743\n'],
 ('10051005', 'HNPCC'): ['1507', '1512', 'Modifier', 'D003123\n'],
 ('1709636', 'PKU'): ['888', '891', 'Modifier', 'D010661\n'],
 ('7607677', 'Mucopolysaccharidosis type IVA'): ['0',
  '30',
  'SpecificDisease',
  'OMIM:253000\n'],
 ('10465113', 'recessive disorders'): ['271',
  '290',
  'DiseaseClass',
  'D030342\n'],
 ('2016095', 'Prader Willi'): ['451', '463', 'Modifier', 'D011218\n'],
 ('10449429', 'Duchenne muscular dystrophy'): ['88',
  '115',
  'SpecificDisease',
  'D020388\n'],
 ('523196', 'Lesch-Nyhan syndrome'): ['1856',
  '1876',
  'SpecificDisease',
  'D007926\n'],
 ('2852474', 'mental retardation'): ['336',
  '354',
  'DiseaseClass',
  'D008607\n'],
 ('7630639', 'tumor'): ['1030', '1035', 'Modifier', 'D009369\n'],
 ('10807385', 'hereditary ovarian cancers'): ['355',
  '381',
  'SpecificDisease',
  'D061325\n'],
 ('10987655', 'migraine'): ['1272', '1280', 'SpecificDisease', 'D008881\n'],
 ('2828430', 'Homozygous hypobetalipoproteinemia'): ['0',
  '34',
  'SpecificDisease',
  'D006995\n'],
 ('6524872', 'ALD'): ['1189', '1192', 'SpecificDisease', 'D000326\n'],
 ('1127526', 'Analbuminemia'): ['0', '13', 'SpecificDisease', 'OMIM:103600\n'],
 ('10465113', 'recessive disorder'): ['1234',
  '1252',
  'DiseaseClass',
  'D030342\n'],
 ('8240110', 'retinitis pigmentosa'): ['1039',
  '1059',
  'SpecificDisease',
  'D012174\n'],
 ('7726234', 'myotonic dystrophy'): ['230',
  '248',
  'SpecificDisease',
  'D009223\n'],
 ('1937471', 'partial deficiencies of HPRT activity'): ['604',
  '641',
  'DiseaseClass',
  'OMIM:300323\n'],
 ('7422429', 'nephropathy'): ['211', '222', 'SpecificDisease', 'D007674\n'],
 ('8589715', 'EDMD'): ['961', '965', 'SpecificDisease', 'D020389\n'],
 ('8531967', 'breast or ovarian cancer'): ['1503',
  '1527',
  'CompositeMention',
  'D001943|D010051\n'],
 ('3162536', 'BMD'): ['471', '474', 'Modifier', 'C537666\n'],
 ('10528860', 'UPD'): ['1439', '1442', 'SpecificDisease', 'D024182\n'],
 ('2316519', 'Becker muscular dystrophy'): ['239',
  '264',
  'SpecificDisease',
  'C537666\n'],
 ('7795653', 'sporadic breast cancer'): ['77', '99', 'Modifier', 'D001943\n'],
 ('10369870', 'myelinopathies'): ['113', '127', 'DiseaseClass', 'D011115\n'],
 ('10425038', 'ataxia-telangiectasia'): ['112',
  '133',
  'SpecificDisease',
  'D001260\n'],
 ('10842298', 'myopia'): ['403', '409', 'DiseaseClass', 'D009216\n'],
 ('7815415', 'congenital disease'): ['537',
  '555',
  'DiseaseClass',
  'D030342\n'],
 ('10706858', 'thrombophilic defect'): ['1100',
  '1120',
  'SpecificDisease',
  'D019851\n'],
 ('7543316', 'DM'): ['1221', '1223', 'Modifier', 'D009223\n'],
 ('10403837', 'hereditary progressive dystonia'): ['89',
  '120',
  'SpecificDisease',
  'D020821\n'],
 ('8301658', 'WAS'): ['1042', '1045', 'Modifier', 'D014923\n'],
 ('6783144', 'haemostasis abnormality'): ['104',
  '127',
  'DiseaseClass',
  'D020141\n'],
 ('1353340', 'MLD'): ['712', '715', 'SpecificDisease', 'D007966\n'],
 ('1351034', 'FAP'): ['927', '930', 'Modifier', 'D011125\n'],
 ('318684',
  'Hereditary deficiency of the third component of complement'): ['0', '58', 'SpecificDisease', 'OMIM:613779\n'],
 ('8209890', 'hypothalamic abnormalities'): ['844',
  '870',
  'DiseaseClass',
  'D007027\n'],
 ('8259519', 'APC tumor'): ['19', '28', 'Modifier', 'D011125\n'],
 ('2894613', 'VHL'): ['804', '807', 'Modifier', 'D006623\n'],
 ('2352258', 'hereditary tumours'): ['263',
  '281',
  'DiseaseClass',
  'D009386\n'],
 ('8188241', 'Ochronosis'): ['353', '363', 'SpecificDisease', 'D009794\n'],
 ('1127526', 'analbuminemia'): ['78',
  '91',
  'SpecificDisease',
  'OMIM:103600\n'],
 ('1577476', 'DMD'): ['429', '432', 'Modifier', 'D020388\n'],
 ('10699184', 'DM'): ['1666', '1668', 'Modifier', 'D009223\n'],
 ('1357962', 'hypotonic'): ['691', '700', 'SpecificDisease', 'D009123\n'],
 ('7550349', 'breast and/or ovarian cancer'): ['1260',
  '1288',
  'CompositeMention',
  'D010051|D001943\n'],
 ('100562', 'absence of functional C7'): ['734',
  '758',
  'Modifier',
  'OMIM:610102\n'],
 ('10090890', 'anemia'): ['1550', '1556', 'DiseaseClass', 'D000740\n'],
 ('10732816', 'Emery-Dreifuss muscular dystrophy'): ['35',
  '68',
  'SpecificDisease',
  'D020389\n'],
 ('1317264', 'tumors'): ['827', '833', 'DiseaseClass', 'D009369\n'],
 ('1338764', 'adenomatous polyposis coli'): ['207',
  '233',
  'Modifier',
  'D011125\n'],
 ('7802009', 'DMD'): ['246', '249', 'Modifier', 'D020388\n'],
 ('7493024', 'breast to ovarian cancer'): ['560',
  '584',
  'Modifier',
  'D001943|D010051\n'],
 ('2180286', 'myopathy'): ['1110', '1118', 'DiseaseClass', 'D009135\n'],
 ('10830910', 'cysts in the kidney, pancreas, and epididymis'): ['326',
  '371',
  'CompositeMention',
  'D052177|D010181|D013088\n'],
 ('7937795', 'tumor'): ['1374', '1379', 'DiseaseClass', 'D009369\n'],
 ('2792129',
  'Familial deficiency of the seventh component of complement'): ['0', '58', 'SpecificDisease', 'OMIM:610102\n'],
 ('10732811', 'degeneration of certain brain areas'): ['1647',
  '1682',
  'DiseaseClass',
  'D001927\n'],
 ('8282802', 'atherosclerosis'): ['2025',
  '2040',
  'SpecificDisease',
  'D050197\n'],
 ('10323740', 'autosomal dominant disorder'): ['117',
  '144',
  'DiseaseClass',
  'D030342\n'],
 ('10077614', 'pseudohermaphroditism'): ['644',
  '665',
  'SpecificDisease',
  'D012734\n'],
 ('2215607', 'premature atherosclerosis'): ['2000',
  '2025',
  'SpecificDisease',
  'D050197\n'],
 ('10408776', 'sporadic renal cell carcinoma'): ['29',
  '58',
  'SpecificDisease',
  'D002292\n'],
 ('10430841', 'white matter abnormalities'): ['280',
  '306',
  'DiseaseClass',
  'D002493\n'],
 ('10071193', 'BFLS'): ['404', '408', 'Modifier', 'C536575\n'],
 ('8198128', 'Friedreich ataxia'): ['132',
  '149',
  'SpecificDisease',
  'D005621\n'],
 ('10434119', 'cleft lip and/or palate'): ['487',
  '510',
  'Modifier',
  'D002971|D002972\n'],
 ('2562820', 'muscular dystrophy'): ['319',
  '337',
  'DiseaseClass',
  'D009136\n'],
 ('23402', 'cataract'): ['1256', '1264', 'Modifier', 'D002386\n'],
 ('1302008', 'Denys-Drash syndrome'): ['105',
  '125',
  'SpecificDisease',
  'D030321\n'],
 ('8317477', 'Huntington disease'): ['133',
  '151',
  'SpecificDisease',
  'D006816\n'],
 ('3565372', 'hereditary non-spherocytic hemolytic anemia'): ['70',
  '113',
  'SpecificDisease',
  'D000746\n'],
 ('10737119', 'sensorineural deafness'): ['1268',
  '1290',
  'DiseaseClass',
  'D006319\n'],
 ('10577908', 'autosomal recessive disorder'): ['146',
  '174',
  'DiseaseClass',
  'D030342\n'],
 ('10737119', 'dopa-responsive dystonia'): ['1232',
  '1256',
  'SpecificDisease',
  'C538007\n'],
 ('8071957', 'autistic'): ['613', '621', 'Modifier', 'D001321\n'],
 ('10861298', 'PDS'): ['853', '856', 'Modifier', 'C536648\n'],
 ('10064668', 'cartilage-hair hypoplasia'): ['165',
  '190',
  'SpecificDisease',
  'C535916\n'],
 ('1327525', 'genital anomalies'): ['101', '118', 'DiseaseClass', 'D014564\n'],
 ('8195156', 'glucose/galactose malabsorption'): ['1265',
  '1296',
  'Modifier',
  'OMIM:606824\n'],
 ('10924409', 'adenomatous polyposis coli (APC) tumor'): ['180',
  '218',
  'Modifier',
  'D011125\n'],
 ('10615125', 'FEO'): ['852', '855', 'SpecificDisease', 'OMIM:174810\n'],
 ('7479827', 'sudden death'): ['1482', '1494', 'SpecificDisease', 'D003645\n'],
 ('10364521', 'Mentally retarded'): ['1190', '1207', 'Modifier', 'D008607\n'],
 ('8301658', 'X linked recessive thrombocytopenic disorder'): ['416',
  '460',
  'SpecificDisease',
  'OMIM:313900\n'],
 ('8075631', 'male breast cancer'): ['754',
  '772',
  'SpecificDisease',
  'D018567\n'],
 ('8563759', 'ovarian cancers'): ['370',
  '385',
  'SpecificDisease',
  'D010051\n'],
 ('10556285', 'polyglutamine disease'): ['435',
  '456',
  'DiseaseClass',
  'D030342\n'],
 ('10487710', 'Familial neurohypophyseal diabetes insipidus'): ['112',
  '156',
  'SpecificDisease',
  'OMIM:125700\n'],
 ('6604602', 'arthropathy'): ['1245', '1256', 'SpecificDisease', 'D007592\n'],
 ('7790377', 'protein-import deficiency'): ['1759',
  '1784',
  'DiseaseClass',
  'D008661\n'],
 ('7458742', 'G6PD-deficient'): ['368', '382', 'Modifier', 'D005955\n'],
 ('10051007', 'Huntington disease'): ['140',
  '158',
  'SpecificDisease',
  'D006816\n'],
 ('10930571', 'Aarskog-Scott syndrome'): ['117',
  '139',
  'SpecificDisease',
  'C535331\n'],
 ('2544995', 'genitourinary abnormalities'): ['309',
  '336',
  'DiseaseClass',
  'D014564\n'],
 ('8364574', 'Aniridia'): ['28', '36', 'SpecificDisease', 'D015783\n'],
 ('2895982', 'mental retardation'): ['197',
  '215',
  'DiseaseClass',
  'D008607\n'],
 ('10521293', 'Wolfram syndrome'): ['1498',
  '1514',
  'Modifier',
  'OMIM:222300\n'],
 ('10807385', 'BRCA1-linked than for BRCA2-linked cancers'): ['2546',
  '2588',
  'CompositeMention',
  'OMIM:604370|OMIM:612555\n'],
 ('2352258', 'cerebellar haemangioblastoma'): ['998',
  '1026',
  'SpecificDisease',
  'D018325\n'],
 ('10598803', 'attenuated adenomatous polyposis coli'): ['381',
  '418',
  'SpecificDisease',
  'C538265\n'],
 ('10192393', 'pilomatricomas'): ['1312',
  '1326',
  'SpecificDisease',
  'D018296\n'],
 ('10930571', 'short stature'): ['212', '225', 'DiseaseClass', 'D006130\n'],
 ('7759076', 'cancers'): ['645', '652', 'DiseaseClass', 'D009369\n'],
 ('10213492', 'colorectal tumor'): ['1384', '1400', 'Modifier', 'D015179\n'],
 ('7790377', 'peroxisomal disorders'): ['1794',
  '1815',
  'DiseaseClass',
  'D018901\n'],
 ('10557309', 'pediatric cancer of striated muscle'): ['148',
  '183',
  'SpecificDisease',
  'D019042\n'],
 ('10615125', 'PDB'): ['891', '894', 'SpecificDisease', 'C538098\n'],
 ('10404839', 'dyshormonogenic goiter'): ['147',
  '169',
  'SpecificDisease',
  'D006042\n'],
 ('1505217', 'AGU'): ['166', '169', 'SpecificDisease', 'D054880\n'],
 ('10706858', 'trauma'): ['1167', '1173', 'SpecificDisease', 'D014947\n'],
 ('8575748', 'hereditary human breast and ovarian cancer'): ['134',
  '176',
  'CompositeMention',
  'D061325\n'],
 ('1269174', 'seizure disorder'): ['187',
  '203',
  'SpecificDisease',
  'D004827\n'],
 ('10930361', 'Frataxin deficiency'): ['96',
  '115',
  'SpecificDisease',
  'D005621\n'],
 ('3578281', 'hypopigmentation'): ['1048',
  '1064',
  'DiseaseClass',
  'D017496\n'],
 ('133535', 'C2 deficiency'): ['621',
  '634',
  'SpecificDisease',
  'OMIM:217000\n'],
 ('7579347', 'immune deficiency'): ['469', '486', 'DiseaseClass', 'D007153\n'],
 ('10767326',
  'Haploinsufficiency of the transcription factors FOXC1 and FOXC2'): ['0',
  '63',
  'CompositeMention',
  'OMIM:602482|OMIM:153400\n'],
 ('1968617', 'genetic defect'): ['1036', '1050', 'DiseaseClass', 'D030342\n'],
 ('8240110', 'macular degeneration'): ['1560',
  '1580',
  'SpecificDisease',
  'D008268\n'],
 ('1384324', 'Pelizaeus-Merzbacher disease'): ['1328',
  '1356',
  'SpecificDisease',
  'OMIM:312080\n'],
 ('2215607', 'hypercholesterolemia'): ['912',
  '932',
  'SpecificDisease',
  'D006937\n'],
 ('10571950', 'Pendred syndrome'): ['111',
  '127',
  'SpecificDisease',
  'C536648\n'],
 ('1301161', 'mental retardation'): ['211',
  '229',
  'DiseaseClass',
  'D008607\n'],
 ('1056013', 'lymphoid malignancies'): ['1275',
  '1296',
  'SpecificDisease',
  'D008223\n'],
 ('10987655', 'FHM'): ['1405', '1408', 'SpecificDisease', 'D020325\n'],
 ('1319838', 'colorectal polyps'): ['1148',
  '1165',
  'SpecificDisease',
  'D003111\n'],
 ('7422429', 'Wiskott-Aldrich syndrome'): ['752',
  '776',
  'SpecificDisease',
  'D014923\n'],
 ('1338904', 'tumors'): ['1108', '1114', 'DiseaseClass', 'D009369\n'],
 ('10190819', 'X-linked adrenoleukodystrophy'): ['137',
  '166',
  'SpecificDisease',
  'D000326\n'],
 ('1322637', 'TSD'): ['880', '883', 'Modifier', 'D013661\n'],
 ('10364521', 'fragile X'): ['1674', '1683', 'Modifier', 'D005600\n'],
 ('10631148', 'retinoblastoma'): ['605', '619', 'Modifier', 'D012175\n'],
 ('10766245', 'AT'): ['640', '642', 'SpecificDisease', 'D001260\n'],
 ('10807793', 'FMF'): ['329', '332', 'SpecificDisease', 'D010505\n'],
 ('2161209', 'adrenomyeloneuropathy'): ['206',
  '227',
  'SpecificDisease',
  'D000326\n'],
 ('218453', 'AMN'): ['987', '990', 'SpecificDisease', 'D000326\n'],
 ('7315872', 'cataract'): ['618', '626', 'SpecificDisease', 'D002386\n'],
 ('2310692', 'Paroxysmal nocturnal haemoglobinuria'): ['0',
  '36',
  'SpecificDisease',
  'D006457\n'],
 ('10732816', 'cardiac conduction block'): ['374',
  '398',
  'SpecificDisease',
  'D006327\n'],
 ('8314592', 'X-linked neurodevelopmental disorder'): ['214',
  '250',
  'DiseaseClass',
  'D038901\n'],
 ('7579347', 'X-linked recessive disorder'): ['153',
  '180',
  'DiseaseClass',
  'D040181\n'],
 ('10699184', 'genetic defect'): ['232', '246', 'DiseaseClass', 'D030342\n'],
 ('3455778', 'Phenylketonuria'): ['102',
  '117',
  'SpecificDisease',
  'D010661\n'],
 ('8314592', 'Norrie disease'): ['169', '183', 'SpecificDisease', 'C537849\n'],
 ('1317264', 'cancer'): ['141', '147', 'DiseaseClass', 'D009369\n'],
 ('10742101', 'obesity'): ['987', '994', 'SpecificDisease', 'D009765\n'],
 ('10190819', 'inherited disease'): ['179',
  '196',
  'DiseaseClass',
  'D030342\n'],
 ('10631148', 'hereditary retinoblastoma'): ['433',
  '458',
  'SpecificDisease',
  'D012175\n'],
 ('2352258', 'sporadic cerebellar haemangioblastoma'): ['744',
  '781',
  'SpecificDisease',
  'D018325\n'],
 ('10746568', 'haemorrhagic condition'): ['112',
  '134',
  'DiseaseClass',
  'D006474\n'],
 ('23402', 'lamellar cataracts'): ['137',
  '155',
  'SpecificDisease',
  'C535342|OMIM:116800\n'],
 ('10571950', 'goiter'): ['284', '290', 'SpecificDisease', 'D006042\n'],
 ('10449794', 'Ataxia-telangiectasia'): ['89',
  '110',
  'SpecificDisease',
  'D001260\n'],
 ('8281152', 'myotonic dystrophy'): ['405', '423', 'Modifier', 'D009223\n'],
 ('10528860', 'maternal uniparental disomy for chromosome 14'): ['106',
  '151',
  'SpecificDisease',
  'D024182\n'],
 ('1831007', 'HD'): ['240', '242', 'SpecificDisease', 'D006816\n'],
 ('7106752', 'hemolytic anemia'): ['519',
  '535',
  'SpecificDisease',
  'D000743\n'],
 ('7586656', 'Lesch-Nyhan syndrome'): ['307',
  '327',
  'SpecificDisease',
  'D007926\n'],
 ('10411929', 'CHS'): ['1134', '1137', 'Modifier', 'D002609\n'],
 ('8302543', 'retinal degeneration'): ['141', '161', 'Modifier', 'D012162\n'],
 ('10557309', 'alveolar rhabdomyosarcoma'): ['1254',
  '1279',
  'Modifier',
  'D018232\n'],
 ('10487695', 'congenital hypothyroidism'): ['1507',
  '1532',
  'SpecificDisease',
  'D003409\n'],
 ('10861282', 'AS'): ['1738', '1740', 'SpecificDisease', 'D013167\n'],
 ('10446987', 'tumors'): ['1365', '1371', 'DiseaseClass', 'D009369\n'],
 ('10807385', 'cancer'): ['648', '654', 'Modifier', 'D009369\n'],
 ('7316485', 'haemolysis'): ['258', '268', 'SpecificDisease', 'D006461\n'],
 ('1351034', 'adenomatous polyposis coli'): ['368',
  '394',
  'Modifier',
  'D011125\n'],
 ('8401501', 'breast-ovarian cancer syndrome'): ['23',
  '53',
  'DiseaseClass',
  'D061325\n'],
 ('10732816', 'X-linked recessive Emery-Dreifuss muscular dystrophy'): ['114',
  '166',
  'SpecificDisease',
  'D020389\n'],
 ('8281142', 'breast/ovarian cancer'): ['163', '184', 'Modifier', 'D061325\n'],
 ('2894613', 'bilateral and multifocal tumours'): ['457',
  '489',
  'CompositeMention',
  'D009369\n'],
 ('8314592', 'pseudoglioma'): ['185', '197', 'SpecificDisease', 'C537849\n'],
 ('7611277', 'inherited breast and ovarian cancer'): ['1261',
  '1296',
  'CompositeMention',
  'D061325\n'],
 ('7543316', 'myopathic'): ['793', '802', 'Modifier', 'D009135\n'],
 ('1127526', 'arterial hypotension'): ['519',
  '539',
  'DiseaseClass',
  'D007022\n'],
 ('7981671', 'choroideremia'): ['1271', '1284', 'Modifier', 'D015794\n'],
 ('7811247', 'X-linked adrenoleukodystrophy'): ['0',
  '29',
  'SpecificDisease',
  'D000326\n'],
 ('1831007', 'TS'): ['220', '222', 'SpecificDisease', 'D005879\n'],
 ('10802667', 'cardiac conduction abnormalities'): ['442',
  '474',
  'DiseaseClass',
  'D006327\n'],
 ('7833921', 'Essential fructosuria'): ['124',
  '145',
  'SpecificDisease',
  'C538068\n'],
 ('10807793', 'Familial Mediterranean fever'): ['149',
  '177',
  'SpecificDisease',
  'D010505\n'],
 ('10556283', 'cancers'): ['1536', '1543', 'DiseaseClass', 'D009369\n'],
 ('2884570', 'deficiency of hepatic phenylalanine hydroxylase'): ['189',
  '236',
  'SpecificDisease',
  'OMIM:261600\n'],
 ('7568002', 'Huntington disease'): ['132', '150', 'Modifier', 'D006816\n'],
 ('7574457',
  'rare, sex-linked recessive, dysmyelinating disease of the central nervous system'): ['122',
  '202',
  'DiseaseClass',
  'D020279+D035583\n'],
 ('3876122', 'von Willebrand'): ['205', '219', 'Modifier', 'D014842\n'],
 ('10330348', 'ataxia-telangiectasia'): ['250',
  '271',
  'SpecificDisease',
  'D001260\n'],
 ('10557317', 'HFE deficiency'): ['48', '62', 'SpecificDisease', 'D006432\n'],
 ('133535', 'C2-deficient'): ['257', '269', 'Modifier', 'OMIM:217000\n'],
 ('10699184', 'myotonic dystrophy'): ['317', '335', 'Modifier', 'D009223\n'],
 ('3615198', 'Classical Phenylketonuria'): ['101',
  '126',
  'SpecificDisease',
  'D010661\n'],
 ('2008213', 'diarrhoea'): ['323', '332', 'SpecificDisease', 'D003967\n'],
 ('10767339', 'macroorchidism'): ['375',
  '389',
  'SpecificDisease',
  'D005600\n'],
 ('1562739', 'G6PD deficiency'): ['2243',
  '2258',
  'SpecificDisease',
  'D005955\n'],
 ('7825578', 'spinal cerebellar ataxia'): ['140',
  '164',
  'Modifier',
  'D013132\n'],
 ('10724175', 'inherited breast and ovarian cancers'): ['175',
  '211',
  'CompositeMention',
  'D061325\n'],
 ('8240110', 'retinitis punctata albescens'): ['339',
  '367',
  'SpecificDisease',
  'OMIM:136880\n'],
 ('8301658', 'immune deficiencies'): ['839',
  '858',
  'DiseaseClass',
  'D007154\n'],
 ('10470088', 'sporadic colorectal cancers'): ['372',
  '399',
  'SpecificDisease',
  'D015179\n'],
 ('107868', 'peripheral psoriatic arthritis'): ['243',
  '273',
  'SpecificDisease',
  'D015535\n'],
 ('10830910', 'VHL disease'): ['1185', '1196', 'SpecificDisease', 'D006623\n'],
 ('10208848', 'alpha-Gal A deficiency'): ['1030',
  '1052',
  'SpecificDisease',
  'D000795\n'],
 ('10602116', 'inner ear morphological anomaly'): ['1188',
  '1219',
  'SpecificDisease',
  'D007759\n'],
 ('10541953', 'Prader-Willi syndrome'): ['792',
  '813',
  'SpecificDisease',
  'D011218\n'],
 ('8240110', 'Pericentral scotomas'): ['1598',
  '1618',
  'SpecificDisease',
  'D012607\n'],
 ('1311721', 'leukemia'): ['1344', '1352', 'Modifier', 'D007938\n'],
 ('10742101', 'leptin deficiency'): ['782',
  '799',
  'SpecificDisease',
  'OMIM:164160\n'],
 ('10364518', 'X-linked disorder'): ['188',
  '205',
  'DiseaseClass',
  'D040181\n'],
 ('10330430', 'IDC'): ['760', '763', 'SpecificDisease', 'C536277\n'],
 ('2591962', 'myotonic dystrophy'): ['137',
  '155',
  'SpecificDisease',
  'D009223\n'],
 ('10441573', 'breast cancer'): ['1187',
  '1200',
  'SpecificDisease',
  'D001943\n'],
 ('10078732', 'ALD'): ['475', '478', 'SpecificDisease', 'D000326\n'],
 ('3565372', 'G6PD deficiency'): ['756',
  '771',
  'SpecificDisease',
  'D005955\n'],
 ('6524872', 'Zellwegers syndrome'): ['1084',
  '1103',
  'SpecificDisease',
  'D015211\n'],
 ('10554035', 'VHL'): ['2197', '2200', 'Modifier', 'D006623\n'],
 ('126380', 'ankylosing spondylitis'): ['908',
  '930',
  'SpecificDisease',
  'D013167\n'],
 ('10466420', 'chondrodysplasias'): ['160',
  '177',
  'DiseaseClass',
  'D010009\n'],
 ('10807793', 'neutrophil-mediated serosal inflammation'): ['247',
  '287',
  'SpecificDisease',
  'D007249\n'],
 ('10480348', 'DMD'): ['855', '858', 'Modifier', 'D020388\n'],
 ('10077614', 'Male genital defects'): ['1082',
  '1102',
  'DiseaseClass',
  'D005832\n'],
 ('10364520', 'FMF'): ['982', '985', 'Modifier', 'D010505\n'],
 ('2241452', 'congenital deficiency of the C9 component of complement'): ['39',
  '94',
  'SpecificDisease',
  'OMIM:613825\n'],
 ('1301187', 'hyperphenylalaninemia'): ['227',
  '248',
  'DiseaseClass',
  'D010661\n'],
 ('1358807', 'XAI disease'): ['629', '640', 'SpecificDisease', 'C538243\n'],
 ('10554035', 'von Hippel-Lindau'): ['15', '32', 'Modifier', 'D006623\n'],
 ('1517503', 'pyoderma gangrenosum'): ['71',
  '91',
  'SpecificDisease',
  'D017511\n'],
 ('10364518', 'oculocerebrorenal syndrome of Lowe'): ['140',
  '174',
  'SpecificDisease',
  'D009800\n'],
 ('10924409', 'benign colorectal tumors'): ['344',
  '368',
  'SpecificDisease',
  'D015179\n'],
 ('2544995', 'WAGR'): ['272', '276', 'Modifier', 'D017624\n'],
 ('7795652', 'autosomal dominant syndrome'): ['122',
  '149',
  'DiseaseClass',
  'D030342\n'],
 ('1357962', 'uniparental disomy'): ['1142',
  '1160',
  'SpecificDisease',
  'D024182\n'],
 ('1301201', 'classical phenylketonuria'): ['939',
  '964',
  'SpecificDisease',
  'D010661\n'],
 ('7579347', 'X-linked congenital thrombocytopenia'): ['33',
  '69',
  'SpecificDisease',
  'OMIM:313900\n'],
 ('1380672',
  'degeneration and premature cell death of oligodendrocytes'): ['546', '603', 'CompositeMention', 'D056784\n'],
 ('10200300', 'ALPS'): ['774', '778', 'SpecificDisease', 'D056735\n'],
 ('10797418', 'Prader-Willi syndrome'): ['136',
  '157',
  'SpecificDisease',
  'D011218\n'],
 ('10732816', 'sudden death'): ['419', '431', 'SpecificDisease', 'D003645\n'],
 ('8528198', 'thrombocytopenia'): ['976',
  '992',
  'SpecificDisease',
  'D013921\n'],
 ('2309698', 'Abnormal color vision'): ['329',
  '350',
  'DiseaseClass',
  'D003117\n'],
 ('10742101', 'fat-cell tumours'): ['427', '443', 'DiseaseClass', 'D018205\n'],
 ('1939657', 'homozygous hypobetalipoproteinemia'): ['754',
  '788',
  'SpecificDisease',
  'D006995\n'],
 ('10987655', 'EA-2'): ['955', '959', 'SpecificDisease', 'C535506\n'],
 ('1303173', 'G6PD deficient'): ['55', '69', 'Modifier', 'D005955\n'],
 ('7959767', 'autosomal dominant neurodegenerative disorder'): ['222',
  '267',
  'DiseaseClass',
  'D019636\n'],
 ('10441329', 'copper toxicity disorder'): ['335',
  '359',
  'DiseaseClass',
  'C535468\n'],
 ('3029599', 'Lesch-Nyhan'): ['1001', '1012', 'Modifier', 'D007926\n'],
 ('10716718', 'cerebellar degeneration'): ['621',
  '644',
  'DiseaseClass',
  'D013132\n'],
 ('10556283', 'rhabdoid'): ['1624', '1632', 'Modifier', 'D018335\n'],
 ('10470088', 'FAP'): ['850', '853', 'Modifier', 'D011125\n'],
 ('7607677', 'Mucopolysaccharidosis IVA'): ['110',
  '135',
  'SpecificDisease',
  'OMIM:253000\n'],
 ('1301201', 'hyperphenylalaninemia'): ['179',
  '200',
  'DiseaseClass',
  'D010661\n'],
 ('10589394', 'Van der Woude syndrome'): ['168',
  '190',
  'SpecificDisease',
  'C536528\n'],
 ('8571951', 'AOII'): ['648', '652', 'Modifier', 'C535395\n'],
 ('3565372', 'hereditary nonspherocytic hemolytic anemia'): ['221',
  '263',
  'SpecificDisease',
  'D000746\n'],
 ('10767326', 'iris hypoplasia'): ['914',
  '929',
  'SpecificDisease',
  'D007499\n'],
 ('10732811', 'spinocerebellar ataxia type 3'): ['515',
  '544',
  'SpecificDisease',
  'D017827\n'],
 ('10323252', 'ankle contractures'): ['1309',
  '1327',
  'SpecificDisease',
  'D003286\n'],
 ('3014348', 'X-linked recessive genetic disorder'): ['128',
  '163',
  'DiseaseClass',
  'D040181\n'],
 ('1351034', 'carcinoma'): ['427', '436', 'DiseaseClass', 'D002277\n'],
 ('10369876', 'arginine vasopressin (AVP) deficiency'): ['249',
  '286',
  'SpecificDisease',
  'OMIM:125700\n'],
 ('1319838', 'adenomatous polyposis coli'): ['182',
  '208',
  'Modifier',
  'D011125\n'],
 ('7795652', 'breast and ovarian cancer'): ['181',
  '206',
  'CompositeMention',
  'D061325\n'],
 ('7939630', 'breast cancer'): ['189', '202', 'CompositeMention', 'D001943\n'],
 ('10615125', 'Familial expansile osteolysis'): ['99',
  '128',
  'SpecificDisease',
  'OMIM:174810\n'],
 ('1302008', 'intersex disorders'): ['227',
  '245',
  'DiseaseClass',
  'D012734\n'],
 ('10528860', 'hypotonia'): ['558', '567', 'DiseaseClass', 'D009123\n'],
 ('133535', 'Hodgkins disease'): ['313',
  '329',
  'SpecificDisease',
  'D006689\n'],
 ('10533031', 'autosomal recessive disorder'): ['101',
  '129',
  'DiseaseClass',
  'D030342\n'],
 ('10417280', 'AS'): ['1163', '1165', 'Modifier', 'D017204\n'],
 ('10205262', 'alkaptonuria'): ['193', '205', 'SpecificDisease', 'D000474\n'],
 ('10602116', 'Pendred syndrome'): ['1014',
  '1030',
  'SpecificDisease',
  'C536648\n'],
 ('7811247', 'Addison only'): ['563', '575', 'SpecificDisease', 'D000224\n'],
 ('10404839', 'thyroid goiter'): ['333', '347', 'DiseaseClass', 'D006042\n'],
 ('1313112', 'FAPC'): ['159', '163', 'SpecificDisease', 'D011125\n'],
 ('3014348', 'BMD'): ['1085', '1088', 'SpecificDisease', 'C537666\n'],
 ('7874117', 'myopia'): ['348', '354', 'DiseaseClass', 'D009216\n'],
 ('10353787', 'gingival overgrowths'): ['1011',
  '1031',
  'SpecificDisease',
  'D019214\n'],
 ('10323252', 'X-linked Emery-Dreifuss muscular dystrophy'): ['79',
  '121',
  'SpecificDisease',
  'D020389\n'],
 ('1127526', 'ease of fatigue'): ['297', '312', 'DiseaseClass', 'D005221\n'],
 ('2055114', 'histidinemia'): ['193', '205', 'SpecificDisease', 'C538320\n'],
 ('10072428', 'gastric and colorectal cancers'): ['1684',
  '1714',
  'CompositeMention',
  'D015179|D013274\n'],
 ('2309698', 'abnormal color vision'): ['632',
  '653',
  'SpecificDisease',
  'D003117\n'],
 ('10589394', 'autosomal dominant craniofacial disorder'): ['203',
  '243',
  'DiseaseClass',
  'D019465\n'],
 ('10090885', 'Autoimmune lymphoproliferative syndrome'): ['93',
  '132',
  'SpecificDisease',
  'D056735\n'],
 ('2963536', 'deficiency of the third component of complement'): ['115',
  '162',
  'SpecificDisease',
  'OMIM:613779\n'],
 ('7857677', 'GD'): ['180', '182', 'SpecificDisease', 'D005776\n'],
 ('10426999', 'breast cancer'): ['515', '528', 'Modifier', 'D001943\n'],
 ('10449429', 'DMD'): ['1392', '1395', 'SpecificDisease', 'D020388\n'],
 ('10382910', 'X-linked Emery-Dreifuss muscular dystrophy'): ['74',
  '116',
  'SpecificDisease',
  'D020389\n'],
 ('7696601', 'myotonic dystrophy'): ['142',
  '160',
  'SpecificDisease',
  'D009223\n'],
 ('1676565', 'Pelizaeus-Merzbacher disease'): ['44',
  '72',
  'SpecificDisease',
  'OMIM:312080\n'],
 ('10830910', 'central nervous system hemangioblastoma'): ['1316',
  '1355',
  'SpecificDisease',
  'D018325\n'],
 ('2886237', 'Choroideremia'): ['0', '13', 'SpecificDisease', 'D015794\n'],
 ('10923035', 'autosomal dominant epilepsy syndrome'): ['187',
  '223',
  'DiseaseClass',
  'D030342+D004827\n'],
 ('1973404', 'AGU'): ['806', '809', 'Modifier', 'D054880\n'],
 ('8088831', 'spongy degeneration of the brain'): ['154',
  '186',
  'SpecificDisease',
  'D017825\n'],
 ('10766245', 'Nijmegen breakage syndrome'): ['149',
  '175',
  'SpecificDisease',
  'D049932\n'],
 ('10403837', 'genetic defect'): ['298', '312', 'DiseaseClass', 'D030342\n'],
 ('23402', 'bilateral lamellar cataracts'): ['745',
  '773',
  'SpecificDisease',
  'C535342|OMIM:116800\n'],
 ('10602116', 'goiter'): ['780', '786', 'SpecificDisease', 'D006042\n'],
 ('10208848', 'severe acroparesthesia'): ['525',
  '547',
  'SpecificDisease',
  'D010292\n'],
 ('7717396', 'adrenomyeloneuropathy'): ['539',
  '560',
  'SpecificDisease',
  'D000326\n'],
 ('10417280', 'neurobehavioral disorders'): ['225',
  '250',
  'DiseaseClass',
  'D019954\n'],
 ('7759076', 'breast and breast-ovarian cancer'): ['32',
  '64',
  'Modifier',
  'D001943|D061325\n'],
 ('10323740', 'developmental delays'): ['440',
  '460',
  'DiseaseClass',
  'D006130\n'],
 ('10767343', 'Myotonic dystrophy'): ['155',
  '173',
  'SpecificDisease',
  'D009223\n'],
 ('2352258', 'sporadic tumours'): ['314', '330', 'DiseaseClass', 'D009369\n'],
 ('1301200', 'genetic defects'): ['96', '111', 'DiseaseClass', 'D030342\n'],
 ('2773936', 'deficit of myelin'): ['372', '389', 'DiseaseClass', 'D003711\n'],
 ('2575071', 'TCD'): ['1082', '1085', 'Modifier', 'D015794\n'],
 ('10465113', 'EDM4'): ['1629', '1633', 'SpecificDisease', 'OMIM:226900\n'],
 ('10788334', 'breast-ovarian cancer'): ['60',
  '81',
  'CompositeMention',
  'D061325\n'],
 ('7795652', 'tumours'): ['551', '558', 'DiseaseClass', 'D009369\n'],
 ('10470286', 'glioblastomas'): ['477', '490', 'SpecificDisease', 'D005909\n'],
 ('10861282', 'rheumatic disorder'): ['144',
  '162',
  'DiseaseClass',
  'D012216\n'],
 ('2303408', 'deficient for C5'): ['330',
  '346',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('10987655', 'SCA6'): ['370', '374', 'SpecificDisease', 'OMIM:183086\n'],
 ('2568588', 'unilateral tumours'): ['961',
  '979',
  'DiseaseClass',
  'D009369\n'],
 ('7523157', 'meningococcal'): ['1128', '1141', 'Modifier', 'D008589\n'],
 ('10706858', 'PROS1 gene defects'): ['1530',
  '1548',
  'SpecificDisease',
  'OMIM:612336\n'],
 ('1897530',
  'deficiency of the enzyme galactose-1-phosphate uridyl transferase'): ['246',
  '311',
  'SpecificDisease',
  'D005693\n'],
 ('7951315', 'central nervous system defects'): ['90',
  '120',
  'DiseaseClass',
  'D009421\n'],
 ('2016095', 'Prader-Willi syndrome'): ['238',
  '259',
  'SpecificDisease',
  'D011218\n'],
 ('7523157', 'meningococcal infection'): ['1074',
  '1097',
  'SpecificDisease',
  'D008589\n'],
 ('2884570', 'inborn error of amino-acid metabolism'): ['308',
  '345',
  'DiseaseClass',
  'D000592\n'],
 ('8441467', 'X-linked adrenoleukodystrophy'): ['9',
  '38',
  'Modifier',
  'D000326\n'],
 ('8401501', 'breast cancer'): ['641', '654', 'Modifier', 'D001943\n'],
 ('1351034', 'desmoid tumor'): ['670', '683', 'SpecificDisease', 'C535944\n'],
 ('8441467', 'Adrenoleukodystrophy'): ['94',
  '114',
  'SpecificDisease',
  'D000326\n'],
 ('10976074', 'myotonia'): ['482', '490', 'DiseaseClass', 'D009222\n'],
 ('10737981', 'genetic lesions'): ['1065',
  '1080',
  'DiseaseClass',
  'D020022\n'],
 ('7191069', 'C2 deficiency'): ['205',
  '218',
  'SpecificDisease',
  'OMIM:217000\n'],
 ('10353787', 'deficiency of aspartylglucosaminidase'): ['157',
  '194',
  'SpecificDisease',
  'D054880\n'],
 ('3789016', 'C3 deficiency'): ['885',
  '898',
  'SpecificDisease',
  'OMIM:613779\n'],
 ('10807385', 'stage III cancers'): ['2164',
  '2181',
  'DiseaseClass',
  'D009369\n'],
 ('6604602', 'psoriasis'): ['1199', '1208', 'SpecificDisease', 'D011565\n'],
 ('10449794', 'A-T'): ['865', '868', 'Modifier', 'D001260\n'],
 ('3393536', 'neonatal jaundice'): ['1239',
  '1256',
  'DiseaseClass',
  'D007567\n'],
 ('2894613', 'renal cell carcinomas'): ['331',
  '352',
  'SpecificDisease',
  'D002292\n'],
 ('7815415', 'mentally retarded'): ['1211',
  '1228',
  'DiseaseClass',
  'D008607\n'],
 ('3032521', 'retinoblastoma'): ['490', '504', 'Modifier', 'D012175\n'],
 ('7937795', 'serous cystadenocarcinoma'): ['354',
  '379',
  'SpecificDisease',
  'D018284\n'],
 ('1316718', 'Wiskott-Aldrich syndrome'): ['207',
  '231',
  'SpecificDisease',
  'D014923\n'],
 ('10571950', 'sensorineural hearing loss'): ['223',
  '249',
  'SpecificDisease',
  'D006319\n'],
 ('10447259', 'Wiskott-Aldrich syndrome'): ['139',
  '163',
  'SpecificDisease',
  'D014923\n'],
 ('4019732', 'cleft palate'): ['337', '349', 'SpecificDisease', 'D002972\n'],
 ('3362213', 'Tay-Sachs disease'): ['1046',
  '1063',
  'SpecificDisease',
  'D013661\n'],
 ('3393536', 'G6PD deficiency'): ['1726',
  '1741',
  'SpecificDisease',
  'D005955\n'],
 ('10369860', 'rearrangement disorders'): ['29',
  '52',
  'DiseaseClass',
  'D002869\n'],
 ('8002973', 'ALD'): ['834', '837', 'Modifier', 'D000326\n'],
 ('10353787', 'leucoedema'): ['995', '1005', 'SpecificDisease', 'D007967\n'],
 ('10830915', 'FRDA'): ['359', '363', 'Modifier', 'D005621\n'],
 ('6859721', 'Chronic neisserial infection'): ['527',
  '555',
  'SpecificDisease',
  'D016870\n'],
 ('10830910', 'VHL'): ['1263', '1266', 'Modifier', 'D006623\n'],
 ('10417286', 'CL/P'): ['454', '458', 'SpecificDisease', 'D002971|D002972\n'],
 ('10581027', 'Keratosis'): ['580', '589', 'SpecificDisease', 'D007642\n'],
 ('1323345', 'G6PD deficiency'): ['1125',
  '1140',
  'SpecificDisease',
  'D005955\n'],
 ('10446987', 'DFSP'): ['1742', '1746', 'SpecificDisease', 'C538219\n'],
 ('2884570', 'molecular lesion'): ['888', '904', 'DiseaseClass', 'D030342\n'],
 ('3674116', 'Prader-Willi syndrome'): ['101',
  '122',
  'SpecificDisease',
  'D011218\n'],
 ('2563633', 'PKU'): ['917', '920', 'Modifier', 'D010661\n'],
 ('10484772', 'intraretinal and subretinal lipid accumulation'): ['284',
  '330',
  'CompositeMention',
  'D006949\n'],
 ('6087154', 'HPRT deficiencies'): ['248',
  '265',
  'DiseaseClass',
  'OMIM:300323\n'],
 ('1346924', 'muscle weakness'): ['263', '278', 'DiseaseClass', 'D018908\n'],
 ('7874117', 'mid-face hypoplasia'): ['313',
  '332',
  'DiseaseClass',
  'D019767\n'],
 ('10930361', 'FRDA'): ['625', '629', 'SpecificDisease', 'D005621\n'],
 ('523196', 'Lesch-Nyhan'): ['1635', '1646', 'Modifier', 'D007926\n'],
 ('8364574', 'congenital malformation of the eye'): ['42',
  '76',
  'DiseaseClass',
  'D005124\n'],
 ('8259519', 'APC'): ['313', '316', 'SpecificDisease', 'D011125\n'],
 ('10716718', 'neuropathological abnormalities'): ['803',
  '834',
  'DiseaseClass',
  'D009422\n'],
 ('495634', 'deficiency of the fifth component of complement'): ['179',
  '226',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('2390095',
  'Total deficiency of plasma cholesteryl ester transfer protein'): ['0',
  '61',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('10064668', 'cancer'): ['1277', '1283', 'DiseaseClass', 'D009369\n'],
 ('10404839', 'pendred syndrome'): ['37', '53', 'Modifier', 'C536648\n'],
 ('7166314', 'congenital hemolytic anemia'): ['78',
  '105',
  'SpecificDisease',
  'D000745\n'],
 ('8500791', 'hemophilia'): ['1582',
  '1592',
  'SpecificDisease',
  'D006467|D002836\n'],
 ('7605382', 'coronary heart disease'): ['1487',
  '1509',
  'SpecificDisease',
  'D003327\n'],
 ('1380672', 'oligodendrocyte degeneration'): ['1010',
  '1038',
  'SpecificDisease',
  'D056784\n'],
 ('10077614', 'nonneoplastic'): ['318', '331', 'Modifier', 'D004194\n'],
 ('3198117', 'enzyme deficiency'): ['218', '235', 'DiseaseClass', 'D008661\n'],
 ('2760209',
  'hypoxanthine phosphoribosyltransferase [HPRT] deficiency'): ['151', '207', 'SpecificDisease', 'D007926\n'],
 ('10528243', 'familial Mediterranean fever'): ['224',
  '252',
  'SpecificDisease',
  'D010505\n'],
 ('8566952', 'adrenoleukodystrophy'): ['655',
  '675',
  'SpecificDisease',
  'D000326\n'],
 ('1301937', 'hexosaminidase A deficiency'): ['19',
  '46',
  'SpecificDisease',
  'D013661\n'],
 ('2491010', 'Duchenne muscular dystrophy'): ['174',
  '201',
  'SpecificDisease',
  'D020388\n'],
 ('7759106', 'HD'): ['1230', '1232', 'Modifier', 'D006816\n'],
 ('1384323', 'deficient in Hex A'): ['234',
  '252',
  'SpecificDisease',
  'D013661\n'],
 ('10818206', 'FMF'): ['1162', '1165', 'Modifier', 'D010505\n'],
 ('10930571', 'Aarskog-Scott Syndrome'): ['142',
  '164',
  'SpecificDisease',
  'C535331\n'],
 ('10323740', 'Van der Woude syndrome'): ['82',
  '104',
  'SpecificDisease',
  'C536528\n'],
 ('2852474', 'complex glycerol kinase deficiency'): ['149',
  '183',
  'SpecificDisease',
  'C538138\n'],
 ('10364521', 'fragile X syndrome'): ['756',
  '774',
  'SpecificDisease',
  'D005600\n'],
 ('10446987', 'dermatofibrosarcoma protuberans'): ['4',
  '35',
  'Modifier',
  'C538219\n'],
 ('6604602', 'HEREDITARY MULTIFOCAL RELAPSING INFLAMMATION'): ['1321',
  '1365',
  'SpecificDisease',
  'D007249+D030342\n'],
 ('10556285', 'MJD'): ['1653', '1656', 'SpecificDisease', 'D017827\n'],
 ('10447258', 'Zellweger syndrome'): ['1539',
  '1557',
  'SpecificDisease',
  'D015211\n'],
 ('2220826', 'AT'): ['851', '853', 'Modifier', 'D001260\n'],
 ('10589394', 'hypodontia'): ['392', '402', 'SpecificDisease', 'D000848\n'],
 ('8279472', 'WND'): ['737', '740', 'SpecificDisease', 'D006527\n'],
 ('1302008', 'Wilms tumour'): ['371', '383', 'Modifier', 'D009396\n'],
 ('102474', 'sub-total deficiency of C6 and C7'): ['117',
  '150',
  'CompositeMention',
  'OMIM:610102|OMIM:612446\n'],
 ('10192393', 'adenomatous polyposis coli'): ['539',
  '565',
  'SpecificDisease',
  'D011125\n'],
 ('10767347', 'embryonic lethality'): ['897',
  '916',
  'SpecificDisease',
  'D020964\n'],
 ('8084618', 'desmoplastic small round cell tumors'): ['406',
  '442',
  'DiseaseClass',
  'D058405\n'],
 ('1483696', 'Tay-Sachs disease'): ['155',
  '172',
  'SpecificDisease',
  'D013661\n'],
 ('10323252', 'cardiomyopathy'): ['339',
  '353',
  'SpecificDisease',
  'D009202\n'],
 ('2773936', 'Pelizaeus-Merzbacher disease'): ['1746',
  '1774',
  'SpecificDisease',
  'OMIM:312080\n'],
 ('7298854', 'genetic disease'): ['178', '193', 'DiseaseClass', 'D030342\n'],
 ('10051005', 'colorectal'): ['840', '850', 'Modifier', 'D015179\n'],
 ('10987655', 'episodic ataxia type 2'): ['274',
  '296',
  'SpecificDisease',
  'C535506\n'],
 ('1338764', 'APC'): ['1222', '1225', 'Modifier', 'D011125\n'],
 ('2601691', 'RB tumors'): ['924', '933', 'SpecificDisease', 'D012175\n'],
 ('10556298', 'PWS'): ['1471', '1474', 'SpecificDisease', 'D011218\n'],
 ('10802667', 'abnormalities of skeletal muscle function'): ['1033',
  '1074',
  'DiseaseClass',
  'D009139\n'],
 ('10426139', 'muscular atrophy'): ['206', '222', 'DiseaseClass', 'D009133\n'],
 ('7825586', 'breast cancer'): ['1154',
  '1167',
  'SpecificDisease',
  'D001943\n'],
 ('7166314', 'G6PD deficiency'): ['347',
  '362',
  'SpecificDisease',
  'D005955\n'],
 ('10842298', 'Hypotonia'): ['363', '372', 'DiseaseClass', 'D009123\n'],
 ('10484765', 'DMPK haploinsufficiency'): ['1630',
  '1653',
  'SpecificDisease',
  'D058495\n'],
 ('2575483', 'aniridia'): ['681', '689', 'SpecificDisease', 'D015783\n'],
 ('10521293', 'psychiatric disorders'): ['1547',
  '1568',
  'DiseaseClass',
  'D001523\n'],
 ('10556298', 'neurogenetic disorder'): ['151',
  '172',
  'DiseaseClass',
  'D020271\n'],
 ('7857677', 'type GD II'): ['575', '585', 'SpecificDisease', 'D005776\n'],
 ('10213492', 'polyposis'): ['951', '960', 'SpecificDisease', 'D044483\n'],
 ('1562739',
  'Glucose-6-PHOSPHATE dehydrogenase (G6PD; EC 1.1. 1. 49) deficiency'): ['99',
  '165',
  'SpecificDisease',
  'D005955\n'],
 ('7761412', 'Lowe syndrome'): ['1715',
  '1728',
  'SpecificDisease',
  'D009800\n'],
 ('2352258', 'VHL disease'): ['869', '880', 'SpecificDisease', 'D006623\n'],
 ('10915776', 'retinal dystrophy'): ['1779',
  '1796',
  'DiseaseClass',
  'D058499\n'],
 ('3258663', 'severe von Willebrand disease'): ['1519',
  '1548',
  'SpecificDisease',
  'D056729\n'],
 ('8528199', 'Wiskott-Aldrich syndrome'): ['183',
  '207',
  'SpecificDisease',
  'D014923\n'],
 ('10874302', 'multifactorial diseases'): ['301',
  '324',
  'DiseaseClass',
  'D004194\n'],
 ('1324223', 'Familial adenomatous polyposis'): ['116',
  '146',
  'SpecificDisease',
  'D011125\n'],
 ('2390095', 'cholesteryl ester transfer protein (CETP) deficiency'): ['160',
  '212',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('1302032', 'fragile X syndrome'): ['572',
  '590',
  'SpecificDisease',
  'D005600\n'],
 ('3417303', 'DM'): ['994', '996', 'Modifier', 'D009223\n'],
 ('8071955', 'DM'): ['1378', '1380', 'SpecificDisease', 'D009223\n'],
 ('2651669', 'Langer-Giedion syndrome'): ['270',
  '293',
  'SpecificDisease',
  'D015826\n'],
 ('10406661', 'kniest dysplasia'): ['61',
  '77',
  'SpecificDisease',
  'C537207\n'],
 ('10830910', 'Von Hippel-Lindau (VHL) disease'): ['96',
  '127',
  'SpecificDisease',
  'D006623\n'],
 ('1302003', 'choroideremia'): ['60', '73', 'SpecificDisease', 'D015794\n'],
 ('10943845', 'cancer'): ['783', '789', 'DiseaseClass', 'D009369\n'],
 ('8566965', 'breast and ovarian cancer'): ['140',
  '165',
  'Modifier',
  'D061325\n'],
 ('1338904', 'familial adenomatous polyposis'): ['241',
  '271',
  'Modifier',
  'D011125\n'],
 ('10426139', 'McLeod syndrome'): ['152',
  '167',
  'SpecificDisease',
  'OMIM:300842\n'],
 ('10051007', 'neurodegeneration'): ['1051',
  '1068',
  'DiseaseClass',
  'D019636\n'],
 ('10465113', 'atelosteogenesis 2'): ['311',
  '329',
  'SpecificDisease',
  'C535395\n'],
 ('7450778', 'glucose-6-phosphate dehydrogenase deficiency'): ['38',
  '82',
  'SpecificDisease',
  'D005955\n'],
 ('10398436', 'adenomatous polyposis coli'): ['734',
  '760',
  'SpecificDisease',
  'D011125\n'],
 ('7962532', 'cholesteryl ester transfer protein deficiency'): ['8',
  '53',
  'SpecificDisease',
  'OMIM:143470\n'],
 ('10861298', 'goiter'): ['1330', '1336', 'DiseaseClass', 'D006042\n'],
 ('7479827', 'VLCAD deficiency'): ['1356',
  '1372',
  'SpecificDisease',
  'C536353\n'],
 ('1376553', 'Pelizaeus-merzbacher disease'): ['241',
  '269',
  'SpecificDisease',
  'OMIM:312080\n'],
 ('1973404', 'aspartylglucosaminuria'): ['11',
  '33',
  'SpecificDisease',
  'D054880\n'],
 ('3572301', 'Wiskott-Aldrich syndrome'): ['58',
  '82',
  'SpecificDisease',
  'D014923\n'],
 ('3563511', 'Lesch-Nyhan'): ['685', '696', 'Modifier', 'D007926\n'],
 ('492812', 'neisserial infections'): ['801',
  '822',
  'SpecificDisease',
  'D016870\n'],
 ('161677', 'C2-deficient'): ['386', '398', 'Modifier', 'OMIM:217000\n'],
 ('10533031', 'impaired proprioception'): ['1224',
  '1247',
  'DiseaseClass',
  'D020886\n'],
 ('2852474', 'Complex glycerol kinase deficiency'): ['0',
  '34',
  'SpecificDisease',
  'C538138\n'],
 ('8302543', 'central scotoma'): ['967',
  '982',
  'SpecificDisease',
  'D012607\n'],
 ('6650504', 'TSD'): ['967', '970', 'Modifier', 'D013661\n'],
 ('218453', 'ALD'): ['979', '982', 'SpecificDisease', 'D000326\n'],
 ('523196', 'dysarthric speech'): ['469',
  '486',
  'SpecificDisease',
  'D004401\n'],
 ('10426999', 'familial breast and ovarian cancers'): ['137',
  '172',
  'CompositeMention',
  'D061325\n'],
 ('8589721', 'breast cancer'): ['445', '458', 'Modifier', 'D001943\n'],
 ('10533031', 'cardiomyopathy'): ['932',
  '946',
  'SpecificDisease',
  'D009202\n'],
 ('8128954', 'Myotonic dystrophy'): ['159',
  '177',
  'SpecificDisease',
  'D009223\n'],
 ('2568588', 'tumour'): ['308', '314', 'DiseaseClass', 'D009369\n'],
 ('10071193', 'BFLS syndrome'): ['1452',
  '1465',
  'SpecificDisease',
  'C536575\n'],
 ('10406661', 'myopia'): ['226', '232', 'DiseaseClass', 'D009216\n'],
 ('318684', 'skin rash'): ['1787', '1796', 'DiseaseClass', 'D005076\n'],
 ('7795653', 'invasive cancer'): ['454', '469', 'DiseaseClass', 'D009362\n'],
 ('495634', 'Deficiency of the fifth component of complement'): ['0',
  '47',
  'SpecificDisease',
  'OMIM:609536\n'],
 ('1709636',
  'deficiency of a hepatic enzyme, phenylalanine hydroxylase'): ['182', '239', 'SpecificDisease', 'OMIM:261600\n'],
 ('7696601', 'cerebral atrophy'): ['1164',
  '1180',
  'SpecificDisease',
  'D001284\n'],
 ('10817650', 'autosomal recessive syndrome'): ['182',
  '210',
  'DiseaseClass',
  'D030342\n'],
 ('10465113', 'DTDST disorders'): ['1189',
  '1204',
  'SpecificDisease',
  'D030342\n'],
 ('1682919', 'centrocytic lymphomas'): ['1095',
  '1116',
  'DiseaseClass',
  'D008223\n'],
 ('10094559', 'alkaptonuria'): ['38', '50', 'Modifier', 'D000474\n'],
 ('2355960', 'GM2 gangliosidosis, type 1'): ['177',
  '203',
  'SpecificDisease',
  'D013661\n'],
 ('10618304', 'idiopathic ventricular fibrillation'): ['263',
  '298',
  'SpecificDisease',
  'C537182\n'],
 ('8434621', 'familial Mediterranean fever'): ['174',
  '202',
  'SpecificDisease',
  'D010505\n'],
 ...}

In [107]:
import nltk

In [111]:
# Find the directory where the corpus lives.
genesis_dir = nltk.data.find('corpora/genesis')
# Create our custom sentence tokenizer.
my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
# Create the new corpus reader object.
my_genesis = nltk.corpus.PlaintextCorpusReader(genesis_dir, '.*\.txt', sent_tokenizer=my_sent_tokenizer)
# Use the new corpus reader object.
#print(my_genesis.sents('english-kjv.txt')[0]) # doctest: +NORMALIZE_WHITESPACE

In [118]:
print(my_genesis.sents('english-kjv.txt')[0])


[u'In', u'the', u'beginning', u'God', u'created', u'the', u'heaven', u'and', u'the', u'earth']

In [124]:
ncbi_dir = nltk.data.find('corpora/ncbi_corpus_nltk')
ncbi = nltk.corpus.PlaintextCorpusReader(ncbi_dir, '.*\.txt')

In [126]:



[u'Glucose', u'6', u'-', u'phosphate', u'dehydrogenase', u'variants', u':', u'Gd', u'(+)', u'Alexandra', u'associated', u'with', u'neonatal', u'jaundice', u'and', u'Gd', u'(-)', u'Camperdown', u'in', u'a', u'young', u'man', u'with', u'lamellar', u'cataracts', u'.']

In [ ]: