In [3]:
# Fetch HTML using requests lib and feed to bs4
import requests
# note their SSL certificate is not verified. Be careful!
result = requests.get("https://globalgenes.org/rarelist", verify=False)
from bs4 import BeautifulSoup
from bs4 import NavigableString
soup = BeautifulSoup(result.content, 'html.parser')
/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
InsecureRequestWarning)
In [4]:
# check
soup.title
Out[4]:
<title>Rare Disease List</title>
In [5]:
# write formatted html to file
# (not used: this is just a useful side effect for exploration)
f=open('rarelist.html','w')
f.write(soup.prettify())
f.close()
In [6]:
# use bs4 to extract names from HTML
names = [] ## all disease names found
name2url = {} ## mapping of names to URLs
h5s = soup.find_all("h5")
for h5 in h5s:
ul = h5.find_next_sibling('ul')
for li in ul.findAll('li'):
if len(li.contents) == 0:
continue
n = li.contents[0]
if n is None:
print('BAD: {}'.format(li))
continue
if not isinstance(n, NavigableString):
n = n.contents[0]
if li.select('a'):
url = li.a['href']
name2url[n] = url
names.append(n)
# show the first 20 for sanity checking
names[0:20]
Out[6]:
['Aagenaes syndrome',
'Aarskog syndrome',
'Aase Smith syndrome',
'ABCD syndrome',
'Abderhalden Kaufmann Lignac syndrome',
'Abdominal aortic aneurysm',
'Abdominal chemodectomas with cutaneous angiolipomas',
'Abdominal cystic lymphangioma',
'Abdominal obesity metabolic syndrome',
'Aberrant subclavian artery',
'Abetalipoproteinemia',
'Abidi X-linked mental retardation syndrome',
'Ablepharon macrostomia syndrome',
"Abrikosov's tumor",
'Abruzzo Erickson syndrome',
'Absence of fingerprints congenital milia',
'Absence of gluteal muscle',
'Absence of septum pellucidum',
'Absence of Tibia',
'Absence of tibia with polydactyly']
In [7]:
## sanity check URL mapping
list(name2url.items())[0:10]
Out[7]:
[('Acute disseminated encephalomyelitis', 'http://ulf.org/'),
('Acute hemorrhagic leukoencephalitis', 'http://ulf.org/'),
('Adrenoleukodystrophy X-linked', 'http://ulf.org/'),
('Adrenomyeloneuropathy', 'http://ulf.org/'),
('Aicardi-Goutieres syndrome', 'http://ulf.org/'),
('Alexander disease', 'http://ulf.org/'),
('Alkaptonuria', 'http://www.alkaptonuria.info/'),
('Alpers syndrome',
'http://www.umdf.org/site/c.8qKOJ0MvF7LUG/b.7929671/k.BDF0/Home.htm'),
('Alzheimer disease familial', 'http://www.mitoaction.org/'),
('Alzheimer disease type 1', 'http://www.mitoaction.org/')]
In [8]:
import csv
with open('rare-list.tsv', 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter='\t')
for n in names:
spamwriter.writerow([n, name2url.get(n)])
In [9]:
## use ontobio lib for fetching ontologies and lexical mapping
from ontobio import OntologyFactory
/usr/local/lib/python3.6/site-packages/cachier/mongo_core.py:24: UserWarning: Cachier warning: pymongo was not found. MongoDB cores will not work.
"Cachier warning: pymongo was not found. MongoDB cores will not work.")
In [10]:
ofa = OntologyFactory()
In [11]:
hp = ofa.create('obo:hp')
In [12]:
mondo = ofa.create('obo:mondo')
In [13]:
from ontobio.lexmap import LexicalMapEngine
lexmap = LexicalMapEngine()
In [14]:
# Quick hack to make a degenerate 'ontology' from the list of names
from ontobio import Ontology
def ont_from_names(names):
ont = Ontology(id='rare')
for n in names:
## use name as ID
ont.add_node(n, n)
return ont
rare = ont_from_names(names)
rare
Out[14]:
rare handle: None meta: None
In [15]:
## quick inspection
rare.nodes()[0:10]
Out[15]:
['Aagenaes syndrome',
'Aarskog syndrome',
'Aase Smith syndrome',
'ABCD syndrome',
'Abderhalden Kaufmann Lignac syndrome',
'Abdominal aortic aneurysm',
'Abdominal chemodectomas with cutaneous angiolipomas',
'Abdominal cystic lymphangioma',
'Abdominal obesity metabolic syndrome',
'Aberrant subclavian artery']
In [16]:
## index the 3 ontologies
lexmap.index_ontology(hp)
lexmap.index_ontology(mondo)
lexmap.index_ontology(rare)
WARNING:root:Incomplete syn: HP:0000991 "" hasRelatedSynonym None [] 1.0
WARNING:root:Incomplete syn: HP:0012377 "" hasRelatedSynonym None [] 1.0
WARNING:root:Incomplete syn: HP:0000510 "" hasRelatedSynonym None [] 1.0
WARNING:root:Ignoring suspicous synonym: UBERON:0002722 "4" hasBroadSynonym None ['http://uri.neuinfo.org/nif/nifstd/birnlex_1488', 'NIFSTD:NeuroNames_abbrevSource'] 1.0
WARNING:root:Ignoring suspicous synonym: UBERON:0001715 "3" hasBroadSynonym None ['http://uri.neuinfo.org/nif/nifstd/birnlex_1240', 'NIFSTD:NeuroNames_abbrevSource'] 1.0
In [17]:
## CONFIGURE
## we will map R to mondo and hp separately
lexmap.ontology_pairs = [(rare.id, mondo.id), (rare.id, hp.id)]
In [18]:
# align
g = lexmap.get_xref_graph()
In [19]:
# get a dataframe from the mapping graph
df=lexmap.as_dataframe(g)
df
Out[19]:
left
left_label
right
right_label
left_match_type
right_match_type
left_match_val
right_match_val
score
left_simscore
...
conditional_pr_equiv
pr_subClassOf
pr_superClassOf
pr_equivalentTo
pr_other
left_novel
right_novel
left_consistent
right_consistent
equiv_clique_size
3287
11-beta-hydroxylase deficiency
11-beta-hydroxylase deficiency
MONDO:0008729
congenital adrenal hyperplasia due to 11-beta-...
label
hasRelatedSynonym
11-beta-hydroxylase deficiency
11-Beta-Hydroxylase Deficiency
50.0
1.000000
...
1.000000
0.061581
0.061581
0.799654
0.077184
True
True
False
False
7
2199
15q13.3 microdeletion syndrome
15q13.3 microdeletion syndrome
MONDO:0012774
chromosome 15q13.3 microdeletion syndrome
label
hasExactSynonym
15q13.3 microdeletion syndrome
15q13.3 microdeletion syndrome
90.0
1.000000
...
1.000000
0.029969
0.029969
0.918763
0.021299
True
True
False
False
6
3339
17-alpha-hydroxylase deficiency
17-alpha-hydroxylase deficiency
MONDO:0008730
congenital adrenal hyperplasia due to 17-alpha...
label
hasRelatedSynonym
17-alpha-hydroxylase deficiency
17-Alpha-Hydroxylase Deficiency
50.0
1.000000
...
1.000000
0.061581
0.061581
0.799654
0.077184
True
True
False
False
5
3481
17-beta hydroxysteroid dehydrogenase 3 deficiency
17-beta hydroxysteroid dehydrogenase 3 deficiency
MONDO:0009916
46,XY disorder of sex development due to 17-be...
label
hasExactSynonym
17-beta hydroxysteroid dehydrogenase 3 deficiency
17-beta-hydroxysteroid dehydrogenase 3 deficiency
58.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
7
2592
17q21.31 microdeletion syndrome
17q21.31 microdeletion syndrome
MONDO:0012496
Koolen de Vries syndrome
label
hasExactSynonym
17q21.31 microdeletion syndrome
17q21.31 microdeletion syndrome
90.0
1.000000
...
0.473684
0.168017
0.055554
0.749591
0.026839
True
True
False
False
8
2593
17q21.31 microdeletion syndrome
17q21.31 microdeletion syndrome
MONDO:0018216
17q21.31 microdeletion syndrome
label
label
17q21.31 microdeletion syndrome
17q21.31 microdeletion syndrome
100.0
1.000000
...
0.526316
0.051671
0.108232
0.824734
0.015363
True
True
False
False
8
2987
18 Hydroxylase deficiency
18 Hydroxylase deficiency
MONDO:0008751
Corticosterone methyloxidase type 1 deficiency
label
hasRelatedSynonym
18 Hydroxylase deficiency
18-Hydroxylase Deficiency
32.0
1.000000
...
0.355556
0.232996
0.289482
0.283582
0.193941
True
True
False
False
6
2986
18 Hydroxylase deficiency
18 Hydroxylase deficiency
MONDO:0020489
familial hyperreninemic hypoaldosteronism type 1
label
hasExactSynonym
18 Hydroxylase deficiency
18-hydroxylase deficiency
58.0
1.000000
...
0.644444
0.292046
0.210145
0.309167
0.188643
True
True
False
False
6
1960
1q21.1 microdeletion syndrome
1q21.1 microdeletion syndrome
MONDO:0012914
chromosome 1q21.1 deletion syndrome
label
hasExactSynonym
1q21.1 microdeletion syndrome
1q21.1 microdeletion syndrome
90.0
1.000000
...
1.000000
0.030109
0.030109
0.923042
0.016740
True
True
False
False
6
1428
2 4-Dienoyl-CoA reductase deficiency
2 4-Dienoyl-CoA reductase deficiency
MONDO:0014464
progressive encephalopathy with leukodystrophy...
label
hasExactSynonym
2 4-Dienoyl-CoA reductase deficiency
2,4-dienoyl-CoA reductase deficiency
58.0
1.000000
...
1.000000
0.200803
0.200803
0.382559
0.215835
True
True
False
False
5
4514
2-Hydroxyglutaric aciduria
2-Hydroxyglutaric aciduria
MONDO:0016001
2-hydroxyglutaric aciduria
label
label
2-Hydroxyglutaric aciduria
2-hydroxyglutaric aciduria
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
7
1888
2-methyl-3-hydroxybutyric aciduria
2-methyl-3-hydroxybutyric aciduria
MONDO:0010327
HSD10 disease
label
hasExactSynonym
2-methyl-3-hydroxybutyric aciduria
2-methyl-3-hydroxybutyric aciduria
90.0
1.000000
...
1.000000
0.029969
0.029969
0.918763
0.021299
True
True
False
False
5
1202
2-methylbutyryl-CoA dehydrogenase deficiency
2-methylbutyryl-CoA dehydrogenase deficiency
MONDO:0012392
2-methylbutyryl-CoA dehydrogenase deficiency
label
label
2-methylbutyryl-CoA dehydrogenase deficiency
2-methylbutyryl-CoA dehydrogenase deficiency
100.0
1.000000
...
1.000000
0.028795
0.028795
0.927169
0.015241
True
True
False
False
7
3288
21-hydroxylase deficiency
21-hydroxylase deficiency
MONDO:0008728
classic congenital adrenal hyperplasia due to ...
label
hasRelatedSynonym
21-hydroxylase deficiency
21-Hydroxylase Deficiency
50.0
1.000000
...
1.000000
0.061581
0.061581
0.799654
0.077184
True
True
False
False
5
3507
22q11.2 deletion syndrome
22q11.2 deletion syndrome
MONDO:0008644
velocardiofacial syndrome
label
hasExactSynonym
22q11.2 deletion syndrome
deletion 22q11.2 syndrome
58.0
1.000000
...
0.134754
0.179472
0.287938
0.282070
0.250520
True
True
False
False
41
2964
22q11.2 deletion syndrome
22q11.2 deletion syndrome
MONDO:0018923
22q11.2 deletion syndrome
label
label
22q11.2 deletion syndrome
22q11.2 deletion syndrome
100.0
0.166667
...
0.115075
0.092223
0.035954
0.841716
0.030107
True
True
False
False
41
1721
3 methylglutaconic aciduria type I
3 methylglutaconic aciduria type I
MONDO:0009610
3-methylglutaconic aciduria type 1
label
label
3 methylglutaconic aciduria type I
3-methylglutaconic aciduria type 1
64.0
1.000000
...
1.000000
0.200803
0.200803
0.382559
0.215835
True
True
False
False
9
1720
3 methylglutaconic aciduria type IV
3 methylglutaconic aciduria type IV
MONDO:0009611
3-methylglutaconic aciduria type 4
label
label
3 methylglutaconic aciduria type IV
3-methylglutaconic aciduria type 4
64.0
1.000000
...
1.000000
0.200803
0.200803
0.382559
0.215835
True
True
False
False
8
2580
3 methylglutaconic aciduria type V
3 methylglutaconic aciduria type V
MONDO:0012435
3-methylglutaconic aciduria type 5
label
label
3 methylglutaconic aciduria type V
3-methylglutaconic aciduria type 5
64.0
1.000000
...
1.000000
0.198342
0.198342
0.377872
0.225444
True
True
False
False
7
1877
3-Hydroxyisobutyric aciduria
3-Hydroxyisobutyric aciduria
MONDO:0009371
3-hydroxyisobutyric aciduria
label
label
3-Hydroxyisobutyric aciduria
3-hydroxyisobutyric aciduria
100.0
1.000000
...
1.000000
0.028795
0.028795
0.927169
0.015241
True
True
False
False
8
3289
3-beta-hydroxysteroid dehydrogenase deficiency
3-beta-hydroxysteroid dehydrogenase deficiency
MONDO:0008727
congenital adrenal hyperplasia due to 3-beta-h...
label
hasRelatedSynonym
3-beta-hydroxysteroid dehydrogenase deficiency
3-Beta-Hydroxysteroid Dehydrogenase Deficiency
50.0
1.000000
...
1.000000
0.061581
0.061581
0.799654
0.077184
True
True
False
False
5
3670
3-methylglutaconic aciduria type III
3-methylglutaconic aciduria type III
MONDO:0009787
3-methylglutaconic aciduria type 3
label
hasExactSynonym
3-methylglutaconic aciduria type III
3-methylglutaconic aciduria type III
90.0
1.000000
...
1.000000
0.029969
0.029969
0.918763
0.021299
True
True
False
False
8
755
4-hydroxyphenylacetic aciduria
4-hydroxyphenylacetic aciduria
HP:0003607
4-Hydroxyphenylacetic aciduria
label
label
4-hydroxyphenylacetic aciduria
4-Hydroxyphenylacetic aciduria
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
2
3680
46 XX testicular disorder of sex development
46 XX testicular disorder of sex development
MONDO:0010766
46,XX testicular disorder of sex development
label
label
46 XX testicular disorder of sex development
46,XX testicular disorder of sex development
64.0
1.000000
...
1.000000
0.198342
0.198342
0.377872
0.225444
True
True
False
False
6
3136
47 XXX syndrome
47 XXX syndrome
MONDO:0018066
trisomy X
label
hasExactSynonym
47 XXX syndrome
47,XXX syndrome
58.0
1.000000
...
1.000000
0.226493
0.185437
0.392394
0.195675
True
True
False
False
5
3166
47 XYY syndrome
47 XYY syndrome
MONDO:0019339
47,XYY syndrome
label
label
47 XYY syndrome
47,XYY syndrome
64.0
1.000000
...
1.000000
0.226493
0.185437
0.392394
0.195675
True
True
False
False
5
4164
49 XXXXX syndrome
49 XXXXX syndrome
MONDO:0015228
pentasomy X
label
hasExactSynonym
49 XXXXX syndrome
49,XXXXX syndrome
58.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
5
4531
49 XXXXY syndrome
49 XXXXY syndrome
MONDO:0019929
49,XXXXY syndrome
label
label
49 XXXXY syndrome
49,XXXXY syndrome
64.0
1.000000
...
1.000000
0.219001
0.179303
0.379414
0.222282
True
True
False
False
6
710
5-oxoprolinase deficiency
5-oxoprolinase deficiency
MONDO:0009825
5-oxoprolinase deficiency (disease)
label
hasExactSynonym
5-oxoprolinase deficiency
5-oxoprolinase deficiency
90.0
1.000000
...
1.000000
0.030109
0.030109
0.923042
0.016740
True
True
False
False
7
709
5-oxoprolinase deficiency
5-oxoprolinase deficiency
HP:0040142
5-oxoprolinase deficiency
label
label
5-oxoprolinase deficiency
5-oxoprolinase deficiency
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
7
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
2067
Wrinkly skin syndrome
Wrinkly skin syndrome
MONDO:0010208
Wrinkly skin syndrome
label
label
Wrinkly skin syndrome
Wrinkly skin syndrome
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
7
2105
X-linked adrenal hypoplasia congenita
X-linked adrenal hypoplasia congenita
MONDO:0010264
X-linked adrenal hypoplasia congenita
label
label
X-linked adrenal hypoplasia congenita
X-linked adrenal hypoplasia congenita
100.0
1.000000
...
1.000000
0.028738
0.028738
0.925323
0.017201
True
True
False
False
7
1552
X-linked hypohidrotic ectodermal dysplasia
X-linked hypohidrotic ectodermal dysplasia
MONDO:0010585
X-linked hypohidrotic ectodermal dysplasia
label
label
X-linked hypohidrotic ectodermal dysplasia
X-linked hypohidrotic ectodermal dysplasia
100.0
1.000000
...
1.000000
0.028738
0.028738
0.925323
0.017201
True
True
False
False
4
3900
X-linked ichthyosis
X-linked ichthyosis
MONDO:0010622
recessive X-linked ichthyosis
label
hasExactSynonym
X-linked ichthyosis
X-linked ichthyosis
90.0
1.000000
...
1.000000
0.029886
0.029886
0.916224
0.024003
True
True
False
False
7
1968
X-linked severe combined immunodeficiency
X-linked severe combined immunodeficiency
MONDO:0010315
gamma chain deficiency
label
hasExactSynonym
X-linked severe combined immunodeficiency
X-Linked Severe Combined Immunodeficiency
90.0
1.000000
...
1.000000
0.029969
0.029969
0.918763
0.021299
True
True
False
False
8
2543
XFE progeroid syndrome
XFE progeroid syndrome
MONDO:0012590
XFE progeroid syndrome
label
label
XFE progeroid syndrome
XFE progeroid syndrome
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
7
3037
XK aprosencephaly
XK aprosencephaly
MONDO:0008811
XK aprosencephaly
label
label
XK aprosencephaly
XK aprosencephaly
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
8
2070
Xanthinuria type 1
Xanthinuria type 1
MONDO:0010209
xanthinuria type I
label
label
Xanthinuria type 1
xanthinuria type I
64.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
5
2414
Xanthinuria type 2
Xanthinuria type 2
MONDO:0011346
xanthinuria type II
label
label
Xanthinuria type 2
xanthinuria type II
64.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
6
1509
Xanthogranulomatous cholecystitis
Xanthogranulomatous cholecystitis
MONDO:0004875
xanthogranulomatous cholecystitis
label
label
Xanthogranulomatous cholecystitis
xanthogranulomatous cholecystitis
100.0
1.000000
...
1.000000
0.028795
0.028795
0.927169
0.015241
True
True
False
False
8
2867
Xeroderma pigmentosum
Xeroderma pigmentosum
MONDO:0019600
xeroderma pigmentosum
label
label
Xeroderma pigmentosum
xeroderma pigmentosum
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
8
2077
Xeroderma pigmentosum variant type
Xeroderma pigmentosum variant type
MONDO:0010214
xeroderma pigmentosum variant type
label
label
Xeroderma pigmentosum variant type
xeroderma pigmentosum variant type
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
8
3151
Yaws
Yaws
MONDO:0006019
yaws
label
label
Yaws
yaws
100.0
1.000000
...
1.000000
0.051830
0.051830
0.874531
0.021809
True
True
False
False
10
3080
Yellow fever
Yellow fever
MONDO:0020502
yellow fever
label
label
Yellow fever
yellow fever
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
8
4539
Yellow nail syndrome
Yellow nail syndrome
MONDO:0007921
yellow nail syndrome
label
label
Yellow nail syndrome
yellow nail syndrome
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
9
2555
Yemenite deaf-blind hypopigmentation syndrome
Yemenite deaf-blind hypopigmentation syndrome
MONDO:0011133
Deaf blind hypopigmentation syndrome, Yemenite...
label
hasExactSynonym
Yemenite deaf-blind hypopigmentation syndrome
Yemenite deaf-blind hypopigmentation syndrome
90.0
1.000000
...
1.000000
0.030109
0.030109
0.923042
0.016740
True
True
False
False
6
4262
Yolk sac tumor
Yolk sac tumor
MONDO:0005744
yolk sac tumor
label
label
Yolk sac tumor
yolk sac tumor
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
7
3775
Yorifuji Okuno syndrome
Yorifuji Okuno syndrome
MONDO:0010802
pancreatic hypoplasia-diabetes-congenital hear...
label
hasExactSynonym
Yorifuji Okuno syndrome
Yorifuji-Okuno syndrome
58.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
5
4330
Young Hughes syndrome
Young Hughes syndrome
MONDO:0017614
X-linked intellectual disability-hypogonadism-...
label
hasExactSynonym
Young Hughes syndrome
Young-Hughes syndrome
58.0
1.000000
...
1.000000
0.200803
0.200803
0.382559
0.215835
True
True
False
False
4
2384
Young Simpson syndrome
Young Simpson syndrome
MONDO:0011365
blepharophimosis-intellectual disability syndr...
label
hasRelatedSynonym
Young Simpson syndrome
Young-Simpson Syndrome
32.0
1.000000
...
1.000000
0.200803
0.200803
0.382559
0.215835
True
True
False
False
7
2059
Young syndrome
Young syndrome
MONDO:0010220
young syndrome
label
label
Young syndrome
young syndrome
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
7
2892
Yunis Varon syndrome
Yunis Varon syndrome
MONDO:0008995
Yunis-Varon syndrome
label
label
Yunis Varon syndrome
Yunis-Varon syndrome
64.0
1.000000
...
1.000000
0.062922
0.062922
0.817066
0.057090
True
True
False
False
7
1473
Zechi Ceide syndrome
Zechi Ceide syndrome
MONDO:0013036
Zechi-Ceide syndrome
label
label
Zechi Ceide syndrome
Zechi-Ceide syndrome
64.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
6
2866
Zellweger syndrome
Zellweger syndrome
MONDO:0019609
Zellweger syndrome
label
label
Zellweger syndrome
Zellweger syndrome
100.0
1.000000
...
1.000000
0.028758
0.028758
0.925963
0.016522
True
True
False
False
6
657
Zollinger-Ellison syndrome
Zollinger-Ellison syndrome
MONDO:0006020
Zollinger-Ellison syndrome (disease)
label
hasExactSynonym
Zollinger-Ellison syndrome
Zollinger-Ellison Syndrome
90.0
1.000000
...
0.473684
0.075251
0.062185
0.839062
0.023503
True
True
False
False
11
655
Zollinger-Ellison syndrome
Zollinger-Ellison syndrome
HP:0002044
Zollinger-Ellison syndrome
label
label
Zollinger-Ellison syndrome
Zollinger-Ellison syndrome
100.0
1.000000
...
1.000000
0.028891
0.028891
0.930268
0.011949
True
True
False
False
11
656
Zollinger-Ellison syndrome
Zollinger-Ellison syndrome
MONDO:0019610
Zollinger-Ellison syndrome
label
label
Zollinger-Ellison syndrome
Zollinger-Ellison syndrome
100.0
1.000000
...
0.526316
0.055295
0.045694
0.882570
0.016441
True
True
False
False
11
3377
Zori Stalker Williams syndrome
Zori Stalker Williams syndrome
MONDO:0010883
pectus excavatum-macrocephaly-dysplastic nails...
label
hasExactSynonym
Zori Stalker Williams syndrome
Zori-Stalker-Williams syndrome
58.0
1.000000
...
1.000000
0.205965
0.205965
0.392394
0.195675
True
True
False
False
5
2061
Zunich neuroectodermal syndrome
Zunich neuroectodermal syndrome
MONDO:0010221
CHIME syndrome
label
hasRelatedSynonym
Zunich neuroectodermal syndrome
Zunich Neuroectodermal Syndrome
50.0
1.000000
...
1.000000
0.061951
0.061951
0.804454
0.071645
True
True
False
False
6
3649
Zygomycosis
Zygomycosis
MONDO:0019136
zygomycosis
label
label
Zygomycosis
zygomycosis
100.0
1.000000
...
1.000000
0.051830
0.051830
0.874531
0.021809
True
True
False
False
9
4558 rows × 22 columns
In [20]:
## write to file (not used here but can be examined separately)
df.to_csv('rare-matches.tsv', sep="\t", index=False)
In [21]:
udf = lexmap.unmapped_dataframe(g)
In [22]:
## unmapped (TODO this includes unmapped from MONDO/HP to R, which we don't care about so much)
udf.to_csv('rare-no-matches.tsv', sep="\t", index=False)
udf
Out[22]:
id
label
mapped_equivs
18057
16p11.2 deletion syndrome
16p11.2 deletion syndrome
105646
2-Methylacetoacetyl CoA thiolase deficiency
2-Methylacetoacetyl CoA thiolase deficiency
41905
2-hydroxyethyl methacrylate sensitization
2-hydroxyethyl methacrylate sensitization
29133
22q11.2 duplication syndrome
22q11.2 duplication syndrome
100428
22q13.3 deletion syndrome
22q13.3 deletion syndrome
96122
2q37 deletion syndrome
2q37 deletion syndrome
88482
3 Methylcrotonyl-CoA carboxylase 1 deficiency
3 Methylcrotonyl-CoA carboxylase 1 deficiency
34501
3 alpha methylcrotonyl-CoA carboxylase 2 defic...
3 alpha methylcrotonyl-CoA carboxylase 2 defic...
85670
3-alpha hydroxyacyl-CoA dehydrogenase deficiency
3-alpha hydroxyacyl-CoA dehydrogenase deficiency
77929
3p deletion syndrome
3p deletion syndrome
95095
46 XX Gonadal dysgenesis epibulbar dermoid
46 XX Gonadal dysgenesis epibulbar dermoid
90032
5-Nucleotidase syndrome
5-Nucleotidase syndrome
74374
6 alpha mercaptopurine sensitivity
6 alpha mercaptopurine sensitivity
51486
ACTH-independent macronodular adrenal hyperplasia
ACTH-independent macronodular adrenal hyperplasia
26334
AIDS Dementia Complex
AIDS Dementia Complex
12881
AIDS dysmorphic syndrome
AIDS dysmorphic syndrome
77859
ALK+ histiocytosis
ALK+ histiocytosis
26275
ALS-like syndrome of encephalomyopathy
ALS-like syndrome of encephalomyopathy
60831
Abderhalden Kaufmann Lignac syndrome
Abderhalden Kaufmann Lignac syndrome
10975
Abdominal chemodectomas with cutaneous angioli...
Abdominal chemodectomas with cutaneous angioli...
108580
Abdominal cystic lymphangioma
Abdominal cystic lymphangioma
94496
Aberrant subclavian artery
Aberrant subclavian artery
93116
Abidi X-linked mental retardation syndrome
Abidi X-linked mental retardation syndrome
40555
Absence of fingerprints congenital milia
Absence of fingerprints congenital milia
7562
Absence of gluteal muscle
Absence of gluteal muscle
6259
Absence of tibia with polydactyly
Absence of tibia with polydactyly
75137
Absent T lymphocytes
Absent T lymphocytes
49480
Absent breasts and nipples
Absent breasts and nipples
56640
Abuse dwarfism syndrome
Abuse dwarfism syndrome
8138
Acanthamoeba infection
Acanthamoeba infection
...
...
...
...
25120
http://www.orpha.net/ORDO/Orphanet_99948
None
[MONDO:0008961]
21037
http://www.orpha.net/ORDO/Orphanet_99949
None
[MONDO:0011113]
17454
http://www.orpha.net/ORDO/Orphanet_99950
None
[MONDO:0011085]
20779
http://www.orpha.net/ORDO/Orphanet_99951
None
[MONDO:0011527]
70495
http://www.orpha.net/ORDO/Orphanet_99952
None
[]
94070
http://www.orpha.net/ORDO/Orphanet_99953
None
[MONDO:0011534]
82089
http://www.orpha.net/ORDO/Orphanet_99954
None
[]
106842
http://www.orpha.net/ORDO/Orphanet_99955
None
[MONDO:0011066]
99610
http://www.orpha.net/ORDO/Orphanet_99956
None
[MONDO:0011475]
10190
http://www.orpha.net/ORDO/Orphanet_99960
None
[]
100192
http://www.orpha.net/ORDO/Orphanet_99961
None
[]
30752
http://www.orpha.net/ORDO/Orphanet_99965
None
[]
51288
http://www.orpha.net/ORDO/Orphanet_99966
None
[]
24502
http://www.orpha.net/ORDO/Orphanet_99967
None
[]
102919
http://www.orpha.net/ORDO/Orphanet_99969
None
[]
79112
http://www.orpha.net/ORDO/Orphanet_99970
None
[]
17426
http://www.orpha.net/ORDO/Orphanet_99971
None
[]
15059
http://www.orpha.net/ORDO/Orphanet_99976
None
[]
65846
http://www.orpha.net/ORDO/Orphanet_99977
None
[]
6381
http://www.orpha.net/ORDO/Orphanet_99978
None
[MONDO:0003345]
3759
http://www.orpha.net/ORDO/Orphanet_99981
None
[]
3847
http://www.orpha.net/ORDO/Orphanet_99983
None
[]
85668
http://www.orpha.net/ORDO/Orphanet_99989
None
[]
4382
http://www.orpha.net/ORDO/Orphanet_99990
None
[]
45866
http://www.orpha.net/ORDO/Orphanet_99991
None
[]
21041
http://www.orpha.net/ORDO/Orphanet_99994
None
[]
65561
http://www.orpha.net/ORDO/Orphanet_99995
None
[]
43256
http://www.w3.org/2000/01/rdf-schema#seeAlso
seeAlso
21673
http://www.w3.org/2002/07/owl#Thing
None
15400
http://www.w3.org/2002/07/owl#topObjectProperty
None
110240 rows × 3 columns
In [ ]:
Content source: biolink/ontobio
Similar notebooks: