In [2]:
import sqlalchemy
import pandas as pd
import re
Read the mapping from SQL
In [3]:
e = sqlalchemy.create_engine('mysql://root:root@127.0.0.1:3306/UMLS_ext_mappings')
In [66]:
df0 = pd.read_sql("select CTV3_CONCEPTID, V2_CONCEPTID from RCD_V3_to_V2", e)\
.drop_duplicates()\
.sort_index(by='CTV3_CONCEPTID')
df0.describe()
Out[66]:
Number of entries in the mapping without Read2 code
In [67]:
df0[df0.V2_CONCEPTID.str.contains('_') == True].V2_CONCEPTID.value_counts()
Out[67]:
In [68]:
df = df0[df0.V2_CONCEPTID.str.contains('_') == False]
Number of occurrences of READ3 codes in mapping
In [70]:
occurrences = df.CTV3_CONCEPTID.value_counts().value_counts()
DataFrame(data={'codes': occurrences.values, 'occurrences': occurrences.index}, index=['',''])
Out[70]:
In [71]:
# Fever
code3s = ["R0062", "X76Df", "X76Di", "X76EI", "XM0yv"]
df[df.CTV3_CONCEPTID.isin(code3s)]
Out[71]:
In [ ]: