In [1]:

    
import pandas as pd
import numpy as np
from pandas import DataFrame
import os
import reader



In [32]:

    
path = os.getcwd()+'/data/'
data = reader.Data(path)









    



Local data read/write folder path:
	Customed path: /Users/Dan/百度云同步盘/丝打底/2017 spring/MATH 497/code and data/data/

Data: systemic_disease_list 
File: systemic_disease_list.pickle
File already exists.

Data: SNOMED_problem_list 
File: SNOMED_problem_list.pickle
File already exists.

Data: macula_findings_for_Enc 
File: macula_findings_for_Enc.pickle
File already exists.

Data: SL_Lens_for_Enc 
File: SL_Lens_for_Enc.pickle
File already exists.

Data: family_hist_list 
File: family_hist_list.pickle
File already exists.

Data: systemic_disease_for_Enc 
File: systemic_disease_for_Enc.pickle
File already exists.

Data: family_hist_for_Enc 
File: family_hist_for_Enc.pickle
File already exists.

Data: all_encounter_data 
File: all_encounter_data.pickle
File already exists.

Data: encounters 
File: encounters.pickle
File already exists.

Data: demographics 
File: demographics.pickle
File already exists.

Data: ICD_for_Enc 
File: ICD_for_Enc.pickle
File already exists.



In [33]:

    
data['demographics'].head()









    Out[33]:






  
    
      
      Person_ID
      Person_Nbr
      DOB
      Gender
      Race
      Ethnicity
      Zip
      Age_Censored
    
  
  
    
      14132
      18405351-AC64-46A2-A003-8F7889351A13
      33
      1948-10-15
      F
      Black/African American (Not Hispanic)
      Declined to specify
      60616
      None
    
    
      8747
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      1955-09-04
      M
      Declined to specify
      Declined to specify
      60652
      None
    
    
      2129
      DF25CE0F-E45A-44D2-BE43-F49FE4F5E280
      112
      1932-11-19
      F
      NaN
      NaN
      606372549
      None
    
    
      6591
      7027DD58-6465-496C-880A-A6E825C155A8
      146
      1948-12-19
      F
      Declined to specify
      Declined to specify
      60615
      None
    
    
      16909
      395DD5E7-45F9-456E-A127-9AB929E872F7
      196
      1955-02-21
      F
      Declined to specify
      Declined to specify
      60615
      None



In [4]:

    
data['family_hist_list'].head()









    Out[4]:






  
    
      
      Person_ID
      Person_Nbr
      Date_Created
      Code
      Code_System
      Family_History
      Relation
    
  
  
    
      47872
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 14:47:15.980
      160347007
      SNOMED
      Glaucoma
      Father
    
    
      47869
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 14:47:26.897
      160274005
      SNOMED
      No history of Diabetes mellitus
      Mother
    
    
      47867
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 14:47:28.590
      160274005
      SNOMED
      No history of Diabetes mellitus
      Father
    
    
      47871
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 14:47:29.563
      160267000
      SNOMED
      No history of Glaucoma
      Mother
    
    
      47868
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 15:55:35.837
      439724007
      SNOMED
      Heart disease
      Brother

There are 443 different relationships



In [53]:

    
temp=list(data['family_hist_list'][data['family_hist_list'].Relation.notnull()].Relation.drop_duplicates())
len(temp)









    Out[53]:





443



In [6]:

    
data['encounters'].head()









    Out[6]:






  
    
      
      Person_ID
      Person_Nbr
      Enc_ID
      Enc_Nbr
      Enc_Timestamp
    
  
  
    
      10335
      18405351-AC64-46A2-A003-8F7889351A13
      33
      97825c51-4462-eade-1c1d-2baa3400c033
      123227
      2014-12-18 14:00:00
    
    
      59383
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      a6d9d991-4ff4-79b5-304d-f1b2bff9d6d0
      12870648
      2012-10-16 03:45:00
    
    
      56011
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      fe5eb87c-39b3-4314-0172-4642d51de417
      4126172
      2014-03-12 15:30:00
    
    
      78800
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      ea3c2622-6d1d-53ab-ddf0-9764927f1507
      9217142
      2014-04-17 18:45:00
    
    
      3870
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      71d10fa4-3b0d-fa52-cfbd-ffd0f3d3532d
      14968450
      2014-05-25 10:45:00

Process family history



In [7]:

    
# Create Date variable
from datetime import datetime
data['family_hist_list']['Date'] = [datetime.strftime(item, '%Y-%m-%d') for item in data['family_hist_list']['Date_Created']]



In [8]:

    
# Individual family history grouped by the relationship
# Date of collecting could be omited
family_hist_list = {k:[{'Relation':k1,
                        'History':[{'Code': a, 'Family_history': b} for a,b in zip(v1.Code, v1.Family_History)]}
              for k1, v1 in v.groupby('Relation')]
           for k,v in data['family_hist_list'].groupby('Person_Nbr')}



In [9]:

    
family_hist_list[109227]









    Out[9]:





[{'History': [{'Code': '430679000',
    'Family_history': 'Diabetes mellitus type 2'}],
  'Relation': 'Brother'},
 {'History': [{'Code': '160274005',
    'Family_history': 'No history of Diabetes mellitus'},
   {'Code': '160267000', 'Family_history': 'No history of Glaucoma'}],
  'Relation': 'Father'},
 {'History': [{'Code': '416855002', 'Family_history': 'Diabetes mellitus'},
   {'Code': '160267000', 'Family_history': 'No history of Glaucoma'},
   {'Code': '160357008', 'Family_history': 'Hypertension'}],
  'Relation': 'Mother'}]

Process demographics



In [10]:

    
# There is no person duplicated in demographics
len(data['demographics'].Person_Nbr.drop_duplicates()) == len(data['demographics'])









    Out[10]:





True



In [34]:

    
# Normalize zip code with only 5 digits
def clean_zip(zip):
    if len(zip)<5:
        return 'Null'
    else:
        return zip[:5]
data['demographics']['Zip'] = demographics.Zip.map(lambda x: clean_zip(x))



In [35]:

    
data['demographics'].head()









    Out[35]:






  
    
      
      Person_ID
      Person_Nbr
      DOB
      Gender
      Race
      Ethnicity
      Zip
      Age_Censored
    
  
  
    
      14132
      18405351-AC64-46A2-A003-8F7889351A13
      33
      1948-10-15
      F
      Black/African American (Not Hispanic)
      Declined to specify
      60616
      None
    
    
      8747
      4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E
      89
      1955-09-04
      M
      Declined to specify
      Declined to specify
      60652
      None
    
    
      2129
      DF25CE0F-E45A-44D2-BE43-F49FE4F5E280
      112
      1932-11-19
      F
      NaN
      NaN
      60637
      None
    
    
      6591
      7027DD58-6465-496C-880A-A6E825C155A8
      146
      1948-12-19
      F
      Declined to specify
      Declined to specify
      60615
      None
    
    
      16909
      395DD5E7-45F9-456E-A127-9AB929E872F7
      196
      1955-02-21
      F
      Declined to specify
      Declined to specify
      60615
      None



In [43]:

    
# Null cases for zip code
data['demographics'][data['demographics'].Zip=='Null']









    Out[43]:






  
    
      
      Person_ID
      Person_Nbr
      DOB
      Gender
      Race
      Ethnicity
      Zip
      Age_Censored
    
  
  
    
      7038
      DEC00BF4-F6F8-46F2-87D9-B1F94E16EB6B
      16320
      1939-04-09
      F
      Declined to specify
      Declined to specify
      Null
      None
    
    
      12820
      7286E85A-473F-4C94-A684-489E6932E02A
      211308
      1975-01-26
      M
      Declined to specify
      Unknown Ethnicity
      Null
      None
    
    
      10604
      663B54A6-C6DD-4858-AE78-8AFDA98E78F4
      496337
      1959-08-05
      F
      NaN
      Unknown Ethnicity
      Null
      None
    
    
      2065
      4CCDD77B-12DF-4943-BA78-F387C3AD2182
      542734
      1927-05-07
      M
      Black/African American (Not Hispanic)
      Not Hispanic or Latino
      Null
      None
    
    
      10147
      0B7013D0-89BD-45C4-B615-E8DE442CC984
      663472
      1980-01-05
      M
      Alaskan Native
      Hispanic or Latino
      Null
      None
    
    
      11573
      1D4E4067-929B-4687-A38F-C69D0AD4DF2D
      782993
      1951-07-26
      F
      Black or African American
      Not Hispanic or Latino
      Null
      None
    
    
      11355
      6000BEE0-6407-4EA2-A88E-53E7C6C796CF
      849189
      1971-10-02
      M
      Black or African American
      Not Hispanic or Latino
      Null
      None
    
    
      7264
      6556347D-22F7-4D17-9917-976441C5FBCD
      850308
      1901-01-06
      F
      White
      Not Hispanic or Latino
      Null
      None
    
    
      15440
      50C8AC9E-D3D4-4279-BC5C-50EC7CEEC2F6
      1014237
      1966-10-02
      M
      Declined to specify
      Declined to specify
      Null
      None
    
    
      9073
      DA5EECE7-B999-4FBA-AB93-C4E5F0CB8BA1
      1041124
      2011-10-30
      M
      Asian
      Not Hispanic or Latino
      Null
      All



In [44]:

    
data['demographics'].to_pickle(path+'demographics_processed_Dan_20170304.pickle')



In [49]:

    
data['demographics']['Age']=data['demographics']['DOB'].map(lambda x: datetime.now().year - x.year)



In [50]:

    
demographics=data['demographics'].set_index('Person_Nbr')[['Age', 'Gender', 'Race', 'Ethnicity', 'Zip', 'Age_Censored']].T.to_dict()



In [52]:

    
demographics[109227]









    Out[52]:





{'Age': 61,
 'Age_Censored': 'None',
 'Ethnicity': 'Not Hispanic or Latino',
 'Gender': 'F',
 'Race': 'Black/African American (Not Hispanic)',
 'Zip': '60419'}



In [53]:

    
# People in demographics have fully covered people in family_hist_list
set(demographics.keys())&set(family_hist_list.keys())==set(family_hist_list)









    Out[53]:





True

Process encounter list



In [20]:

    
# Create Date variable
#data['encounters']['Enc_Date'] = pd.to_datetime([datetime.strftime(item, '%Y-%m-%d') for item in data['encounters']['Enc_Timestamp']])



In [54]:

    
Enc_list = {k:sorted([{'Enc_Nbr': a, 'Enc_Date': b} for a,b in zip(v.Enc_Nbr, v.Enc_Timestamp)], key=lambda x:x['Enc_Date']) for k,v in data['encounters'].groupby('Person_Nbr')}



In [55]:

    
Enc_list[109227]









    Out[55]:





[{'Enc_Date': Timestamp('2016-07-29 12:30:00'), 'Enc_Nbr': 4086734}]



In [56]:

    
set(Enc_list.keys())&set(demographics.keys())==set(Enc_list)
# People in demographics have fully covered people in encouters









    Out[56]:





True



In [57]:

    
set(Enc_list)&set(family_hist_list) == set(family_hist_list)
# People has family history record must have encounter records, encounters fully cover family_hist_list









    Out[57]:





True

Merge into a dictionary of profile



In [58]:

    
profile_full={}
for k,v in demographics.items():
    profile_full[k]=v
    
    # patint may or may nor have a family history
    profile_full[k]['family_hist_list'] = {}
    profile_full[k]['family_hist_list_count'] = 0
    if k in family_hist_list.keys():
        profile_full[k]['family_hist_list'] = family_hist_list[k]
        profile_full[k]['family_hist_list_count'] = len(family_hist_list[k])
    
    # patient may or may not have encounter records
    profile_full[k]['Enc_list'] = {}
    profile_full[k]['Enc_list_count'] = 0
    profile_full[k]['Enc_list_span'] = 0

    if k in Enc_list.keys():
        profile_full[k]['Enc_list'] = Enc_list[k]
        profile_full[k]['Enc_list_count'] = len(Enc_list[k])
        profile_full[k]['Enc_list_span'] = datetime.now().year - int(datetime.strftime(datetime.date(Enc_list[k][0]['Enc_Date']), '%Y'))



In [59]:

    
profile_full[109227]









    Out[59]:





{'Age': 61,
 'Age_Censored': 'None',
 'Enc_list': [{'Enc_Date': Timestamp('2016-07-29 12:30:00'),
   'Enc_Nbr': 4086734}],
 'Enc_list_count': 1,
 'Enc_list_span': 1,
 'Ethnicity': 'Not Hispanic or Latino',
 'Gender': 'F',
 'Race': 'Black/African American (Not Hispanic)',
 'Zip': '60419',
 'family_hist_list': [{'History': [{'Code': '430679000',
     'Family_history': 'Diabetes mellitus type 2'}],
   'Relation': 'Brother'},
  {'History': [{'Code': '160274005',
     'Family_history': 'No history of Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'}],
   'Relation': 'Father'},
  {'History': [{'Code': '416855002', 'Family_history': 'Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'},
    {'Code': '160357008', 'Family_history': 'Hypertension'}],
   'Relation': 'Mother'}],
 'family_hist_list_count': 3}



In [58]:

    
len(profile_full)









    Out[58]:





17019

Shall we remove the profiles that has no history and no encounters?



In [26]:

    
len(set(demographics)-set(family_hist_list))
# That is to remove 2975 patients









    Out[26]:





2975



In [27]:

    
len(set(family_hist_list)&set(Enc_list))
# So that only 14044 patients left have both records









    Out[27]:





14044



In [28]:

    
len(set(demographics)-set(Enc_list))









    Out[28]:





510

We have in total 17019 patients. 510 of total have no encounter records. 2465 of total have no encounter records and no family records. If we want a profile that everyone has both records, we need to remove all 2975 patients.



In [60]:

    
# Remove patients have no family or no encounter records
profile={}
for k,v in demographics.items():
    if k in set(family_hist_list)&set(Enc_list):
        profile[k]=v
        
        profile[k]['family_hist_list'] = family_hist_list[k]
        profile[k]['family_hist_list_count'] = len(family_hist_list[k])

        profile[k]['Enc_list'] = Enc_list[k]
        profile[k]['Enc_list_count'] = len(Enc_list[k])
        profile_full[k]['Enc_list_span'] = datetime.now().year - int(datetime.strftime(datetime.date(Enc_list[k][0]['Enc_Date']), '%Y'))
    else:
        continue



In [61]:

    
profile[109227]









    Out[61]:





{'Age': 61,
 'Age_Censored': 'None',
 'Enc_list': [{'Enc_Date': Timestamp('2016-07-29 12:30:00'),
   'Enc_Nbr': 4086734}],
 'Enc_list_count': 1,
 'Enc_list_span': 1,
 'Ethnicity': 'Not Hispanic or Latino',
 'Gender': 'F',
 'Race': 'Black/African American (Not Hispanic)',
 'Zip': '60419',
 'family_hist_list': [{'History': [{'Code': '430679000',
     'Family_history': 'Diabetes mellitus type 2'}],
   'Relation': 'Brother'},
  {'History': [{'Code': '160274005',
     'Family_history': 'No history of Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'}],
   'Relation': 'Father'},
  {'History': [{'Code': '416855002', 'Family_history': 'Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'},
    {'Code': '160357008', 'Family_history': 'Hypertension'}],
   'Relation': 'Mother'}],
 'family_hist_list_count': 3}



In [31]:

    
len(profile)









    Out[31]:





14044

(Or we can only remove 510 patients that lack encounter records and omit family part for now)



In [62]:

    
# Remove patients have no encounter records
profile1={}
for k,v in demographics.items():
    if k in set(Enc_list):
        profile1[k]=v

        profile1[k]['Enc_list'] = Enc_list[k]
        profile1[k]['Enc_list_count'] = len(Enc_list[k])
        profile_full[k]['Enc_list_span'] = datetime.now().year - int(datetime.strftime(datetime.date(Enc_list[k][0]['Enc_Date']), '%Y'))
        
        profile1[k]['family_hist_list'] = {}
        profile1[k]['family_hist_list_count'] = 0
        
        if k in family_hist_list.keys():
            profile1[k]['family_hist_list'] = family_hist_list[k]
            profile1[k]['family_hist_list_count'] = len(family_hist_list[k])
    else:
        continue



In [63]:

    
profile1[109227]









    Out[63]:





{'Age': 61,
 'Age_Censored': 'None',
 'Enc_list': [{'Enc_Date': Timestamp('2016-07-29 12:30:00'),
   'Enc_Nbr': 4086734}],
 'Enc_list_count': 1,
 'Enc_list_span': 1,
 'Ethnicity': 'Not Hispanic or Latino',
 'Gender': 'F',
 'Race': 'Black/African American (Not Hispanic)',
 'Zip': '60419',
 'family_hist_list': [{'History': [{'Code': '430679000',
     'Family_history': 'Diabetes mellitus type 2'}],
   'Relation': 'Brother'},
  {'History': [{'Code': '160274005',
     'Family_history': 'No history of Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'}],
   'Relation': 'Father'},
  {'History': [{'Code': '416855002', 'Family_history': 'Diabetes mellitus'},
    {'Code': '160267000', 'Family_history': 'No history of Glaucoma'},
    {'Code': '160357008', 'Family_history': 'Hypertension'}],
   'Relation': 'Mother'}],
 'family_hist_list_count': 3}



In [34]:

    
len(profile1)









    Out[34]:





16509



In [66]:

    
temp=pd.DataFrame.from_dict(profile1, orient='index')
temp.head()









    Out[66]:






  
    
      
      Zip
      Gender
      Age
      Enc_list
      Age_Censored
      family_hist_list
      Enc_list_count
      Race
      family_hist_list_count
      Enc_list_span
      Ethnicity
    
  
  
    
      33
      60616
      F
      69
      [{u'Enc_Date': 2014-12-18 14:00:00, u'Enc_Nbr'...
      None
      [{u'Relation': u'Brother', u'History': [{'Code...
      1
      Black/African American (Not Hispanic)
      4
      3
      Declined to specify
    
    
      89
      60652
      M
      62
      [{u'Enc_Date': 2012-10-16 03:45:00, u'Enc_Nbr'...
      None
      [{u'Relation': u'Father', u'History': [{'Code'...
      9
      Declined to specify
      3
      5
      Declined to specify
    
    
      146
      60615
      F
      69
      [{u'Enc_Date': 2012-11-04 19:45:00, u'Enc_Nbr'...
      None
      [{u'Relation': u'Father', u'History': [{'Code'...
      5
      Declined to specify
      2
      5
      Declined to specify
    
    
      196
      60615
      F
      62
      [{u'Enc_Date': 2016-10-05 00:45:00, u'Enc_Nbr'...
      None
      [{u'Relation': u'Father', u'History': [{'Code'...
      2
      Declined to specify
      2
      1
      Declined to specify
    
    
      327
      60411
      F
      73
      [{u'Enc_Date': 2011-12-04 11:00:00, u'Enc_Nbr'...
      None
      {}
      1
      Black or African American
      0
      6
      African American



In [67]:

    
temp.to_pickle(path+'person_profile_df.pickle')

Tried to process SNOMED code list for person



In [36]:

    
data['SNOMED_problem_list'].head()









    Out[36]:






  
    
      
      Person_ID
      Person_Nbr
      Date_Created
      Concept_ID
      Description
    
  
  
    
      69610
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 15:51:19.607
      41256004
      Presbyopia
    
    
      69608
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 15:51:28.043
      41446000
      Blepharitis
    
    
      69609
      80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa
      33
      2014-12-18 16:36:28.083
      313436004
      Type 2 diabetes mellitus without complication
    
    
      46510
      adca6fa4-e7d4-d7f8-cf41-27056662d84b
      89
      2014-08-12 03:04:55.010
      81416004
      Open angle with borderline findings
    
    
      46511
      adca6fa4-e7d4-d7f8-cf41-27056662d84b
      89
      2014-08-12 03:04:55.010
      28998008
      Retinal hemorrhage



In [64]:

    
{k:list(v) for k,v in data['systemic_disease_list'].groupby('Person_Nbr')['Snomed_Code']}[109227]









    Out[64]:





['44054006', '56265001', '38341003', '230690007', '13644009']



In [65]:

    
{k:list(v) for k,v in data['SNOMED_problem_list'].groupby('Person_Nbr')['Concept_ID']}[109227]









    Out[65]:





[38101003, 111552007, 41446000]



In [ ]:

	Person_ID	Person_Nbr	DOB	Gender	Race	Ethnicity	Zip	Age_Censored
14132	18405351-AC64-46A2-A003-8F7889351A13	33	1948-10-15	F	Black/African American (Not Hispanic)	Declined to specify	60616	None
8747	4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E	89	1955-09-04	M	Declined to specify	Declined to specify	60652	None
2129	DF25CE0F-E45A-44D2-BE43-F49FE4F5E280	112	1932-11-19	F	NaN	NaN	606372549	None
6591	7027DD58-6465-496C-880A-A6E825C155A8	146	1948-12-19	F	Declined to specify	Declined to specify	60615	None
16909	395DD5E7-45F9-456E-A127-9AB929E872F7	196	1955-02-21	F	Declined to specify	Declined to specify	60615	None

	Person_ID	Person_Nbr	Date_Created	Code	Code_System	Family_History	Relation
47872	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 14:47:15.980	160347007	SNOMED	Glaucoma	Father
47869	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 14:47:26.897	160274005	SNOMED	No history of Diabetes mellitus	Mother
47867	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 14:47:28.590	160274005	SNOMED	No history of Diabetes mellitus	Father
47871	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 14:47:29.563	160267000	SNOMED	No history of Glaucoma	Mother
47868	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 15:55:35.837	439724007	SNOMED	Heart disease	Brother

	Person_ID	Person_Nbr	Enc_ID	Enc_Nbr	Enc_Timestamp
10335	18405351-AC64-46A2-A003-8F7889351A13	33	97825c51-4462-eade-1c1d-2baa3400c033	123227	2014-12-18 14:00:00
59383	4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E	89	a6d9d991-4ff4-79b5-304d-f1b2bff9d6d0	12870648	2012-10-16 03:45:00
56011	4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E	89	fe5eb87c-39b3-4314-0172-4642d51de417	4126172	2014-03-12 15:30:00
78800	4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E	89	ea3c2622-6d1d-53ab-ddf0-9764927f1507	9217142	2014-04-17 18:45:00
3870	4ACD51E8-4A9B-4AA6-B635-166ADA5EF02E	89	71d10fa4-3b0d-fa52-cfbd-ffd0f3d3532d	14968450	2014-05-25 10:45:00

	Person_ID	Person_Nbr	DOB	Gender	Race	Ethnicity	Zip	Age_Censored
7038	DEC00BF4-F6F8-46F2-87D9-B1F94E16EB6B	16320	1939-04-09	F	Declined to specify	Declined to specify	Null	None
12820	7286E85A-473F-4C94-A684-489E6932E02A	211308	1975-01-26	M	Declined to specify	Unknown Ethnicity	Null	None
10604	663B54A6-C6DD-4858-AE78-8AFDA98E78F4	496337	1959-08-05	F	NaN	Unknown Ethnicity	Null	None
2065	4CCDD77B-12DF-4943-BA78-F387C3AD2182	542734	1927-05-07	M	Black/African American (Not Hispanic)	Not Hispanic or Latino	Null	None
10147	0B7013D0-89BD-45C4-B615-E8DE442CC984	663472	1980-01-05	M	Alaskan Native	Hispanic or Latino	Null	None
11573	1D4E4067-929B-4687-A38F-C69D0AD4DF2D	782993	1951-07-26	F	Black or African American	Not Hispanic or Latino	Null	None
11355	6000BEE0-6407-4EA2-A88E-53E7C6C796CF	849189	1971-10-02	M	Black or African American	Not Hispanic or Latino	Null	None
7264	6556347D-22F7-4D17-9917-976441C5FBCD	850308	1901-01-06	F	White	Not Hispanic or Latino	Null	None
15440	50C8AC9E-D3D4-4279-BC5C-50EC7CEEC2F6	1014237	1966-10-02	M	Declined to specify	Declined to specify	Null	None
9073	DA5EECE7-B999-4FBA-AB93-C4E5F0CB8BA1	1041124	2011-10-30	M	Asian	Not Hispanic or Latino	Null	All

	Zip	Gender	Age	Enc_list	Age_Censored	family_hist_list	Enc_list_count	Race	family_hist_list_count	Enc_list_span	Ethnicity
33	60616	F	69	[{u'Enc_Date': 2014-12-18 14:00:00, u'Enc_Nbr'...	None	[{u'Relation': u'Brother', u'History': [{'Code...	1	Black/African American (Not Hispanic)	4	3	Declined to specify
89	60652	M	62	[{u'Enc_Date': 2012-10-16 03:45:00, u'Enc_Nbr'...	None	[{u'Relation': u'Father', u'History': [{'Code'...	9	Declined to specify	3	5	Declined to specify
146	60615	F	69	[{u'Enc_Date': 2012-11-04 19:45:00, u'Enc_Nbr'...	None	[{u'Relation': u'Father', u'History': [{'Code'...	5	Declined to specify	2	5	Declined to specify
196	60615	F	62	[{u'Enc_Date': 2016-10-05 00:45:00, u'Enc_Nbr'...	None	[{u'Relation': u'Father', u'History': [{'Code'...	2	Declined to specify	2	1	Declined to specify
327	60411	F	73	[{u'Enc_Date': 2011-12-04 11:00:00, u'Enc_Nbr'...	None	{}	1	Black or African American	0	6	African American

	Person_ID	Person_Nbr	Date_Created	Concept_ID	Description
69610	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 15:51:19.607	41256004	Presbyopia
69608	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 15:51:28.043	41446000	Blepharitis
69609	80d3df88-dddf-5ad3-7cc1-b7b1ac6151fa	33	2014-12-18 16:36:28.083	313436004	Type 2 diabetes mellitus without complication
46510	adca6fa4-e7d4-d7f8-cf41-27056662d84b	89	2014-08-12 03:04:55.010	81416004	Open angle with borderline findings
46511	adca6fa4-e7d4-d7f8-cf41-27056662d84b	89	2014-08-12 03:04:55.010	28998008	Retinal hemorrhage