In [2]:
import numpy as np
import pandas as pd
In [3]:
# Set some Pandas options
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 10)
In [4]:
data = pd.read_hdf('/var/datasets/dshs/CD2007Q1/PUDF_base1q2007.h5','data')
In [5]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 740288 entries, 0 to 740287
Data columns (total 167 columns):
Discharge object
THCIC_ID int64
Provider_Name object
4 object
5 object
6 object
7 object
8 object
9 object
10 object
11 object
12 object
13 int64
Sex object
15 object
16 object
Pat_State object
Pat_ZIP object
19 object
20 float64
21 float64
Admit_Weedkay int64
Length_of_Stay int64
Pat_Age int64
Pat_Status object
Race object
27 object
28 object
29 object
30 int64
31 float64
32 float64
33 float64
34 float64
35 float64
36 float64
37 float64
38 float64
39 float64
40 float64
41 float64
42 float64
43 float64
44 float64
45 float64
46 float64
47 float64
48 float64
49 float64
50 float64
51 float64
52 float64
53 float64
54 float64
55 float64
56 float64
57 float64
58 float64
59 float64
60 float64
Total_Charges float64
62 float64
63 float64
64 float64
65 float64
66 float64
Admitting_Diagnosis object
Princ_Diag_Code object
Oth_Diag_Code_1 object
Oth_Diag_Code_2 object
Oth_Diag_Code_3 object
Oth_Diag_Code_4 object
Oth_Diag_Code_5 object
Oth_Diag_Code_6 object
Oth_Diag_Code_7 object
Oth_Diag_Code_8 object
Oth_Diag_Code_9 object
Oth_Diag_Code_10 object
Oth_Diag_Code_11 object
Oth_Diag_Code_12 object
Oth_Diag_Code_13 object
Oth_Diag_Code_14 object
Oth_Diag_Code_15 object
Oth_Diag_Code_16 object
Oth_Diag_Code_17 object
Oth_Diag_Code_18 object
Oth_Diag_Code_19 object
Oth_Diag_Code_20 object
Oth_Diag_Code_21 object
Oth_Diag_Code_22 object
Oth_Diag_Code_23 object
Oth_Diag_Code_24 object
93 float64
94 float64
95 object
96 float64
97 float64
98 object
99 float64
100 float64
101 object
102 float64
103 float64
104 object
105 float64
106 float64
107 float64
108 float64
109 float64
110 float64
111 float64
112 float64
113 float64
114 float64
115 float64
116 float64
117 float64
118 float64
119 float64
120 float64
121 float64
122 float64
123 float64
124 float64
125 float64
126 float64
127 float64
128 float64
129 float64
130 float64
131 float64
132 float64
133 float64
134 float64
135 float64
136 float64
137 float64
138 float64
139 float64
140 float64
141 float64
142 float64
143 float64
144 float64
145 float64
146 float64
147 float64
148 float64
149 float64
150 float64
151 float64
152 float64
153 float64
154 float64
155 float64
156 float64
157 float64
158 float64
159 float64
160 float64
161 float64
162 float64
163 float64
164 float64
165 float64
166 float64
167 float64
dtypes: float64(109), int64(6), object(52)
In [6]:
for name in data.keys():
if type(name)!=str:
del data[name]
In [16]:
reduc = data[['Admitting_Diagnosis','Princ_Diag_Code']]
In [23]:
sum(reduc.Admitting_Diagnosis == reduc.Princ_Diag_Code)
Out[23]:
411890
In [ ]:
Content source: mitliagkas/dshs
Similar notebooks: