In [1]:
import warnings
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
ytrain = pd.read_csv("ytrain23.csv")
ytest = pd.read_excel("ytest23.xlsx")

In [3]:
ytrain


Out[3]:
Unnamed: 0 readmitted readmitted_2 readmitted_3
0 0 NO 0 0
1 1 NO 0 0
2 2 NO 0 0
3 3 NO 0 0
4 4 NO 0 0
5 5 <30 1 1
6 6 >30 1 2
7 7 NO 0 0
8 8 >30 1 2
9 9 >30 1 2
10 10 NO 0 0
11 11 NO 0 0
12 12 NO 0 0
13 13 NO 0 0
14 14 NO 0 0
15 15 NO 0 0
16 16 NO 0 0
17 17 NO 0 0
18 18 >30 1 2
19 19 NO 0 0
20 20 <30 1 1
21 21 NO 0 0
22 22 >30 1 2
23 23 NO 0 0
24 24 >30 1 2
25 25 NO 0 0
26 26 NO 0 0
27 27 NO 0 0
28 28 NO 0 0
29 29 NO 0 0
... ... ... ... ...
49970 49970 <30 1 1
49971 49971 >30 1 2
49972 49972 >30 1 2
49973 49973 NO 0 0
49974 49974 NO 0 0
49975 49975 NO 0 0
49976 49976 NO 0 0
49977 49977 NO 0 0
49978 49978 >30 1 2
49979 49979 NO 0 0
49980 49980 NO 0 0
49981 49981 NO 0 0
49982 49982 NO 0 0
49983 49983 >30 1 2
49984 49984 NO 0 0
49985 49985 NO 0 0
49986 49986 NO 0 0
49987 49987 NO 0 0
49988 49988 NO 0 0
49989 49989 >30 1 2
49990 49990 <30 1 1
49991 49991 NO 0 0
49992 49992 NO 0 0
49993 49993 NO 0 0
49994 49994 NO 0 0
49995 49995 NO 0 0
49996 49996 >30 1 2
49997 49997 NO 0 0
49998 49998 >30 1 2
49999 49999 >30 1 2

50000 rows × 4 columns


In [4]:
ytest


Out[4]:
readmit_true readmit_true_2 readmit_true_3
0 <30 1 1
1 >30 1 2
2 NO 0 0
3 >30 1 2
4 >30 1 2
5 >30 1 2
6 >30 1 2
7 >30 1 2
8 <30 1 1
9 >30 1 2
10 >30 1 2
11 >30 1 2
12 >30 1 2
13 <30 1 1
14 >30 1 2
15 NO 0 0
16 >30 1 2
17 NO 0 0
18 >30 1 2
19 <30 1 1
20 NO 0 0
21 NO 0 0
22 NO 0 0
23 NO 0 0
24 NO 0 0
25 NO 0 0
26 >30 1 2
27 >30 1 2
28 >30 1 2
29 >30 1 2
... ... ... ...
12907 NO 0 0
12908 NO 0 0
12909 NO 0 0
12910 >30 1 2
12911 NO 0 0
12912 >30 1 2
12913 NO 0 0
12914 NO 0 0
12915 NO 0 0
12916 <30 1 1
12917 NO 0 0
12918 NO 0 0
12919 NO 0 0
12920 NO 0 0
12921 NO 0 0
12922 <30 1 1
12923 >30 1 2
12924 NO 0 0
12925 NO 0 0
12926 NO 0 0
12927 NO 0 0
12928 NO 0 0
12929 >30 1 2
12930 NO 0 0
12931 NO 0 0
12932 NO 0 0
12933 >30 1 2
12934 >30 1 2
12935 NO 0 0
12936 NO 0 0

12937 rows × 3 columns


In [3]:
ytrain2 = ytrain.readmitted_3.replace(2,0)
ytest2 = ytest.readmit_true_3.replace(2,0)

In [7]:
ytrain2.to_csv("Send/ytrain.csv")
ytest2.to_csv("Send/ytest.csv")

In [4]:
data = pd.read_csv("full data.csv")

In [5]:
print(data.shape[0])
print(data.shape[1])


62937
112

In [6]:
data.diag_1.value_counts()


Out[6]:
Circulatory        20105
Neoplasms           9032
Respiratory         8892
Digestive           6029
Diabetes            4708
Musculoskeletal     3726
Injury              3500
Other               3313
Genitourinary       3278
Name: diag_1, dtype: int64

In [7]:
data.diag_1 = pd.DataFrame(data.diag_1).fillna('Neoplasms')
data.diag_1.value_counts()


Out[7]:
Circulatory        20105
Neoplasms           9386
Respiratory         8892
Digestive           6029
Diabetes            4708
Musculoskeletal     3726
Injury              3500
Other               3313
Genitourinary       3278
Name: diag_1, dtype: int64

In [8]:
data.diag_2 = pd.DataFrame(data.diag_2).fillna('Neoplasms')
data.diag_3 = pd.DataFrame(data.diag_3).fillna('Neoplasms')
print(sum(pd.isnull(data.diag_2)))
print(sum(pd.isnull(data.diag_3)))


0
0

In [9]:
alldata = pd.get_dummies(data)

In [10]:
print(alldata.shape[0])
print(alldata.shape[1])


62937
136

In [11]:
pd.DataFrame(alldata).to_csv("alldata.csv")

In [12]:
xtrainA = alldata[0:50000]
xtestA = alldata[50000:62937]

In [13]:
xtrainA


Out[13]:
admission_type_id discharge_disposition_id admission_source_id time_in_hospital num_lab_procedures num_procedures num_medications number_outpatient number_emergency number_inpatient ... diag_2_Respiratory diag_3_Circulatory diag_3_Diabetes diag_3_Digestive diag_3_Genitourinary diag_3_Injury diag_3_Musculoskeletal diag_3_Neoplasms diag_3_Other diag_3_Respiratory
0 1 1 7 2 19 0 19 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
1 1 1 7 2 1 1 2 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
2 1 1 7 7 51 1 14 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
3 1 1 7 2 53 3 14 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
4 3 1 7 3 25 1 6 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
5 1 1 7 2 19 2 15 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
6 3 3 1 7 65 1 32 1 0 0 ... 0 0 0 0 0 0 0 0 1 0
7 3 1 1 3 55 2 20 2 0 1 ... 0 0 1 0 0 0 0 0 0 0
8 1 6 7 4 63 1 24 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
9 1 1 7 11 67 1 22 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
10 1 1 7 1 38 1 5 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
11 1 1 7 8 68 3 21 2 0 1 ... 0 1 0 0 0 0 0 0 0 0
12 3 1 1 2 11 0 14 1 1 1 ... 0 0 0 1 0 0 0 0 0 0
13 2 1 1 3 36 6 6 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
14 1 3 5 8 56 1 22 0 0 1 ... 0 0 0 1 0 0 0 0 0 0
15 1 1 7 10 83 1 28 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
16 1 1 7 5 62 6 22 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
17 1 1 7 4 46 0 18 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
18 1 1 7 2 26 0 10 2 0 0 ... 0 1 0 0 0 0 0 0 0 0
19 1 6 7 3 66 0 6 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
20 1 1 7 1 55 0 12 0 0 1 ... 1 0 1 0 0 0 0 0 0 0
21 2 1 1 3 56 0 20 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
22 1 1 7 2 18 6 9 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
23 1 1 7 2 49 0 13 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
24 1 6 7 5 30 1 15 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
25 3 1 1 7 50 4 8 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
26 1 13 7 3 40 1 15 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
27 1 6 7 3 9 0 14 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
28 6 1 17 2 13 2 25 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
29 1 1 7 2 10 0 6 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
49970 1 3 7 13 48 1 19 0 0 1 ... 0 0 1 0 0 0 0 0 0 0
49971 1 6 7 11 63 0 16 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49972 1 2 7 12 67 5 24 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49973 2 1 1 9 73 6 26 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49974 1 1 7 6 57 2 14 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
49975 2 3 1 8 40 6 12 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49976 1 1 7 1 41 0 4 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
49977 2 1 1 6 70 3 16 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49978 3 1 2 3 49 1 35 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49979 1 1 7 10 79 2 20 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
49980 1 1 7 6 50 0 12 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49981 6 2 3 10 80 2 18 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
49982 2 1 6 10 65 0 11 0 0 0 ... 1 0 1 0 0 0 0 0 0 0
49983 1 1 7 1 38 0 5 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
49984 1 1 7 5 61 0 15 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49985 1 1 7 5 64 1 21 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
49986 1 1 1 2 54 1 11 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
49987 3 1 1 5 49 0 19 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
49988 1 1 1 1 33 0 8 0 0 0 ... 1 0 1 0 0 0 0 0 0 0
49989 1 1 7 4 38 0 17 0 0 1 ... 0 1 0 0 0 0 0 0 0 0
49990 6 1 7 7 67 0 21 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
49991 3 6 1 8 10 2 12 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
49992 1 1 7 2 51 3 11 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
49993 1 3 7 2 39 0 18 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
49994 6 10 7 8 67 0 7 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
49995 1 1 7 6 68 1 20 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
49996 1 1 1 4 67 1 18 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
49997 1 1 1 4 55 3 11 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
49998 1 1 7 2 44 2 17 0 0 1 ... 0 0 1 0 0 0 0 0 0 0
49999 1 1 7 3 31 0 15 0 0 1 ... 0 0 1 0 0 0 0 0 0 0

50000 rows × 136 columns


In [22]:
xtestA


Out[22]:
admission_type_id discharge_disposition_id admission_source_id time_in_hospital num_lab_procedures num_procedures num_medications number_outpatient number_emergency number_inpatient ... diag_2_Respiratory diag_3_Circulatory diag_3_Diabetes diag_3_Digestive diag_3_Genitourinary diag_3_Injury diag_3_Musculoskeletal diag_3_Neoplasms diag_3_Other diag_3_Respiratory
50000 1 1 7 7 44 0 11 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
50001 3 1 1 8 39 1 20 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
50002 3 2 1 4 55 2 22 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50003 3 1 1 4 39 1 19 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
50004 1 1 1 3 65 0 7 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50005 6 1 17 4 63 1 21 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
50006 1 1 1 3 47 3 10 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
50007 2 1 1 11 41 1 28 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
50008 1 1 7 3 46 0 16 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
50009 3 1 1 4 79 3 25 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
50010 1 1 7 1 42 0 11 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
50011 3 6 4 6 13 2 19 0 0 0 ... 1 1 0 0 0 0 0 0 0 0
50012 2 1 1 3 34 2 13 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
50013 1 1 7 3 55 0 9 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50014 1 6 7 4 39 1 16 0 1 0 ... 0 1 0 0 0 0 0 0 0 0
50015 2 1 1 8 69 0 26 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
50016 2 1 1 10 37 1 15 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
50017 1 1 7 4 53 0 11 0 0 1 ... 0 0 0 1 0 0 0 0 0 0
50018 1 3 7 5 39 0 17 0 0 3 ... 0 0 0 0 0 0 1 0 0 0
50019 1 3 7 5 72 0 10 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50020 2 1 2 3 38 0 15 0 0 5 ... 0 0 1 0 0 0 0 0 0 0
50021 3 1 1 4 29 1 10 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
50022 3 1 1 4 67 1 14 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
50023 1 1 7 12 63 3 16 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50024 1 1 7 4 45 2 21 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
50025 6 1 7 1 43 0 9 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
50026 1 3 7 9 43 5 21 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
50027 6 6 17 10 98 5 42 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
50028 1 1 7 3 46 1 12 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
50029 1 1 7 4 39 1 17 0 0 0 ... 1 0 0 0 0 0 0 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
62907 1 1 7 3 45 0 14 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
62908 3 1 1 5 42 3 14 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
62909 3 1 1 1 23 2 19 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
62910 2 3 5 5 83 0 16 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
62911 2 1 1 3 53 0 5 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
62912 2 1 1 2 2 0 15 0 0 0 ... 1 0 0 0 0 0 0 1 0 0
62913 1 1 7 2 42 1 21 0 0 1 ... 0 1 0 0 0 0 0 0 0 0
62914 1 1 7 3 57 1 1 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
62915 2 1 1 7 54 0 19 1 0 0 ... 0 0 0 0 1 0 0 0 0 0
62916 1 3 5 9 86 1 17 0 0 1 ... 0 1 0 0 0 0 0 0 0 0
62917 1 1 7 2 60 1 20 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
62918 1 3 7 4 19 2 10 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
62919 1 1 7 1 44 0 9 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
62920 1 6 7 13 74 1 31 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
62921 3 1 1 2 3 2 11 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
62922 1 3 7 10 48 2 22 4 0 0 ... 1 0 0 1 0 0 0 0 0 0
62923 1 6 7 3 28 0 14 5 0 0 ... 0 0 0 0 0 0 0 0 0 1
62924 2 1 7 2 58 2 13 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
62925 1 1 7 5 45 4 17 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
62926 1 2 7 3 70 0 10 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
62927 2 1 1 1 1 4 5 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
62928 8 1 1 1 41 2 6 1 0 0 ... 1 0 0 0 1 0 0 0 0 0
62929 1 3 7 4 65 0 17 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
62930 1 1 7 3 68 0 10 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
62931 1 1 1 1 34 1 18 0 0 0 ... 1 1 0 0 0 0 0 0 0 0
62932 3 6 1 3 21 1 17 2 2 0 ... 0 0 0 0 0 0 0 1 0 0
62933 2 1 4 3 22 4 13 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
62934 1 1 7 4 45 1 19 1 0 0 ... 0 1 0 0 0 0 0 0 0 0
62935 1 1 7 4 39 0 16 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
62936 2 1 1 2 30 0 9 0 0 0 ... 0 0 0 0 0 0 0 1 0 0

12937 rows × 136 columns


In [23]:
xtrainA.to_csv("xtrainA.csv")
xtestA.to_csv("xtestA.csv")

In [15]:
fsel=pd.read_csv("features selected.csv")
xtrainB=fsel[0:50000]
xtestB=fsel[50000:62937]

In [18]:
xtrainB.to_csv("xtrainB.csv")
xtestB.to_csv("xtestB.csv")

In [16]:
#Age changed
data = pd.read_excel("data.xlsx")
print(data.race.value_counts())
data.race = data.race.replace('?','Caucasian')
print(data.race.value_counts())


Caucasian          47016
AfricanAmerican    11363
?                   1729
Hispanic            1345
Other               1043
Asian                441
Name: race, dtype: int64
Caucasian          48745
AfricanAmerican    11363
Hispanic            1345
Other               1043
Asian                441
Name: race, dtype: int64

In [17]:
Data = pd.get_dummies(data)
print(Data.shape[0])
print(Data.shape[1])
xtrainC = Data[0:50000]
xtestC = Data[50000:62937]
xtrainC.to_csv("xtrainC.csv")
xtestC.to_csv("xtestC.csv")


62937
129

In [28]:
#SGD on 136 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="elasticnet",n_iter=70, random_state=0)
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainA,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainA)))
print(reg.score(xtrainA,ytrain2))
sgd136 = reg.predict(xtestA)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd136)

pd.DataFrame(sgd136).to_csv('ytestsgd136.csv')
print(np.sum(sgd136))

print('Accuracy for 2 clusters: ',np.sum(sgd136==ytest2)/len(ytest2))


0.2315
0.2315
model fit done
[1 1 1 ..., 1 1 1]
10683
Accuracy for 2 clusters:  0.239159001314

In [34]:
#SGD on 136 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="l1")
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainA,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainA)))
print(reg.score(xtrainA,ytrain2))
sgd136 = reg.predict(xtestA)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd136)

pd.DataFrame(sgd136).to_csv('ytestsgd136.csv')
print(np.sum(sgd136))

print('Accuracy for 2 clusters: ',np.sum(sgd136==ytest2)/len(ytest2))


0.90614
0.90614
model fit done
[0 0 0 ..., 0 0 0]
97
Accuracy for 2 clusters:  0.906856303625

In [35]:
#SGD on 26 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="l1")
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainB,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainB)))
print(reg.score(xtrainB,ytrain2))
sgd26 = reg.predict(xtestB)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd26)

pd.DataFrame(sgd26).to_csv('ytestsgd26.csv')
print(np.sum(sgd26))

print('Accuracy for 2 clusters: ',np.sum(sgd26==ytest2)/len(ytest2))


0.84884
0.84884
model fit done
[0 0 0 ..., 0 0 0]
912
Accuracy for 2 clusters:  0.863183118188

In [33]:
#SGD on 129 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="l1")
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainC,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainC)))
print(reg.score(xtrainC,ytrain2))
sgd129 = reg.predict(xtestC)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd129)

pd.DataFrame(sgd129).to_csv('ytestsgd129.csv')
print(np.sum(sgd129))

print('Accuracy for 2 clusters: ',np.sum(sgd129==ytest2)/len(ytest2))


0.86268
0.86268
model fit done
[0 0 0 ..., 0 0 0]
747
Accuracy for 2 clusters:  0.869598825075

In [36]:
#SGD on 26 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="elasticnet",n_iter=70, random_state=0)
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainB,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainB)))
print(reg.score(xtrainB,ytrain2))
sgd26 = reg.predict(xtestB)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd26)

#pd.DataFrame(sgd26).to_csv('ytestsgd26.csv')
#print(np.sum(sgd26))

print('Accuracy for 2 clusters: ',np.sum(sgd26==ytest2)/len(ytest2))


0.22558
0.22558
model fit done
[1 1 1 ..., 1 1 1]
Accuracy for 2 clusters:  0.23119734096

In [37]:
#SGD on 129 features xtrainA and xtestA with 2 readmit values with log
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
#Applying SGDClassifierModel
reg=SGDClassifier(loss="log", penalty="elasticnet",n_iter=70, random_state=0)
#reg=SGDClassifier(penalty='l1')
reg.fit(xtrainC,ytrain2)
print(accuracy_score(ytrain2, reg.predict(xtrainC)))
print(reg.score(xtrainC,ytrain2))
sgd129 = reg.predict(xtestC)
#output= reg.decision_function(all_data_test)
#plt.hist(output)
#plt.show()
print("model fit done")
print(sgd129)

#pd.DataFrame(sgd129).to_csv('ytestsgd129.csv')
#print(np.sum(sgd129))

print('Accuracy for 2 clusters: ',np.sum(sgd129==ytest2)/len(ytest2))


0.22918
0.22918
model fit done
[1 1 1 ..., 1 1 1]
Accuracy for 2 clusters:  0.236840071114

In [39]:
#GaussianNB
from sklearn.naive_bayes import GaussianNB
clfA = GaussianNB()
clfA.fit(xtrainA, ytrain2)
GaussianNB(priors=None)
print(clfA.predict(xtestA))
pred=clfA.predict(xtestA)

print('Accuracy for 2 clusters: ',np.sum(pred==ytest2)/len(ytest2))


[1 1 1 ..., 1 1 1]
Accuracy for 2 clusters:  0.104274561336

In [40]:
#GaussianNB
from sklearn.naive_bayes import GaussianNB
clfB = GaussianNB()
clfB.fit(xtrainB, ytrain2)
GaussianNB(priors=None)
print(clfB.predict(xtestB))
pred=clfB.predict(xtestB)

print('Accuracy for 2 clusters: ',np.sum(pred==ytest2)/len(ytest2))


[0 0 0 ..., 0 0 0]
Accuracy for 2 clusters:  0.882971322563

In [41]:
#GaussianNB
from sklearn.naive_bayes import GaussianNB
clfC = GaussianNB()
clfC.fit(xtrainC, ytrain2)
GaussianNB(priors=None)
print(clfC.predict(xtestC))
pred=clfC.predict(xtestC)

print('Accuracy for 2 clusters: ',np.sum(pred==ytest2)/len(ytest2))


[1 1 1 ..., 1 1 1]
Accuracy for 2 clusters:  0.0996367009353

In [43]:
# k-means on 136 features for 2 clusters (NO & >30 = 0, <30 = 1)
from sklearn.cluster import KMeans

# Input
kmeansA = KMeans(n_clusters=2, random_state=0).fit(xtrainA)

# Gives labels of training points after classification into 2 clusters
print(kmeansA.labels_)

ytrainApred = pd.DataFrame(kmeansA.labels_)
#ytraind2Apred.to_csv('with diag/ytrain2Apred.csv')

c1A=ytrain2[kmeansA.labels_==0]
#saves the ytrain2 values in c1 at indices only pertaining to the label 0
#assume kmeans_labels = {0,1,0,0,1,0} & ytrain2.readmitted = {A,B,C,D,E,F}, then c1 = {A,C,D,F}

c2A=ytrain2[kmeansA.labels_==1]

#Observations of label 0
print("Observations of label 0: ",c1A.shape[0])

#Observations of label 1
print("Observations of label 1: ",c2A.shape[0])

print("Number of YES observations in label 0: ",sum(c1A))
print("Number of YES observations in label 1: ",sum(c2A))


[1 1 0 ..., 0 0 1]
Observations of label 0:  29191
Observations of label 1:  20809
Number of YES observations in label 0:  2811
Number of YES observations in label 1:  1608

In [44]:
#Hence, label 0 corresponds to YES. But initially YES was mapped as 1, therefore swap 0 and 1 in ytest2
ytest2


Out[44]:
0        1
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        1
9        0
10       0
11       0
12       0
13       1
14       0
15       0
16       0
17       0
18       0
19       1
20       0
21       0
22       0
23       0
24       0
25       0
26       0
27       0
28       0
29       0
        ..
12907    0
12908    0
12909    0
12910    0
12911    0
12912    0
12913    0
12914    0
12915    0
12916    1
12917    0
12918    0
12919    0
12920    0
12921    0
12922    1
12923    0
12924    0
12925    0
12926    0
12927    0
12928    0
12929    0
12930    0
12931    0
12932    0
12933    0
12934    0
12935    0
12936    0
Name: readmit_true_3, dtype: int64

In [51]:
#SWAP 0 & 1 in ytest2
ytest2s = ytest2.replace(0,'temp')
ytest2s = ytest2s.replace(1,0)
ytest2s = ytest2s.replace('temp',1)
ytest2s


Out[51]:
0        0
1        1
2        1
3        1
4        1
5        1
6        1
7        1
8        0
9        1
10       1
11       1
12       1
13       0
14       1
15       1
16       1
17       1
18       1
19       0
20       1
21       1
22       1
23       1
24       1
25       1
26       1
27       1
28       1
29       1
        ..
12907    1
12908    1
12909    1
12910    1
12911    1
12912    1
12913    1
12914    1
12915    1
12916    0
12917    1
12918    1
12919    1
12920    1
12921    1
12922    0
12923    1
12924    1
12925    1
12926    1
12927    1
12928    1
12929    1
12930    1
12931    1
12932    1
12933    1
12934    1
12935    1
12936    1
Name: readmit_true_3, dtype: int64

In [52]:
# Predicts labels ytest2
ytestApred = kmeansA.predict(xtestA)
print("Predicted labels for ftest:",ytestApred)

#write to CSV
#pd.DataFrame(ytest2Apred).to_csv('with diag/ytest2Apred.csv')

# Cluster center co-ordinates
print("Cluster Centers:\n",kmeansA.cluster_centers_)

print('Accuracy for 2 clusters: ',np.sum(ytestApred==ytest2)/len(ytest2))


Predicted labels for ftest: [0 1 0 ..., 0 1 1]
Cluster Centers:
 [[  1.95676009e+00   3.64273967e+00   5.95747961e+00   5.04656342e+00
    5.69830056e+01   1.48293702e+00   1.74809840e+01   2.76879326e-01
    1.12040019e-01   2.10134996e-01   7.55074351e+00   1.88686356e-01
    6.13307750e-03   7.39841020e-01   2.07633797e-02   1.62749263e-02
    5.33474954e-01   4.66490783e-01   3.42630028e-05   9.59364079e-04
    4.72829439e-03   1.43904612e-02   3.64900980e-02   9.53881998e-02
    1.72137326e-01   2.18323854e-01   2.59576509e-01   1.69053656e-01
    2.89522374e-02   6.81833756e-03   7.12670458e-03   9.76701158e-01
    9.35379977e-03   5.87610498e-02   1.27698211e-01   7.35386829e-01
    7.81539094e-02   6.71554855e-03   7.97402864e-01   1.82793120e-01
    1.30884671e-02   5.48208045e-04   9.86397588e-01   1.15466319e-02
    1.50757212e-03   2.05578017e-04   9.93078873e-01   6.47570753e-03
    2.39841020e-04   0.00000000e+00   9.98903584e-01   9.59364079e-04
    1.37052011e-04   2.46693620e-03   9.46652505e-01   4.70773659e-02
    3.80319331e-03   9.99965737e-01   3.42630028e-05   6.61275954e-03
    8.65997396e-01   1.17179470e-01   1.02103748e-02   7.53786062e-03
    8.86006990e-01   9.57308298e-02   1.07243199e-02   9.99862948e-01
    1.37052011e-04   1.61036113e-03   9.25957651e-01   6.89714247e-02
    3.46056328e-03   1.23346810e-03   9.33015830e-01   6.33522922e-02
    2.39841020e-03   9.96882067e-01   2.91235524e-03   2.05578017e-04
    0.00000000e+00   9.99725896e-01   2.74104022e-04   0.00000000e+00
    9.99931474e-01   6.85260056e-05   9.99623107e-01   3.76893031e-04
    1.00000000e+00   1.00000000e+00   1.19269513e-01   4.47200713e-01
    3.20153498e-01   1.13376276e-01   3.42630028e-05   9.93147399e-01
    6.78407456e-03   3.42630028e-05   9.99897211e-01   1.02789008e-04
    1.00000000e+00   9.99965737e-01   3.42630028e-05   1.00000000e+00
   -1.03541307e-17   4.81018296e-01   5.18981704e-01   2.20413897e-01
    7.79586103e-01   3.31837182e-01   8.21626807e-02   1.02891797e-01
    5.55403276e-02   5.15315562e-02   3.48112109e-02   1.43596245e-01
    4.83450970e-02   1.49283903e-01   3.28171041e-01   1.21256767e-01
    4.49187967e-02   8.71308161e-02   2.23394778e-02   1.46303022e-02
    1.92866443e-01   7.34598780e-02   1.15226478e-01   3.09394915e-01
    1.70218598e-01   4.38566436e-02   7.22949359e-02   2.22709518e-02
    1.85705475e-02   2.01397931e-01   8.39443569e-02   7.80511204e-02]
 [  2.31757471e+00   3.38368406e+00   5.32031325e+00   3.33078697e+00
    2.47444509e+01   1.42505045e+00   1.37034688e+01   2.85432882e-01
    9.71941962e-02   1.18237725e-01   7.03060440e+00   1.72287883e-01
    8.11953493e-03   7.52714519e-01   2.21965984e-02   1.72960507e-02
    5.35312770e-01   4.64639185e-01   4.80445854e-05   8.64802537e-04
    3.60334390e-03   1.43172864e-02   3.96367829e-02   9.96925147e-02
    1.83866628e-01   2.32199481e-01   2.52954742e-01   1.49658883e-01
    2.32055347e-02   2.35418468e-02   1.50859998e-02   9.16258288e-01
    4.51138657e-02   1.99865475e-02   3.55529932e-02   9.21639281e-01
    2.28211781e-02   5.33294898e-03   7.75391563e-01   2.08561545e-01
    1.07139425e-02   1.44133756e-04   9.87556452e-01   1.18670126e-02
    4.32401268e-04   4.80445854e-05   9.92601134e-01   7.15864322e-03
    1.92178342e-04   0.00000000e+00   9.98943019e-01   1.05698088e-03
   -2.58988794e-17   8.16757951e-04   9.47487268e-01   4.99663688e-02
    1.72960507e-03   1.00000000e+00  -6.47471985e-18   3.60334390e-03
    8.78303065e-01   1.12952820e-01   5.14077064e-03   3.93965600e-03
    8.90794657e-01   9.87796675e-02   6.48601903e-03   9.99807822e-01
    1.92178342e-04   5.28490439e-04   9.22600173e-01   7.54299990e-02
    1.44133756e-03   7.68713366e-04   9.32161045e-01   6.58210820e-02
    1.24915922e-03   9.97405592e-01   2.54636302e-03   4.80445854e-05
    0.00000000e+00   9.99759777e-01   2.40222927e-04   0.00000000e+00
    1.00000000e+00  -1.29494397e-17   9.99567599e-01   4.32401268e-04
    1.00000000e+00   1.00000000e+00   8.33093110e-02   5.44489286e-01
    2.98789276e-01   7.34121265e-02   9.60891708e-05   9.91544153e-01
    8.16757951e-03   1.92178342e-04   9.99903911e-01   9.60891708e-05
    1.00000000e+00   1.00000000e+00  -6.47471985e-18   9.99951955e-01
    4.80445854e-05   4.09916402e-01   5.90083598e-01   2.65061978e-01
    7.34938022e-01   2.98212741e-01   6.42356106e-02   8.76333237e-02
    4.79965408e-02   6.25060056e-02   9.52724128e-02   1.55856635e-01
    5.72691458e-02   1.31017584e-01   3.28817142e-01   1.54223119e-01
    3.95887383e-02   6.82713558e-02   2.78658595e-02   2.42144710e-02
    1.78293456e-01   8.55674066e-02   9.31584510e-02   3.30738926e-01
    2.18074373e-01   3.90602479e-02   4.92937446e-02   2.00826367e-02
    2.46949169e-02   1.74065533e-01   8.12914385e-02   6.26981839e-02]]
Accuracy for 2 clusters:  0.552601066708

In [53]:
# k-means on 26 features for 2 clusters (NO & >30 = 0, <30 = 1)
from sklearn.cluster import KMeans

# Input
kmeansB = KMeans(n_clusters=2, random_state=0).fit(xtrainB)

# Gives labels of training points after classification into 2 clusters
print(kmeansB.labels_)

ytrainBpred = pd.DataFrame(kmeansB.labels_)
#ytraind2Apred.to_csv('with diag/ytrain2Apred.csv')

c1B=ytrain2[kmeansB.labels_==0]
#saves the ytrain2 values in c1 at indices only pertaining to the label 0
#assume kmeans_labels = {0,1,0,0,1,0} & ytrain2.readmitted = {A,B,C,D,E,F}, then c1 = {A,C,D,F}

c2B=ytrain2[kmeansA.labels_==1]

#Observations of label 0
print("Observations of label 0: ",c1B.shape[0])

#Observations of label 1
print("Observations of label 1: ",c2B.shape[0])

print("Number of YES observations in label 0: ",sum(c1B))
print("Number of YES observations in label 1: ",sum(c2B))


[1 1 0 ..., 0 0 1]
Observations of label 0:  29181
Observations of label 1:  20809
Number of YES observations in label 0:  2808
Number of YES observations in label 1:  1608

In [54]:
#Hence, label 0 corresponds to YES. But initially YES was mapped as 1, therefore swap 0 and 1 in ytest2
ytest2


Out[54]:
0        1
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        1
9        0
10       0
11       0
12       0
13       1
14       0
15       0
16       0
17       0
18       0
19       1
20       0
21       0
22       0
23       0
24       0
25       0
26       0
27       0
28       0
29       0
        ..
12907    0
12908    0
12909    0
12910    0
12911    0
12912    0
12913    0
12914    0
12915    0
12916    1
12917    0
12918    0
12919    0
12920    0
12921    0
12922    1
12923    0
12924    0
12925    0
12926    0
12927    0
12928    0
12929    0
12930    0
12931    0
12932    0
12933    0
12934    0
12935    0
12936    0
Name: readmit_true_3, dtype: int64

In [55]:
ytest2s


Out[55]:
0        0
1        1
2        1
3        1
4        1
5        1
6        1
7        1
8        0
9        1
10       1
11       1
12       1
13       0
14       1
15       1
16       1
17       1
18       1
19       0
20       1
21       1
22       1
23       1
24       1
25       1
26       1
27       1
28       1
29       1
        ..
12907    1
12908    1
12909    1
12910    1
12911    1
12912    1
12913    1
12914    1
12915    1
12916    0
12917    1
12918    1
12919    1
12920    1
12921    1
12922    0
12923    1
12924    1
12925    1
12926    1
12927    1
12928    1
12929    1
12930    1
12931    1
12932    1
12933    1
12934    1
12935    1
12936    1
Name: readmit_true_3, dtype: int64

In [56]:
# Predicts labels ytest2
ytestBpred = kmeansB.predict(xtestB)
print("Predicted labels for ftest:",ytestBpred)

#write to CSV
#pd.DataFrame(ytest2Apred).to_csv('with diag/ytest2Apred.csv')

# Cluster center co-ordinates
print("Cluster Centers:\n",kmeansB.cluster_centers_)

print('Accuracy for 2 clusters: ',np.sum(ytestBpred==ytest2)/len(ytest2))


Predicted labels for ftest: [0 1 0 ..., 0 1 1]
Cluster Centers:
 [[  1.95666929   3.64152754   5.95876041   5.04686161  56.99129272
    1.48267115  17.48184841   0.27695314   0.11209763   0.21017449
    7.55106099   0.73984437   0.53344075   0.46652497   0.218299
    0.25964142   0.79733297   0.4471907    0.32014672   0.48109424
    0.51890576   0.22035583   0.77964417   0.33180213   0.32813411
    0.30948545]
 [  2.31744203   3.3855682    5.31897835   3.33160497  24.75606126
    1.4254645   13.70497864   0.28532335   0.0971242    0.1182486
    7.03053435   0.75270056   0.53535935   0.46459264   0.2322243
    0.2528686    0.77550531   0.54443324   0.29881415   0.40986125
    0.59013875   0.26511114   0.73488886   0.29828604   0.3288684
    0.33059676]]
Accuracy for 2 clusters:  0.552446471361

In [57]:
# k-means on 129 features for 2 clusters (NO & >30 = 0, <30 = 1)
from sklearn.cluster import KMeans

# Input
kmeansC = KMeans(n_clusters=2, random_state=0).fit(xtrainC)

# Gives labels of training points after classification into 2 clusters
print(kmeansC.labels_)

ytrainCpred = pd.DataFrame(kmeansC.labels_)
#ytraind2Apred.to_csv('with diag/ytrain2Apred.csv')

c1C=ytrain2[kmeansC.labels_==0]
#saves the ytrain2 values in c1 at indices only pertaining to the label 0
#assume kmeans_labels = {0,1,0,0,1,0} & ytrain2.readmitted = {A,B,C,D,E,F}, then c1 = {A,C,D,F}

c2C=ytrain2[kmeansC.labels_==1]

#Observations of label 0
print("Observations of label 0: ",c1C.shape[0])

#Observations of label 1
print("Observations of label 1: ",c2C.shape[0])

print("Number of YES observations in label 0: ",sum(c1C))
print("Number of YES observations in label 1: ",sum(c2C))


[1 1 0 ..., 0 0 1]
Observations of label 0:  29191
Observations of label 1:  20809
Number of YES observations in label 0:  2811
Number of YES observations in label 1:  1608

In [58]:
# Predicts labels ytest2
ytestCpred = kmeansC.predict(xtestC)
print("Predicted labels for ftest:",ytestCpred)

#write to CSV
#pd.DataFrame(ytest2Apred).to_csv('with diag/ytest2Apred.csv')

# Cluster center co-ordinates
print("Cluster Centers:\n",kmeansC.cluster_centers_)

print('Accuracy for 2 clusters: ',np.sum(ytestCpred==ytest2)/len(ytest2))


Predicted labels for ftest: [0 1 0 ..., 0 1 1]
Cluster Centers:
 [[  1.95672731e+00   3.64282043e+00   5.95751533e+00   5.04642478e+00
    5.69824922e+01   1.48288622e+00   1.74806249e+01   2.76938363e-01
    1.12070442e-01   2.10127797e-01   7.55072464e+00   1.88679892e-01
    6.13286737e-03   7.68150204e-01   2.07626683e-02   1.62743687e-02
    5.33490938e-01   4.66474800e-01   3.42618289e-05   2.00774317e-02
    3.04005208e-01   6.75917360e-01   3.31825813e-01   8.21598657e-02
    1.02888272e-01   5.55384246e-02   5.15297907e-02   3.48100182e-02
    1.37115839e-01   5.48531881e-02   1.49278789e-01   3.28159797e-01
    1.21252612e-01   4.49172577e-02   8.71278309e-02   2.23387124e-02
    1.46298009e-02   1.88953986e-01   7.73974715e-02   1.15222531e-01
    3.09384315e-01   1.70247028e-01   4.38551410e-02   7.22924590e-02
    2.22701888e-02   1.85699113e-02   1.97416658e-01   8.79158529e-02
    7.80484462e-02   6.81810395e-03   7.12646041e-03   9.76701956e-01
    9.35347929e-03   5.87590366e-02   1.27693836e-01   7.35395895e-01
    7.81512317e-02   6.71531846e-03   7.97409806e-01   1.82786857e-01
    1.30880186e-02   5.48189262e-04   9.86398054e-01   1.15462363e-02
    1.50752047e-03   2.05570973e-04   9.93079111e-01   6.47548566e-03
    2.39832802e-04   0.00000000e+00   9.98903621e-01   9.59331209e-04
    1.37047316e-04   2.46685168e-03   9.46654332e-01   4.70757529e-02
    3.80306301e-03   9.99965738e-01   3.42618289e-05   6.61253298e-03
    8.66001987e-01   1.17175455e-01   1.02100250e-02   7.53760236e-03
    8.86010895e-01   9.57275499e-02   1.07239524e-02   9.99862953e-01
    1.37047316e-04   1.61030596e-03   9.25925926e-01   6.90033234e-02
    3.46044472e-03   1.23342584e-03   9.33018125e-01   6.33501216e-02
    2.39832802e-03   9.96882174e-01   2.91225546e-03   2.05570973e-04
    0.00000000e+00   9.99725905e-01   2.74094631e-04   0.00000000e+00
    9.99931476e-01   6.85236578e-05   9.99623120e-01   3.76880118e-04
    1.00000000e+00   1.00000000e+00   1.19265426e-01   4.47219653e-01
    3.20142529e-01   1.13372392e-01   3.42618289e-05   9.93147634e-01
    6.78384212e-03   3.42618289e-05   9.99897215e-01   1.02785487e-04
    1.00000000e+00   9.99965738e-01   3.42618289e-05   1.00000000e+00
   -1.03541307e-17   4.81001816e-01   5.18998184e-01   2.20406345e-01
    7.79593655e-01]
 [  2.31763801e+00   3.38355835e+00   5.32023255e+00   3.33089896e+00
    2.47436218e+01   1.42511892e+00   1.37037909e+01   2.85350502e-01
    9.71508192e-02   1.18243406e-01   7.03060587e+00   1.72296161e-01
    8.11992505e-03   7.80089367e-01   2.21976649e-02   1.72968818e-02
    5.35290443e-01   4.64661510e-01   4.80468938e-05   1.87863355e-02
    3.23211454e-01   6.58002210e-01   2.98227070e-01   6.42386970e-02
    8.76375342e-02   4.79988469e-02   6.25090088e-02   9.52769903e-02
    1.51299668e-01   6.17883054e-02   1.31023879e-01   3.28832941e-01
    1.54230529e-01   3.95906405e-02   6.82746360e-02   2.78671984e-02
    2.42156345e-02   1.75563350e-01   8.82621439e-02   9.31629270e-02
    3.30754817e-01   2.18036804e-01   3.90621246e-02   4.92961130e-02
    2.00836016e-02   2.46961034e-02   1.71094989e-01   8.42742517e-02
    6.27011964e-02   2.35429779e-02   1.50867246e-02   9.16254264e-01
    4.51160332e-02   1.99875078e-02   3.55547014e-02   9.21635516e-01
    2.28222745e-02   5.33320521e-03   7.75380772e-01   2.08571566e-01
    1.07144573e-02   1.44140681e-04   9.87555855e-01   1.18675828e-02
    4.32422044e-04   4.80468938e-05   9.92600778e-01   7.15898717e-03
    1.92187575e-04   0.00000000e+00   9.98942968e-01   1.05703166e-03
   -2.58853269e-17   8.16797194e-04   9.47484745e-01   4.99687695e-02
    1.72968818e-03   1.00000000e+00  -6.47133172e-18   3.60351703e-03
    8.78297218e-01   1.12958247e-01   5.14101763e-03   3.93984529e-03
    8.90789410e-01   9.87844136e-02   6.48633066e-03   9.99807812e-01
    1.92187575e-04   5.28515831e-04   9.22644501e-01   7.53855763e-02
    1.44140681e-03   7.68750300e-04   9.32157786e-01   6.58242445e-02
    1.24921924e-03   9.97405468e-01   2.54648537e-03   4.80468938e-05
    0.00000000e+00   9.99759766e-01   2.40234469e-04   0.00000000e+00
    1.00000000e+00  -1.29426634e-17   9.99567578e-01   4.32422044e-04
    1.00000000e+00   1.00000000e+00   8.33133138e-02   5.44467400e-01
    2.98803632e-01   7.34156537e-02   9.60937875e-05   9.91543747e-01
    8.16797194e-03   1.92187575e-04   9.99903906e-01   9.60937875e-05
    1.00000000e+00   1.00000000e+00  -6.47133172e-18   9.99951953e-01
    4.80468938e-05   4.09936098e-01   5.90063902e-01   2.65074713e-01
    7.34925287e-01]]
Accuracy for 2 clusters:  0.552601066708

In [23]:
#DecisionTreeClassifier136
from sklearn.tree import DecisionTreeClassifier

dtcA = DecisionTreeClassifier(random_state=0)
dtcA.fit(xtrainA, ytrain2)
print(dtcA.predict(xtestA))
predA=dtcA.predict(xtestA)

print('Accuracy for 136 features: ',np.sum(predA==ytest2)/len(ytest2))


[0 0 0 ..., 0 0 0]
Accuracy for 136 features:  0.833500811626

In [24]:
#DecisionTreeClassifier26
from sklearn.tree import DecisionTreeClassifier

dtcB = DecisionTreeClassifier(random_state=0)
dtcB.fit(xtrainB, ytrain2)
print(dtcB.predict(xtestB))
predB=dtcB.predict(xtestB)

print('Accuracy for 26 features: ',np.sum(predB==ytest2)/len(ytest2))


[0 0 1 ..., 0 0 0]
Accuracy for 26 features:  0.824998067558

In [25]:
#DecisionTreeClassifier129
from sklearn.tree import DecisionTreeClassifier

dtcC = DecisionTreeClassifier(random_state=0)
dtcC.fit(xtrainC, ytrain2)
print(dtcC.predict(xtestC))
predC=dtcC.predict(xtestC)

print('Accuracy for 129 features: ',np.sum(predC==ytest2)/len(ytest2))


[0 0 1 ..., 0 0 0]
Accuracy for 129 features:  0.832805132566

In [3]:
import matplotlib.pyplot as plt
c136=[0.906856304,0.104274561,0.552601067,0.833500812]
c26=[0.863183118,0.882971323,0.552446471,0.824998068]
c129=[0.869598825,0.099636701,0.552601067,0.832805133]
#pd.DataFrame.plot(kind='scatter',x='c1x',y='c1y',color='b')
#plt.scatter(c1x,c1y,label='C1',color='b')
#plt.scatter(c2x,c2y,label='C2',color='g')
plt.hist(c136,label='c136',color='r')
plt.hist(c26,label='c26',color='b')
plt.hist(c129,label='c129',color='g')
#plt.plot([-4,6],[7,-4],label='L1',color='y')
#plt.plot([-5,6],[0,0],label='L2',color='c')
#plt.plot([0.3,0.3],[7,-4],label='L3',color='m')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Scatter Plot of C1, C2 & C3')
#plt.axis([-5,6,-4,7])
#plt.legend()
plt.show()



In [ ]: