In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder

from scipy.stats import norm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')


C:\ProgramData\Anaconda3\envs\PY36\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
DATAFILE = os.path.join('data','data.csv')
TARGETFILE = os.path.join('data','target.csv')
OUTDIR = os.path.join('results')

In [3]:
#DA AGGIUNGERE GTARGET E TDI
train = pd.read_table(DATAFILE,sep=',')
target = pd.read_table(TARGETFILE,sep=',')
dataset = pd.concat([train,target],axis=1)

In [4]:
#Original dataset size
original_size = np.shape(dataset)[0]

#Delete rows associated with "extreme" values: Y=200 || Y = -200
dataset = dataset[dataset.Y!=-200]
dataset = dataset[dataset.Y!=200]

#New dataset size and analysis of lost data
new_size = np.shape(dataset)[0]
print('Records lost: ',(1.0-new_size/original_size)*100, '%')

#Description of the dataset
dataset.describe()


Records lost:  30.05967005967006 %
Out[4]:
subj Gc CHO ROC Gt IOB var_class Vmx kp3 CR CF BW u2ss Ib min_risk Y
count 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000 9963.000000
mean 51.591388 114.664258 74.627120 -0.333333 119.862853 1.929754 4.737730 0.070821 0.010829 16.013952 43.360333 70.012447 1.758747 106.129012 6.225203 22.048580
std 29.087193 63.364758 17.039728 1.547287 6.707839 2.457164 2.054412 0.023353 0.005171 5.238797 9.625555 12.529655 0.511385 17.855293 9.034714 85.405424
min 1.000000 60.000000 50.000000 -2.000000 107.530000 0.000000 1.000000 0.026252 0.002596 7.000000 26.000000 46.692000 0.647920 68.668000 0.772100 -190.000000
25% 25.500000 70.000000 60.000000 -2.000000 115.090000 0.025917 4.000000 0.056890 0.007041 12.000000 36.000000 60.371000 1.446300 91.455000 2.579850 -30.000000
50% 53.000000 80.000000 70.000000 -1.000000 119.790000 1.033500 5.000000 0.064676 0.010244 15.000000 43.000000 68.404000 1.681200 104.230000 3.931400 30.000000
75% 77.000000 150.000000 90.000000 1.000000 124.600000 2.957900 6.000000 0.085176 0.013947 20.000000 50.000000 76.510000 2.073500 118.920000 6.042250 80.000000
max 100.000000 250.000000 100.000000 2.000000 137.800000 16.508000 7.000000 0.142120 0.025736 30.000000 67.000000 106.090000 3.551700 161.560000 117.400000 190.000000

In [5]:
dataset['Y'].describe()


Out[5]:
count    9963.000000
mean       22.048580
std        85.405424
min      -190.000000
25%       -30.000000
50%        30.000000
75%        80.000000
max       190.000000
Name: Y, dtype: float64

In [6]:
sns.distplot(dataset['Y'])
plt.show()



In [10]:
#correlation matrix
corrmat = dataset.corr()
sns.heatmap(corrmat, vmax=.8, square=True)
plt.show()



In [11]:
#Target correlation matrix
k = 7 #number of variables for heatmap
cols = corrmat.nlargest(k, 'Y')['Y'].index
cm = np.corrcoef(dataset[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()



In [54]:
#scatterplot
sns.set()
sns.pairplot(dataset, size = 2.5)
plt.show();


Variable transformation


In [7]:
dataset_trans = dataset.copy()
dataset_trans.to_csv('dataset_trans.csv',sep=',',index=False)

In [61]:
#GC
var = 'GT'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
scaler = MinMaxScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()



In [63]:
#min_risk
var = 'min_risk'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [64]:
#CR 
var = 'CR'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [65]:
#CF
var = 'CF'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [66]:
#u2ss
var = 'u2ss'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [67]:
#Vmx
var = 'Vmx'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [68]:
#kp3
var = 'kp3'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [18]:
#Gt
var = 'Gt'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [69]:
#Ib
var = 'Ib'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [70]:
#BW 
var = 'BW'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var].reshape(-1,1))
dataset_trans[var] = scaler.transform(dataset_trans[var].reshape(-1,1))
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [71]:
#ROC
var = 'ROC'
enc = LabelEncoder()
a = enc.fit_transform(np.asarray(dataset_trans[var]))
n = np.shape(enc.classes_)[0]
b = np.zeros([np.shape(a)[0],n])
for i in np.arange(np.shape(a)[0]):
    b[i,a[i]] = 1
    
columns = ["" for x in range(n)]
for i in np.arange(n):
    columns[i] = var + str(enc.classes_[i])
d = pd.DataFrame(data=b,columns=columns)
del dataset_trans[var]
dataset_trans = pd.concat([d, dataset_trans],axis=1)

In [72]:
#var_class
var = 'var_class'
enc = LabelEncoder()
a = enc.fit_transform(np.asarray(dataset_trans[var]))
n = np.shape(enc.classes_)[0]
b = np.zeros([np.shape(a)[0],n])
for i in np.arange(np.shape(a)[0]):
    b[i,a[i]] = 1
    
columns = ["" for x in range(n)]
for i in np.arange(n):
    columns[i] = var + str(enc.classes_[i])
d = pd.DataFrame(data=b,columns=columns)
del dataset_trans[var]
dataset_trans = pd.concat([d, dataset_trans],axis=1)

In [ ]:
#scatterplot
sns.set()
sns.pairplot(dataset_trans, size = 2.5)
plt.show();

In [ ]:
#correlation matrix
corrmat = dataset_trans.corr()
sns.heatmap(corrmat, vmax=.8, square=True)
plt.show()

In [ ]:
#Target correlation matrix
k = 10 #number of variables for heatmap
cols = corrmat.nlargest(k, 'Y')['Y'].index
cm = np.corrcoef(dataset_trans[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()

In [30]:
dataset_trans


Out[30]:
var_class1 var_class4 var_class5 var_class6 var_class7 ROC-2 ROC-1 ROC1 ROC2 G_c min_risk CR CF u2ss Vmx kp3 Gt Ib BW Y
0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.003086 -0.724219 -0.939255 0.703325 -0.142312 -0.778155 0.160792 0.575951 0.083377 90
1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.838816 0.367086 0.148202 -0.377239 0.003725 -0.286428 1.142868 0.214296 0.420162 90
2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.122196 0.040469 0.039855 0.064605 1.437880 1.092596 -1.922954 -0.173284 -0.222051 0
3 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.021344 -0.320590 1.018966 0.526725 1.569870 0.280758 1.325976 -0.225919 -2.217111 90
4 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.060542 -0.724219 -1.222438 0.731913 -0.078926 -1.435269 0.025798 2.177880 0.407807 90
5 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.527620 -0.320590 -0.422446 0.230259 0.501800 0.213381 0.243142 1.040811 0.155654 0
6 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.502310 -1.181817 -1.078562 1.348268 1.643383 -0.732168 -1.518356 0.340528 -0.881206 90
7 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -1.267528 -0.724219 -0.673246 0.956989 0.038015 -0.369248 0.113350 -0.181441 -0.664983 0
8 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.137600 -0.724219 -1.222438 0.622160 -1.699792 0.064083 -0.484685 1.494613 0.563290 0
9 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.946827 0.939562 0.653104 -0.989585 -0.077874 0.973800 -0.519543 -0.717414 0.708865 0
10 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.455122 -1.181817 -1.525165 1.292016 1.506494 -0.441265 -0.012733 1.290048 -0.082816 50
11 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.250434 0.040469 0.653104 -0.091800 0.500363 -0.929471 -1.244132 0.014103 -0.755702 0
12 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.559481 0.367086 1.105856 -0.293736 -0.070970 -0.109126 -0.508444 -1.650976 -0.930919 90
13 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.049455 -0.320590 0.839847 0.145238 -0.457926 0.521952 0.124074 -1.268177 -1.327385 90
14 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.328960 0.939562 1.105856 -0.896549 -0.420541 -0.543362 1.868797 -1.418937 0.003291 90
15 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.068727 -0.724219 0.039855 0.694372 0.947301 0.119419 0.045803 0.595409 -1.176998 10
16 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.311873 -1.710074 -1.525165 1.629790 1.333680 0.668143 -0.900926 2.575203 -0.712843 90
17 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -1.285617 1.193522 0.930321 -1.133020 -0.539434 1.891912 -0.150565 -0.153521 0.638472 0
18 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.695993 -0.724219 -1.222438 0.715773 -1.167495 -1.111187 0.249227 0.175455 0.399607 50
19 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.140146 -2.334870 -2.022430 2.089831 0.968051 -1.880187 -0.325496 0.840725 -0.871818 50
20 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.693620 0.367086 0.039855 -0.222879 1.257459 -0.704178 -1.649425 0.812707 0.237152 90
21 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.109790 0.367086 0.930321 -0.453278 -0.425260 -0.166509 -0.421475 -0.770062 -0.506956 0
22 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.842432 0.939562 0.653104 -0.969825 -0.388933 -0.023198 -0.696432 -0.635060 0.730823 0
23 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.737508 -1.710074 -1.525165 1.807725 0.070266 0.205990 0.439892 1.320419 -0.944125 90
24 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.516994 -1.181817 -2.022430 1.345918 -2.535820 1.155203 1.961889 1.405262 0.548979 90
25 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.773371 0.367086 -1.078562 -0.320720 -0.533843 -0.541689 0.064250 1.128334 2.014960 90
26 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.389105 0.665265 -0.939255 -0.605012 -0.273154 -0.482657 0.188283 1.085205 2.251729 90
27 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.511637 0.040469 -1.078562 -0.053062 0.017062 -0.597240 0.969625 1.088498 1.553413 90
28 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.182352 -0.724219 -1.850337 0.829044 -1.046726 0.235582 -0.107091 2.167312 1.040566 0
29 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.250777 2.415808 1.274639 -2.406369 -0.422643 -0.076984 1.281448 -0.875541 2.248929 90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2266 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.078051 1.651119 1.437169 -1.615514 -0.678711 0.298695 1.280010 -1.416493 0.715681 -160
2267 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.144227 -2.334870 -1.222438 2.104701 -0.517236 -2.532240 0.962310 0.475085 -1.897630 -160
2268 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.146230 0.367086 0.253937 -0.388460 1.612358 0.728731 -0.399403 0.267491 0.342814 -160
2269 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.234908 -0.320590 -0.546054 0.275200 -1.788539 -0.157722 -0.476772 -0.409560 0.289004 -100
2270 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.870380 -0.320590 0.253937 0.307216 -0.303030 1.044796 0.711857 0.189161 -0.797168 -160
2271 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.384597 -0.320590 0.039855 0.469099 0.384723 1.654966 0.438387 0.981733 -0.769649 -160
2272 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.115950 0.040469 -1.078562 -0.073846 -0.253496 0.266821 0.379577 0.700790 1.484810 -160
2273 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.670282 -0.320590 -0.071234 0.461797 -0.667126 1.047610 -1.161834 0.128064 -0.596011 -120
2274 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.246721 0.665265 1.274639 -0.542053 0.726048 -0.222571 -0.804032 -1.208837 -0.810164 -160
2275 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.170999 -0.320590 -0.804235 0.336593 -0.226601 -0.294524 -0.320789 -0.132089 0.421627 0
2276 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.085440 -0.320590 -1.371191 0.191274 -1.528402 -0.262137 -0.110193 0.691745 1.477699 -160
2277 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.787046 0.040469 0.253937 0.047062 0.838422 -0.451533 1.566848 -0.080280 -0.406239 -160
2278 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.345769 0.367086 1.105856 -0.460809 0.322279 0.047065 -0.456213 -1.304803 -0.716888 0
2279 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 2.130645 -0.320590 -0.422446 0.420472 -0.288766 -0.274444 0.985708 0.964951 -0.166223 -160
2280 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.723266 1.429953 0.839847 -1.472623 0.240196 0.374789 0.583847 -0.102675 1.329842 0
2281 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.444113 0.040469 1.105856 -0.191165 1.534048 2.044783 -1.434825 -0.707272 -1.103616 0
2282 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.034675 -0.724219 -0.185210 0.688943 0.533090 -2.271325 -1.498274 -1.095455 -0.847403 -160
2283 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -1.272670 1.858873 1.593893 -1.901100 0.665699 -1.089140 -0.850800 -0.601348 0.969194 -160
2284 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.419242 0.040469 1.356656 -0.058904 0.980408 0.753172 -0.579908 -0.783217 -1.684001 -160
2285 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.390086 -0.724219 0.556665 0.915373 2.001617 -1.202329 -0.508444 -0.041434 -2.205863 -160
2286 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.146033 -1.710074 -1.078562 1.620742 0.289574 0.671541 1.466037 0.412673 -1.226174 0
2287 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.920565 0.040469 1.018966 -0.051312 -0.662848 -0.587414 0.751863 -1.183568 -1.249578 -160
2288 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.885321 -0.320590 -0.071234 0.399753 1.254806 1.893836 -1.503292 0.248006 -0.608372 -160
2289 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.120776 0.665265 0.148202 -0.545745 -0.929069 1.245867 0.202013 -0.013600 0.674883 -160
2290 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.802272 -1.181817 -1.525165 1.289628 -0.493520 0.179313 -0.605385 0.847097 -0.195443 -120
2291 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.559025 0.939562 -0.546054 -1.048885 0.097300 1.065042 2.161759 1.417728 2.472454 -160
2292 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.541209 0.367086 0.556665 -0.313483 -0.077690 0.241142 -1.163476 -0.336566 -0.245489 0
2293 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.193594 1.858873 2.033040 -1.812940 -0.474366 1.408141 -0.150565 -2.055186 0.318760 0
2294 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.693934 -0.320590 0.357185 0.205673 1.184708 -0.855776 -0.947942 -0.003974 -0.748978 -160
2295 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.032755 0.665265 -0.546054 -0.531334 -1.236334 -1.502792 1.941088 -0.834829 1.533261 -60

2296 rows × 20 columns


In [31]:
dataset_trans.to_csv('dataset_trans.csv',sep=',',index=False)

In [ ]: