In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder

from scipy.stats import norm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')


/Users/cappe/anaconda/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [11]:
DATAFILE = os.path.join('data','data.csv')
TARGETFILE = os.path.join('data','target.csv')
OUTDIR = os.path.join('results')

In [45]:
#DA AGGIUNGERE GTARGET E TDI
train = pd.read_table(DATAFILE,sep=',')
target = pd.read_table(TARGETFILE,sep=',')
dataset = pd.concat([train,target],axis=1)

In [46]:
#Original dataset size
original_size = np.shape(dataset)[0]

#Delete rows associated with "extreme" values: Y=200 || Y = -200
dataset = dataset[dataset.Y!=-200]
dataset = dataset[dataset.Y!=200]

#New dataset size and analysis of lost data
new_size = np.shape(dataset)[0]
print('Records lost: ',(1.0-new_size/original_size)*100, '%')

#Description of the dataset
dataset.describe()


Records lost:  30.11058451816746 %
Out[46]:
GT CHO ROC IOB var_class Vmx kp3 CR CF u2ss BW Ib min_risk Y
count 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000 6636.000000
mean 114.663954 74.587101 -0.331073 1.926460 4.739451 0.070840 0.010827 16.018987 43.357444 1.758951 70.016509 106.138198 6.248124 21.943942
std 63.364296 17.976660 1.546936 2.453095 2.054440 0.023351 0.005175 5.245353 9.626891 0.511420 12.523806 17.859708 9.105536 85.444997
min 60.000000 50.000000 -2.000000 0.000000 1.000000 0.026252 0.002596 7.000000 26.000000 0.647920 46.692000 68.668000 0.772100 -190.000000
25% 70.000000 50.000000 -2.000000 0.025462 4.000000 0.056890 0.007041 12.000000 36.000000 1.446300 60.371000 91.455000 2.591900 -30.000000
50% 80.000000 70.000000 -1.000000 1.027500 5.000000 0.064676 0.010244 15.000000 43.000000 1.681200 68.404000 104.230000 3.940550 30.000000
75% 150.000000 80.000000 1.000000 2.957900 6.000000 0.085176 0.013947 20.000000 50.000000 2.073500 76.510000 118.920000 6.081950 80.000000
max 250.000000 100.000000 2.000000 16.508000 7.000000 0.142120 0.025736 30.000000 67.000000 3.551700 106.090000 161.560000 117.400000 190.000000

In [47]:
dataset['Y'].describe()


Out[47]:
count    6636.000000
mean       21.943942
std        85.444997
min      -190.000000
25%       -30.000000
50%        30.000000
75%        80.000000
max       190.000000
Name: Y, dtype: float64

In [49]:
sns.distplot(dataset['Y'])
plt.show()



In [52]:
#correlation matrix
corrmat = dataset.corr()
sns.heatmap(corrmat, vmax=.8, square=True)
plt.show()



In [51]:
#Target correlation matrix
k = 5 #number of variables for heatmap
cols = corrmat.nlargest(k, 'Y')['Y'].index
cm = np.corrcoef(dataset[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()



In [21]:
#scatterplot
sns.set()
sns.pairplot(dataset, size = 2.5)
plt.show();


Variable transformation


In [19]:
dataset_trans = dataset.copy()

In [20]:
#min_risk
var = 'GT'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
scaler = MinMaxScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-fadc711b7ff1> in <module>()
      7 
      8 #log-transformation and standard scaling
----> 9 scaler = MinMaxScaler().fit(dataset_trans[var])
     10 dataset_trans[var] = scaler.transform(dataset_trans[var])
     11 sns.distplot(dataset_trans[var])

/Users/cappe/anaconda/lib/python3.6/site-packages/sklearn/preprocessing/data.py in fit(self, X, y)
    306         # Reset internal state before fitting
    307         self._reset()
--> 308         return self.partial_fit(X, y)
    309 
    310     def partial_fit(self, X, y=None):

/Users/cappe/anaconda/lib/python3.6/site-packages/sklearn/preprocessing/data.py in partial_fit(self, X, y)
    332 
    333         X = check_array(X, copy=self.copy, warn_on_dtype=True,
--> 334                         estimator=self, dtype=FLOAT_DTYPES)
    335 
    336         data_min = np.min(X, axis=0)

/Users/cappe/anaconda/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    408                     "Reshape your data either using array.reshape(-1, 1) if "
    409                     "your data has a single feature or array.reshape(1, -1) "
--> 410                     "if it contains a single sample.".format(array))
    411             array = np.atleast_2d(array)
    412             # To ensure that array flags are maintained

ValueError: Expected 2D array, got 1D array instead:
array=[  60.   70.   80. ...,  100.  150.  250.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [12]:
#min_risk
var = 'min_risk'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [13]:
#CR 
var = 'CR'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [14]:
#CF
var = 'CF'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [15]:
#u2ss
var = 'u2ss'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [16]:
#Vmx
var = 'Vmx'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [17]:
#kp3
var = 'kp3'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [18]:
#Gt
var = 'Gt'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [19]:
#Ib
var = 'Ib'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [20]:
#BW 
var = 'BW'
sns.distplot(dataset[var])
plt.show()
res = stats.probplot(dataset[var], plot=plt)
plt.show()

#log-transformation and standard scaling
dataset_trans[var] = np.log(dataset[var])
scaler = StandardScaler().fit(dataset_trans[var])
dataset_trans[var] = scaler.transform(dataset_trans[var])
sns.distplot(dataset_trans[var])
plt.show()
res = stats.probplot(dataset_trans[var], plot=plt)
plt.show()



In [21]:
#ROC
var = 'ROC'
enc = LabelEncoder()
a = enc.fit_transform(np.asarray(dataset_trans[var]))
n = np.shape(enc.classes_)[0]
b = np.zeros([np.shape(a)[0],n])
for i in np.arange(np.shape(a)[0]):
    b[i,a[i]] = 1
    
columns = ["" for x in range(n)]
for i in np.arange(n):
    columns[i] = var + str(enc.classes_[i])
d = pd.DataFrame(data=b,columns=columns)
del dataset_trans[var]
dataset_trans = pd.concat([d, dataset_trans],axis=1)

In [22]:
#var_class
var = 'var_class'
enc = LabelEncoder()
a = enc.fit_transform(np.asarray(dataset_trans[var]))
n = np.shape(enc.classes_)[0]
b = np.zeros([np.shape(a)[0],n])
for i in np.arange(np.shape(a)[0]):
    b[i,a[i]] = 1
    
columns = ["" for x in range(n)]
for i in np.arange(n):
    columns[i] = var + str(enc.classes_[i])
d = pd.DataFrame(data=b,columns=columns)
del dataset_trans[var]
dataset_trans = pd.concat([d, dataset_trans],axis=1)

In [23]:
#scatterplot
sns.set()
sns.pairplot(dataset_trans, size = 2.5)
plt.show();



In [24]:
#correlation matrix
corrmat = dataset_trans.corr()
sns.heatmap(corrmat, vmax=.8, square=True)
plt.show()



In [26]:
#Target correlation matrix
k = 10 #number of variables for heatmap
cols = corrmat.nlargest(k, 'Y')['Y'].index
cm = np.corrcoef(dataset_trans[cols].values.T)
sns.set(font_scale=1.25)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
plt.show()



In [30]:
dataset_trans


Out[30]:
var_class1 var_class4 var_class5 var_class6 var_class7 ROC-2 ROC-1 ROC1 ROC2 G_c min_risk CR CF u2ss Vmx kp3 Gt Ib BW Y
0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.003086 -0.724219 -0.939255 0.703325 -0.142312 -0.778155 0.160792 0.575951 0.083377 90
1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.838816 0.367086 0.148202 -0.377239 0.003725 -0.286428 1.142868 0.214296 0.420162 90
2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.122196 0.040469 0.039855 0.064605 1.437880 1.092596 -1.922954 -0.173284 -0.222051 0
3 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.021344 -0.320590 1.018966 0.526725 1.569870 0.280758 1.325976 -0.225919 -2.217111 90
4 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.060542 -0.724219 -1.222438 0.731913 -0.078926 -1.435269 0.025798 2.177880 0.407807 90
5 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.527620 -0.320590 -0.422446 0.230259 0.501800 0.213381 0.243142 1.040811 0.155654 0
6 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.502310 -1.181817 -1.078562 1.348268 1.643383 -0.732168 -1.518356 0.340528 -0.881206 90
7 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -1.267528 -0.724219 -0.673246 0.956989 0.038015 -0.369248 0.113350 -0.181441 -0.664983 0
8 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.137600 -0.724219 -1.222438 0.622160 -1.699792 0.064083 -0.484685 1.494613 0.563290 0
9 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.946827 0.939562 0.653104 -0.989585 -0.077874 0.973800 -0.519543 -0.717414 0.708865 0
10 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.455122 -1.181817 -1.525165 1.292016 1.506494 -0.441265 -0.012733 1.290048 -0.082816 50
11 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.250434 0.040469 0.653104 -0.091800 0.500363 -0.929471 -1.244132 0.014103 -0.755702 0
12 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.559481 0.367086 1.105856 -0.293736 -0.070970 -0.109126 -0.508444 -1.650976 -0.930919 90
13 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.049455 -0.320590 0.839847 0.145238 -0.457926 0.521952 0.124074 -1.268177 -1.327385 90
14 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.328960 0.939562 1.105856 -0.896549 -0.420541 -0.543362 1.868797 -1.418937 0.003291 90
15 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.068727 -0.724219 0.039855 0.694372 0.947301 0.119419 0.045803 0.595409 -1.176998 10
16 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 -0.311873 -1.710074 -1.525165 1.629790 1.333680 0.668143 -0.900926 2.575203 -0.712843 90
17 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -1.285617 1.193522 0.930321 -1.133020 -0.539434 1.891912 -0.150565 -0.153521 0.638472 0
18 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.695993 -0.724219 -1.222438 0.715773 -1.167495 -1.111187 0.249227 0.175455 0.399607 50
19 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.140146 -2.334870 -2.022430 2.089831 0.968051 -1.880187 -0.325496 0.840725 -0.871818 50
20 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.693620 0.367086 0.039855 -0.222879 1.257459 -0.704178 -1.649425 0.812707 0.237152 90
21 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.109790 0.367086 0.930321 -0.453278 -0.425260 -0.166509 -0.421475 -0.770062 -0.506956 0
22 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.842432 0.939562 0.653104 -0.969825 -0.388933 -0.023198 -0.696432 -0.635060 0.730823 0
23 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.737508 -1.710074 -1.525165 1.807725 0.070266 0.205990 0.439892 1.320419 -0.944125 90
24 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.516994 -1.181817 -2.022430 1.345918 -2.535820 1.155203 1.961889 1.405262 0.548979 90
25 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.773371 0.367086 -1.078562 -0.320720 -0.533843 -0.541689 0.064250 1.128334 2.014960 90
26 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.389105 0.665265 -0.939255 -0.605012 -0.273154 -0.482657 0.188283 1.085205 2.251729 90
27 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.511637 0.040469 -1.078562 -0.053062 0.017062 -0.597240 0.969625 1.088498 1.553413 90
28 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.182352 -0.724219 -1.850337 0.829044 -1.046726 0.235582 -0.107091 2.167312 1.040566 0
29 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 -0.250777 2.415808 1.274639 -2.406369 -0.422643 -0.076984 1.281448 -0.875541 2.248929 90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2266 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.078051 1.651119 1.437169 -1.615514 -0.678711 0.298695 1.280010 -1.416493 0.715681 -160
2267 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.144227 -2.334870 -1.222438 2.104701 -0.517236 -2.532240 0.962310 0.475085 -1.897630 -160
2268 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.146230 0.367086 0.253937 -0.388460 1.612358 0.728731 -0.399403 0.267491 0.342814 -160
2269 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.234908 -0.320590 -0.546054 0.275200 -1.788539 -0.157722 -0.476772 -0.409560 0.289004 -100
2270 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.870380 -0.320590 0.253937 0.307216 -0.303030 1.044796 0.711857 0.189161 -0.797168 -160
2271 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.384597 -0.320590 0.039855 0.469099 0.384723 1.654966 0.438387 0.981733 -0.769649 -160
2272 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.115950 0.040469 -1.078562 -0.073846 -0.253496 0.266821 0.379577 0.700790 1.484810 -160
2273 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.670282 -0.320590 -0.071234 0.461797 -0.667126 1.047610 -1.161834 0.128064 -0.596011 -120
2274 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.246721 0.665265 1.274639 -0.542053 0.726048 -0.222571 -0.804032 -1.208837 -0.810164 -160
2275 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.170999 -0.320590 -0.804235 0.336593 -0.226601 -0.294524 -0.320789 -0.132089 0.421627 0
2276 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.085440 -0.320590 -1.371191 0.191274 -1.528402 -0.262137 -0.110193 0.691745 1.477699 -160
2277 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.787046 0.040469 0.253937 0.047062 0.838422 -0.451533 1.566848 -0.080280 -0.406239 -160
2278 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.345769 0.367086 1.105856 -0.460809 0.322279 0.047065 -0.456213 -1.304803 -0.716888 0
2279 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 2.130645 -0.320590 -0.422446 0.420472 -0.288766 -0.274444 0.985708 0.964951 -0.166223 -160
2280 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.723266 1.429953 0.839847 -1.472623 0.240196 0.374789 0.583847 -0.102675 1.329842 0
2281 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.444113 0.040469 1.105856 -0.191165 1.534048 2.044783 -1.434825 -0.707272 -1.103616 0
2282 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.034675 -0.724219 -0.185210 0.688943 0.533090 -2.271325 -1.498274 -1.095455 -0.847403 -160
2283 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -1.272670 1.858873 1.593893 -1.901100 0.665699 -1.089140 -0.850800 -0.601348 0.969194 -160
2284 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.419242 0.040469 1.356656 -0.058904 0.980408 0.753172 -0.579908 -0.783217 -1.684001 -160
2285 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.390086 -0.724219 0.556665 0.915373 2.001617 -1.202329 -0.508444 -0.041434 -2.205863 -160
2286 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.146033 -1.710074 -1.078562 1.620742 0.289574 0.671541 1.466037 0.412673 -1.226174 0
2287 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.920565 0.040469 1.018966 -0.051312 -0.662848 -0.587414 0.751863 -1.183568 -1.249578 -160
2288 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 1.885321 -0.320590 -0.071234 0.399753 1.254806 1.893836 -1.503292 0.248006 -0.608372 -160
2289 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.120776 0.665265 0.148202 -0.545745 -0.929069 1.245867 0.202013 -0.013600 0.674883 -160
2290 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 -0.802272 -1.181817 -1.525165 1.289628 -0.493520 0.179313 -0.605385 0.847097 -0.195443 -120
2291 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.559025 0.939562 -0.546054 -1.048885 0.097300 1.065042 2.161759 1.417728 2.472454 -160
2292 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 -0.541209 0.367086 0.556665 -0.313483 -0.077690 0.241142 -1.163476 -0.336566 -0.245489 0
2293 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.193594 1.858873 2.033040 -1.812940 -0.474366 1.408141 -0.150565 -2.055186 0.318760 0
2294 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.693934 -0.320590 0.357185 0.205673 1.184708 -0.855776 -0.947942 -0.003974 -0.748978 -160
2295 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 1.0 -1.032755 0.665265 -0.546054 -0.531334 -1.236334 -1.502792 1.941088 -0.834829 1.533261 -60

2296 rows × 20 columns


In [31]:
dataset_trans.to_csv('dataset_trans.csv',sep=',',index=False)

In [ ]: