In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [4]:
data = pd.read_csv('voice.csv')
In [6]:
# Exploring data
print data.dtypes
print data.describe()
meanfreq float64
sd float64
median float64
Q25 float64
Q75 float64
IQR float64
skew float64
kurt float64
sp.ent float64
sfm float64
mode float64
centroid float64
meanfun float64
minfun float64
maxfun float64
meandom float64
mindom float64
maxdom float64
dfrange float64
modindx float64
label object
dtype: object
Out[6]:
meanfreq
sd
median
Q25
Q75
IQR
skew
kurt
sp.ent
sfm
mode
centroid
meanfun
minfun
maxfun
meandom
mindom
maxdom
dfrange
modindx
count
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
3168.000000
mean
0.180907
0.057126
0.185621
0.140456
0.224765
0.084309
3.140168
36.568461
0.895127
0.408216
0.165282
0.180907
0.142807
0.036802
0.258842
0.829211
0.052647
5.047277
4.994630
0.173752
std
0.029918
0.016652
0.036360
0.048680
0.023639
0.042783
4.240529
134.928661
0.044980
0.177521
0.077203
0.029918
0.032304
0.019220
0.030077
0.525205
0.063299
3.521157
3.520039
0.119454
min
0.039363
0.018363
0.010975
0.000229
0.042946
0.014558
0.141735
2.068455
0.738651
0.036876
0.000000
0.039363
0.055565
0.009775
0.103093
0.007812
0.004883
0.007812
0.000000
0.000000
25%
0.163662
0.041954
0.169593
0.111087
0.208747
0.042560
1.649569
5.669547
0.861811
0.258041
0.118016
0.163662
0.116998
0.018223
0.253968
0.419828
0.007812
2.070312
2.044922
0.099766
50%
0.184838
0.059155
0.190032
0.140286
0.225684
0.094280
2.197101
8.318463
0.901767
0.396335
0.186599
0.184838
0.140519
0.046110
0.271186
0.765795
0.023438
4.992188
4.945312
0.139357
75%
0.199146
0.067020
0.210618
0.175939
0.243660
0.114175
2.931694
13.648905
0.928713
0.533676
0.221104
0.199146
0.169581
0.047904
0.277457
1.177166
0.070312
7.007812
6.992188
0.209183
max
0.251124
0.115273
0.261224
0.247347
0.273469
0.252225
34.725453
1309.612887
0.981997
0.842936
0.280000
0.251124
0.237636
0.204082
0.279114
2.957682
0.458984
21.867188
21.843750
0.932374
In [30]:
#from sklearn import Dec
#from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
#X = ["a", "b", "c", "d"]
#kf = KFold(n_splits=2)
#for train, test in kf.split(X):
#print("%s %s" % (train, test))
X_train, X_test, y_train, y_test = train_test_split(data.ix[:, 'meanfreq':'modindx'], data.label, test_size=0.4, random_state=0)
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-30-4ea7ea34ac9b> in <module>()
1 #from sklearn import Dec
2 #from sklearn.model_selection import train_test_split
----> 3 from sklearn.model_selection import KFold
4 X = ["a", "b", "c", "d"]
5 kf = KFold(n_splits=2)
/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/__init__.py in <module>()
21 from ._validation import validation_curve
22
---> 23 from ._search import GridSearchCV
24 from ._search import RandomizedSearchCV
25 from ._search import ParameterGrid
/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_search.py in <module>()
30 from ..utils import check_random_state
31 from ..utils.fixes import sp_version
---> 32 from ..utils.fixes import rankdata
33 from ..utils.fixes import MaskedArray
34 from ..utils.random import sample_without_replacement
ImportError: cannot import name rankdata
In [ ]:
Content source: diegocavalca/Studies
Similar notebooks: