In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [4]:
data = pd.read_csv('voice.csv')

In [6]:
# Exploring data
print data.dtypes

print data.describe()


meanfreq    float64
sd          float64
median      float64
Q25         float64
Q75         float64
IQR         float64
skew        float64
kurt        float64
sp.ent      float64
sfm         float64
mode        float64
centroid    float64
meanfun     float64
minfun      float64
maxfun      float64
meandom     float64
mindom      float64
maxdom      float64
dfrange     float64
modindx     float64
label        object
dtype: object
Out[6]:
meanfreq sd median Q25 Q75 IQR skew kurt sp.ent sfm mode centroid meanfun minfun maxfun meandom mindom maxdom dfrange modindx
count 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000 3168.000000
mean 0.180907 0.057126 0.185621 0.140456 0.224765 0.084309 3.140168 36.568461 0.895127 0.408216 0.165282 0.180907 0.142807 0.036802 0.258842 0.829211 0.052647 5.047277 4.994630 0.173752
std 0.029918 0.016652 0.036360 0.048680 0.023639 0.042783 4.240529 134.928661 0.044980 0.177521 0.077203 0.029918 0.032304 0.019220 0.030077 0.525205 0.063299 3.521157 3.520039 0.119454
min 0.039363 0.018363 0.010975 0.000229 0.042946 0.014558 0.141735 2.068455 0.738651 0.036876 0.000000 0.039363 0.055565 0.009775 0.103093 0.007812 0.004883 0.007812 0.000000 0.000000
25% 0.163662 0.041954 0.169593 0.111087 0.208747 0.042560 1.649569 5.669547 0.861811 0.258041 0.118016 0.163662 0.116998 0.018223 0.253968 0.419828 0.007812 2.070312 2.044922 0.099766
50% 0.184838 0.059155 0.190032 0.140286 0.225684 0.094280 2.197101 8.318463 0.901767 0.396335 0.186599 0.184838 0.140519 0.046110 0.271186 0.765795 0.023438 4.992188 4.945312 0.139357
75% 0.199146 0.067020 0.210618 0.175939 0.243660 0.114175 2.931694 13.648905 0.928713 0.533676 0.221104 0.199146 0.169581 0.047904 0.277457 1.177166 0.070312 7.007812 6.992188 0.209183
max 0.251124 0.115273 0.261224 0.247347 0.273469 0.252225 34.725453 1309.612887 0.981997 0.842936 0.280000 0.251124 0.237636 0.204082 0.279114 2.957682 0.458984 21.867188 21.843750 0.932374

In [30]:
#from sklearn import Dec
#from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
#X = ["a", "b", "c", "d"]
#kf = KFold(n_splits=2)
#for train, test in kf.split(X):
     #print("%s %s" % (train, test))

X_train, X_test, y_train, y_test = train_test_split(data.ix[:, 'meanfreq':'modindx'], data.label, test_size=0.4, random_state=0)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-30-4ea7ea34ac9b> in <module>()
      1 #from sklearn import Dec
      2 #from sklearn.model_selection import train_test_split
----> 3 from sklearn.model_selection import KFold
      4 X = ["a", "b", "c", "d"]
      5 kf = KFold(n_splits=2)

/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/__init__.py in <module>()
     21 from ._validation import validation_curve
     22 
---> 23 from ._search import GridSearchCV
     24 from ._search import RandomizedSearchCV
     25 from ._search import ParameterGrid

/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_search.py in <module>()
     30 from ..utils import check_random_state
     31 from ..utils.fixes import sp_version
---> 32 from ..utils.fixes import rankdata
     33 from ..utils.fixes import MaskedArray
     34 from ..utils.random import sample_without_replacement

ImportError: cannot import name rankdata

In [ ]: