notebook.community

Edit and run



In [2]:

    
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd



In [4]:

    
data = pd.read_csv('voice.csv')



In [6]:

    
# Exploring data
print data.dtypes

print data.describe()









    



meanfreq    float64
sd          float64
median      float64
Q25         float64
Q75         float64
IQR         float64
skew        float64
kurt        float64
sp.ent      float64
sfm         float64
mode        float64
centroid    float64
meanfun     float64
minfun      float64
maxfun      float64
meandom     float64
mindom      float64
maxdom      float64
dfrange     float64
modindx     float64
label        object
dtype: object






    Out[6]:






  
    
      
      meanfreq
      sd
      median
      Q25
      Q75
      IQR
      skew
      kurt
      sp.ent
      sfm
      mode
      centroid
      meanfun
      minfun
      maxfun
      meandom
      mindom
      maxdom
      dfrange
      modindx
    
  
  
    
      count
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
      3168.000000
    
    
      mean
      0.180907
      0.057126
      0.185621
      0.140456
      0.224765
      0.084309
      3.140168
      36.568461
      0.895127
      0.408216
      0.165282
      0.180907
      0.142807
      0.036802
      0.258842
      0.829211
      0.052647
      5.047277
      4.994630
      0.173752
    
    
      std
      0.029918
      0.016652
      0.036360
      0.048680
      0.023639
      0.042783
      4.240529
      134.928661
      0.044980
      0.177521
      0.077203
      0.029918
      0.032304
      0.019220
      0.030077
      0.525205
      0.063299
      3.521157
      3.520039
      0.119454
    
    
      min
      0.039363
      0.018363
      0.010975
      0.000229
      0.042946
      0.014558
      0.141735
      2.068455
      0.738651
      0.036876
      0.000000
      0.039363
      0.055565
      0.009775
      0.103093
      0.007812
      0.004883
      0.007812
      0.000000
      0.000000
    
    
      25%
      0.163662
      0.041954
      0.169593
      0.111087
      0.208747
      0.042560
      1.649569
      5.669547
      0.861811
      0.258041
      0.118016
      0.163662
      0.116998
      0.018223
      0.253968
      0.419828
      0.007812
      2.070312
      2.044922
      0.099766
    
    
      50%
      0.184838
      0.059155
      0.190032
      0.140286
      0.225684
      0.094280
      2.197101
      8.318463
      0.901767
      0.396335
      0.186599
      0.184838
      0.140519
      0.046110
      0.271186
      0.765795
      0.023438
      4.992188
      4.945312
      0.139357
    
    
      75%
      0.199146
      0.067020
      0.210618
      0.175939
      0.243660
      0.114175
      2.931694
      13.648905
      0.928713
      0.533676
      0.221104
      0.199146
      0.169581
      0.047904
      0.277457
      1.177166
      0.070312
      7.007812
      6.992188
      0.209183
    
    
      max
      0.251124
      0.115273
      0.261224
      0.247347
      0.273469
      0.252225
      34.725453
      1309.612887
      0.981997
      0.842936
      0.280000
      0.251124
      0.237636
      0.204082
      0.279114
      2.957682
      0.458984
      21.867188
      21.843750
      0.932374



In [30]:

    
#from sklearn import Dec
#from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
#X = ["a", "b", "c", "d"]
#kf = KFold(n_splits=2)
#for train, test in kf.split(X):
     #print("%s %s" % (train, test))

X_train, X_test, y_train, y_test = train_test_split(data.ix[:, 'meanfreq':'modindx'], data.label, test_size=0.4, random_state=0)









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-30-4ea7ea34ac9b> in <module>()
      1 #from sklearn import Dec
      2 #from sklearn.model_selection import train_test_split
----> 3 from sklearn.model_selection import KFold
      4 X = ["a", "b", "c", "d"]
      5 kf = KFold(n_splits=2)

/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/__init__.py in <module>()
     21 from ._validation import validation_curve
     22 
---> 23 from ._search import GridSearchCV
     24 from ._search import RandomizedSearchCV
     25 from ._search import ParameterGrid

/Volumes/Toshiba/Applications/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_search.py in <module>()
     30 from ..utils import check_random_state
     31 from ..utils.fixes import sp_version
---> 32 from ..utils.fixes import rankdata
     33 from ..utils.fixes import MaskedArray
     34 from ..utils.random import sample_without_replacement

ImportError: cannot import name rankdata



In [ ]:

	meanfreq	sd	median	Q25	Q75	IQR	skew	kurt	sp.ent	sfm	mode	centroid	meanfun	minfun	maxfun	meandom	mindom	maxdom	dfrange	modindx
count	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000	3168.000000
mean	0.180907	0.057126	0.185621	0.140456	0.224765	0.084309	3.140168	36.568461	0.895127	0.408216	0.165282	0.180907	0.142807	0.036802	0.258842	0.829211	0.052647	5.047277	4.994630	0.173752
std	0.029918	0.016652	0.036360	0.048680	0.023639	0.042783	4.240529	134.928661	0.044980	0.177521	0.077203	0.029918	0.032304	0.019220	0.030077	0.525205	0.063299	3.521157	3.520039	0.119454
min	0.039363	0.018363	0.010975	0.000229	0.042946	0.014558	0.141735	2.068455	0.738651	0.036876	0.000000	0.039363	0.055565	0.009775	0.103093	0.007812	0.004883	0.007812	0.000000	0.000000
25%	0.163662	0.041954	0.169593	0.111087	0.208747	0.042560	1.649569	5.669547	0.861811	0.258041	0.118016	0.163662	0.116998	0.018223	0.253968	0.419828	0.007812	2.070312	2.044922	0.099766
50%	0.184838	0.059155	0.190032	0.140286	0.225684	0.094280	2.197101	8.318463	0.901767	0.396335	0.186599	0.184838	0.140519	0.046110	0.271186	0.765795	0.023438	4.992188	4.945312	0.139357
75%	0.199146	0.067020	0.210618	0.175939	0.243660	0.114175	2.931694	13.648905	0.928713	0.533676	0.221104	0.199146	0.169581	0.047904	0.277457	1.177166	0.070312	7.007812	6.992188	0.209183
max	0.251124	0.115273	0.261224	0.247347	0.273469	0.252225	34.725453	1309.612887	0.981997	0.842936	0.280000	0.251124	0.237636	0.204082	0.279114	2.957682	0.458984	21.867188	21.843750	0.932374