notebook.community

Edit and run



In [1]:

    
import pandas as pd
import os as os
import numpy as np



In [2]:

    
adult=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=None)



In [3]:

    
adult.columns=["age ",
"workclass ",
"fnlwgt",
"education ",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"income",
]



In [5]:

    
adult.head()









    Out[5]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [12]:

    
adult.iloc[:2,:3]









    Out[12]:







  
    
      
      age
      workclass
      fnlwgt
    
  
  
    
      0
      39
      State-gov
      77516
    
    
      1
      50
      Self-emp-not-inc
      83311



In [13]:

    
adult.iloc[1:2,2:3]



In [14]:

    
adult.iloc[4,]#index









    Out[14]:





age                                28
workclass                     Private
fnlwgt                         338409
education                   Bachelors
education-num                      13
marital-status     Married-civ-spouse
occupation             Prof-specialty
relationship                     Wife
race                            Black
sex                            Female
capital-gain                        0
capital-loss                        0
hours-per-week                     40
native-country                   Cuba
income                          <=50K
Name: 4, dtype: object



In [15]:

    
adult.iloc[3:4,:]









    Out[15]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K



In [20]:

    
adult.drop?



In [21]:

    
adult2=adult.drop('capital-loss',1)



In [22]:

    
adult2.head()









    Out[22]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      40
      Cuba
      <=50K



In [25]:

    
del adult2["capital-gain"]



In [27]:

    
adult2.head()









    Out[27]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      40
      Cuba
      <=50K



In [28]:

    
titanic=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/Titanic.csv")



In [29]:

    
titanic.columns









    Out[29]:





Index(['Unnamed: 0', 'Name', 'PClass', 'Age', 'Sex', 'Survived', 'SexCode'], dtype='object')



In [30]:

    
titanic=titanic.drop('Unnamed: 0',1)



In [31]:

    
titanic.head()









    Out[31]:







  
    
      
      Name
      PClass
      Age
      Sex
      Survived
      SexCode
    
  
  
    
      0
      Allen, Miss Elisabeth Walton
      1st
      29.00
      female
      1
      1
    
    
      1
      Allison, Miss Helen Loraine
      1st
      2.00
      female
      0
      1
    
    
      2
      Allison, Mr Hudson Joshua Creighton
      1st
      30.00
      male
      0
      0
    
    
      3
      Allison, Mrs Hudson JC (Bessie Waldo Daniels)
      1st
      25.00
      female
      0
      1
    
    
      4
      Allison, Master Hudson Trevor
      1st
      0.92
      male
      1
      0



In [32]:

    
import IPython



In [33]:

    
print (IPython.sys_info())









    



{'commit_hash': '5a894b9',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\KOGENTIX\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.3.0',
 'os_name': 'nt',
 'platform': 'Windows-10-10.0.15063-SP0',
 'sys_executable': 'C:\\Users\\KOGENTIX\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.6.1 |Anaconda 4.4.0 (64-bit)| (default, May 11 2017, '
                '13:25:24) [MSC v.1900 64 bit (AMD64)]'}



In [34]:

    
!pip install version_information









    



Collecting version_information
  Downloading version_information-1.0.3.tar.gz
Building wheels for collected packages: version-information
  Running setup.py bdist_wheel for version-information: started
  Running setup.py bdist_wheel for version-information: finished with status 'done'
  Stored in directory: C:\Users\KOGENTIX\AppData\Local\pip\Cache\wheels\4b\4c\f7\4d99d7820a507d8ae55204fcc00d66cdabf596d4b01228e7bd
Successfully built version-information
Installing collected packages: version-information
Successfully installed version-information-1.0.3



In [35]:

    
%load_ext version_information



In [36]:

    
%version_information









    Out[36]:




Software Version
Python 3.6.1 64bit [MSC v.1900 64 bit (AMD64)]
IPython 5.3.0
OS Windows 10 10.0.15063 SP0
Tue Dec 12 14:13:34 2017 SE Asia Standard Time



In [37]:

    
!pip freeze









    



alabaster==0.7.10
anaconda-client==1.6.3
anaconda-navigator==1.6.2
anaconda-project==0.6.0
asn1crypto==0.22.0
astroid==1.4.9
astropy==1.3.2
Babel==2.4.0
backports.shutil-get-terminal-size==1.0.0
bcrypt==3.1.3
beautifulsoup4==4.6.0
bitarray==0.8.1
blaze==0.10.1
bleach==1.5.0
bokeh==0.12.5
boto==2.46.1
Bottleneck==1.2.1
brewer2mpl==1.4.1
cffi==1.10.0
chardet==3.0.3
click==6.7
cloudpickle==0.2.2
clyent==1.2.2
cm-api==16.0.0
colorama==0.3.9
comtypes==1.1.2
conda==4.3.22
contextlib2==0.5.5
cryptography==1.8.1
cycler==0.10.0
Cython==0.25.2
cytoolz==0.8.2
dask==0.14.3
datashape==0.5.4
decorator==4.0.11
distributed==1.16.3
docutils==0.13.1
entrypoints==0.2.2
et-xmlfile==1.0.1
fastcache==1.0.2
findspark==1.1.0
Flask==0.12.2
Flask-Cors==3.0.2
future==0.16.0
gevent==1.2.1
ggplot==0.11.5
greenlet==0.4.12
h5py==2.7.0
HeapDict==1.0.0
html5lib==0.999
idna==2.5
imagesize==0.7.1
ipykernel==4.6.1
ipython==5.3.0
ipython-genutils==0.2.0
ipywidgets==6.0.0
isort==4.2.5
itsdangerous==0.24
jdcal==1.3
jedi==0.10.2
Jinja2==2.9.6
jsonschema==2.6.0
jupyter==1.0.0
jupyter-client==5.0.1
jupyter-console==5.1.0
jupyter-core==4.3.0
kmodes==0.7
lazy-object-proxy==1.2.2
llvmlite==0.18.0
locket==0.2.0
lxml==3.7.3
MarkupSafe==0.23
matplotlib==2.0.2
menuinst==1.4.7
mistune==0.7.4
mpmath==0.19
msgpack-python==0.4.8
multipledispatch==0.4.9
navigator-updater==0.1.0
nbconvert==5.1.1
nbformat==4.3.0
networkx==1.11
nltk==3.2.3
nose==1.3.7
notebook==5.0.0
numba==0.33.0
numexpr==2.6.2
numpy==1.12.1
numpydoc==0.6.0
odo==0.5.0
olefile==0.44
openpyxl==2.4.7
packaging==16.8
pandas==0.20.1
pandasql==0.7.3
pandocfilters==1.4.1
paramiko==2.2.1
partd==0.3.8
path.py==10.3.1
pathlib2==2.2.1
patsy==0.4.1
pep8==1.7.0
pickleshare==0.7.4
Pillow==4.1.1
ply==3.10
prompt-toolkit==1.0.14
psutil==5.2.2
py==1.4.33
py4j==0.10.4
pyasn1==0.3.1
pycosat==0.6.2
pycparser==2.17
pycrypto==2.6.1
pycurl==7.43.0
pyflakes==1.5.0
Pygments==2.2.0
PyHive==0.4.0
pyhs2==0.6.0
pylint==1.6.4
PyNaCl==1.1.2
pyodbc==4.0.16
pyOpenSSL==17.0.0
pyparsing==2.1.4
pyspark==2.2.0
pytest==3.0.7
python-dateutil==2.6.0
pytz==2017.2
PyWavelets==0.5.2
pywin32==220
PyYAML==3.12
pyzmq==16.0.2
QtAwesome==0.4.4
qtconsole==4.3.0
QtPy==1.2.1
requests==2.14.2
rope-py3k==0.9.4.post1
rpy2==2.8.5
sasl==0.2.1
scikit-image==0.13.0
scikit-learn==0.18.1
scipy==0.19.0
seaborn==0.7.1
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.10.0
snowballstemmer==1.2.1
sortedcollections==0.5.3
sortedcontainers==1.5.7
sphinx==1.5.6
spyder==3.1.4
SQLAlchemy==1.1.9
statsmodels==0.8.0
sympy==1.0
tables==3.2.2
tblib==1.3.2
testpath==0.3
thrift==0.10.0
thrift-sasl==0.2.1
thriftpy==0.3.9
toolz==0.8.2
tornado==4.5.1
traitlets==4.3.2
unicodecsv==0.14.1
version-information==1.0.3
wcwidth==0.1.7
Werkzeug==0.12.2
widgetsnbextension==2.0.0
win-unicode-console==0.5
wrapt==1.10.10
xlrd==1.0.0
XlsxWriter==0.9.6
xlwings==0.10.4
xlwt==1.2.0
zict==0.1.2



In [38]:

    
titanic.columns









    Out[38]:





Index(['Name', 'PClass', 'Age', 'Sex', 'Survived', 'SexCode'], dtype='object')



In [40]:

    
titanic.head(10)









    Out[40]:







  
    
      
      Name
      PClass
      Age
      Sex
      Survived
      SexCode
    
  
  
    
      0
      Allen, Miss Elisabeth Walton
      1st
      29.00
      female
      1
      1
    
    
      1
      Allison, Miss Helen Loraine
      1st
      2.00
      female
      0
      1
    
    
      2
      Allison, Mr Hudson Joshua Creighton
      1st
      30.00
      male
      0
      0
    
    
      3
      Allison, Mrs Hudson JC (Bessie Waldo Daniels)
      1st
      25.00
      female
      0
      1
    
    
      4
      Allison, Master Hudson Trevor
      1st
      0.92
      male
      1
      0
    
    
      5
      Anderson, Mr Harry
      1st
      47.00
      male
      1
      0
    
    
      6
      Andrews, Miss Kornelia Theodosia
      1st
      63.00
      female
      1
      1
    
    
      7
      Andrews, Mr Thomas, jr
      1st
      39.00
      male
      0
      0
    
    
      8
      Appleton, Mrs Edward Dale (Charlotte Lamson)
      1st
      58.00
      female
      1
      1
    
    
      9
      Artagaveytia, Mr Ramon
      1st
      71.00
      male
      0
      0



In [41]:

    
titanic.tail()









    Out[41]:







  
    
      
      Name
      PClass
      Age
      Sex
      Survived
      SexCode
    
  
  
    
      1308
      Zakarian, Mr Artun
      3rd
      27.0
      male
      0
      0
    
    
      1309
      Zakarian, Mr Maprieder
      3rd
      26.0
      male
      0
      0
    
    
      1310
      Zenni, Mr Philip
      3rd
      22.0
      male
      0
      0
    
    
      1311
      Lievens, Mr Rene
      3rd
      24.0
      male
      0
      0
    
    
      1312
      Zimmerman, Leo
      3rd
      29.0
      male
      0
      0



In [42]:

    
titanic.dtypes









    Out[42]:





Name         object
PClass       object
Age         float64
Sex          object
Survived      int64
SexCode       int64
dtype: object



In [43]:

    
titanic.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1313 entries, 0 to 1312
Data columns (total 6 columns):
Name        1313 non-null object
PClass      1313 non-null object
Age         756 non-null float64
Sex         1313 non-null object
Survived    1313 non-null int64
SexCode     1313 non-null int64
dtypes: float64(1), int64(2), object(3)
memory usage: 61.6+ KB



In [44]:

    
type(titanic)









    Out[44]:





pandas.core.frame.DataFrame



In [47]:

    
titanic2=titanic.iloc[:,1:6]



In [48]:

    
titanic2.columns









    Out[48]:





Index(['PClass', 'Age', 'Sex', 'Survived', 'SexCode'], dtype='object')



In [49]:

    
del titanic2['Sex']



In [61]:

    
list1=[1,2,4,5]



In [62]:

    
titanic2.columns









    Out[62]:





Index(['PClass', 'Age', 'Survived', 'SexCode'], dtype='object')



In [63]:

    
titanic3=titanic.iloc[:,list1]



In [64]:

    
titanic3.columns









    Out[64]:





Index(['PClass', 'Age', 'Survived', 'SexCode'], dtype='object')



In [65]:

    
titanic4=titanic[['PClass', 'Age', 'Survived', 'SexCode']]



In [66]:

    
titanic4.columns









    Out[66]:





Index(['PClass', 'Age', 'Survived', 'SexCode'], dtype='object')



In [67]:

    
titanic.ix[20:28]









    



C:\Users\KOGENTIX\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.






    Out[67]:







  
    
      
      Name
      PClass
      Age
      Sex
      Survived
      SexCode
    
  
  
    
      20
      Behr, Mr Karl Howell
      1st
      26.0
      male
      1
      0
    
    
      21
      Birnbaum, Mr Jakob
      1st
      25.0
      male
      0
      0
    
    
      22
      Bishop, Mr Dickinson H
      1st
      25.0
      male
      1
      0
    
    
      23
      Bishop, Mrs Dickinson H (Helen Walton)
      1st
      19.0
      female
      1
      1
    
    
      24
      Bjornstrm-Steffansson, Mr Mauritz Hakan
      1st
      28.0
      male
      1
      0
    
    
      25
      Blackwell, Mr Stephen Weart
      1st
      45.0
      male
      0
      0
    
    
      26
      Blank, Mr Henry
      1st
      39.0
      male
      1
      0
    
    
      27
      Bonnell, Miss Caroline
      1st
      30.0
      female
      1
      1
    
    
      28
      Bonnell, Miss Elizabeth
      1st
      58.0
      female
      1
      1



In [68]:

    
titanic.iloc[20:28,:]









    Out[68]:







  
    
      
      Name
      PClass
      Age
      Sex
      Survived
      SexCode
    
  
  
    
      20
      Behr, Mr Karl Howell
      1st
      26.0
      male
      1
      0
    
    
      21
      Birnbaum, Mr Jakob
      1st
      25.0
      male
      0
      0
    
    
      22
      Bishop, Mr Dickinson H
      1st
      25.0
      male
      1
      0
    
    
      23
      Bishop, Mrs Dickinson H (Helen Walton)
      1st
      19.0
      female
      1
      1
    
    
      24
      Bjornstrm-Steffansson, Mr Mauritz Hakan
      1st
      28.0
      male
      1
      0
    
    
      25
      Blackwell, Mr Stephen Weart
      1st
      45.0
      male
      0
      0
    
    
      26
      Blank, Mr Henry
      1st
      39.0
      male
      1
      0
    
    
      27
      Bonnell, Miss Caroline
      1st
      30.0
      female
      1
      1



In [71]:

    
titanic.Age.mean()









    Out[71]:





30.397989417989415



In [72]:

    
import numpy as np



In [74]:

    
adult.index









    Out[74]:





RangeIndex(start=0, stop=32561, step=1)



In [73]:

    
adult.index.values









    Out[73]:





array([    0,     1,     2, ..., 32558, 32559, 32560], dtype=int64)



In [75]:

    
len(adult)









    Out[75]:





32561



In [78]:

    
0.001*len(adult)









    Out[78]:





32.561



In [79]:

    
round(0.001*len(adult))









    Out[79]:





33



In [81]:

    
rows = np.random.choice(adult.index.values, round(0.001*len(adult)))
print(rows)









    



[16132  5411 11189 12552  1920  6733 31595 10530   278 23958  9603  8259
  3089 16182  6404  7994 19078 28333 14027 32534 14665 31239 32336 11534
 13507  7887 29310 13020 31124 16488  9102 17185  3422]



In [83]:

    
adult.iloc[rows,:]









    Out[83]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      16132
      47
      Private
      344916
      Assoc-acdm
      12
      Divorced
      Transport-moving
      Not-in-family
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      5411
      71
      Local-gov
      337064
      Masters
      14
      Widowed
      Prof-specialty
      Not-in-family
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      11189
      59
      Private
      46466
      HS-grad
      9
      Married-civ-spouse
      Transport-moving
      Husband
      White
      Male
      0
      0
      40
      United-States
      >50K
    
    
      12552
      51
      Private
      193720
      HS-grad
      9
      Married-civ-spouse
      Craft-repair
      Husband
      White
      Male
      0
      0
      42
      United-States
      <=50K
    
    
      1920
      32
      Private
      120426
      HS-grad
      9
      Separated
      Adm-clerical
      Unmarried
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      6733
      21
      ?
      152328
      Some-college
      10
      Never-married
      ?
      Own-child
      White
      Male
      0
      0
      20
      United-States
      <=50K
    
    
      31595
      27
      Private
      278617
      Some-college
      10
      Never-married
      Craft-repair
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      10530
      67
      Local-gov
      233681
      Assoc-acdm
      12
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      35
      United-States
      <=50K
    
    
      278
      25
      Private
      193787
      Some-college
      10
      Never-married
      Tech-support
      Own-child
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      23958
      56
      Private
      265086
      HS-grad
      9
      Married-civ-spouse
      Craft-repair
      Husband
      White
      Male
      0
      0
      50
      United-States
      >50K
    
    
      9603
      36
      Private
      272944
      HS-grad
      9
      Never-married
      Transport-moving
      Not-in-family
      White
      Male
      0
      0
      45
      United-States
      <=50K
    
    
      8259
      23
      Private
      195767
      HS-grad
      9
      Never-married
      Craft-repair
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3089
      51
      Self-emp-not-inc
      145409
      Bachelors
      13
      Married-civ-spouse
      Sales
      Husband
      White
      Male
      15024
      0
      50
      United-States
      >50K
    
    
      16182
      56
      Private
      145574
      HS-grad
      9
      Married-civ-spouse
      Craft-repair
      Husband
      White
      Male
      0
      0
      40
      United-States
      >50K
    
    
      6404
      20
      Self-emp-inc
      95997
      HS-grad
      9
      Never-married
      Farming-fishing
      Own-child
      White
      Male
      0
      0
      70
      United-States
      <=50K
    
    
      7994
      37
      Private
      178136
      HS-grad
      9
      Married-civ-spouse
      Machine-op-inspct
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      19078
      60
      Local-gov
      259803
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      White
      Female
      0
      0
      45
      United-States
      >50K
    
    
      28333
      61
      Private
      29059
      HS-grad
      9
      Divorced
      Sales
      Unmarried
      White
      Female
      0
      2754
      25
      United-States
      <=50K
    
    
      14027
      34
      Private
      35644
      Some-college
      10
      Married-civ-spouse
      Craft-repair
      Husband
      White
      Male
      0
      0
      40
      United-States
      >50K
    
    
      32534
      37
      Private
      179137
      Some-college
      10
      Divorced
      Adm-clerical
      Unmarried
      White
      Female
      0
      0
      39
      United-States
      <=50K
    
    
      14665
      40
      Private
      26892
      Bachelors
      13
      Married-civ-spouse
      Adm-clerical
      Husband
      White
      Male
      0
      0
      40
      United-States
      >50K
    
    
      31239
      51
      Private
      99987
      10th
      6
      Separated
      Machine-op-inspct
      Unmarried
      Black
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      32336
      32
      Private
      172415
      HS-grad
      9
      Never-married
      Other-service
      Unmarried
      Black
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      11534
      25
      Private
      35854
      Some-college
      10
      Married-spouse-absent
      Sales
      Unmarried
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      13507
      34
      Private
      236543
      HS-grad
      9
      Never-married
      Craft-repair
      Own-child
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      7887
      25
      Private
      34803
      Bachelors
      13
      Never-married
      Exec-managerial
      Own-child
      White
      Female
      0
      0
      20
      United-States
      <=50K
    
    
      29310
      22
      ?
      313786
      HS-grad
      9
      Divorced
      ?
      Other-relative
      Black
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      13020
      34
      Self-emp-not-inc
      137223
      10th
      6
      Never-married
      Other-service
      Own-child
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      31124
      63
      Private
      163708
      9th
      5
      Widowed
      Other-service
      Not-in-family
      White
      Female
      0
      0
      20
      United-States
      <=50K
    
    
      16488
      33
      ?
      119918
      Bachelors
      13
      Never-married
      ?
      Not-in-family
      Black
      Male
      0
      0
      45
      ?
      <=50K
    
    
      9102
      68
      Private
      201732
      Some-college
      10
      Divorced
      Adm-clerical
      Unmarried
      White
      Female
      0
      0
      40
      United-States
      <=50K
    
    
      17185
      22
      Private
      202871
      Assoc-voc
      11
      Married-civ-spouse
      Other-service
      Husband
      White
      Male
      0
      0
      44
      United-States
      <=50K
    
    
      3422
      40
      Private
      168936
      Assoc-voc
      11
      Divorced
      Other-service
      Not-in-family
      White
      Female
      0
      0
      32
      United-States
      <=50K



In [84]:

    
adultsm=adult.iloc[rows,:]



In [85]:

    
diamonds=pd.read_csv("C:\\Users\\KOGENTIX\\Desktop\\training\\BigDiamonds.csv\\BigDiamonds.csv")



In [86]:

    
diamonds.head()









    Out[86]:







  
    
      
      Unnamed: 0
      carat
      cut
      color
      clarity
      table
      depth
      cert
      measurements
      price
      x
      y
      z
    
  
  
    
      0
      1
      0.25
      V.Good
      K
      I1
      59.0
      63.7
      GIA
      3.96 x 3.95 x 2.52
      NaN
      3.96
      3.95
      2.52
    
    
      1
      2
      0.23
      Good
      G
      I1
      61.0
      58.1
      GIA
      4.00 x 4.05 x 2.30
      NaN
      4.00
      4.05
      2.30
    
    
      2
      3
      0.34
      Good
      J
      I2
      58.0
      58.7
      GIA
      4.56 x 4.53 x 2.67
      NaN
      4.56
      4.53
      2.67
    
    
      3
      4
      0.21
      V.Good
      D
      I1
      60.0
      60.6
      GIA
      3.80 x 3.82 x 2.31
      NaN
      3.80
      3.82
      2.31
    
    
      4
      5
      0.31
      V.Good
      K
      I1
      59.0
      62.2
      EGL
      4.35 x 4.26 x 2.68
      NaN
      4.35
      4.26
      2.68



In [87]:

    
diamonds.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0      598024 non-null int64
carat           598024 non-null float64
cut             598024 non-null object
color           598024 non-null object
clarity         598024 non-null object
table           598024 non-null float64
depth           598024 non-null float64
cert            598024 non-null object
measurements    597978 non-null object
price           597311 non-null float64
x               596209 non-null float64
y               596172 non-null float64
z               595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB



In [88]:

    
diamonds= diamonds.dropna(how='any')



In [89]:

    
diamonds.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 593784 entries, 493 to 598023
Data columns (total 13 columns):
Unnamed: 0      593784 non-null int64
carat           593784 non-null float64
cut             593784 non-null object
color           593784 non-null object
clarity         593784 non-null object
table           593784 non-null float64
depth           593784 non-null float64
cert            593784 non-null object
measurements    593784 non-null object
price           593784 non-null float64
x               593784 non-null float64
y               593784 non-null float64
z               593784 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 63.4+ MB



In [90]:

    
adult.describe()









    Out[90]:







  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      count
      32561.000000
      3.256100e+04
      32561.000000
      32561.000000
      32561.000000
      32561.000000
    
    
      mean
      38.581647
      1.897784e+05
      10.080679
      1077.648844
      87.303830
      40.437456
    
    
      std
      13.640433
      1.055500e+05
      2.572720
      7385.292085
      402.960219
      12.347429
    
    
      min
      17.000000
      1.228500e+04
      1.000000
      0.000000
      0.000000
      1.000000
    
    
      25%
      28.000000
      1.178270e+05
      9.000000
      0.000000
      0.000000
      40.000000
    
    
      50%
      37.000000
      1.783560e+05
      10.000000
      0.000000
      0.000000
      40.000000
    
    
      75%
      48.000000
      2.370510e+05
      12.000000
      0.000000
      0.000000
      45.000000
    
    
      max
      90.000000
      1.484705e+06
      16.000000
      99999.000000
      4356.000000
      99.000000



In [91]:

    
titanic.describe()









    Out[91]:







  
    
      
      Age
      Survived
      SexCode
    
  
  
    
      count
      756.000000
      1313.000000
      1313.000000
    
    
      mean
      30.397989
      0.342727
      0.351866
    
    
      std
      14.259049
      0.474802
      0.477734
    
    
      min
      0.170000
      0.000000
      0.000000
    
    
      25%
      21.000000
      0.000000
      0.000000
    
    
      50%
      28.000000
      0.000000
      0.000000
    
    
      75%
      39.000000
      1.000000
      1.000000
    
    
      max
      71.000000
      1.000000
      1.000000



In [93]:

    
diamonds.describe()









    Out[93]:







  
    
      
      Unnamed: 0
      carat
      table
      depth
      price
      x
      y
      z
    
  
  
    
      count
      593784.000000
      593784.000000
      593784.000000
      593784.000000
      593784.000000
      593784.000000
      593784.000000
      593784.000000
    
    
      mean
      299220.966754
      1.072593
      57.658755
      61.091980
      8755.808723
      5.991952
      6.200535
      4.036075
    
    
      std
      172625.362546
      0.813113
      4.827985
      7.487465
      13022.108651
      1.530444
      1.485081
      1.240932
    
    
      min
      494.000000
      0.200000
      0.000000
      0.000000
      300.000000
      0.150000
      1.000000
      0.040000
    
    
      25%
      149637.750000
      0.500000
      56.000000
      61.000000
      1218.000000
      4.740000
      4.970000
      3.120000
    
    
      50%
      299311.500000
      0.900000
      58.000000
      62.000000
      3503.000000
      5.780000
      6.050000
      3.860000
    
    
      75%
      448775.250000
      1.500000
      59.000000
      62.700000
      11186.000000
      6.970000
      7.230000
      4.610000
    
    
      max
      598024.000000
      9.250000
      75.900000
      81.300000
      99990.000000
      13.890000
      13.890000
      13.180000



In [94]:

    
diamonds.price.describe()









    Out[94]:





count    593784.000000
mean       8755.808723
std       13022.108651
min         300.000000
25%        1218.000000
50%        3503.000000
75%       11186.000000
max       99990.000000
Name: price, dtype: float64



In [95]:

    
diamonds.ppc=diamonds.price/diamonds.carat



In [96]:

    
diamonds.ppc.describe()









    Out[96]:





count    593784.000000
mean       5788.585161
std        4570.993823
min         525.000000
25%        2666.666667
50%        4172.857143
75%        7437.198068
max       49519.402985
dtype: float64



In [98]:

    
diamonds=diamonds.drop('Unnamed: 0',1)



In [99]:

    
diamonds.corr()



In [101]:

    
diamonds.shape









    Out[101]:





(593784, 12)



In [102]:

    
adult2=adult.copy()



In [107]:

    
! pip install pandasql









    



Requirement already satisfied: pandasql in c:\users\kogentix\anaconda3\lib\site-packages
Requirement already satisfied: numpy in c:\users\kogentix\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: sqlalchemy in c:\users\kogentix\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: pandas in c:\users\kogentix\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: python-dateutil>=2 in c:\users\kogentix\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: pytz>=2011k in c:\users\kogentix\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: six>=1.5 in c:\users\kogentix\anaconda3\lib\site-packages (from python-dateutil>=2->pandas->pandasql)



In [109]:

    
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())



In [110]:

    
import pandas as pd



In [111]:

    
mycars=pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv")



In [112]:

    
mycars.head()









    Out[112]:







  
    
      
      Unnamed: 0
      mpg
      cyl
      disp
      hp
      drat
      wt
      qsec
      vs
      am
      gear
      carb
    
  
  
    
      0
      Mazda RX4
      21.0
      6
      160.0
      110
      3.90
      2.620
      16.46
      0
      1
      4
      4
    
    
      1
      Mazda RX4 Wag
      21.0
      6
      160.0
      110
      3.90
      2.875
      17.02
      0
      1
      4
      4
    
    
      2
      Datsun 710
      22.8
      4
      108.0
      93
      3.85
      2.320
      18.61
      1
      1
      4
      1
    
    
      3
      Hornet 4 Drive
      21.4
      6
      258.0
      110
      3.08
      3.215
      19.44
      1
      0
      3
      1
    
    
      4
      Hornet Sportabout
      18.7
      8
      360.0
      175
      3.15
      3.440
      17.02
      0
      0
      3
      2



In [113]:

    
mycars.columns= ['brand','mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs',
       'am', 'gear', 'carb']



In [114]:

    
pysqldf("SELECT * FROM mycars LIMIT 10;")









    Out[114]:







  
    
      
      brand
      mpg
      cyl
      disp
      hp
      drat
      wt
      qsec
      vs
      am
      gear
      carb
    
  
  
    
      0
      Mazda RX4
      21.0
      6
      160.0
      110
      3.90
      2.620
      16.46
      0
      1
      4
      4
    
    
      1
      Mazda RX4 Wag
      21.0
      6
      160.0
      110
      3.90
      2.875
      17.02
      0
      1
      4
      4
    
    
      2
      Datsun 710
      22.8
      4
      108.0
      93
      3.85
      2.320
      18.61
      1
      1
      4
      1
    
    
      3
      Hornet 4 Drive
      21.4
      6
      258.0
      110
      3.08
      3.215
      19.44
      1
      0
      3
      1
    
    
      4
      Hornet Sportabout
      18.7
      8
      360.0
      175
      3.15
      3.440
      17.02
      0
      0
      3
      2
    
    
      5
      Valiant
      18.1
      6
      225.0
      105
      2.76
      3.460
      20.22
      1
      0
      3
      1
    
    
      6
      Duster 360
      14.3
      8
      360.0
      245
      3.21
      3.570
      15.84
      0
      0
      3
      4
    
    
      7
      Merc 240D
      24.4
      4
      146.7
      62
      3.69
      3.190
      20.00
      1
      0
      4
      2
    
    
      8
      Merc 230
      22.8
      4
      140.8
      95
      3.92
      3.150
      22.90
      1
      0
      4
      2
    
    
      9
      Merc 280
      19.2
      6
      167.6
      123
      3.92
      3.440
      18.30
      1
      0
      4
      4



In [115]:

    
pysqldf("SELECT * from mycars where gear >3")









    Out[115]:







  
    
      
      brand
      mpg
      cyl
      disp
      hp
      drat
      wt
      qsec
      vs
      am
      gear
      carb
    
  
  
    
      0
      Mazda RX4
      21.0
      6
      160.0
      110
      3.90
      2.620
      16.46
      0
      1
      4
      4
    
    
      1
      Mazda RX4 Wag
      21.0
      6
      160.0
      110
      3.90
      2.875
      17.02
      0
      1
      4
      4
    
    
      2
      Datsun 710
      22.8
      4
      108.0
      93
      3.85
      2.320
      18.61
      1
      1
      4
      1
    
    
      3
      Merc 240D
      24.4
      4
      146.7
      62
      3.69
      3.190
      20.00
      1
      0
      4
      2
    
    
      4
      Merc 230
      22.8
      4
      140.8
      95
      3.92
      3.150
      22.90
      1
      0
      4
      2
    
    
      5
      Merc 280
      19.2
      6
      167.6
      123
      3.92
      3.440
      18.30
      1
      0
      4
      4
    
    
      6
      Merc 280C
      17.8
      6
      167.6
      123
      3.92
      3.440
      18.90
      1
      0
      4
      4
    
    
      7
      Fiat 128
      32.4
      4
      78.7
      66
      4.08
      2.200
      19.47
      1
      1
      4
      1
    
    
      8
      Honda Civic
      30.4
      4
      75.7
      52
      4.93
      1.615
      18.52
      1
      1
      4
      2
    
    
      9
      Toyota Corolla
      33.9
      4
      71.1
      65
      4.22
      1.835
      19.90
      1
      1
      4
      1
    
    
      10
      Fiat X1-9
      27.3
      4
      79.0
      66
      4.08
      1.935
      18.90
      1
      1
      4
      1
    
    
      11
      Porsche 914-2
      26.0
      4
      120.3
      91
      4.43
      2.140
      16.70
      0
      1
      5
      2
    
    
      12
      Lotus Europa
      30.4
      4
      95.1
      113
      3.77
      1.513
      16.90
      1
      1
      5
      2
    
    
      13
      Ford Pantera L
      15.8
      8
      351.0
      264
      4.22
      3.170
      14.50
      0
      1
      5
      4
    
    
      14
      Ferrari Dino
      19.7
      6
      145.0
      175
      3.62
      2.770
      15.50
      0
      1
      5
      6
    
    
      15
      Maserati Bora
      15.0
      8
      301.0
      335
      3.54
      3.570
      14.60
      0
      1
      5
      8
    
    
      16
      Volvo 142E
      21.4
      4
      121.0
      109
      4.11
      2.780
      18.60
      1
      1
      4
      2



In [117]:

    
pysqldf("SELECT avg(mpg),gear from mycars group by gear ")



In [119]:

    
np.arange(0.1,1,0.1)









    Out[119]:





array([ 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])



In [120]:

    
diamonds.quantile(np.arange(0.1,1,0.1))



In [121]:

    
titanic.columns









    Out[121]:





Index(['Name', 'PClass', 'Age', 'Sex', 'Survived', 'SexCode'], dtype='object')



In [125]:

    
titanic.PClass.unique()









    Out[125]:





array(['1st', '2nd', '*', '3rd'], dtype=object)



In [126]:

    
titanic.Survived.unique()









    Out[126]:





array([1, 0], dtype=int64)



In [127]:

    
titanic.SexCode.unique()









    Out[127]:





array([1, 0], dtype=int64)



In [128]:

    
titanic.PClass.value_counts()









    Out[128]:





3rd    711
1st    322
2nd    279
*        1
Name: PClass, dtype: int64



In [129]:

    
titanic.Survived.value_counts()









    Out[129]:





0    863
1    450
Name: Survived, dtype: int64



In [132]:

    
titanic.SexCode.value_counts()









    Out[132]:





0    851
1    462
Name: SexCode, dtype: int64



In [133]:

    
pd.crosstab(titanic.SexCode,titanic.PClass)



In [134]:

    
pd.crosstab(titanic.Sex,titanic.PClass)



In [135]:

    
pd.crosstab(titanic.Sex,titanic.Survived)



In [136]:

    
pd.crosstab(titanic.PClass,titanic.Survived)



In [138]:

    
pd.crosstab(titanic.Sex,[titanic.PClass,titanic.Survived])



In [140]:

    
x=titanic.groupby(['Survived'])



In [141]:

    
type(x)









    Out[141]:





pandas.core.groupby.DataFrameGroupBy



In [142]:

    
x









    Out[142]:





<pandas.core.groupby.DataFrameGroupBy object at 0x000002289F6CFCF8>



In [143]:

    
x.describe()



In [144]:

    
z=titanic.groupby(['Survived','Sex'])



In [145]:

    
z.Age









    Out[145]:





<pandas.core.groupby.SeriesGroupBy object at 0x000002289F750BE0>



In [146]:

    
z.Age.mean()









    Out[146]:





Survived  Sex   
0         female    24.901408
          male      32.320780
1         female    30.867143
          male      25.951875
Name: Age, dtype: float64



In [147]:

    
z.Age.mean().reset_index()



In [148]:

    
p=z.Age.mean().reset_index()



In [150]:

    
p.pivot(index='Survived',columns='Sex',values="Age")



In [151]:

    
q=p.pivot(index='Survived',columns='Sex',values="Age")



In [152]:

    
q



In [153]:

    
q.transpose()



In [154]:

    
iris=pd.read_csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/datasets/iris.csv")



In [165]:

    
iris=iris.drop('Unnamed: 0',1)



In [166]:

    
len(iris)









    Out[166]:





150



In [167]:

    
a=0.8*len(iris)



In [168]:

    
a









    Out[168]:





120.0



In [169]:

    
np.arange(0,a)









    Out[169]:





array([   0.,    1.,    2.,    3.,    4.,    5.,    6.,    7.,    8.,
          9.,   10.,   11.,   12.,   13.,   14.,   15.,   16.,   17.,
         18.,   19.,   20.,   21.,   22.,   23.,   24.,   25.,   26.,
         27.,   28.,   29.,   30.,   31.,   32.,   33.,   34.,   35.,
         36.,   37.,   38.,   39.,   40.,   41.,   42.,   43.,   44.,
         45.,   46.,   47.,   48.,   49.,   50.,   51.,   52.,   53.,
         54.,   55.,   56.,   57.,   58.,   59.,   60.,   61.,   62.,
         63.,   64.,   65.,   66.,   67.,   68.,   69.,   70.,   71.,
         72.,   73.,   74.,   75.,   76.,   77.,   78.,   79.,   80.,
         81.,   82.,   83.,   84.,   85.,   86.,   87.,   88.,   89.,
         90.,   91.,   92.,   93.,   94.,   95.,   96.,   97.,   98.,
         99.,  100.,  101.,  102.,  103.,  104.,  105.,  106.,  107.,
        108.,  109.,  110.,  111.,  112.,  113.,  114.,  115.,  116.,
        117.,  118.,  119.])



In [170]:

    
a=int(0.8*len(iris))



In [171]:

    
a









    Out[171]:





120



In [172]:

    
np.arange(0,a)









    Out[172]:





array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119])



In [173]:

    
b=np.arange(0,a)



In [174]:

    
iris.iloc[b,:]









    Out[174]:







  
    
      
      Sepal.Length
      Sepal.Width
      Petal.Length
      Petal.Width
      Species
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
      setosa
    
    
      1
      4.9
      3.0
      1.4
      0.2
      setosa
    
    
      2
      4.7
      3.2
      1.3
      0.2
      setosa
    
    
      3
      4.6
      3.1
      1.5
      0.2
      setosa
    
    
      4
      5.0
      3.6
      1.4
      0.2
      setosa
    
    
      5
      5.4
      3.9
      1.7
      0.4
      setosa
    
    
      6
      4.6
      3.4
      1.4
      0.3
      setosa
    
    
      7
      5.0
      3.4
      1.5
      0.2
      setosa
    
    
      8
      4.4
      2.9
      1.4
      0.2
      setosa
    
    
      9
      4.9
      3.1
      1.5
      0.1
      setosa
    
    
      10
      5.4
      3.7
      1.5
      0.2
      setosa
    
    
      11
      4.8
      3.4
      1.6
      0.2
      setosa
    
    
      12
      4.8
      3.0
      1.4
      0.1
      setosa
    
    
      13
      4.3
      3.0
      1.1
      0.1
      setosa
    
    
      14
      5.8
      4.0
      1.2
      0.2
      setosa
    
    
      15
      5.7
      4.4
      1.5
      0.4
      setosa
    
    
      16
      5.4
      3.9
      1.3
      0.4
      setosa
    
    
      17
      5.1
      3.5
      1.4
      0.3
      setosa
    
    
      18
      5.7
      3.8
      1.7
      0.3
      setosa
    
    
      19
      5.1
      3.8
      1.5
      0.3
      setosa
    
    
      20
      5.4
      3.4
      1.7
      0.2
      setosa
    
    
      21
      5.1
      3.7
      1.5
      0.4
      setosa
    
    
      22
      4.6
      3.6
      1.0
      0.2
      setosa
    
    
      23
      5.1
      3.3
      1.7
      0.5
      setosa
    
    
      24
      4.8
      3.4
      1.9
      0.2
      setosa
    
    
      25
      5.0
      3.0
      1.6
      0.2
      setosa
    
    
      26
      5.0
      3.4
      1.6
      0.4
      setosa
    
    
      27
      5.2
      3.5
      1.5
      0.2
      setosa
    
    
      28
      5.2
      3.4
      1.4
      0.2
      setosa
    
    
      29
      4.7
      3.2
      1.6
      0.2
      setosa
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      90
      5.5
      2.6
      4.4
      1.2
      versicolor
    
    
      91
      6.1
      3.0
      4.6
      1.4
      versicolor
    
    
      92
      5.8
      2.6
      4.0
      1.2
      versicolor
    
    
      93
      5.0
      2.3
      3.3
      1.0
      versicolor
    
    
      94
      5.6
      2.7
      4.2
      1.3
      versicolor
    
    
      95
      5.7
      3.0
      4.2
      1.2
      versicolor
    
    
      96
      5.7
      2.9
      4.2
      1.3
      versicolor
    
    
      97
      6.2
      2.9
      4.3
      1.3
      versicolor
    
    
      98
      5.1
      2.5
      3.0
      1.1
      versicolor
    
    
      99
      5.7
      2.8
      4.1
      1.3
      versicolor
    
    
      100
      6.3
      3.3
      6.0
      2.5
      virginica
    
    
      101
      5.8
      2.7
      5.1
      1.9
      virginica
    
    
      102
      7.1
      3.0
      5.9
      2.1
      virginica
    
    
      103
      6.3
      2.9
      5.6
      1.8
      virginica
    
    
      104
      6.5
      3.0
      5.8
      2.2
      virginica
    
    
      105
      7.6
      3.0
      6.6
      2.1
      virginica
    
    
      106
      4.9
      2.5
      4.5
      1.7
      virginica
    
    
      107
      7.3
      2.9
      6.3
      1.8
      virginica
    
    
      108
      6.7
      2.5
      5.8
      1.8
      virginica
    
    
      109
      7.2
      3.6
      6.1
      2.5
      virginica
    
    
      110
      6.5
      3.2
      5.1
      2.0
      virginica
    
    
      111
      6.4
      2.7
      5.3
      1.9
      virginica
    
    
      112
      6.8
      3.0
      5.5
      2.1
      virginica
    
    
      113
      5.7
      2.5
      5.0
      2.0
      virginica
    
    
      114
      5.8
      2.8
      5.1
      2.4
      virginica
    
    
      115
      6.4
      3.2
      5.3
      2.3
      virginica
    
    
      116
      6.5
      3.0
      5.5
      1.8
      virginica
    
    
      117
      7.7
      3.8
      6.7
      2.2
      virginica
    
    
      118
      7.7
      2.6
      6.9
      2.3
      virginica
    
    
      119
      6.0
      2.2
      5.0
      1.5
      virginica
    
  

120 rows × 5 columns



In [184]:

    
test1=iris.iloc[b,:]



In [180]:

    
np.arange(a,len(iris))









    Out[180]:





array([120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
       133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
       146, 147, 148, 149])



In [181]:

    
c=np.arange(a,len(iris))



In [183]:

    
iris.iloc[c,:]









    Out[183]:







  
    
      
      Sepal.Length
      Sepal.Width
      Petal.Length
      Petal.Width
      Species
    
  
  
    
      120
      6.9
      3.2
      5.7
      2.3
      virginica
    
    
      121
      5.6
      2.8
      4.9
      2.0
      virginica
    
    
      122
      7.7
      2.8
      6.7
      2.0
      virginica
    
    
      123
      6.3
      2.7
      4.9
      1.8
      virginica
    
    
      124
      6.7
      3.3
      5.7
      2.1
      virginica
    
    
      125
      7.2
      3.2
      6.0
      1.8
      virginica
    
    
      126
      6.2
      2.8
      4.8
      1.8
      virginica
    
    
      127
      6.1
      3.0
      4.9
      1.8
      virginica
    
    
      128
      6.4
      2.8
      5.6
      2.1
      virginica
    
    
      129
      7.2
      3.0
      5.8
      1.6
      virginica
    
    
      130
      7.4
      2.8
      6.1
      1.9
      virginica
    
    
      131
      7.9
      3.8
      6.4
      2.0
      virginica
    
    
      132
      6.4
      2.8
      5.6
      2.2
      virginica
    
    
      133
      6.3
      2.8
      5.1
      1.5
      virginica
    
    
      134
      6.1
      2.6
      5.6
      1.4
      virginica
    
    
      135
      7.7
      3.0
      6.1
      2.3
      virginica
    
    
      136
      6.3
      3.4
      5.6
      2.4
      virginica
    
    
      137
      6.4
      3.1
      5.5
      1.8
      virginica
    
    
      138
      6.0
      3.0
      4.8
      1.8
      virginica
    
    
      139
      6.9
      3.1
      5.4
      2.1
      virginica
    
    
      140
      6.7
      3.1
      5.6
      2.4
      virginica
    
    
      141
      6.9
      3.1
      5.1
      2.3
      virginica
    
    
      142
      5.8
      2.7
      5.1
      1.9
      virginica
    
    
      143
      6.8
      3.2
      5.9
      2.3
      virginica
    
    
      144
      6.7
      3.3
      5.7
      2.5
      virginica
    
    
      145
      6.7
      3.0
      5.2
      2.3
      virginica
    
    
      146
      6.3
      2.5
      5.0
      1.9
      virginica
    
    
      147
      6.5
      3.0
      5.2
      2.0
      virginica
    
    
      148
      6.2
      3.4
      5.4
      2.3
      virginica
    
    
      149
      5.9
      3.0
      5.1
      1.8
      virginica



In [185]:

    
control1=iris.iloc[c,:]



In [188]:

    
rowsi = np.random.choice(iris.index.values, round(0.8*len(iris)),replace=False)
print(rowsi)









    



[147 112   8 139  99 119 110 109 135 144  60  89 138 117  83  38 101  66
  78  27   6  16 126 106 134  29  36  40  80  39  76  85  69  86 125  17
  82   4 132 118  62  93 121  22  35  63  53  30 127  67   5  32  28  48
  34 115   9  14  56  33 131  81 137 149 103 116  52  43  77 129  46  44
  21  96  75  68  25  92 122 102   1 124 108  57  97  50  94  73  90  64
 148 140   7 141 120  23  58  79 111 130  31 136  18  88  12  59  19 113
  26  42  70  71  91  20 143  72  95 107  49  74]



In [187]:

    
#np.random.choice?



In [189]:

    
test2=iris.iloc[rowsi,:]



In [190]:

    
test2









    Out[190]:







  
    
      
      Sepal.Length
      Sepal.Width
      Petal.Length
      Petal.Width
      Species
    
  
  
    
      147
      6.5
      3.0
      5.2
      2.0
      virginica
    
    
      112
      6.8
      3.0
      5.5
      2.1
      virginica
    
    
      8
      4.4
      2.9
      1.4
      0.2
      setosa
    
    
      139
      6.9
      3.1
      5.4
      2.1
      virginica
    
    
      99
      5.7
      2.8
      4.1
      1.3
      versicolor
    
    
      119
      6.0
      2.2
      5.0
      1.5
      virginica
    
    
      110
      6.5
      3.2
      5.1
      2.0
      virginica
    
    
      109
      7.2
      3.6
      6.1
      2.5
      virginica
    
    
      135
      7.7
      3.0
      6.1
      2.3
      virginica
    
    
      144
      6.7
      3.3
      5.7
      2.5
      virginica
    
    
      60
      5.0
      2.0
      3.5
      1.0
      versicolor
    
    
      89
      5.5
      2.5
      4.0
      1.3
      versicolor
    
    
      138
      6.0
      3.0
      4.8
      1.8
      virginica
    
    
      117
      7.7
      3.8
      6.7
      2.2
      virginica
    
    
      83
      6.0
      2.7
      5.1
      1.6
      versicolor
    
    
      38
      4.4
      3.0
      1.3
      0.2
      setosa
    
    
      101
      5.8
      2.7
      5.1
      1.9
      virginica
    
    
      66
      5.6
      3.0
      4.5
      1.5
      versicolor
    
    
      78
      6.0
      2.9
      4.5
      1.5
      versicolor
    
    
      27
      5.2
      3.5
      1.5
      0.2
      setosa
    
    
      6
      4.6
      3.4
      1.4
      0.3
      setosa
    
    
      16
      5.4
      3.9
      1.3
      0.4
      setosa
    
    
      126
      6.2
      2.8
      4.8
      1.8
      virginica
    
    
      106
      4.9
      2.5
      4.5
      1.7
      virginica
    
    
      134
      6.1
      2.6
      5.6
      1.4
      virginica
    
    
      29
      4.7
      3.2
      1.6
      0.2
      setosa
    
    
      36
      5.5
      3.5
      1.3
      0.2
      setosa
    
    
      40
      5.0
      3.5
      1.3
      0.3
      setosa
    
    
      80
      5.5
      2.4
      3.8
      1.1
      versicolor
    
    
      39
      5.1
      3.4
      1.5
      0.2
      setosa
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      148
      6.2
      3.4
      5.4
      2.3
      virginica
    
    
      140
      6.7
      3.1
      5.6
      2.4
      virginica
    
    
      7
      5.0
      3.4
      1.5
      0.2
      setosa
    
    
      141
      6.9
      3.1
      5.1
      2.3
      virginica
    
    
      120
      6.9
      3.2
      5.7
      2.3
      virginica
    
    
      23
      5.1
      3.3
      1.7
      0.5
      setosa
    
    
      58
      6.6
      2.9
      4.6
      1.3
      versicolor
    
    
      79
      5.7
      2.6
      3.5
      1.0
      versicolor
    
    
      111
      6.4
      2.7
      5.3
      1.9
      virginica
    
    
      130
      7.4
      2.8
      6.1
      1.9
      virginica
    
    
      31
      5.4
      3.4
      1.5
      0.4
      setosa
    
    
      136
      6.3
      3.4
      5.6
      2.4
      virginica
    
    
      18
      5.7
      3.8
      1.7
      0.3
      setosa
    
    
      88
      5.6
      3.0
      4.1
      1.3
      versicolor
    
    
      12
      4.8
      3.0
      1.4
      0.1
      setosa
    
    
      59
      5.2
      2.7
      3.9
      1.4
      versicolor
    
    
      19
      5.1
      3.8
      1.5
      0.3
      setosa
    
    
      113
      5.7
      2.5
      5.0
      2.0
      virginica
    
    
      26
      5.0
      3.4
      1.6
      0.4
      setosa
    
    
      42
      4.4
      3.2
      1.3
      0.2
      setosa
    
    
      70
      5.9
      3.2
      4.8
      1.8
      versicolor
    
    
      71
      6.1
      2.8
      4.0
      1.3
      versicolor
    
    
      91
      6.1
      3.0
      4.6
      1.4
      versicolor
    
    
      20
      5.4
      3.4
      1.7
      0.2
      setosa
    
    
      143
      6.8
      3.2
      5.9
      2.3
      virginica
    
    
      72
      6.3
      2.5
      4.9
      1.5
      versicolor
    
    
      95
      5.7
      3.0
      4.2
      1.2
      versicolor
    
    
      107
      7.3
      2.9
      6.3
      1.8
      virginica
    
    
      49
      5.0
      3.3
      1.4
      0.2
      setosa
    
    
      74
      6.4
      2.9
      4.3
      1.3
      versicolor
    
  

120 rows × 5 columns



In [191]:

    
rowsi









    Out[191]:





array([147, 112,   8, 139,  99, 119, 110, 109, 135, 144,  60,  89, 138,
       117,  83,  38, 101,  66,  78,  27,   6,  16, 126, 106, 134,  29,
        36,  40,  80,  39,  76,  85,  69,  86, 125,  17,  82,   4, 132,
       118,  62,  93, 121,  22,  35,  63,  53,  30, 127,  67,   5,  32,
        28,  48,  34, 115,   9,  14,  56,  33, 131,  81, 137, 149, 103,
       116,  52,  43,  77, 129,  46,  44,  21,  96,  75,  68,  25,  92,
       122, 102,   1, 124, 108,  57,  97,  50,  94,  73,  90,  64, 148,
       140,   7, 141, 120,  23,  58,  79, 111, 130,  31, 136,  18,  88,
        12,  59,  19, 113,  26,  42,  70,  71,  91,  20, 143,  72,  95,
       107,  49,  74], dtype=int64)



In [192]:

    
indices = np.random.permutation(len(iris))
indices









    Out[192]:





array([  9,  29, 130, 101,  74,  14, 139, 108,   5,  21,  88,  58, 122,
        27,  31,  32,  33, 107,  13, 129, 140,  43,  38, 125,  12, 141,
        80,   8,  16,  36,  75,   1,  93,  10,  82,  56, 119, 105,  67,
       114,  55,  57,  30, 137,  59,  34,  73,  91,  24, 149, 120,  17,
        90, 138,  35, 144, 136, 115,  99,  72,  79,  18,   4,  98,  70,
       112,  62,  15,  52, 121,  49, 146, 117, 110, 148, 133,  41,  26,
       106,  84,  89,  44,  94, 104, 118,  77,  78,  42,  47,  51,  68,
       132,   6,  69,  19,  86,   2, 143,  50, 103, 124, 127,  60,   3,
       128, 102,  39,  85, 126,  28,  96,  45,  71,  81, 116,  48, 135,
       123,  83,  25, 145,  22,  65,   7,  40, 142,  20,  46,  64,  54,
       111,  37,  66,   0,  97, 100, 113,  53,  87, 147,  76,  11, 109,
        63, 131, 134,  95,  61,  92,  23])



In [196]:

    
indices[0:120]









    Out[196]:





array([  9,  29, 130, 101,  74,  14, 139, 108,   5,  21,  88,  58, 122,
        27,  31,  32,  33, 107,  13, 129, 140,  43,  38, 125,  12, 141,
        80,   8,  16,  36,  75,   1,  93,  10,  82,  56, 119, 105,  67,
       114,  55,  57,  30, 137,  59,  34,  73,  91,  24, 149, 120,  17,
        90, 138,  35, 144, 136, 115,  99,  72,  79,  18,   4,  98,  70,
       112,  62,  15,  52, 121,  49, 146, 117, 110, 148, 133,  41,  26,
       106,  84,  89,  44,  94, 104, 118,  77,  78,  42,  47,  51,  68,
       132,   6,  69,  19,  86,   2, 143,  50, 103, 124, 127,  60,   3,
       128, 102,  39,  85, 126,  28,  96,  45,  71,  81, 116,  48, 135,
       123,  83,  25])



In [195]:

    
indices[120:150]









    Out[195]:





array([145,  22,  65,   7,  40, 142,  20,  46,  64,  54, 111,  37,  66,
         0,  97, 100, 113,  53,  87, 147,  76,  11, 109,  63, 131, 134,
        95,  61,  92,  23])



In [197]:

    
from sklearn.linear_model import LogisticRegression



In [198]:

    
from sklearn import datasets



In [199]:

    
iris = datasets.load_iris()



In [200]:

    
type(iris)









    Out[200]:





sklearn.datasets.base.Bunch



In [201]:

    
x,y=iris.data,iris.target



In [202]:

    
x









    Out[202]:





array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4.8,  3.1,  1.6,  0.2],
       [ 5.4,  3.4,  1.5,  0.4],
       [ 5.2,  4.1,  1.5,  0.1],
       [ 5.5,  4.2,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5. ,  3.2,  1.2,  0.2],
       [ 5.5,  3.5,  1.3,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 4.4,  3. ,  1.3,  0.2],
       [ 5.1,  3.4,  1.5,  0.2],
       [ 5. ,  3.5,  1.3,  0.3],
       [ 4.5,  2.3,  1.3,  0.3],
       [ 4.4,  3.2,  1.3,  0.2],
       [ 5. ,  3.5,  1.6,  0.6],
       [ 5.1,  3.8,  1.9,  0.4],
       [ 4.8,  3. ,  1.4,  0.3],
       [ 5.1,  3.8,  1.6,  0.2],
       [ 4.6,  3.2,  1.4,  0.2],
       [ 5.3,  3.7,  1.5,  0.2],
       [ 5. ,  3.3,  1.4,  0.2],
       [ 7. ,  3.2,  4.7,  1.4],
       [ 6.4,  3.2,  4.5,  1.5],
       [ 6.9,  3.1,  4.9,  1.5],
       [ 5.5,  2.3,  4. ,  1.3],
       [ 6.5,  2.8,  4.6,  1.5],
       [ 5.7,  2.8,  4.5,  1.3],
       [ 6.3,  3.3,  4.7,  1.6],
       [ 4.9,  2.4,  3.3,  1. ],
       [ 6.6,  2.9,  4.6,  1.3],
       [ 5.2,  2.7,  3.9,  1.4],
       [ 5. ,  2. ,  3.5,  1. ],
       [ 5.9,  3. ,  4.2,  1.5],
       [ 6. ,  2.2,  4. ,  1. ],
       [ 6.1,  2.9,  4.7,  1.4],
       [ 5.6,  2.9,  3.6,  1.3],
       [ 6.7,  3.1,  4.4,  1.4],
       [ 5.6,  3. ,  4.5,  1.5],
       [ 5.8,  2.7,  4.1,  1. ],
       [ 6.2,  2.2,  4.5,  1.5],
       [ 5.6,  2.5,  3.9,  1.1],
       [ 5.9,  3.2,  4.8,  1.8],
       [ 6.1,  2.8,  4. ,  1.3],
       [ 6.3,  2.5,  4.9,  1.5],
       [ 6.1,  2.8,  4.7,  1.2],
       [ 6.4,  2.9,  4.3,  1.3],
       [ 6.6,  3. ,  4.4,  1.4],
       [ 6.8,  2.8,  4.8,  1.4],
       [ 6.7,  3. ,  5. ,  1.7],
       [ 6. ,  2.9,  4.5,  1.5],
       [ 5.7,  2.6,  3.5,  1. ],
       [ 5.5,  2.4,  3.8,  1.1],
       [ 5.5,  2.4,  3.7,  1. ],
       [ 5.8,  2.7,  3.9,  1.2],
       [ 6. ,  2.7,  5.1,  1.6],
       [ 5.4,  3. ,  4.5,  1.5],
       [ 6. ,  3.4,  4.5,  1.6],
       [ 6.7,  3.1,  4.7,  1.5],
       [ 6.3,  2.3,  4.4,  1.3],
       [ 5.6,  3. ,  4.1,  1.3],
       [ 5.5,  2.5,  4. ,  1.3],
       [ 5.5,  2.6,  4.4,  1.2],
       [ 6.1,  3. ,  4.6,  1.4],
       [ 5.8,  2.6,  4. ,  1.2],
       [ 5. ,  2.3,  3.3,  1. ],
       [ 5.6,  2.7,  4.2,  1.3],
       [ 5.7,  3. ,  4.2,  1.2],
       [ 5.7,  2.9,  4.2,  1.3],
       [ 6.2,  2.9,  4.3,  1.3],
       [ 5.1,  2.5,  3. ,  1.1],
       [ 5.7,  2.8,  4.1,  1.3],
       [ 6.3,  3.3,  6. ,  2.5],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 7.1,  3. ,  5.9,  2.1],
       [ 6.3,  2.9,  5.6,  1.8],
       [ 6.5,  3. ,  5.8,  2.2],
       [ 7.6,  3. ,  6.6,  2.1],
       [ 4.9,  2.5,  4.5,  1.7],
       [ 7.3,  2.9,  6.3,  1.8],
       [ 6.7,  2.5,  5.8,  1.8],
       [ 7.2,  3.6,  6.1,  2.5],
       [ 6.5,  3.2,  5.1,  2. ],
       [ 6.4,  2.7,  5.3,  1.9],
       [ 6.8,  3. ,  5.5,  2.1],
       [ 5.7,  2.5,  5. ,  2. ],
       [ 5.8,  2.8,  5.1,  2.4],
       [ 6.4,  3.2,  5.3,  2.3],
       [ 6.5,  3. ,  5.5,  1.8],
       [ 7.7,  3.8,  6.7,  2.2],
       [ 7.7,  2.6,  6.9,  2.3],
       [ 6. ,  2.2,  5. ,  1.5],
       [ 6.9,  3.2,  5.7,  2.3],
       [ 5.6,  2.8,  4.9,  2. ],
       [ 7.7,  2.8,  6.7,  2. ],
       [ 6.3,  2.7,  4.9,  1.8],
       [ 6.7,  3.3,  5.7,  2.1],
       [ 7.2,  3.2,  6. ,  1.8],
       [ 6.2,  2.8,  4.8,  1.8],
       [ 6.1,  3. ,  4.9,  1.8],
       [ 6.4,  2.8,  5.6,  2.1],
       [ 7.2,  3. ,  5.8,  1.6],
       [ 7.4,  2.8,  6.1,  1.9],
       [ 7.9,  3.8,  6.4,  2. ],
       [ 6.4,  2.8,  5.6,  2.2],
       [ 6.3,  2.8,  5.1,  1.5],
       [ 6.1,  2.6,  5.6,  1.4],
       [ 7.7,  3. ,  6.1,  2.3],
       [ 6.3,  3.4,  5.6,  2.4],
       [ 6.4,  3.1,  5.5,  1.8],
       [ 6. ,  3. ,  4.8,  1.8],
       [ 6.9,  3.1,  5.4,  2.1],
       [ 6.7,  3.1,  5.6,  2.4],
       [ 6.9,  3.1,  5.1,  2.3],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 6.8,  3.2,  5.9,  2.3],
       [ 6.7,  3.3,  5.7,  2.5],
       [ 6.7,  3. ,  5.2,  2.3],
       [ 6.3,  2.5,  5. ,  1.9],
       [ 6.5,  3. ,  5.2,  2. ],
       [ 6.2,  3.4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]])



In [203]:

    
y









    Out[203]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])



In [205]:

    
from sklearn.cross_validation import train_test_split









    



C:\Users\KOGENTIX\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [206]:

    
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.8)



In [ ]:

	carat	table	depth	price	x	y	z
carat	1.000000	0.037631	0.008883	0.856340	0.859864	0.960857	0.791658
table	0.037631	1.000000	0.423914	0.023266	0.028462	0.045617	0.031170
depth	0.008883	0.423914	1.000000	-0.002129	-0.003632	0.007346	0.031961
price	0.856340	0.023266	-0.002129	1.000000	0.719537	0.796746	0.645191
x	0.859864	0.028462	-0.003632	0.719537	1.000000	0.893783	0.482109
y	0.960857	0.045617	0.007346	0.796746	0.893783	1.000000	0.819880
z	0.791658	0.031170	0.031961	0.645191	0.482109	0.819880	1.000000

	avg(mpg)	gear
0	16.106667	3
1	24.533333	4
2	21.380000	5

	carat	table	depth	price	x	y	z
0.1	0.31	55.0	59.7	734.0	4.29	4.38	2.71
0.2	0.40	56.0	60.7	994.0	4.58	4.75	2.96
0.3	0.51	57.0	61.3	1510.0	4.95	5.15	3.22
0.4	0.70	57.0	61.7	2240.0	5.37	5.64	3.55
0.5	0.90	58.0	62.0	3503.0	5.78	6.05	3.86
0.6	1.01	58.0	62.3	5294.0	6.28	6.44	4.04
0.7	1.22	59.0	62.6	8362.0	6.64	6.87	4.44
0.8	1.57	60.0	63.0	14460.0	7.34	7.49	4.92
0.9	2.06	61.0	63.9	22211.0	8.12	8.21	5.60

	age	workclass	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	hours-per-week	native-country	income
0	39	State-gov	77516	Bachelors	13	Never-married	Adm-clerical	Not-in-family	White	Male	2174	40	United-States	<=50K
1	50	Self-emp-not-inc	83311	Bachelors	13	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	13	United-States	<=50K
2	38	Private	215646	HS-grad	9	Divorced	Handlers-cleaners	Not-in-family	White	Male	0	40	United-States	<=50K
3	53	Private	234721	11th	7	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0	40	United-States	<=50K
4	28	Private	338409	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	Black	Female	0	40	Cuba	<=50K

	Name	PClass	Age	Sex	Survived	SexCode
0	Allen, Miss Elisabeth Walton	1st	29.00	female	1	1
1	Allison, Miss Helen Loraine	1st	2.00	female	0	1
2	Allison, Mr Hudson Joshua Creighton	1st	30.00	male	0	0
3	Allison, Mrs Hudson JC (Bessie Waldo Daniels)	1st	25.00	female	0	1
4	Allison, Master Hudson Trevor	1st	0.92	male	1	0

Software	Version
Python	3.6.1 64bit [MSC v.1900 64 bit (AMD64)]
IPython	5.3.0
OS	Windows 10 10.0.15063 SP0
Tue Dec 12 14:13:34 2017 SE Asia Standard Time

	Name	PClass	Age	Sex
1308	Zakarian, Mr Artun	3rd	27.0	male
1309	Zakarian, Mr Maprieder	3rd	26.0	male
1310	Zenni, Mr Philip	3rd	22.0	male
1311	Lievens, Mr Rene	3rd	24.0	male
1312	Zimmerman, Leo	3rd	29.0	male

	Name	PClass	Age	Sex	Survived	SexCode
20	Behr, Mr Karl Howell	1st	26.0	male	1	0
21	Birnbaum, Mr Jakob	1st	25.0	male	0	0
22	Bishop, Mr Dickinson H	1st	25.0	male	1	0
23	Bishop, Mrs Dickinson H (Helen Walton)	1st	19.0	female	1	1
24	Bjornstrm-Steffansson, Mr Mauritz Hakan	1st	28.0	male	1	0
25	Blackwell, Mr Stephen Weart	1st	45.0	male	0	0
26	Blank, Mr Henry	1st	39.0	male	1	0
27	Bonnell, Miss Caroline	1st	30.0	female	1	1
28	Bonnell, Miss Elizabeth	1st	58.0	female	1	1

	age	workclass	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	capital-loss	hours-per-week	native-country	income
16132	47	Private	344916	Assoc-acdm	12	Divorced	Transport-moving	Not-in-family	Black	Male	0	0	40	United-States	<=50K
5411	71	Local-gov	337064	Masters	14	Widowed	Prof-specialty	Not-in-family	White	Female	0	0	40	United-States	<=50K
11189	59	Private	46466	HS-grad	9	Married-civ-spouse	Transport-moving	Husband	White	Male	0	0	40	United-States	>50K
12552	51	Private	193720	HS-grad	9	Married-civ-spouse	Craft-repair	Husband	White	Male	0	0	42	United-States	<=50K
1920	32	Private	120426	HS-grad	9	Separated	Adm-clerical	Unmarried	White	Female	0	0	40	United-States	<=50K
6733	21	?	152328	Some-college	10	Never-married	?	Own-child	White	Male	0	0	20	United-States	<=50K
31595	27	Private	278617	Some-college	10	Never-married	Craft-repair	Not-in-family	White	Male	0	0	40	United-States	<=50K
10530	67	Local-gov	233681	Assoc-acdm	12	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	0	35	United-States	<=50K
278	25	Private	193787	Some-college	10	Never-married	Tech-support	Own-child	White	Female	0	0	40	United-States	<=50K
23958	56	Private	265086	HS-grad	9	Married-civ-spouse	Craft-repair	Husband	White	Male	0	0	50	United-States	>50K
9603	36	Private	272944	HS-grad	9	Never-married	Transport-moving	Not-in-family	White	Male	0	0	45	United-States	<=50K
8259	23	Private	195767	HS-grad	9	Never-married	Craft-repair	Not-in-family	White	Male	0	0	40	United-States	<=50K
3089	51	Self-emp-not-inc	145409	Bachelors	13	Married-civ-spouse	Sales	Husband	White	Male	15024	0	50	United-States	>50K
16182	56	Private	145574	HS-grad	9	Married-civ-spouse	Craft-repair	Husband	White	Male	0	0	40	United-States	>50K
6404	20	Self-emp-inc	95997	HS-grad	9	Never-married	Farming-fishing	Own-child	White	Male	0	0	70	United-States	<=50K
7994	37	Private	178136	HS-grad	9	Married-civ-spouse	Machine-op-inspct	Husband	Black	Male	0	0	40	United-States	<=50K
19078	60	Local-gov	259803	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	White	Female	0	0	45	United-States	>50K
28333	61	Private	29059	HS-grad	9	Divorced	Sales	Unmarried	White	Female	0	2754	25	United-States	<=50K
14027	34	Private	35644	Some-college	10	Married-civ-spouse	Craft-repair	Husband	White	Male	0	0	40	United-States	>50K
32534	37	Private	179137	Some-college	10	Divorced	Adm-clerical	Unmarried	White	Female	0	0	39	United-States	<=50K
14665	40	Private	26892	Bachelors	13	Married-civ-spouse	Adm-clerical	Husband	White	Male	0	0	40	United-States	>50K
31239	51	Private	99987	10th	6	Separated	Machine-op-inspct	Unmarried	Black	Female	0	0	40	United-States	<=50K
32336	32	Private	172415	HS-grad	9	Never-married	Other-service	Unmarried	Black	Female	0	0	40	United-States	<=50K
11534	25	Private	35854	Some-college	10	Married-spouse-absent	Sales	Unmarried	White	Female	0	0	40	United-States	<=50K
13507	34	Private	236543	HS-grad	9	Never-married	Craft-repair	Own-child	White	Male	0	0	40	United-States	<=50K
7887	25	Private	34803	Bachelors	13	Never-married	Exec-managerial	Own-child	White	Female	0	0	20	United-States	<=50K
29310	22	?	313786	HS-grad	9	Divorced	?	Other-relative	Black	Female	0	0	40	United-States	<=50K
13020	34	Self-emp-not-inc	137223	10th	6	Never-married	Other-service	Own-child	White	Female	0	0	40	United-States	<=50K
31124	63	Private	163708	9th	5	Widowed	Other-service	Not-in-family	White	Female	0	0	20	United-States	<=50K
16488	33	?	119918	Bachelors	13	Never-married	?	Not-in-family	Black	Male	0	0	45	?	<=50K
9102	68	Private	201732	Some-college	10	Divorced	Adm-clerical	Unmarried	White	Female	0	0	40	United-States	<=50K
17185	22	Private	202871	Assoc-voc	11	Married-civ-spouse	Other-service	Husband	White	Male	0	0	44	United-States	<=50K
3422	40	Private	168936	Assoc-voc	11	Divorced	Other-service	Not-in-family	White	Female	0	0	32	United-States	<=50K

	Unnamed: 0	carat	cut	color	clarity	table	depth	cert	measurements	price	x	y	z
0	1	0.25	V.Good	K	I1	59.0	63.7	GIA	3.96 x 3.95 x 2.52	NaN	3.96	3.95	2.52
1	2	0.23	Good	G	I1	61.0	58.1	GIA	4.00 x 4.05 x 2.30	NaN	4.00	4.05	2.30
2	3	0.34	Good	J	I2	58.0	58.7	GIA	4.56 x 4.53 x 2.67	NaN	4.56	4.53	2.67
3	4	0.21	V.Good	D	I1	60.0	60.6	GIA	3.80 x 3.82 x 2.31	NaN	3.80	3.82	2.31
4	5	0.31	V.Good	K	I1	59.0	62.2	EGL	4.35 x 4.26 x 2.68	NaN	4.35	4.26	2.68

	age	fnlwgt	education-num	capital-gain	capital-loss	hours-per-week
count	32561.000000	3.256100e+04	32561.000000	32561.000000	32561.000000	32561.000000
mean	38.581647	1.897784e+05	10.080679	1077.648844	87.303830	40.437456
std	13.640433	1.055500e+05	2.572720	7385.292085	402.960219	12.347429
min	17.000000	1.228500e+04	1.000000	0.000000	0.000000	1.000000
25%	28.000000	1.178270e+05	9.000000	0.000000	0.000000	40.000000
50%	37.000000	1.783560e+05	10.000000	0.000000	0.000000	40.000000
75%	48.000000	2.370510e+05	12.000000	0.000000	0.000000	45.000000
max	90.000000	1.484705e+06	16.000000	99999.000000	4356.000000	99.000000

	Age	Survived	SexCode
count	756.000000	1313.000000	1313.000000
mean	30.397989	0.342727	0.351866
std	14.259049	0.474802	0.477734
min	0.170000	0.000000	0.000000
25%	21.000000	0.000000	0.000000
50%	28.000000	0.000000	0.000000
75%	39.000000	1.000000	1.000000
max	71.000000	1.000000	1.000000

	Unnamed: 0	carat	table	depth	price	x	y	z
count	593784.000000	593784.000000	593784.000000	593784.000000	593784.000000	593784.000000	593784.000000	593784.000000
mean	299220.966754	1.072593	57.658755	61.091980	8755.808723	5.991952	6.200535	4.036075
std	172625.362546	0.813113	4.827985	7.487465	13022.108651	1.530444	1.485081	1.240932
min	494.000000	0.200000	0.000000	0.000000	300.000000	0.150000	1.000000	0.040000
25%	149637.750000	0.500000	56.000000	61.000000	1218.000000	4.740000	4.970000	3.120000
50%	299311.500000	0.900000	58.000000	62.000000	3503.000000	5.780000	6.050000	3.860000
75%	448775.250000	1.500000	59.000000	62.700000	11186.000000	6.970000	7.230000	4.610000
max	598024.000000	9.250000	75.900000	81.300000	99990.000000	13.890000	13.890000	13.180000

	Unnamed: 0	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	Mazda RX4	21.0	6	160.0	110	3.90	2.620	16.46	0	1	4	4
1	Mazda RX4 Wag	21.0	6	160.0	110	3.90	2.875	17.02	0	1	4	4
2	Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61	1	1	4	1
3	Hornet 4 Drive	21.4	6	258.0	110	3.08	3.215	19.44	1	0	3	1
4	Hornet Sportabout	18.7	8	360.0	175	3.15	3.440	17.02	0	0	3	2

	Survived	Sex	Age
0	0	female	24.901408
1	0	male	32.320780
2	1	female	30.867143
3	1	male	25.951875

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
5	5.4	3.9	1.7	0.4	setosa
6	4.6	3.4	1.4	0.3	setosa
7	5.0	3.4	1.5	0.2	setosa
8	4.4	2.9	1.4	0.2	setosa
9	4.9	3.1	1.5	0.1	setosa
10	5.4	3.7	1.5	0.2	setosa
11	4.8	3.4	1.6	0.2	setosa
12	4.8	3.0	1.4	0.1	setosa
13	4.3	3.0	1.1	0.1	setosa
14	5.8	4.0	1.2	0.2	setosa
15	5.7	4.4	1.5	0.4	setosa
16	5.4	3.9	1.3	0.4	setosa
17	5.1	3.5	1.4	0.3	setosa
18	5.7	3.8	1.7	0.3	setosa
19	5.1	3.8	1.5	0.3	setosa
20	5.4	3.4	1.7	0.2	setosa
21	5.1	3.7	1.5	0.4	setosa
22	4.6	3.6	1.0	0.2	setosa
23	5.1	3.3	1.7	0.5	setosa
24	4.8	3.4	1.9	0.2	setosa
25	5.0	3.0	1.6	0.2	setosa
26	5.0	3.4	1.6	0.4	setosa
27	5.2	3.5	1.5	0.2	setosa
28	5.2	3.4	1.4	0.2	setosa
29	4.7	3.2	1.6	0.2	setosa
...	...	...	...	...	...
90	5.5	2.6	4.4	1.2	versicolor
91	6.1	3.0	4.6	1.4	versicolor
92	5.8	2.6	4.0	1.2	versicolor
93	5.0	2.3	3.3	1.0	versicolor
94	5.6	2.7	4.2	1.3	versicolor
95	5.7	3.0	4.2	1.2	versicolor
96	5.7	2.9	4.2	1.3	versicolor
97	6.2	2.9	4.3	1.3	versicolor
98	5.1	2.5	3.0	1.1	versicolor
99	5.7	2.8	4.1	1.3	versicolor
100	6.3	3.3	6.0	2.5	virginica
101	5.8	2.7	5.1	1.9	virginica
102	7.1	3.0	5.9	2.1	virginica
103	6.3	2.9	5.6	1.8	virginica
104	6.5	3.0	5.8	2.2	virginica
105	7.6	3.0	6.6	2.1	virginica
106	4.9	2.5	4.5	1.7	virginica
107	7.3	2.9	6.3	1.8	virginica
108	6.7	2.5	5.8	1.8	virginica
109	7.2	3.6	6.1	2.5	virginica
110	6.5	3.2	5.1	2.0	virginica
111	6.4	2.7	5.3	1.9	virginica
112	6.8	3.0	5.5	2.1	virginica
113	5.7	2.5	5.0	2.0	virginica
114	5.8	2.8	5.1	2.4	virginica
115	6.4	3.2	5.3	2.3	virginica
116	6.5	3.0	5.5	1.8	virginica
117	7.7	3.8	6.7	2.2	virginica
118	7.7	2.6	6.9	2.3	virginica
119	6.0	2.2	5.0	1.5	virginica