In [3]:
#comments in Python
'''multiple lines of comments are being shown here'''


Out[3]:
'multiple lines of comments are being shown here'

Important

this is a markdown and not a code window


In [1]:
2+3+5


Out[1]:
10

In [2]:
66-3-(-4)


Out[2]:
67

In [3]:
32*3


Out[3]:
96

In [4]:
2**3


Out[4]:
8

In [5]:
2^3


Out[5]:
1

In [6]:
43/3


Out[6]:
14.333333333333334

In [7]:
43//3


Out[7]:
14

In [8]:
43%3


Out[8]:
1

In [9]:
import math as mt

In [10]:
mt.exp(2)


Out[10]:
7.38905609893065

In [11]:
mt.log(10)


Out[11]:
2.302585092994046

In [12]:
mt.exp(1)


Out[12]:
2.718281828459045

In [14]:
mt.log(8,2)


Out[14]:
3.0

In [15]:
mt.sqrt(1000)


Out[15]:
31.622776601683793

In [17]:
import numpy as np

In [18]:
np.std([23,45,67,78])


Out[18]:
21.123150806638673

In [20]:
dir(mt)


Out[20]:
['__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'acos',
 'acosh',
 'asin',
 'asinh',
 'atan',
 'atan2',
 'atanh',
 'ceil',
 'copysign',
 'cos',
 'cosh',
 'degrees',
 'e',
 'erf',
 'erfc',
 'exp',
 'expm1',
 'fabs',
 'factorial',
 'floor',
 'fmod',
 'frexp',
 'fsum',
 'gamma',
 'gcd',
 'hypot',
 'inf',
 'isclose',
 'isfinite',
 'isinf',
 'isnan',
 'ldexp',
 'lgamma',
 'log',
 'log10',
 'log1p',
 'log2',
 'modf',
 'nan',
 'pi',
 'pow',
 'radians',
 'sin',
 'sinh',
 'sqrt',
 'tan',
 'tanh',
 'trunc']

In [21]:
type(1)


Out[21]:
int

In [22]:
type("Ajay")


Out[22]:
str

In [24]:
type([23,45,67])


Out[24]:
list

In [25]:
a=[23,45,67]

In [32]:
len(a)


Out[32]:
3

In [31]:
np.std(a)


Out[31]:
17.962924780409974

In [28]:
np.var(a)


Out[28]:
322.66666666666669

In [30]:
123456789123456789*9999999999999999


Out[30]:
1234567891234567766543210876543211

In [35]:
np.random??

In [37]:
from random import randrange,randint

In [39]:
print(randint(0,90))


78

In [42]:
randrange(1000)


Out[42]:
286

In [46]:
for x in range(0,10):
    print(randrange(10000000000000000))


2472965195555081
6352816454724336
4809973335770632
5246909950815852
6348106781629098
2586909203145681
2509370301745813
4082241628288070
7691514263873286
8069700113941950

In [51]:
def mynewfunction(x,y):
    taxes=((x-1000000)*0.35+100000-min(y,100000))
    print(taxes)

In [53]:
mynewfunction(2200000,300000)


420000.0

In [54]:
import os as os

In [57]:
os??

In [62]:
for x in range(0,30,6):
    print(x)


0
6
12
18
24

In [63]:
def mynewfunction(x,y):
    z=x**3+3*x*y+20*y
    print(z)

In [65]:
for x in range(0,30,6):
    mynewfunction(x,10)


200
596
2288
6572
14744

In [1]:
import os as os

In [2]:
os.getcwd()


Out[2]:
'C:\\Users\\Dell'

In [3]:
os.listdir()


Out[3]:
['.bash_history',
 '.git',
 '.gitconfig',
 '.gitignore',
 '.idlerc',
 '.ipynb_checkpoints',
 '.ipython',
 '.jupyter',
 '.matplotlib',
 '.spyder-py3',
 '.ssh',
 '.VirtualBox',
 'Anaconda3',
 'AppData',
 'Application Data',
 'Contacts',
 'Cookies',
 'data munging again.ipynb',
 'data wrangling titanic dataset.ipynb',
 'Desktop',
 'Documents',
 'Downloads',
 'Dropbox',
 'Favorites',
 'home',
 'IntelGraphicsProfiles',
 'introductory python.ipynb',
 'Links',
 'Local Settings',
 'month_ridership.png',
 'multiple file concat in pandas.ipynb',
 'Music',
 'My Documents',
 'NetHood',
 'new notebook.ipynb',
 'nltk.ipynb',
 'NTUSER.DAT',
 'ntuser.dat.LOG1',
 'ntuser.dat.LOG2',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TM.blf',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000001.regtrans-ms',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000002.regtrans-ms',
 'ntuser.ini',
 'pandas 11.ipynb',
 'pandas analysis 1.ipynb',
 'pandas data manipulation.ipynb',
 'Pictures',
 'PrintHood',
 'Rdatasets',
 'Recent',
 'rforanalytics',
 'Saved Games',
 'Searches',
 'SendTo',
 'Start Menu',
 'Templates',
 'test web scraping.ipynb',
 'time series.ipynb',
 'Untitled.ipynb',
 'untitled.txt',
 'Untitled1.ipynb',
 'untitled1.txt',
 'Untitled2.ipynb',
 'Untitled3.ipynb',
 'Untitled4.ipynb',
 'Untitled5.ipynb',
 'Videos',
 'VirtualBox VMs',
 'Web Scraping Yelp with Beautiful Soup.ipynb']

In [4]:
os.chdir('C:\\Users\\Dell')

In [7]:
mystring='Hello World'

In [8]:
mystring


Out[8]:
'Hello World'

In [9]:
mystring[1]


Out[9]:
'e'

In [10]:
mystring[0]


Out[10]:
'H'

In [11]:
print(mystring)


Hello World

In [12]:
type(mystring)


Out[12]:
str

In [13]:
len(mystring)


Out[13]:
11

In [15]:
newstring2='Aye aye me heartie\'s'

In [16]:
newstring3="Aye aye me heartie's"

In [18]:
10*newstring3


Out[18]:
"Aye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie's"

In [30]:
ne1= "'Ajay','Vijay','Anita','Ankit'"

In [32]:
type(ne1)


Out[32]:
str

In [31]:
str(ne1)


Out[31]:
"'Ajay','Vijay','Anita','Ankit'"

In [33]:
ne1[1]


Out[33]:
'A'

In [26]:
ne2= ['Ajay','Vijay','Anita','Ankit']

In [27]:
str(ne2)


Out[27]:
"['Ajay', 'Vijay', 'Anita', 'Ankit']"

In [29]:
ne2[1]


Out[29]:
'Vijay'

In [34]:
myname1='Ajay'
myname2='John'

In [35]:
message= "Hi I am %s howdy"

In [37]:
message %myname1


Out[37]:
'Hi I am Ajay howdy'

In [36]:
message %myname2


Out[36]:
'Hi I am John howdy'

In [38]:
ne2


Out[38]:
['Ajay', 'Vijay', 'Anita', 'Ankit']

In [40]:
ne2.append('Anna')

In [41]:
ne2


Out[41]:
['Ajay', 'Vijay', 'Anita', 'Ankit', 'Anna']

In [42]:
del ne2[0]

In [43]:
ne2


Out[43]:
['Vijay', 'Anita', 'Ankit', 'Anna']

In [44]:
ne3=('Sachin','Dhoni','Gavaskar','Kapil')

In [46]:
dir(ne3)


Out[46]:
['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'count',
 'index']

In [52]:
favourite_movie=['micky mouse,steamboat willie', 'vijay,slumdog millionaire', 'john,passion of christ', 'donald,arthur']

In [53]:
type(favourite_movie)


Out[53]:
list

In [51]:
favourite_movie2={'micky mouse:steamboat willie', 'vijay:slumdog millionaire', 'john:passion of christ', 'donald:arthur'}

In [54]:
type(favourite_movie2)


Out[54]:
set

In [55]:
favourite_movie3={'micky mouse':'steamboat willie', 'vijay':'slumdog millionaire', 'john':'passion of christ', 'donald':'arthur'}

In [56]:
type(favourite_movie3)


Out[56]:
dict

In [57]:
favourite_movie3['micky mouse']


Out[57]:
'steamboat willie'

In [58]:
import re

In [59]:
names =["Anna", "Anne", "Annaporna","Shubham","Aruna"]

In [60]:
for name in names:
    print(re.search(r'(An)',name))


<_sre.SRE_Match object; span=(0, 2), match='An'>
<_sre.SRE_Match object; span=(0, 2), match='An'>
<_sre.SRE_Match object; span=(0, 2), match='An'>
None
None

In [61]:
for name in names:
    print(re.search(r'(A)',name))


<_sre.SRE_Match object; span=(0, 1), match='A'>
<_sre.SRE_Match object; span=(0, 1), match='A'>
<_sre.SRE_Match object; span=(0, 1), match='A'>
None
<_sre.SRE_Match object; span=(0, 1), match='A'>

In [62]:
for name in names:
    print(re.search(r'(a)',name))


<_sre.SRE_Match object; span=(3, 4), match='a'>
None
<_sre.SRE_Match object; span=(3, 4), match='a'>
<_sre.SRE_Match object; span=(5, 6), match='a'>
<_sre.SRE_Match object; span=(4, 5), match='a'>

In [63]:
for name in names:
    print(bool(re.search(r'(a)',name)))


True
False
True
True
True

In [64]:
import numpy as np

In [74]:
numlist=["$10000","$20,000","30,000",40000,"50000   "]

In [75]:
for i,value in enumerate(numlist):
    print(i)       
    print(value)


0
$10000
1
$20,000
2
30,000
3
40000
4
50000   

In [76]:
for i,value in enumerate(numlist):
 
    numlist[i]=re.sub(r"([$,])","",str(value))
    numlist[i]=int(numlist[i])

In [77]:
numlist


Out[77]:
[10000, 20000, 30000, 40000, 50000]

In [68]:
np.mean(numlist)


Out[68]:
30000.0

In [78]:
from datetime import datetime

In [82]:
datetime.now()


Out[82]:
datetime.datetime(2017, 4, 15, 14, 35, 5, 932765)

In [80]:
date_obj=datetime.strptime("15/August/2007","%d/%B/%Y")

In [81]:
date_obj


Out[81]:
datetime.datetime(2007, 8, 15, 0, 0)

In [85]:
a=date_obj-datetime.now()

In [86]:
a.days


Out[86]:
-3532

In [88]:
a.seconds


Out[88]:
33861

In [89]:
os.getcwd()


Out[89]:
'C:\\Users\\Dell'

In [93]:
import IPython 
print (IPython.sys_info())


{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.1.0',
 'os_name': 'nt',
 'platform': 'Windows-7-6.1.7600-SP0',
 'sys_executable': 'C:\\Users\\Dell\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, '
                '11:41:13) [MSC v.1900 64 bit (AMD64)]'}

In [94]:
%load_ext version_information
%version_information


Out[94]:
SoftwareVersion
Python3.5.2 64bit [MSC v.1900 64 bit (AMD64)]
IPython5.1.0
OSWindows 7 6.1.7600 SP0
Sat Apr 15 14:49:54 2017 India Standard Time

In [90]:
os.chdir('C:\\Users\\Dell\\Downloads')

In [91]:
os.listdir()


Out[91]:
['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '7z1604-x64.exe',
 '7z1604.exe',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'All+CSV+Files+in+a+Folder.ipynb',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv (3).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'Data Analysis (1).7z',
 'Data Analysis (1).rar',
 'Data Analysis.rar',
 'Data Viz.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data+munging+again.ipynb',
 'data+wrangling+titanic+dataset.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond (7).csv',
 'Diamond (8).csv',
 'Diamond.csv',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'Hdma.csv',
 'Hedonic.csv',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'internship.docx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'Invoice trafla format.docx',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'March invoice Indicus   - Sheet1.pdf',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mortDefault',
 'mortDefault.zip',
 'mtcarslm.R',
 'multiple+file+concat+in+pandas (1).ipynb',
 'multiple+file+concat+in+pandas.ipynb',
 'my+first+class+in+python.ipynb',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'pandas+11.ipynb',
 'pandas+analysis+1.ipynb',
 'pandas+data+manipulation.ipynb',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'Python.docx',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'test+web+scraping.ipynb',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 950045.crdownload',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']

In [99]:
import glob as glob

In [100]:
path = os.getcwd()
extension = 'csv'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)


['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond (7).csv', 'Diamond (8).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']

In [95]:
import pandas as pd

In [97]:
fraud=pd.read_csv('ccFraud.csv')

In [104]:
mtcars=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv")

In [101]:
smalldiamonds=pd.read_csv("C:\\Users\\Dell\\Desktop\\Diamond (8).csv")

In [111]:
fraud.columns


Out[111]:
Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',
       'numIntlTrans', 'creditLine', 'fraudRisk'],
      dtype='object')

In [110]:
fraud.shape


Out[110]:
(10000000, 9)

In [120]:
len(fraud)


Out[120]:
10000000

In [121]:
len(fraud.columns)


Out[121]:
9

In [109]:
fraud.dtypes


Out[109]:
custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtype: object

In [102]:
fraud.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000000 entries, 0 to 9999999
Data columns (total 9 columns):
custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtypes: int64(9)
memory usage: 686.6 MB

In [105]:
mtcars.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
Unnamed: 0    32 non-null object
mpg           32 non-null float64
cyl           32 non-null int64
disp          32 non-null float64
hp            32 non-null int64
drat          32 non-null float64
wt            32 non-null float64
qsec          32 non-null float64
vs            32 non-null int64
am            32 non-null int64
gear          32 non-null int64
carb          32 non-null int64
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB

In [106]:
smalldiamonds.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 6 columns):
Unnamed: 0       308 non-null int64
carat            308 non-null float64
colour           308 non-null object
clarity          308 non-null object
certification    308 non-null object
price            308 non-null int64
dtypes: float64(1), int64(2), object(3)
memory usage: 14.5+ KB

In [108]:
fraud.head()


Out[108]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 35 1 3000 4 14 2 0
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0
3 4 1 15 1 0 12 0 5 0
4 5 1 46 1 0 11 16 7 0

In [112]:
fraud.tail()


Out[112]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
9999995 9999996 1 37 1 0 10 0 9 0
9999996 9999997 1 16 1 0 33 2 4 0
9999997 9999998 1 24 1 9000 38 0 8 0
9999998 9999999 1 28 1 7000 20 19 6 0
9999999 10000000 1 23 1 0 13 0 7 0

In [113]:
fraud2=fraud.copy()

In [115]:
fraud.describe()


Out[115]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07
mean 5.000000e+06 1.382177e+00 2.466127e+01 1.030004e+00 4.109920e+03 2.893519e+01 4.047190e+00 9.134469e+00 5.960140e-02
std 2.886751e+06 4.859195e-01 1.497012e+01 1.705991e-01 3.996847e+03 2.655378e+01 8.602970e+00 9.641974e+00 2.367469e-01
min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
25% 2.500001e+06 1.000000e+00 1.000000e+01 1.000000e+00 0.000000e+00 1.000000e+01 0.000000e+00 4.000000e+00 0.000000e+00
50% 5.000000e+06 1.000000e+00 2.400000e+01 1.000000e+00 3.706000e+03 1.900000e+01 0.000000e+00 6.000000e+00 0.000000e+00
75% 7.500000e+06 2.000000e+00 3.800000e+01 1.000000e+00 6.000000e+03 3.900000e+01 4.000000e+00 1.100000e+01 0.000000e+00
max 1.000000e+07 2.000000e+00 5.100000e+01 2.000000e+00 4.148500e+04 1.000000e+02 6.000000e+01 7.500000e+01 1.000000e+00

In [116]:
fraud.gender.describe()


Out[116]:
count    1.000000e+07
mean     1.382177e+00
std      4.859195e-01
min      1.000000e+00
25%      1.000000e+00
50%      1.000000e+00
75%      2.000000e+00
max      2.000000e+00
Name: gender, dtype: float64

In [117]:
mtcars.head()


Out[117]:
Unnamed: 0 mpg cyl disp hp drat wt qsec vs am gear carb
0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2

In [118]:
mtcars=mtcars.drop("Unnamed: 0",1)

In [119]:
mtcars.head()


Out[119]:
mpg cyl disp hp drat wt qsec vs am gear carb
0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2

In [124]:
import IPython
print (IPython.sys_info())


{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.1.0',
 'os_name': 'nt',
 'platform': 'Windows-7-6.1.7600-SP0',
 'sys_executable': 'C:\\Users\\Dell\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, '
                '11:41:13) [MSC v.1900 64 bit (AMD64)]'}

In [125]:
!pip install version_information
%load_ext version_information
%version_information


!pip freeze


Requirement already satisfied: version_information in c:\users\dell\anaconda3\lib\site-packages
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information
alabaster==0.7.9
anaconda-clean==1.0
anaconda-client==1.5.1
anaconda-navigator==1.3.1
argcomplete==1.0.0
astroid==1.4.7
astropy==1.2.1
Babel==2.3.4
backports.shutil-get-terminal-size==1.0.0
beautifulsoup4==4.5.1
bitarray==0.8.1
blaze==0.10.1
bokeh==0.12.2
boto==2.42.0
Bottleneck==1.1.0
brewer2mpl==1.4.1
cffi==1.7.0
chest==0.2.3
click==6.6
cloudpickle==0.2.1
clyent==1.2.2
colorama==0.3.7
comtypes==1.1.2
conda==4.3.9
conda-build==2.0.2
configobj==5.0.6
contextlib2==0.5.3
cryptography==1.5
cycler==0.10.0
Cython==0.24.1
cytoolz==0.8.0
dask==0.11.0
datashape==0.5.2
decorator==4.0.10
dill==0.2.5
docutils==0.12
dynd===c328ab7
et-xmlfile==1.0.1
fastcache==1.0.2
filelock==2.0.6
Flask==0.11.1
Flask-Cors==2.1.2
gevent==1.1.2
ggplot==0.11.5
greenlet==0.4.10
h5py==2.6.0
HeapDict==1.0.0
idna==2.1
imagesize==0.7.1
ipykernel==4.5.0
ipython==5.1.0
ipython-genutils==0.1.0
ipywidgets==5.2.2
itsdangerous==0.24
jdcal==1.2
jedi==0.9.0
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.4.0
jupyter-console==5.0.0
jupyter-core==4.2.0
lazy-object-proxy==1.2.1
llvmlite==0.13.0
locket==0.2.0
lxml==3.6.4
MarkupSafe==0.23
matplotlib==1.5.3
menuinst==1.4.1
mistune==0.7.3
mpmath==0.19
multipledispatch==0.4.8
nb-anacondacloud==1.2.0
nb-conda==2.0.0
nb-conda-kernels==2.0.0
nbconvert==4.2.0
nbformat==4.1.0
nbpresent==3.0.2
networkx==1.11
nltk==3.2.1
nose==1.3.7
notebook==4.2.3
numba==0.28.1
numexpr==2.6.1
numpy==1.11.1
odo==0.5.0
openpyxl==2.3.2
pandas==0.18.1
pandasql==0.7.3
partd==0.3.6
path.py==0.0.0
pathlib2==2.1.0
patsy==0.4.1
pep8==1.7.0
pickleshare==0.7.4
Pillow==3.3.1
pkginfo==1.3.2
ply==3.9
prompt-toolkit==1.0.3
psutil==4.3.1
psycopg2==2.6.2
py==1.4.31
pyasn1==0.1.9
pycosat==0.6.1
pycparser==2.14
pycrypto==2.6.1
pycurl==7.43.0
pyflakes==1.3.0
Pygments==2.1.3
pylint==1.5.4
pyodbc==3.0.10
pyOpenSSL==16.2.0
pyparsing==2.1.4
pytest==2.9.2
python-dateutil==2.5.3
pytz==2016.6.1
pywin32==220
PyYAML==3.12
pyzmq==15.4.0
QtAwesome==0.3.3
qtconsole==4.2.1
QtPy==1.1.2
requests==2.12.4
rope-py3k==0.9.4.post1
ruamel-yaml===-VERSION
scikit-image==0.12.3
scikit-learn==0.17.1
scipy==0.18.1
seaborn==0.7.1
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.10.0
snowballstemmer==1.2.1
sockjs-tornado==1.0.3
sphinx==1.4.6
spyder==3.0.0
SQLAlchemy==1.0.13
statsmodels==0.6.1
sympy==1.0
tables==3.2.2
toolz==0.8.0
tornado==4.4.1
traitlets==4.3.0
unicodecsv==0.14.1
urllib3==1.20
version-information==1.0.3
wcwidth==0.1.7
Werkzeug==0.11.11
widgetsnbextension==1.2.6
win-unicode-console==0.5
wrapt==1.10.6
xlrd==1.0.0
XlsxWriter==0.9.3
xlwings==0.10.0
xlwt==1.1.2

In [127]:
!pip install guppy


Collecting guppy
  Downloading guppy-0.1.10.tar.gz (484kB)
Building wheels for collected packages: guppy
  Running setup.py bdist_wheel for guppy: started
  Running setup.py bdist_wheel for guppy: finished with status 'error'
  Complete output from command c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d C:\Users\Dell\AppData\Local\Temp\tmppr8koym2pip-wheel- --python-tag cp35:
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.5
  creating build\lib.win-amd64-3.5\guppy
  copying guppy\__init__.py -> build\lib.win-amd64-3.5\guppy
  creating build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\__init__.py -> build\lib.win-amd64-3.5\guppy\doc
  creating build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Cat.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\cmd.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Code.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Compat.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\etc.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\ExecfileWithModuleInfo.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\FSA.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Glue.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Help.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\IterPermute.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\KanExtension.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\KnuthBendix.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\RE.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\RE_Rect.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\textView.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\tkcursors.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Unpack.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\xterm.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\__init__.py -> build\lib.win-amd64-3.5\guppy\etc
  creating build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Document.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\DottedTree.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Exceptions.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\FileIO.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Filer.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Gsml.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Help.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Html.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Latex.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Main.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\SpecNodes.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Tester.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Text.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\XHTML.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\__init__.py -> build\lib.win-amd64-3.5\guppy\gsl
  creating build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\AbstractAlgebra.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Console.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Doc.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\ImpSet.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Monitor.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Part.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Path.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\pbhelp.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Prof.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Remote.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RemoteConstants.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RM.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Spec.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Target.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Use.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\View.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy
  creating build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\support.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_all.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_dependencies.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_ER.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_heapyc.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_menuleak.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Part.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Path.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_RetaGraph.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_sf.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Spec.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_View.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  creating build\lib.win-amd64-3.5\guppy\sets
  copying guppy\sets\test.py -> build\lib.win-amd64-3.5\guppy\sets
  copying guppy\sets\__init__.py -> build\lib.win-amd64-3.5\guppy\sets
  copying guppy\doc\docexample.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\gsl.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\gslexample.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\guppy.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapyc.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_RootState.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_tutorial.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_UniSet.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_Use.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\index.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\ProfileBrowser.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\sets.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\pbscreen.jpg -> build\lib.win-amd64-3.5\guppy\doc
  running build_ext
  building 'guppy.sets.setsc' extension
  error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools
  
  ----------------------------------------
  Running setup.py clean for guppy
Failed to build guppy
Installing collected packages: guppy
  Running setup.py install for guppy: started
    Running setup.py install for guppy: finished with status 'error'
    Complete output from command c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\Dell\AppData\Local\Temp\pip-_nlam_7o-record\install-record.txt --single-version-externally-managed --compile:
    running install
    running build
    running build_py
    creating build
    creating build\lib.win-amd64-3.5
    creating build\lib.win-amd64-3.5\guppy
    copying guppy\__init__.py -> build\lib.win-amd64-3.5\guppy
    creating build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\__init__.py -> build\lib.win-amd64-3.5\guppy\doc
    creating build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Cat.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\cmd.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Code.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Compat.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\etc.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\ExecfileWithModuleInfo.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\FSA.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Glue.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Help.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\IterPermute.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\KanExtension.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\KnuthBendix.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\RE.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\RE_Rect.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\textView.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\tkcursors.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Unpack.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\xterm.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\__init__.py -> build\lib.win-amd64-3.5\guppy\etc
    creating build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Document.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\DottedTree.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Exceptions.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\FileIO.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Filer.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Gsml.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Help.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Html.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Latex.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Main.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\SpecNodes.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Tester.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Text.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\XHTML.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\__init__.py -> build\lib.win-amd64-3.5\guppy\gsl
    creating build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\AbstractAlgebra.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Console.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Doc.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\ImpSet.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Monitor.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Part.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Path.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\pbhelp.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Prof.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Remote.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RemoteConstants.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RM.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Spec.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Target.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Use.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\View.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy
    creating build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\support.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_all.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_dependencies.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_ER.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_heapyc.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_menuleak.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Part.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Path.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_RetaGraph.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_sf.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Spec.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_View.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    creating build\lib.win-amd64-3.5\guppy\sets
    copying guppy\sets\test.py -> build\lib.win-amd64-3.5\guppy\sets
    copying guppy\sets\__init__.py -> build\lib.win-amd64-3.5\guppy\sets
    copying guppy\doc\docexample.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\gsl.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\gslexample.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\guppy.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapyc.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_RootState.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_tutorial.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_UniSet.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_Use.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\index.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\ProfileBrowser.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\sets.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\pbscreen.jpg -> build\lib.win-amd64-3.5\guppy\doc
    running build_ext
    building 'guppy.sets.setsc' extension
    error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools
    
    ----------------------------------------
  Failed building wheel for guppy
Command "c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\Dell\AppData\Local\Temp\pip-_nlam_7o-record\install-record.txt --single-version-externally-managed --compile" failed with error code 1 in C:\Users\Dell\AppData\Local\Temp\pip-build-d3t4jj4u\guppy\

In [128]:
fraud.head()


Out[128]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 35 1 3000 4 14 2 0
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0
3 4 1 15 1 0 12 0 5 0
4 5 1 46 1 0 11 16 7 0

In [135]:
fraud.head().gender


Out[135]:
0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64

In [133]:
fraud.gender.head()


Out[133]:
0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64

In [132]:
fraud['gender'].head()


Out[132]:
0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64

In [131]:
fraud[['gender','state','balance']].head()


Out[131]:
gender state balance
0 1 35 3000
1 2 2 0
2 2 2 0
3 1 15 0
4 1 46 0

In [136]:
fraud.ix[10:20]


Out[136]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
10 11 1 46 1 4601 54 0 4 0
11 12 1 10 1 3000 20 0 2 0
12 13 1 6 1 0 45 2 4 0
13 14 2 38 1 9000 41 3 8 0
14 15 1 27 1 5227 60 0 17 0
15 16 1 44 1 0 22 0 5 0
16 17 2 18 1 13970 20 0 13 0
17 18 1 35 1 3113 13 6 8 0
18 19 1 5 1 9000 20 2 8 0
19 20 2 31 1 1860 21 10 8 0
20 21 1 39 1 4000 24 0 3 0

In [137]:
fraud.iloc[:,:]


Out[137]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 35 1 3000 4 14 2 0
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0
3 4 1 15 1 0 12 0 5 0
4 5 1 46 1 0 11 16 7 0
5 6 2 44 2 5546 21 0 13 0
6 7 1 3 1 2000 41 0 1 0
7 8 1 10 1 6016 20 3 6 0
8 9 2 32 1 2428 4 10 22 0
9 10 1 23 1 0 18 56 5 0
10 11 1 46 1 4601 54 0 4 0
11 12 1 10 1 3000 20 0 2 0
12 13 1 6 1 0 45 2 4 0
13 14 2 38 1 9000 41 3 8 0
14 15 1 27 1 5227 60 0 17 0
15 16 1 44 1 0 22 0 5 0
16 17 2 18 1 13970 20 0 13 0
17 18 1 35 1 3113 13 6 8 0
18 19 1 5 1 9000 20 2 8 0
19 20 2 31 1 1860 21 10 8 0
20 21 1 39 1 4000 24 0 3 0
21 22 1 34 1 0 22 0 3 0
22 23 1 5 1 0 7 0 11 0
23 24 2 21 1 0 15 0 3 0
24 25 1 25 1 0 12 0 65 0
25 26 2 29 1 5000 4 9 4 0
26 27 1 38 1 4000 21 5 3 0
27 28 1 9 1 12000 20 0 11 0
28 29 2 20 1 0 19 0 2 0
29 30 2 49 1 5192 84 0 13 1
... ... ... ... ... ... ... ... ... ...
9999970 9999971 1 10 1 0 12 0 11 0
9999971 9999972 2 40 1 3683 31 3 7 0
9999972 9999973 1 46 1 5000 69 0 4 0
9999973 9999974 1 10 2 0 36 31 14 0
9999974 9999975 1 25 1 0 18 0 3 0
9999975 9999976 2 48 1 4210 23 0 8 0
9999976 9999977 2 4 1 0 14 0 7 0
9999977 9999978 1 35 1 8000 24 0 7 0
9999978 9999979 1 44 1 5000 12 0 4 0
9999979 9999980 1 6 1 7669 5 3 7 0
9999980 9999981 1 10 1 13889 54 0 13 0
9999981 9999982 1 33 1 0 23 26 18 0
9999982 9999983 1 44 1 3000 14 0 2 0
9999983 9999984 2 13 1 0 2 0 5 0
9999984 9999985 1 39 1 7000 30 0 6 0
9999985 9999986 2 45 2 3000 4 0 2 0
9999986 9999987 1 23 1 7000 59 0 6 0
9999987 9999988 2 24 1 6000 46 0 5 0
9999988 9999989 1 18 1 15000 72 0 14 1
9999989 9999990 2 4 1 12000 17 7 11 0
9999990 9999991 1 16 2 1561 8 0 5 0
9999991 9999992 2 36 1 5217 6 0 5 0
9999992 9999993 1 38 1 0 7 0 2 0
9999993 9999994 2 43 1 2607 6 0 5 0
9999994 9999995 1 16 2 17376 3 0 20 0
9999995 9999996 1 37 1 0 10 0 9 0
9999996 9999997 1 16 1 0 33 2 4 0
9999997 9999998 1 24 1 9000 38 0 8 0
9999998 9999999 1 28 1 7000 20 19 6 0
9999999 10000000 1 23 1 0 13 0 7 0

10000000 rows × 9 columns


In [139]:
fraud.iloc[10:20,1:4]


Out[139]:
gender state cardholder
10 1 46 1
11 1 10 1
12 1 6 1
13 2 38 1
14 1 27 1
15 1 44 1
16 2 18 1
17 1 35 1
18 1 5 1
19 2 31 1

In [140]:
fraud.describe()


Out[140]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07
mean 5.000000e+06 1.382177e+00 2.466127e+01 1.030004e+00 4.109920e+03 2.893519e+01 4.047190e+00 9.134469e+00 5.960140e-02
std 2.886751e+06 4.859195e-01 1.497012e+01 1.705991e-01 3.996847e+03 2.655378e+01 8.602970e+00 9.641974e+00 2.367469e-01
min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
25% 2.500001e+06 1.000000e+00 1.000000e+01 1.000000e+00 0.000000e+00 1.000000e+01 0.000000e+00 4.000000e+00 0.000000e+00
50% 5.000000e+06 1.000000e+00 2.400000e+01 1.000000e+00 3.706000e+03 1.900000e+01 0.000000e+00 6.000000e+00 0.000000e+00
75% 7.500000e+06 2.000000e+00 3.800000e+01 1.000000e+00 6.000000e+03 3.900000e+01 4.000000e+00 1.100000e+01 0.000000e+00
max 1.000000e+07 2.000000e+00 5.100000e+01 2.000000e+00 4.148500e+04 1.000000e+02 6.000000e+01 7.500000e+01 1.000000e+00

In [141]:
fraud.gender.value_counts()


Out[141]:
1    6178231
2    3821769
Name: gender, dtype: int64

In [142]:
fraud.state.value_counts()


Out[142]:
5     1216069
44     812638
10     608630
35     608575
39     405892
15     404720
36     364531
23     304553
11     303984
29     303833
32     284428
46     252812
43     203827
16     203143
25     203045
48     202972
4      202776
21     202444
20     201918
49     182557
24     182201
6      171774
2      162574
41     152253
19     151715
18     142170
37     122191
38     121846
7      121802
13     111775
26     101829
3      101740
45      91375
34      91326
17      91127
33      81332
50      61385
14      60992
28      60617
12      50438
22      40819
31      40563
9       30333
40      30233
27      30131
51      20691
8       20603
42      20449
30      20215
1       20137
47      20017
Name: state, dtype: int64

In [143]:
fraud.fraudRisk.value_counts()


Out[143]:
0    9403986
1     596014
Name: fraudRisk, dtype: int64

In [144]:
pd.crosstab(fraud.fraudRisk,fraud.gender)


Out[144]:
gender 1 2
fraudRisk
0 5853053 3550933
1 325178 270836

In [145]:
pd.crosstab(fraud.fraudRisk,fraud.gender,margins=True)


Out[145]:
gender 1 2 All
fraudRisk
0 5853053 3550933 9403986
1 325178 270836 596014
All 6178231 3821769 10000000

In [148]:
np.random.choice(100,10)


Out[148]:
array([36, 51, 88, 10, 86,  2, 92, 45, 22, 33])

In [150]:
a=len(fraud)

In [151]:
b=0.0001

In [152]:
a*b


Out[152]:
10.0

In [154]:
fraud.ix[np.random.choice(len(fraud),a*b)]


C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':
Out[154]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
1213704 1213705 1 41 1 6000 34 0 5 0
9188257 9188258 1 4 1 0 6 0 6 0
7040966 7040967 1 32 1 0 15 0 15 0
9106986 9106987 1 36 1 2000 28 0 1 0
2441776 2441777 1 5 1 10000 39 5 9 0
6415126 6415127 2 39 1 0 10 2 6 0
6300659 6300660 1 11 1 8000 4 17 7 0
952480 952481 1 6 1 6000 10 0 5 0
4407853 4407854 1 41 1 0 3 0 3 0
3029728 3029729 1 9 1 4000 32 0 3 0

In [156]:
! pip install pandasql


Requirement already satisfied: pandasql in c:\users\dell\anaconda3\lib\site-packages
Requirement already satisfied: pandas in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: sqlalchemy in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: python-dateutil>=2 in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: pytz>=2011k in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: six>=1.5 in c:\users\dell\anaconda3\lib\site-packages (from python-dateutil>=2->pandas->pandasql)

In [163]:
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

In [157]:
mtcars.head()


Out[157]:
mpg cyl disp hp drat wt qsec vs am gear carb
0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2

In [164]:
pysqldf("SELECT * FROM mtcars  LIMIT 10;")


Out[164]:
mpg cyl disp hp drat wt qsec vs am gear carb
0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
5 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
6 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
8 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
9 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4

In [165]:
pysqldf("SELECT * FROM mtcars  WHERE gear > 4;")


Out[165]:
mpg cyl disp hp drat wt qsec vs am gear carb
0 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
1 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
2 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
3 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
4 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8

In [166]:
pysqldf("SELECT AVG(mpg),gear FROM mtcars group by gear  ;")


Out[166]:
AVG(mpg) gear
0 16.106667 3
1 24.533333 4
2 21.380000 5

In [167]:
mtcars.mpg.mean()


Out[167]:
20.090624999999996

In [169]:
g1=pd.groupby(mtcars,mtcars.gear)

In [170]:
g1.mean()


Out[170]:
mpg cyl disp hp drat wt qsec vs am carb
gear
3 16.106667 7.466667 326.300000 176.133333 3.132667 3.892600 17.692 0.200000 0.000000 2.666667
4 24.533333 4.666667 123.016667 89.500000 4.043333 2.616667 18.965 0.833333 0.666667 2.333333
5 21.380000 6.000000 202.480000 195.600000 3.916000 2.632600 15.640 0.200000 1.000000 4.400000

In [171]:
mtcars.gear.value_counts()


Out[171]:
3    15
4    12
5     5
Name: gear, dtype: int64

In [173]:
mtcars.cyl.unique()


Out[173]:
array([6, 4, 8], dtype=int64)

In [174]:
pd.crosstab(mtcars.gear,mtcars.cyl)


Out[174]:
cyl 4 6 8
gear
3 1 2 12
4 8 4 0
5 2 1 2

In [175]:
mtcars.pivot_table(index='gear', columns='cyl', values='mpg', fill_value=0)


Out[175]:
cyl 4 6 8
gear
3 21.500 19.75 15.05
4 26.925 19.75 0.00
5 28.200 19.70 15.40

In [176]:
fraud.head()


Out[176]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 35 1 3000 4 14 2 0
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0
3 4 1 15 1 0 12 0 5 0
4 5 1 46 1 0 11 16 7 0

In [181]:
del fraud['custID']

In [182]:
fraud.head()


Out[182]:
gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 35 1 3000 4 14 2 0
1 2 2 1 0 9 0 18 0
2 2 2 1 0 27 9 16 0
3 1 15 1 0 12 0 5 0
4 1 46 1 0 11 16 7 0

In [183]:
fraud3=fraud

In [186]:
del fraud['state']

In [187]:
fraud3.head()


Out[187]:
gender cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 3000 4 14 2 0
1 2 1 0 9 0 18 0
2 2 1 0 27 9 16 0
3 1 1 0 12 0 5 0
4 1 1 0 11 16 7 0

In [190]:
wine=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",header=None)

In [191]:
wine.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
0     178 non-null int64
1     178 non-null float64
2     178 non-null float64
3     178 non-null float64
4     178 non-null float64
5     178 non-null int64
6     178 non-null float64
7     178 non-null float64
8     178 non-null float64
9     178 non-null float64
10    178 non-null float64
11    178 non-null float64
12    178 non-null float64
13    178 non-null int64
dtypes: float64(11), int64(3)
memory usage: 19.5 KB

In [200]:
wine.columns=['WineClass','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline']

In [201]:
wine.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
WineClass                       178 non-null int64
Alcohol                         178 non-null float64
Malic acid                      178 non-null float64
Ash                             178 non-null float64
Alcalinity of ash               178 non-null float64
Magnesium                       178 non-null int64
Total phenols                   178 non-null float64
Flavanoids                      178 non-null float64
Nonflavanoid phenols            178 non-null float64
Proanthocyanins                 178 non-null float64
Color intensity                 178 non-null float64
Hue                             178 non-null float64
OD280/OD315 of diluted wines    178 non-null float64
Proline                         178 non-null int64
dtypes: float64(11), int64(3)
memory usage: 19.5 KB

In [202]:
wine.head()


Out[202]:
WineClass Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735

In [204]:
wine.WineClass.value_counts()


Out[204]:
2    71
1    59
3    48
Name: WineClass, dtype: int64

In [205]:
classby=pd.groupby(wine,wine.WineClass)

In [206]:
classby.mean()


Out[206]:
Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
WineClass
1 13.744746 2.010678 2.455593 17.037288 106.338983 2.840169 2.982373 0.290000 1.899322 5.528305 1.062034 3.157797 1115.711864
2 12.278732 1.932676 2.244789 20.238028 94.549296 2.258873 2.080845 0.363662 1.630282 3.086620 1.056282 2.785352 519.507042
3 13.153750 3.333750 2.437083 21.416667 99.312500 1.678750 0.781458 0.447500 1.153542 7.396250 0.682708 1.683542 629.895833

In [207]:
wine.describe()


Out[207]:
WineClass Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000
mean 1.938202 13.000618 2.336348 2.366517 19.494944 99.741573 2.295112 2.029270 0.361854 1.590899 5.058090 0.957449 2.611685 746.893258
std 0.775035 0.811827 1.117146 0.274344 3.339564 14.282484 0.625851 0.998859 0.124453 0.572359 2.318286 0.228572 0.709990 314.907474
min 1.000000 11.030000 0.740000 1.360000 10.600000 70.000000 0.980000 0.340000 0.130000 0.410000 1.280000 0.480000 1.270000 278.000000
25% 1.000000 12.362500 1.602500 2.210000 17.200000 88.000000 1.742500 1.205000 0.270000 1.250000 3.220000 0.782500 1.937500 500.500000
50% 2.000000 13.050000 1.865000 2.360000 19.500000 98.000000 2.355000 2.135000 0.340000 1.555000 4.690000 0.965000 2.780000 673.500000
75% 3.000000 13.677500 3.082500 2.557500 21.500000 107.000000 2.800000 2.875000 0.437500 1.950000 6.200000 1.120000 3.170000 985.000000
max 3.000000 14.830000 5.800000 3.230000 30.000000 162.000000 3.880000 5.080000 0.660000 3.580000 13.000000 1.710000 4.000000 1680.000000

In [212]:
wine.Ash.describe()


Out[212]:
count    178.000000
mean       2.366517
std        0.274344
min        1.360000
25%        2.210000
50%        2.360000
75%        2.557500
max        3.230000
Name: Ash, dtype: float64

In [ ]: