In [3]:

    
#comments in Python
'''multiple lines of comments are being shown here'''









    Out[3]:





'multiple lines of comments are being shown here'

Important

this is a markdown and not a code window



In [1]:

    
2+3+5









    Out[1]:





10



In [2]:

    
66-3-(-4)









    Out[2]:





67



In [3]:

    
32*3









    Out[3]:





96



In [4]:

    
2**3









    Out[4]:





8



In [5]:

    
2^3









    Out[5]:





1



In [6]:

    
43/3









    Out[6]:





14.333333333333334



In [7]:

    
43//3









    Out[7]:





14



In [8]:

    
43%3









    Out[8]:





1



In [9]:

    
import math as mt



In [10]:

    
mt.exp(2)









    Out[10]:





7.38905609893065



In [11]:

    
mt.log(10)









    Out[11]:





2.302585092994046



In [12]:

    
mt.exp(1)









    Out[12]:





2.718281828459045



In [14]:

    
mt.log(8,2)









    Out[14]:





3.0



In [15]:

    
mt.sqrt(1000)









    Out[15]:





31.622776601683793



In [17]:

    
import numpy as np



In [18]:

    
np.std([23,45,67,78])









    Out[18]:





21.123150806638673



In [20]:

    
dir(mt)









    Out[20]:





['__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'acos',
 'acosh',
 'asin',
 'asinh',
 'atan',
 'atan2',
 'atanh',
 'ceil',
 'copysign',
 'cos',
 'cosh',
 'degrees',
 'e',
 'erf',
 'erfc',
 'exp',
 'expm1',
 'fabs',
 'factorial',
 'floor',
 'fmod',
 'frexp',
 'fsum',
 'gamma',
 'gcd',
 'hypot',
 'inf',
 'isclose',
 'isfinite',
 'isinf',
 'isnan',
 'ldexp',
 'lgamma',
 'log',
 'log10',
 'log1p',
 'log2',
 'modf',
 'nan',
 'pi',
 'pow',
 'radians',
 'sin',
 'sinh',
 'sqrt',
 'tan',
 'tanh',
 'trunc']



In [21]:

    
type(1)









    Out[21]:





int



In [22]:

    
type("Ajay")









    Out[22]:





str



In [24]:

    
type([23,45,67])









    Out[24]:





list



In [25]:

    
a=[23,45,67]



In [32]:

    
len(a)









    Out[32]:





3



In [31]:

    
np.std(a)









    Out[31]:





17.962924780409974



In [28]:

    
np.var(a)









    Out[28]:





322.66666666666669



In [30]:

    
123456789123456789*9999999999999999









    Out[30]:





1234567891234567766543210876543211



In [35]:

    
np.random??



In [37]:

    
from random import randrange,randint



In [39]:

    
print(randint(0,90))



In [42]:

    
randrange(1000)









    Out[42]:





286



In [46]:

    
for x in range(0,10):
    print(randrange(10000000000000000))









    



2472965195555081
6352816454724336
4809973335770632
5246909950815852
6348106781629098
2586909203145681
2509370301745813
4082241628288070
7691514263873286
8069700113941950



In [51]:

    
def mynewfunction(x,y):
    taxes=((x-1000000)*0.35+100000-min(y,100000))
    print(taxes)



In [53]:

    
mynewfunction(2200000,300000)



In [54]:

    
import os as os



In [57]:

    
os??



In [62]:

    
for x in range(0,30,6):
    print(x)



In [63]:

    
def mynewfunction(x,y):
    z=x**3+3*x*y+20*y
    print(z)



In [65]:

    
for x in range(0,30,6):
    mynewfunction(x,10)



In [1]:

    
import os as os



In [2]:

    
os.getcwd()









    Out[2]:





'C:\\Users\\Dell'



In [3]:

    
os.listdir()









    Out[3]:





['.bash_history',
 '.git',
 '.gitconfig',
 '.gitignore',
 '.idlerc',
 '.ipynb_checkpoints',
 '.ipython',
 '.jupyter',
 '.matplotlib',
 '.spyder-py3',
 '.ssh',
 '.VirtualBox',
 'Anaconda3',
 'AppData',
 'Application Data',
 'Contacts',
 'Cookies',
 'data munging again.ipynb',
 'data wrangling titanic dataset.ipynb',
 'Desktop',
 'Documents',
 'Downloads',
 'Dropbox',
 'Favorites',
 'home',
 'IntelGraphicsProfiles',
 'introductory python.ipynb',
 'Links',
 'Local Settings',
 'month_ridership.png',
 'multiple file concat in pandas.ipynb',
 'Music',
 'My Documents',
 'NetHood',
 'new notebook.ipynb',
 'nltk.ipynb',
 'NTUSER.DAT',
 'ntuser.dat.LOG1',
 'ntuser.dat.LOG2',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TM.blf',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000001.regtrans-ms',
 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000002.regtrans-ms',
 'ntuser.ini',
 'pandas 11.ipynb',
 'pandas analysis 1.ipynb',
 'pandas data manipulation.ipynb',
 'Pictures',
 'PrintHood',
 'Rdatasets',
 'Recent',
 'rforanalytics',
 'Saved Games',
 'Searches',
 'SendTo',
 'Start Menu',
 'Templates',
 'test web scraping.ipynb',
 'time series.ipynb',
 'Untitled.ipynb',
 'untitled.txt',
 'Untitled1.ipynb',
 'untitled1.txt',
 'Untitled2.ipynb',
 'Untitled3.ipynb',
 'Untitled4.ipynb',
 'Untitled5.ipynb',
 'Videos',
 'VirtualBox VMs',
 'Web Scraping Yelp with Beautiful Soup.ipynb']



In [4]:

    
os.chdir('C:\\Users\\Dell')



In [7]:

    
mystring='Hello World'



In [8]:

    
mystring









    Out[8]:





'Hello World'



In [9]:

    
mystring[1]









    Out[9]:





'e'



In [10]:

    
mystring[0]









    Out[10]:





'H'



In [11]:

    
print(mystring)









    



Hello World



In [12]:

    
type(mystring)









    Out[12]:





str



In [13]:

    
len(mystring)









    Out[13]:





11



In [15]:

    
newstring2='Aye aye me heartie\'s'



In [16]:

    
newstring3="Aye aye me heartie's"



In [18]:

    
10*newstring3









    Out[18]:





"Aye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie's"



In [30]:

    
ne1= "'Ajay','Vijay','Anita','Ankit'"



In [32]:

    
type(ne1)









    Out[32]:





str



In [31]:

    
str(ne1)









    Out[31]:





"'Ajay','Vijay','Anita','Ankit'"



In [33]:

    
ne1[1]









    Out[33]:





'A'



In [26]:

    
ne2= ['Ajay','Vijay','Anita','Ankit']



In [27]:

    
str(ne2)









    Out[27]:





"['Ajay', 'Vijay', 'Anita', 'Ankit']"



In [29]:

    
ne2[1]









    Out[29]:





'Vijay'



In [34]:

    
myname1='Ajay'
myname2='John'



In [35]:

    
message= "Hi I am %s howdy"



In [37]:

    
message %myname1









    Out[37]:





'Hi I am Ajay howdy'



In [36]:

    
message %myname2









    Out[36]:





'Hi I am John howdy'



In [38]:

    
ne2









    Out[38]:





['Ajay', 'Vijay', 'Anita', 'Ankit']



In [40]:

    
ne2.append('Anna')



In [41]:

    
ne2









    Out[41]:





['Ajay', 'Vijay', 'Anita', 'Ankit', 'Anna']



In [42]:

    
del ne2[0]



In [43]:

    
ne2









    Out[43]:





['Vijay', 'Anita', 'Ankit', 'Anna']



In [44]:

    
ne3=('Sachin','Dhoni','Gavaskar','Kapil')



In [46]:

    
dir(ne3)









    Out[46]:





['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'count',
 'index']



In [52]:

    
favourite_movie=['micky mouse,steamboat willie', 'vijay,slumdog millionaire', 'john,passion of christ', 'donald,arthur']



In [53]:

    
type(favourite_movie)









    Out[53]:





list



In [51]:

    
favourite_movie2={'micky mouse:steamboat willie', 'vijay:slumdog millionaire', 'john:passion of christ', 'donald:arthur'}



In [54]:

    
type(favourite_movie2)









    Out[54]:





set



In [55]:

    
favourite_movie3={'micky mouse':'steamboat willie', 'vijay':'slumdog millionaire', 'john':'passion of christ', 'donald':'arthur'}



In [56]:

    
type(favourite_movie3)









    Out[56]:





dict



In [57]:

    
favourite_movie3['micky mouse']









    Out[57]:





'steamboat willie'



In [58]:

    
import re



In [59]:

    
names =["Anna", "Anne", "Annaporna","Shubham","Aruna"]



In [60]:

    
for name in names:
    print(re.search(r'(An)',name))









    



<_sre.SRE_Match object; span=(0, 2), match='An'>
<_sre.SRE_Match object; span=(0, 2), match='An'>
<_sre.SRE_Match object; span=(0, 2), match='An'>
None
None



In [61]:

    
for name in names:
    print(re.search(r'(A)',name))









    



<_sre.SRE_Match object; span=(0, 1), match='A'>
<_sre.SRE_Match object; span=(0, 1), match='A'>
<_sre.SRE_Match object; span=(0, 1), match='A'>
None
<_sre.SRE_Match object; span=(0, 1), match='A'>



In [62]:

    
for name in names:
    print(re.search(r'(a)',name))









    



<_sre.SRE_Match object; span=(3, 4), match='a'>
None
<_sre.SRE_Match object; span=(3, 4), match='a'>
<_sre.SRE_Match object; span=(5, 6), match='a'>
<_sre.SRE_Match object; span=(4, 5), match='a'>



In [63]:

    
for name in names:
    print(bool(re.search(r'(a)',name)))









    



True
False
True
True
True



In [64]:

    
import numpy as np



In [74]:

    
numlist=["$10000","$20,000","30,000",40000,"50000   "]



In [75]:

    
for i,value in enumerate(numlist):
    print(i)       
    print(value)



In [76]:

    
for i,value in enumerate(numlist):
 
    numlist[i]=re.sub(r"([$,])","",str(value))
    numlist[i]=int(numlist[i])



In [77]:

    
numlist









    Out[77]:





[10000, 20000, 30000, 40000, 50000]



In [68]:

    
np.mean(numlist)









    Out[68]:





30000.0



In [78]:

    
from datetime import datetime



In [82]:

    
datetime.now()









    Out[82]:





datetime.datetime(2017, 4, 15, 14, 35, 5, 932765)



In [80]:

    
date_obj=datetime.strptime("15/August/2007","%d/%B/%Y")



In [81]:

    
date_obj









    Out[81]:





datetime.datetime(2007, 8, 15, 0, 0)



In [85]:

    
a=date_obj-datetime.now()



In [86]:

    
a.days









    Out[86]:





-3532



In [88]:

    
a.seconds









    Out[88]:





33861



In [89]:

    
os.getcwd()









    Out[89]:





'C:\\Users\\Dell'



In [93]:

    
import IPython 
print (IPython.sys_info())









    



{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.1.0',
 'os_name': 'nt',
 'platform': 'Windows-7-6.1.7600-SP0',
 'sys_executable': 'C:\\Users\\Dell\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, '
                '11:41:13) [MSC v.1900 64 bit (AMD64)]'}



In [94]:

    
%load_ext version_information
%version_information









    Out[94]:




Software Version
Python 3.5.2 64bit [MSC v.1900 64 bit (AMD64)]
IPython 5.1.0
OS Windows 7 6.1.7600 SP0
Sat Apr 15 14:49:54 2017 India Standard Time



In [90]:

    
os.chdir('C:\\Users\\Dell\\Downloads')



In [91]:

    
os.listdir()









    Out[91]:





['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '7z1604-x64.exe',
 '7z1604.exe',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'All+CSV+Files+in+a+Folder.ipynb',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv (3).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'Data Analysis (1).7z',
 'Data Analysis (1).rar',
 'Data Analysis.rar',
 'Data Viz.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data+munging+again.ipynb',
 'data+wrangling+titanic+dataset.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond (7).csv',
 'Diamond (8).csv',
 'Diamond.csv',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'Hdma.csv',
 'Hedonic.csv',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'internship.docx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'Invoice trafla format.docx',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'March invoice Indicus   - Sheet1.pdf',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mortDefault',
 'mortDefault.zip',
 'mtcarslm.R',
 'multiple+file+concat+in+pandas (1).ipynb',
 'multiple+file+concat+in+pandas.ipynb',
 'my+first+class+in+python.ipynb',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'pandas+11.ipynb',
 'pandas+analysis+1.ipynb',
 'pandas+data+manipulation.ipynb',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'Python.docx',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'test+web+scraping.ipynb',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 950045.crdownload',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']



In [99]:

    
import glob as glob



In [100]:

    
path = os.getcwd()
extension = 'csv'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)









    



['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond (7).csv', 'Diamond (8).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']



In [95]:

    
import pandas as pd



In [97]:

    
fraud=pd.read_csv('ccFraud.csv')



In [104]:

    
mtcars=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv")



In [101]:

    
smalldiamonds=pd.read_csv("C:\\Users\\Dell\\Desktop\\Diamond (8).csv")



In [111]:

    
fraud.columns









    Out[111]:





Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',
       'numIntlTrans', 'creditLine', 'fraudRisk'],
      dtype='object')



In [110]:

    
fraud.shape









    Out[110]:





(10000000, 9)



In [120]:

    
len(fraud)









    Out[120]:





10000000



In [121]:

    
len(fraud.columns)









    Out[121]:





9



In [109]:

    
fraud.dtypes









    Out[109]:





custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtype: object



In [102]:

    
fraud.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000000 entries, 0 to 9999999
Data columns (total 9 columns):
custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtypes: int64(9)
memory usage: 686.6 MB



In [105]:

    
mtcars.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
Unnamed: 0    32 non-null object
mpg           32 non-null float64
cyl           32 non-null int64
disp          32 non-null float64
hp            32 non-null int64
drat          32 non-null float64
wt            32 non-null float64
qsec          32 non-null float64
vs            32 non-null int64
am            32 non-null int64
gear          32 non-null int64
carb          32 non-null int64
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB



In [106]:

    
smalldiamonds.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 6 columns):
Unnamed: 0       308 non-null int64
carat            308 non-null float64
colour           308 non-null object
clarity          308 non-null object
certification    308 non-null object
price            308 non-null int64
dtypes: float64(1), int64(2), object(3)
memory usage: 14.5+ KB



In [108]:

    
fraud.head()









    Out[108]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0



In [112]:

    
fraud.tail()









    Out[112]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      9999995
      9999996
      1
      37
      1
      0
      10
      0
      9
      0
    
    
      9999996
      9999997
      1
      16
      1
      0
      33
      2
      4
      0
    
    
      9999997
      9999998
      1
      24
      1
      9000
      38
      0
      8
      0
    
    
      9999998
      9999999
      1
      28
      1
      7000
      20
      19
      6
      0
    
    
      9999999
      10000000
      1
      23
      1
      0
      13
      0
      7
      0



In [113]:

    
fraud2=fraud.copy()



In [115]:

    
fraud.describe()









    Out[115]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      count
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
    
    
      mean
      5.000000e+06
      1.382177e+00
      2.466127e+01
      1.030004e+00
      4.109920e+03
      2.893519e+01
      4.047190e+00
      9.134469e+00
      5.960140e-02
    
    
      std
      2.886751e+06
      4.859195e-01
      1.497012e+01
      1.705991e-01
      3.996847e+03
      2.655378e+01
      8.602970e+00
      9.641974e+00
      2.367469e-01
    
    
      min
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      0.000000e+00
      0.000000e+00
      0.000000e+00
      1.000000e+00
      0.000000e+00
    
    
      25%
      2.500001e+06
      1.000000e+00
      1.000000e+01
      1.000000e+00
      0.000000e+00
      1.000000e+01
      0.000000e+00
      4.000000e+00
      0.000000e+00
    
    
      50%
      5.000000e+06
      1.000000e+00
      2.400000e+01
      1.000000e+00
      3.706000e+03
      1.900000e+01
      0.000000e+00
      6.000000e+00
      0.000000e+00
    
    
      75%
      7.500000e+06
      2.000000e+00
      3.800000e+01
      1.000000e+00
      6.000000e+03
      3.900000e+01
      4.000000e+00
      1.100000e+01
      0.000000e+00
    
    
      max
      1.000000e+07
      2.000000e+00
      5.100000e+01
      2.000000e+00
      4.148500e+04
      1.000000e+02
      6.000000e+01
      7.500000e+01
      1.000000e+00



In [116]:

    
fraud.gender.describe()









    Out[116]:





count    1.000000e+07
mean     1.382177e+00
std      4.859195e-01
min      1.000000e+00
25%      1.000000e+00
50%      1.000000e+00
75%      2.000000e+00
max      2.000000e+00
Name: gender, dtype: float64



In [117]:

    
mtcars.head()









    Out[117]:






  
    
      
      Unnamed: 0
      mpg
      cyl
      disp
      hp
      drat
      wt
      qsec
      vs
      am
      gear
      carb
    
  
  
    
      0
      Mazda RX4
      21.0
      6
      160.0
      110
      3.90
      2.620
      16.46
      0
      1
      4
      4
    
    
      1
      Mazda RX4 Wag
      21.0
      6
      160.0
      110
      3.90
      2.875
      17.02
      0
      1
      4
      4
    
    
      2
      Datsun 710
      22.8
      4
      108.0
      93
      3.85
      2.320
      18.61
      1
      1
      4
      1
    
    
      3
      Hornet 4 Drive
      21.4
      6
      258.0
      110
      3.08
      3.215
      19.44
      1
      0
      3
      1
    
    
      4
      Hornet Sportabout
      18.7
      8
      360.0
      175
      3.15
      3.440
      17.02
      0
      0
      3
      2



In [118]:

    
mtcars=mtcars.drop("Unnamed: 0",1)



In [119]:

    
mtcars.head()



In [124]:

    
import IPython
print (IPython.sys_info())









    



{'commit_hash': '5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'cp1252',
 'ipython_path': 'C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\IPython',
 'ipython_version': '5.1.0',
 'os_name': 'nt',
 'platform': 'Windows-7-6.1.7600-SP0',
 'sys_executable': 'C:\\Users\\Dell\\Anaconda3\\python.exe',
 'sys_platform': 'win32',
 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, '
                '11:41:13) [MSC v.1900 64 bit (AMD64)]'}



In [125]:

    
!pip install version_information
%load_ext version_information
%version_information


!pip freeze









    



Requirement already satisfied: version_information in c:\users\dell\anaconda3\lib\site-packages
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information
alabaster==0.7.9
anaconda-clean==1.0
anaconda-client==1.5.1
anaconda-navigator==1.3.1
argcomplete==1.0.0
astroid==1.4.7
astropy==1.2.1
Babel==2.3.4
backports.shutil-get-terminal-size==1.0.0
beautifulsoup4==4.5.1
bitarray==0.8.1
blaze==0.10.1
bokeh==0.12.2
boto==2.42.0
Bottleneck==1.1.0
brewer2mpl==1.4.1
cffi==1.7.0
chest==0.2.3
click==6.6
cloudpickle==0.2.1
clyent==1.2.2
colorama==0.3.7
comtypes==1.1.2
conda==4.3.9
conda-build==2.0.2
configobj==5.0.6
contextlib2==0.5.3
cryptography==1.5
cycler==0.10.0
Cython==0.24.1
cytoolz==0.8.0
dask==0.11.0
datashape==0.5.2
decorator==4.0.10
dill==0.2.5
docutils==0.12
dynd===c328ab7
et-xmlfile==1.0.1
fastcache==1.0.2
filelock==2.0.6
Flask==0.11.1
Flask-Cors==2.1.2
gevent==1.1.2
ggplot==0.11.5
greenlet==0.4.10
h5py==2.6.0
HeapDict==1.0.0
idna==2.1
imagesize==0.7.1
ipykernel==4.5.0
ipython==5.1.0
ipython-genutils==0.1.0
ipywidgets==5.2.2
itsdangerous==0.24
jdcal==1.2
jedi==0.9.0
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.4.0
jupyter-console==5.0.0
jupyter-core==4.2.0
lazy-object-proxy==1.2.1
llvmlite==0.13.0
locket==0.2.0
lxml==3.6.4
MarkupSafe==0.23
matplotlib==1.5.3
menuinst==1.4.1
mistune==0.7.3
mpmath==0.19
multipledispatch==0.4.8
nb-anacondacloud==1.2.0
nb-conda==2.0.0
nb-conda-kernels==2.0.0
nbconvert==4.2.0
nbformat==4.1.0
nbpresent==3.0.2
networkx==1.11
nltk==3.2.1
nose==1.3.7
notebook==4.2.3
numba==0.28.1
numexpr==2.6.1
numpy==1.11.1
odo==0.5.0
openpyxl==2.3.2
pandas==0.18.1
pandasql==0.7.3
partd==0.3.6
path.py==0.0.0
pathlib2==2.1.0
patsy==0.4.1
pep8==1.7.0
pickleshare==0.7.4
Pillow==3.3.1
pkginfo==1.3.2
ply==3.9
prompt-toolkit==1.0.3
psutil==4.3.1
psycopg2==2.6.2
py==1.4.31
pyasn1==0.1.9
pycosat==0.6.1
pycparser==2.14
pycrypto==2.6.1
pycurl==7.43.0
pyflakes==1.3.0
Pygments==2.1.3
pylint==1.5.4
pyodbc==3.0.10
pyOpenSSL==16.2.0
pyparsing==2.1.4
pytest==2.9.2
python-dateutil==2.5.3
pytz==2016.6.1
pywin32==220
PyYAML==3.12
pyzmq==15.4.0
QtAwesome==0.3.3
qtconsole==4.2.1
QtPy==1.1.2
requests==2.12.4
rope-py3k==0.9.4.post1
ruamel-yaml===-VERSION
scikit-image==0.12.3
scikit-learn==0.17.1
scipy==0.18.1
seaborn==0.7.1
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.10.0
snowballstemmer==1.2.1
sockjs-tornado==1.0.3
sphinx==1.4.6
spyder==3.0.0
SQLAlchemy==1.0.13
statsmodels==0.6.1
sympy==1.0
tables==3.2.2
toolz==0.8.0
tornado==4.4.1
traitlets==4.3.0
unicodecsv==0.14.1
urllib3==1.20
version-information==1.0.3
wcwidth==0.1.7
Werkzeug==0.11.11
widgetsnbextension==1.2.6
win-unicode-console==0.5
wrapt==1.10.6
xlrd==1.0.0
XlsxWriter==0.9.3
xlwings==0.10.0
xlwt==1.1.2



In [127]:

    
!pip install guppy









    



Collecting guppy
  Downloading guppy-0.1.10.tar.gz (484kB)
Building wheels for collected packages: guppy
  Running setup.py bdist_wheel for guppy: started
  Running setup.py bdist_wheel for guppy: finished with status 'error'
  Complete output from command c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d C:\Users\Dell\AppData\Local\Temp\tmppr8koym2pip-wheel- --python-tag cp35:
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.5
  creating build\lib.win-amd64-3.5\guppy
  copying guppy\__init__.py -> build\lib.win-amd64-3.5\guppy
  creating build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\__init__.py -> build\lib.win-amd64-3.5\guppy\doc
  creating build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Cat.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\cmd.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Code.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Compat.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\etc.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\ExecfileWithModuleInfo.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\FSA.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Glue.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Help.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\IterPermute.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\KanExtension.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\KnuthBendix.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\RE.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\RE_Rect.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\textView.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\tkcursors.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\Unpack.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\xterm.py -> build\lib.win-amd64-3.5\guppy\etc
  copying guppy\etc\__init__.py -> build\lib.win-amd64-3.5\guppy\etc
  creating build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Document.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\DottedTree.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Exceptions.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\FileIO.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Filer.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Gsml.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Help.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Html.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Latex.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Main.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\SpecNodes.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Tester.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\Text.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\XHTML.py -> build\lib.win-amd64-3.5\guppy\gsl
  copying guppy\gsl\__init__.py -> build\lib.win-amd64-3.5\guppy\gsl
  creating build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\AbstractAlgebra.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Console.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Doc.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\ImpSet.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Monitor.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Part.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Path.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\pbhelp.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Prof.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Remote.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RemoteConstants.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\RM.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Spec.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Target.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\Use.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\View.py -> build\lib.win-amd64-3.5\guppy\heapy
  copying guppy\heapy\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy
  creating build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\support.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_all.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_dependencies.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_ER.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_heapyc.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_menuleak.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Part.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Path.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_RetaGraph.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_sf.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_Spec.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\test_View.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  copying guppy\heapy\test\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy\test
  creating build\lib.win-amd64-3.5\guppy\sets
  copying guppy\sets\test.py -> build\lib.win-amd64-3.5\guppy\sets
  copying guppy\sets\__init__.py -> build\lib.win-amd64-3.5\guppy\sets
  copying guppy\doc\docexample.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\gsl.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\gslexample.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\guppy.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapyc.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_RootState.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_tutorial.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_UniSet.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\heapy_Use.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\index.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\ProfileBrowser.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\sets.html -> build\lib.win-amd64-3.5\guppy\doc
  copying guppy\doc\pbscreen.jpg -> build\lib.win-amd64-3.5\guppy\doc
  running build_ext
  building 'guppy.sets.setsc' extension
  error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools
  
  ----------------------------------------
  Running setup.py clean for guppy
Failed to build guppy
Installing collected packages: guppy
  Running setup.py install for guppy: started
    Running setup.py install for guppy: finished with status 'error'
    Complete output from command c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\Dell\AppData\Local\Temp\pip-_nlam_7o-record\install-record.txt --single-version-externally-managed --compile:
    running install
    running build
    running build_py
    creating build
    creating build\lib.win-amd64-3.5
    creating build\lib.win-amd64-3.5\guppy
    copying guppy\__init__.py -> build\lib.win-amd64-3.5\guppy
    creating build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\__init__.py -> build\lib.win-amd64-3.5\guppy\doc
    creating build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Cat.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\cmd.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Code.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Compat.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\etc.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\ExecfileWithModuleInfo.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\FSA.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Glue.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Help.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\IterPermute.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\KanExtension.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\KnuthBendix.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\RE.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\RE_Rect.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\textView.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\tkcursors.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\Unpack.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\xterm.py -> build\lib.win-amd64-3.5\guppy\etc
    copying guppy\etc\__init__.py -> build\lib.win-amd64-3.5\guppy\etc
    creating build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Document.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\DottedTree.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Exceptions.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\FileIO.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Filer.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Gsml.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Help.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Html.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Latex.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Main.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\SpecNodes.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Tester.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\Text.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\XHTML.py -> build\lib.win-amd64-3.5\guppy\gsl
    copying guppy\gsl\__init__.py -> build\lib.win-amd64-3.5\guppy\gsl
    creating build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\AbstractAlgebra.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Console.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Doc.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\ImpSet.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Monitor.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Part.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Path.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\pbhelp.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Prof.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Remote.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RemoteConstants.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\RM.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Spec.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Target.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\Use.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\View.py -> build\lib.win-amd64-3.5\guppy\heapy
    copying guppy\heapy\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy
    creating build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\support.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_all.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Classifiers.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_dependencies.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_ER.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_heapyc.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_menuleak.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_OutputHandling.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Part.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Path.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_RefPat.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_RetaGraph.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_sf.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_Spec.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_UniSet.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\test_View.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    copying guppy\heapy\test\__init__.py -> build\lib.win-amd64-3.5\guppy\heapy\test
    creating build\lib.win-amd64-3.5\guppy\sets
    copying guppy\sets\test.py -> build\lib.win-amd64-3.5\guppy\sets
    copying guppy\sets\__init__.py -> build\lib.win-amd64-3.5\guppy\sets
    copying guppy\doc\docexample.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\gsl.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\gslexample.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\guppy.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapyc.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_RootState.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_tutorial.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_UniSet.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\heapy_Use.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\index.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\ProfileBrowser.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\sets.html -> build\lib.win-amd64-3.5\guppy\doc
    copying guppy\doc\pbscreen.jpg -> build\lib.win-amd64-3.5\guppy\doc
    running build_ext
    building 'guppy.sets.setsc' extension
    error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools
    
    ----------------------------------------






    



  Failed building wheel for guppy
Command "c:\users\dell\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\Dell\AppData\Local\Temp\pip-_nlam_7o-record\install-record.txt --single-version-externally-managed --compile" failed with error code 1 in C:\Users\Dell\AppData\Local\Temp\pip-build-d3t4jj4u\guppy\



In [128]:

    
fraud.head()









    Out[128]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0



In [135]:

    
fraud.head().gender









    Out[135]:





0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64



In [133]:

    
fraud.gender.head()









    Out[133]:





0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64



In [132]:

    
fraud['gender'].head()









    Out[132]:





0    1
1    2
2    2
3    1
4    1
Name: gender, dtype: int64



In [131]:

    
fraud[['gender','state','balance']].head()



In [136]:

    
fraud.ix[10:20]









    Out[136]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      10
      11
      1
      46
      1
      4601
      54
      0
      4
      0
    
    
      11
      12
      1
      10
      1
      3000
      20
      0
      2
      0
    
    
      12
      13
      1
      6
      1
      0
      45
      2
      4
      0
    
    
      13
      14
      2
      38
      1
      9000
      41
      3
      8
      0
    
    
      14
      15
      1
      27
      1
      5227
      60
      0
      17
      0
    
    
      15
      16
      1
      44
      1
      0
      22
      0
      5
      0
    
    
      16
      17
      2
      18
      1
      13970
      20
      0
      13
      0
    
    
      17
      18
      1
      35
      1
      3113
      13
      6
      8
      0
    
    
      18
      19
      1
      5
      1
      9000
      20
      2
      8
      0
    
    
      19
      20
      2
      31
      1
      1860
      21
      10
      8
      0
    
    
      20
      21
      1
      39
      1
      4000
      24
      0
      3
      0



In [137]:

    
fraud.iloc[:,:]









    Out[137]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0
    
    
      5
      6
      2
      44
      2
      5546
      21
      0
      13
      0
    
    
      6
      7
      1
      3
      1
      2000
      41
      0
      1
      0
    
    
      7
      8
      1
      10
      1
      6016
      20
      3
      6
      0
    
    
      8
      9
      2
      32
      1
      2428
      4
      10
      22
      0
    
    
      9
      10
      1
      23
      1
      0
      18
      56
      5
      0
    
    
      10
      11
      1
      46
      1
      4601
      54
      0
      4
      0
    
    
      11
      12
      1
      10
      1
      3000
      20
      0
      2
      0
    
    
      12
      13
      1
      6
      1
      0
      45
      2
      4
      0
    
    
      13
      14
      2
      38
      1
      9000
      41
      3
      8
      0
    
    
      14
      15
      1
      27
      1
      5227
      60
      0
      17
      0
    
    
      15
      16
      1
      44
      1
      0
      22
      0
      5
      0
    
    
      16
      17
      2
      18
      1
      13970
      20
      0
      13
      0
    
    
      17
      18
      1
      35
      1
      3113
      13
      6
      8
      0
    
    
      18
      19
      1
      5
      1
      9000
      20
      2
      8
      0
    
    
      19
      20
      2
      31
      1
      1860
      21
      10
      8
      0
    
    
      20
      21
      1
      39
      1
      4000
      24
      0
      3
      0
    
    
      21
      22
      1
      34
      1
      0
      22
      0
      3
      0
    
    
      22
      23
      1
      5
      1
      0
      7
      0
      11
      0
    
    
      23
      24
      2
      21
      1
      0
      15
      0
      3
      0
    
    
      24
      25
      1
      25
      1
      0
      12
      0
      65
      0
    
    
      25
      26
      2
      29
      1
      5000
      4
      9
      4
      0
    
    
      26
      27
      1
      38
      1
      4000
      21
      5
      3
      0
    
    
      27
      28
      1
      9
      1
      12000
      20
      0
      11
      0
    
    
      28
      29
      2
      20
      1
      0
      19
      0
      2
      0
    
    
      29
      30
      2
      49
      1
      5192
      84
      0
      13
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      9999970
      9999971
      1
      10
      1
      0
      12
      0
      11
      0
    
    
      9999971
      9999972
      2
      40
      1
      3683
      31
      3
      7
      0
    
    
      9999972
      9999973
      1
      46
      1
      5000
      69
      0
      4
      0
    
    
      9999973
      9999974
      1
      10
      2
      0
      36
      31
      14
      0
    
    
      9999974
      9999975
      1
      25
      1
      0
      18
      0
      3
      0
    
    
      9999975
      9999976
      2
      48
      1
      4210
      23
      0
      8
      0
    
    
      9999976
      9999977
      2
      4
      1
      0
      14
      0
      7
      0
    
    
      9999977
      9999978
      1
      35
      1
      8000
      24
      0
      7
      0
    
    
      9999978
      9999979
      1
      44
      1
      5000
      12
      0
      4
      0
    
    
      9999979
      9999980
      1
      6
      1
      7669
      5
      3
      7
      0
    
    
      9999980
      9999981
      1
      10
      1
      13889
      54
      0
      13
      0
    
    
      9999981
      9999982
      1
      33
      1
      0
      23
      26
      18
      0
    
    
      9999982
      9999983
      1
      44
      1
      3000
      14
      0
      2
      0
    
    
      9999983
      9999984
      2
      13
      1
      0
      2
      0
      5
      0
    
    
      9999984
      9999985
      1
      39
      1
      7000
      30
      0
      6
      0
    
    
      9999985
      9999986
      2
      45
      2
      3000
      4
      0
      2
      0
    
    
      9999986
      9999987
      1
      23
      1
      7000
      59
      0
      6
      0
    
    
      9999987
      9999988
      2
      24
      1
      6000
      46
      0
      5
      0
    
    
      9999988
      9999989
      1
      18
      1
      15000
      72
      0
      14
      1
    
    
      9999989
      9999990
      2
      4
      1
      12000
      17
      7
      11
      0
    
    
      9999990
      9999991
      1
      16
      2
      1561
      8
      0
      5
      0
    
    
      9999991
      9999992
      2
      36
      1
      5217
      6
      0
      5
      0
    
    
      9999992
      9999993
      1
      38
      1
      0
      7
      0
      2
      0
    
    
      9999993
      9999994
      2
      43
      1
      2607
      6
      0
      5
      0
    
    
      9999994
      9999995
      1
      16
      2
      17376
      3
      0
      20
      0
    
    
      9999995
      9999996
      1
      37
      1
      0
      10
      0
      9
      0
    
    
      9999996
      9999997
      1
      16
      1
      0
      33
      2
      4
      0
    
    
      9999997
      9999998
      1
      24
      1
      9000
      38
      0
      8
      0
    
    
      9999998
      9999999
      1
      28
      1
      7000
      20
      19
      6
      0
    
    
      9999999
      10000000
      1
      23
      1
      0
      13
      0
      7
      0
    
  

10000000 rows × 9 columns



In [139]:

    
fraud.iloc[10:20,1:4]



In [140]:

    
fraud.describe()









    Out[140]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      count
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
      1.000000e+07
    
    
      mean
      5.000000e+06
      1.382177e+00
      2.466127e+01
      1.030004e+00
      4.109920e+03
      2.893519e+01
      4.047190e+00
      9.134469e+00
      5.960140e-02
    
    
      std
      2.886751e+06
      4.859195e-01
      1.497012e+01
      1.705991e-01
      3.996847e+03
      2.655378e+01
      8.602970e+00
      9.641974e+00
      2.367469e-01
    
    
      min
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      0.000000e+00
      0.000000e+00
      0.000000e+00
      1.000000e+00
      0.000000e+00
    
    
      25%
      2.500001e+06
      1.000000e+00
      1.000000e+01
      1.000000e+00
      0.000000e+00
      1.000000e+01
      0.000000e+00
      4.000000e+00
      0.000000e+00
    
    
      50%
      5.000000e+06
      1.000000e+00
      2.400000e+01
      1.000000e+00
      3.706000e+03
      1.900000e+01
      0.000000e+00
      6.000000e+00
      0.000000e+00
    
    
      75%
      7.500000e+06
      2.000000e+00
      3.800000e+01
      1.000000e+00
      6.000000e+03
      3.900000e+01
      4.000000e+00
      1.100000e+01
      0.000000e+00
    
    
      max
      1.000000e+07
      2.000000e+00
      5.100000e+01
      2.000000e+00
      4.148500e+04
      1.000000e+02
      6.000000e+01
      7.500000e+01
      1.000000e+00



In [141]:

    
fraud.gender.value_counts()









    Out[141]:





1    6178231
2    3821769
Name: gender, dtype: int64



In [142]:

    
fraud.state.value_counts()









    Out[142]:





5     1216069
44     812638
10     608630
35     608575
39     405892
15     404720
36     364531
23     304553
11     303984
29     303833
32     284428
46     252812
43     203827
16     203143
25     203045
48     202972
4      202776
21     202444
20     201918
49     182557
24     182201
6      171774
2      162574
41     152253
19     151715
18     142170
37     122191
38     121846
7      121802
13     111775
26     101829
3      101740
45      91375
34      91326
17      91127
33      81332
50      61385
14      60992
28      60617
12      50438
22      40819
31      40563
9       30333
40      30233
27      30131
51      20691
8       20603
42      20449
30      20215
1       20137
47      20017
Name: state, dtype: int64



In [143]:

    
fraud.fraudRisk.value_counts()









    Out[143]:





0    9403986
1     596014
Name: fraudRisk, dtype: int64



In [144]:

    
pd.crosstab(fraud.fraudRisk,fraud.gender)



In [145]:

    
pd.crosstab(fraud.fraudRisk,fraud.gender,margins=True)



In [148]:

    
np.random.choice(100,10)









    Out[148]:





array([36, 51, 88, 10, 86,  2, 92, 45, 22, 33])



In [150]:

    
a=len(fraud)



In [151]:

    
b=0.0001



In [152]:

    
a*b









    Out[152]:





10.0



In [154]:

    
fraud.ix[np.random.choice(len(fraud),a*b)]









    



C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':






    Out[154]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      1213704
      1213705
      1
      41
      1
      6000
      34
      0
      5
      0
    
    
      9188257
      9188258
      1
      4
      1
      0
      6
      0
      6
      0
    
    
      7040966
      7040967
      1
      32
      1
      0
      15
      0
      15
      0
    
    
      9106986
      9106987
      1
      36
      1
      2000
      28
      0
      1
      0
    
    
      2441776
      2441777
      1
      5
      1
      10000
      39
      5
      9
      0
    
    
      6415126
      6415127
      2
      39
      1
      0
      10
      2
      6
      0
    
    
      6300659
      6300660
      1
      11
      1
      8000
      4
      17
      7
      0
    
    
      952480
      952481
      1
      6
      1
      6000
      10
      0
      5
      0
    
    
      4407853
      4407854
      1
      41
      1
      0
      3
      0
      3
      0
    
    
      3029728
      3029729
      1
      9
      1
      4000
      32
      0
      3
      0



In [156]:

    
! pip install pandasql









    



Requirement already satisfied: pandasql in c:\users\dell\anaconda3\lib\site-packages
Requirement already satisfied: pandas in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: sqlalchemy in c:\users\dell\anaconda3\lib\site-packages (from pandasql)
Requirement already satisfied: python-dateutil>=2 in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: pytz>=2011k in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql)
Requirement already satisfied: six>=1.5 in c:\users\dell\anaconda3\lib\site-packages (from python-dateutil>=2->pandas->pandasql)



In [163]:

    
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())



In [157]:

    
mtcars.head()



In [164]:

    
pysqldf("SELECT * FROM mtcars  LIMIT 10;")



In [165]:

    
pysqldf("SELECT * FROM mtcars  WHERE gear > 4;")



In [166]:

    
pysqldf("SELECT AVG(mpg),gear FROM mtcars group by gear  ;")



In [167]:

    
mtcars.mpg.mean()









    Out[167]:





20.090624999999996



In [169]:

    
g1=pd.groupby(mtcars,mtcars.gear)



In [170]:

    
g1.mean()



In [171]:

    
mtcars.gear.value_counts()









    Out[171]:





3    15
4    12
5     5
Name: gear, dtype: int64



In [173]:

    
mtcars.cyl.unique()









    Out[173]:





array([6, 4, 8], dtype=int64)



In [174]:

    
pd.crosstab(mtcars.gear,mtcars.cyl)



In [175]:

    
mtcars.pivot_table(index='gear', columns='cyl', values='mpg', fill_value=0)



In [176]:

    
fraud.head()









    Out[176]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0



In [181]:

    
del fraud['custID']



In [182]:

    
fraud.head()









    Out[182]:






  
    
      
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      1
      46
      1
      0
      11
      16
      7
      0



In [183]:

    
fraud3=fraud



In [186]:

    
del fraud['state']



In [187]:

    
fraud3.head()









    Out[187]:






  
    
      
      gender
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      1
      1
      0
      12
      0
      5
      0
    
    
      4
      1
      1
      0
      11
      16
      7
      0



In [190]:

    
wine=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",header=None)



In [191]:

    
wine.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
0     178 non-null int64
1     178 non-null float64
2     178 non-null float64
3     178 non-null float64
4     178 non-null float64
5     178 non-null int64
6     178 non-null float64
7     178 non-null float64
8     178 non-null float64
9     178 non-null float64
10    178 non-null float64
11    178 non-null float64
12    178 non-null float64
13    178 non-null int64
dtypes: float64(11), int64(3)
memory usage: 19.5 KB



In [200]:

    
wine.columns=['WineClass','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline']



In [201]:

    
wine.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
WineClass                       178 non-null int64
Alcohol                         178 non-null float64
Malic acid                      178 non-null float64
Ash                             178 non-null float64
Alcalinity of ash               178 non-null float64
Magnesium                       178 non-null int64
Total phenols                   178 non-null float64
Flavanoids                      178 non-null float64
Nonflavanoid phenols            178 non-null float64
Proanthocyanins                 178 non-null float64
Color intensity                 178 non-null float64
Hue                             178 non-null float64
OD280/OD315 of diluted wines    178 non-null float64
Proline                         178 non-null int64
dtypes: float64(11), int64(3)
memory usage: 19.5 KB



In [202]:

    
wine.head()









    Out[202]:






  
    
      
      WineClass
      Alcohol
      Malic acid
      Ash
      Alcalinity of ash
      Magnesium
      Total phenols
      Flavanoids
      Nonflavanoid phenols
      Proanthocyanins
      Color intensity
      Hue
      OD280/OD315 of diluted wines
      Proline
    
  
  
    
      0
      1
      14.23
      1.71
      2.43
      15.6
      127
      2.80
      3.06
      0.28
      2.29
      5.64
      1.04
      3.92
      1065
    
    
      1
      1
      13.20
      1.78
      2.14
      11.2
      100
      2.65
      2.76
      0.26
      1.28
      4.38
      1.05
      3.40
      1050
    
    
      2
      1
      13.16
      2.36
      2.67
      18.6
      101
      2.80
      3.24
      0.30
      2.81
      5.68
      1.03
      3.17
      1185
    
    
      3
      1
      14.37
      1.95
      2.50
      16.8
      113
      3.85
      3.49
      0.24
      2.18
      7.80
      0.86
      3.45
      1480
    
    
      4
      1
      13.24
      2.59
      2.87
      21.0
      118
      2.80
      2.69
      0.39
      1.82
      4.32
      1.04
      2.93
      735



In [204]:

    
wine.WineClass.value_counts()









    Out[204]:





2    71
1    59
3    48
Name: WineClass, dtype: int64



In [205]:

    
classby=pd.groupby(wine,wine.WineClass)



In [206]:

    
classby.mean()









    Out[206]:






  
    
      
      Alcohol
      Malic acid
      Ash
      Alcalinity of ash
      Magnesium
      Total phenols
      Flavanoids
      Nonflavanoid phenols
      Proanthocyanins
      Color intensity
      Hue
      OD280/OD315 of diluted wines
      Proline
    
    
      WineClass
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      13.744746
      2.010678
      2.455593
      17.037288
      106.338983
      2.840169
      2.982373
      0.290000
      1.899322
      5.528305
      1.062034
      3.157797
      1115.711864
    
    
      2
      12.278732
      1.932676
      2.244789
      20.238028
      94.549296
      2.258873
      2.080845
      0.363662
      1.630282
      3.086620
      1.056282
      2.785352
      519.507042
    
    
      3
      13.153750
      3.333750
      2.437083
      21.416667
      99.312500
      1.678750
      0.781458
      0.447500
      1.153542
      7.396250
      0.682708
      1.683542
      629.895833



In [207]:

    
wine.describe()









    Out[207]:






  
    
      
      WineClass
      Alcohol
      Malic acid
      Ash
      Alcalinity of ash
      Magnesium
      Total phenols
      Flavanoids
      Nonflavanoid phenols
      Proanthocyanins
      Color intensity
      Hue
      OD280/OD315 of diluted wines
      Proline
    
  
  
    
      count
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
      178.000000
    
    
      mean
      1.938202
      13.000618
      2.336348
      2.366517
      19.494944
      99.741573
      2.295112
      2.029270
      0.361854
      1.590899
      5.058090
      0.957449
      2.611685
      746.893258
    
    
      std
      0.775035
      0.811827
      1.117146
      0.274344
      3.339564
      14.282484
      0.625851
      0.998859
      0.124453
      0.572359
      2.318286
      0.228572
      0.709990
      314.907474
    
    
      min
      1.000000
      11.030000
      0.740000
      1.360000
      10.600000
      70.000000
      0.980000
      0.340000
      0.130000
      0.410000
      1.280000
      0.480000
      1.270000
      278.000000
    
    
      25%
      1.000000
      12.362500
      1.602500
      2.210000
      17.200000
      88.000000
      1.742500
      1.205000
      0.270000
      1.250000
      3.220000
      0.782500
      1.937500
      500.500000
    
    
      50%
      2.000000
      13.050000
      1.865000
      2.360000
      19.500000
      98.000000
      2.355000
      2.135000
      0.340000
      1.555000
      4.690000
      0.965000
      2.780000
      673.500000
    
    
      75%
      3.000000
      13.677500
      3.082500
      2.557500
      21.500000
      107.000000
      2.800000
      2.875000
      0.437500
      1.950000
      6.200000
      1.120000
      3.170000
      985.000000
    
    
      max
      3.000000
      14.830000
      5.800000
      3.230000
      30.000000
      162.000000
      3.880000
      5.080000
      0.660000
      3.580000
      13.000000
      1.710000
      4.000000
      1680.000000



In [212]:

    
wine.Ash.describe()









    Out[212]:





count    178.000000
mean       2.366517
std        0.274344
min        1.360000
25%        2.210000
50%        2.360000
75%        2.557500
max        3.230000
Name: Ash, dtype: float64



In [ ]:

gender	1	2
fraudRisk
0	5853053	3550933
1	325178	270836

	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	26.0	4	120.3	91	4.43	2.140	16.7	0	1	5	2
1	30.4	4	95.1	113	3.77	1.513	16.9	1	1	5	2
2	15.8	8	351.0	264	4.22	3.170	14.5	0	1	5	4
3	19.7	6	145.0	175	3.62	2.770	15.5	0	1	5	6
4	15.0	8	301.0	335	3.54	3.570	14.6	0	1	5	8

	AVG(mpg)	gear
0	16.106667	3
1	24.533333	4
2	21.380000	5

	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	carb
gear
3	16.106667	7.466667	326.300000	176.133333	3.132667	3.892600	17.692	0.200000	0.000000	2.666667
4	24.533333	4.666667	123.016667	89.500000	4.043333	2.616667	18.965	0.833333	0.666667	2.333333
5	21.380000	6.000000	202.480000	195.600000	3.916000	2.632600	15.640	0.200000	1.000000	4.400000

cyl	4	6	8
gear
3	21.500	19.75	15.05
4	26.925	19.75	0.00
5	28.200	19.70	15.40

Software	Version
Python	3.5.2 64bit [MSC v.1900 64 bit (AMD64)]
IPython	5.1.0
OS	Windows 7 6.1.7600 SP0
Sat Apr 15 14:49:54 2017 India Standard Time

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
0	1	1	35	1	3000	4	14	2
1	2	2	2	1	0	9	0	18
2	3	2	2	1	0	27	9	16
3	4	1	15	1	0	12	0	5
4	5	1	46	1	0	11	16	7

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
9999995	9999996	1	37	1	0	10	0	9
9999996	9999997	1	16	1	0	33	2	4
9999997	9999998	1	24	1	9000	38	0	8
9999998	9999999	1	28	1	7000	20	19	6
9999999	10000000	1	23	1	0	13	0	7

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine	fraudRisk
count	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07	1.000000e+07
mean	5.000000e+06	1.382177e+00	2.466127e+01	1.030004e+00	4.109920e+03	2.893519e+01	4.047190e+00	9.134469e+00	5.960140e-02
std	2.886751e+06	4.859195e-01	1.497012e+01	1.705991e-01	3.996847e+03	2.655378e+01	8.602970e+00	9.641974e+00	2.367469e-01
min	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	1.000000e+00	0.000000e+00
25%	2.500001e+06	1.000000e+00	1.000000e+01	1.000000e+00	0.000000e+00	1.000000e+01	0.000000e+00	4.000000e+00	0.000000e+00
50%	5.000000e+06	1.000000e+00	2.400000e+01	1.000000e+00	3.706000e+03	1.900000e+01	0.000000e+00	6.000000e+00	0.000000e+00
75%	7.500000e+06	2.000000e+00	3.800000e+01	1.000000e+00	6.000000e+03	3.900000e+01	4.000000e+00	1.100000e+01	0.000000e+00
max	1.000000e+07	2.000000e+00	5.100000e+01	2.000000e+00	4.148500e+04	1.000000e+02	6.000000e+01	7.500000e+01	1.000000e+00

	Unnamed: 0	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	Mazda RX4	21.0	6	160.0	110	3.90	2.620	16.46	0	1	4	4
1	Mazda RX4 Wag	21.0	6	160.0	110	3.90	2.875	17.02	0	1	4	4
2	Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61	1	1	4	1
3	Hornet 4 Drive	21.4	6	258.0	110	3.08	3.215	19.44	1	0	3	1
4	Hornet Sportabout	18.7	8	360.0	175	3.15	3.440	17.02	0	0	3	2

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
10	11	1	46	1	4601	54	0	4
11	12	1	10	1	3000	20	0	2
12	13	1	6	1	0	45	2	4
13	14	2	38	1	9000	41	3	8
14	15	1	27	1	5227	60	0	17
15	16	1	44	1	0	22	0	5
16	17	2	18	1	13970	20	0	13
17	18	1	35	1	3113	13	6	8
18	19	1	5	1	9000	20	2	8
19	20	2	31	1	1860	21	10	8
20	21	1	39	1	4000	24	0	3

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine	fraudRisk
0	1	1	35	1	3000	4	14	2	0
1	2	2	2	1	0	9	0	18	0
2	3	2	2	1	0	27	9	16	0
3	4	1	15	1	0	12	0	5	0
4	5	1	46	1	0	11	16	7	0
5	6	2	44	2	5546	21	0	13	0
6	7	1	3	1	2000	41	0	1	0
7	8	1	10	1	6016	20	3	6	0
8	9	2	32	1	2428	4	10	22	0
9	10	1	23	1	0	18	56	5	0
10	11	1	46	1	4601	54	0	4	0
11	12	1	10	1	3000	20	0	2	0
12	13	1	6	1	0	45	2	4	0
13	14	2	38	1	9000	41	3	8	0
14	15	1	27	1	5227	60	0	17	0
15	16	1	44	1	0	22	0	5	0
16	17	2	18	1	13970	20	0	13	0
17	18	1	35	1	3113	13	6	8	0
18	19	1	5	1	9000	20	2	8	0
19	20	2	31	1	1860	21	10	8	0
20	21	1	39	1	4000	24	0	3	0
21	22	1	34	1	0	22	0	3	0
22	23	1	5	1	0	7	0	11	0
23	24	2	21	1	0	15	0	3	0
24	25	1	25	1	0	12	0	65	0
25	26	2	29	1	5000	4	9	4	0
26	27	1	38	1	4000	21	5	3	0
27	28	1	9	1	12000	20	0	11	0
28	29	2	20	1	0	19	0	2	0
29	30	2	49	1	5192	84	0	13	1
...	...	...	...	...	...	...	...	...	...
9999970	9999971	1	10	1	0	12	0	11	0
9999971	9999972	2	40	1	3683	31	3	7	0
9999972	9999973	1	46	1	5000	69	0	4	0
9999973	9999974	1	10	2	0	36	31	14	0
9999974	9999975	1	25	1	0	18	0	3	0
9999975	9999976	2	48	1	4210	23	0	8	0
9999976	9999977	2	4	1	0	14	0	7	0
9999977	9999978	1	35	1	8000	24	0	7	0
9999978	9999979	1	44	1	5000	12	0	4	0
9999979	9999980	1	6	1	7669	5	3	7	0
9999980	9999981	1	10	1	13889	54	0	13	0
9999981	9999982	1	33	1	0	23	26	18	0
9999982	9999983	1	44	1	3000	14	0	2	0
9999983	9999984	2	13	1	0	2	0	5	0
9999984	9999985	1	39	1	7000	30	0	6	0
9999985	9999986	2	45	2	3000	4	0	2	0
9999986	9999987	1	23	1	7000	59	0	6	0
9999987	9999988	2	24	1	6000	46	0	5	0
9999988	9999989	1	18	1	15000	72	0	14	1
9999989	9999990	2	4	1	12000	17	7	11	0
9999990	9999991	1	16	2	1561	8	0	5	0
9999991	9999992	2	36	1	5217	6	0	5	0
9999992	9999993	1	38	1	0	7	0	2	0
9999993	9999994	2	43	1	2607	6	0	5	0
9999994	9999995	1	16	2	17376	3	0	20	0
9999995	9999996	1	37	1	0	10	0	9	0
9999996	9999997	1	16	1	0	33	2	4	0
9999997	9999998	1	24	1	9000	38	0	8	0
9999998	9999999	1	28	1	7000	20	19	6	0
9999999	10000000	1	23	1	0	13	0	7	0

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
1213704	1213705	1	41	1	6000	34	0	5
9188257	9188258	1	4	1	0	6	0	6
7040966	7040967	1	32	1	0	15	0	15
9106986	9106987	1	36	1	2000	28	0	1
2441776	2441777	1	5	1	10000	39	5	9
6415126	6415127	2	39	1	0	10	2	6
6300659	6300660	1	11	1	8000	4	17	7
952480	952481	1	6	1	6000	10	0	5
4407853	4407854	1	41	1	0	3	0	3
3029728	3029729	1	9	1	4000	32	0	3

	WineClass	Alcohol	Malic acid	Ash	Alcalinity of ash	Magnesium	Total phenols	Flavanoids	Nonflavanoid phenols	Proanthocyanins	Color intensity	Hue	OD280/OD315 of diluted wines	Proline
0	1	14.23	1.71	2.43	15.6	127	2.80	3.06	0.28	2.29	5.64	1.04	3.92	1065
1	1	13.20	1.78	2.14	11.2	100	2.65	2.76	0.26	1.28	4.38	1.05	3.40	1050
2	1	13.16	2.36	2.67	18.6	101	2.80	3.24	0.30	2.81	5.68	1.03	3.17	1185
3	1	14.37	1.95	2.50	16.8	113	3.85	3.49	0.24	2.18	7.80	0.86	3.45	1480
4	1	13.24	2.59	2.87	21.0	118	2.80	2.69	0.39	1.82	4.32	1.04	2.93	735

	Alcohol	Malic acid	Ash	Alcalinity of ash	Magnesium	Total phenols	Flavanoids	Nonflavanoid phenols	Proanthocyanins	Color intensity	Hue	OD280/OD315 of diluted wines	Proline
WineClass
1	13.744746	2.010678	2.455593	17.037288	106.338983	2.840169	2.982373	0.290000	1.899322	5.528305	1.062034	3.157797	1115.711864
2	12.278732	1.932676	2.244789	20.238028	94.549296	2.258873	2.080845	0.363662	1.630282	3.086620	1.056282	2.785352	519.507042
3	13.153750	3.333750	2.437083	21.416667	99.312500	1.678750	0.781458	0.447500	1.153542	7.396250	0.682708	1.683542	629.895833

	WineClass	Alcohol	Malic acid	Ash	Alcalinity of ash	Magnesium	Total phenols	Flavanoids	Nonflavanoid phenols	Proanthocyanins	Color intensity	Hue	OD280/OD315 of diluted wines	Proline
count	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000	178.000000
mean	1.938202	13.000618	2.336348	2.366517	19.494944	99.741573	2.295112	2.029270	0.361854	1.590899	5.058090	0.957449	2.611685	746.893258
std	0.775035	0.811827	1.117146	0.274344	3.339564	14.282484	0.625851	0.998859	0.124453	0.572359	2.318286	0.228572	0.709990	314.907474
min	1.000000	11.030000	0.740000	1.360000	10.600000	70.000000	0.980000	0.340000	0.130000	0.410000	1.280000	0.480000	1.270000	278.000000
25%	1.000000	12.362500	1.602500	2.210000	17.200000	88.000000	1.742500	1.205000	0.270000	1.250000	3.220000	0.782500	1.937500	500.500000
50%	2.000000	13.050000	1.865000	2.360000	19.500000	98.000000	2.355000	2.135000	0.340000	1.555000	4.690000	0.965000	2.780000	673.500000
75%	3.000000	13.677500	3.082500	2.557500	21.500000	107.000000	2.800000	2.875000	0.437500	1.950000	6.200000	1.120000	3.170000	985.000000
max	3.000000	14.830000	5.800000	3.230000	30.000000	162.000000	3.880000	5.080000	0.660000	3.580000	13.000000	1.710000	4.000000	1680.000000