data from https://packages.revolutionanalytics.com/datasets/



In [47]:

    
import pandas as pd

Data Import #Hint wil take some time

fraud=pd.read_csv("https://packages.revolutionanalytics.com/datasets/ccFraud.csv")



In [48]:

    
import os as os



In [49]:

    
os.getcwd()









    Out[49]:





'C:\\Users\\Dell\\Downloads'



In [50]:

    
os.chdir('C:\\Users\\Dell\\Downloads')



In [51]:

    
os.listdir()









    Out[51]:





['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond.csv',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'March invoice Indicus   - Sheet1.pdf',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mtcarslm.R',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376562.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 930917.crdownload',
 'Unconfirmed 950045.crdownload',
 'unvbasicvapp__9411008__ova__en__sp0__1.ova.crdownload',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']



In [52]:

    
fraud=pd.read_csv("ccFraud.csv")



In [53]:

    
fraud2=fraud



In [54]:

    
fraud3=fraud.copy()



In [55]:

    
type(fraud)









    Out[55]:





pandas.core.frame.DataFrame



In [56]:

    
fraud.columns









    Out[56]:





Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',
       'numIntlTrans', 'creditLine', 'fraudRisk'],
      dtype='object')



In [57]:

    
fraud.dtypes









    Out[57]:





custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtype: object



In [58]:

    
fraud.shape









    Out[58]:





(10000000, 9)



In [59]:

    
fraud.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000000 entries, 0 to 9999999
Data columns (total 9 columns):
custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtypes: int64(9)
memory usage: 686.6 MB



In [60]:

    
len(fraud)









    Out[60]:





10000000



In [61]:

    
len(fraud.columns)









    Out[61]:





9



In [62]:

    
fraud.head()









    Out[62]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0



In [63]:

    
fraud.tail()









    Out[63]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      9999995
      9999996
      1
      37
      1
      0
      10
      0
      9
      0
    
    
      9999996
      9999997
      1
      16
      1
      0
      33
      2
      4
      0
    
    
      9999997
      9999998
      1
      24
      1
      9000
      38
      0
      8
      0
    
    
      9999998
      9999999
      1
      28
      1
      7000
      20
      19
      6
      0
    
    
      9999999
      10000000
      1
      23
      1
      0
      13
      0
      7
      0



In [64]:

    
fraud.ix[0:10]









    Out[64]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0
    
    
      5
      6
      2
      44
      2
      5546
      21
      0
      13
      0
    
    
      6
      7
      1
      3
      1
      2000
      41
      0
      1
      0
    
    
      7
      8
      1
      10
      1
      6016
      20
      3
      6
      0
    
    
      8
      9
      2
      32
      1
      2428
      4
      10
      22
      0
    
    
      9
      10
      1
      23
      1
      0
      18
      56
      5
      0
    
    
      10
      11
      1
      46
      1
      4601
      54
      0
      4
      0



In [65]:

    
fraud[['gender','state','cardholder']]









    Out[65]:






  
    
      
      gender
      state
      cardholder
    
  
  
    
      0
      1
      35
      1
    
    
      1
      2
      2
      1
    
    
      2
      2
      2
      1
    
    
      3
      1
      15
      1
    
    
      4
      1
      46
      1
    
    
      5
      2
      44
      2
    
    
      6
      1
      3
      1
    
    
      7
      1
      10
      1
    
    
      8
      2
      32
      1
    
    
      9
      1
      23
      1
    
    
      10
      1
      46
      1
    
    
      11
      1
      10
      1
    
    
      12
      1
      6
      1
    
    
      13
      2
      38
      1
    
    
      14
      1
      27
      1
    
    
      15
      1
      44
      1
    
    
      16
      2
      18
      1
    
    
      17
      1
      35
      1
    
    
      18
      1
      5
      1
    
    
      19
      2
      31
      1
    
    
      20
      1
      39
      1
    
    
      21
      1
      34
      1
    
    
      22
      1
      5
      1
    
    
      23
      2
      21
      1
    
    
      24
      1
      25
      1
    
    
      25
      2
      29
      1
    
    
      26
      1
      38
      1
    
    
      27
      1
      9
      1
    
    
      28
      2
      20
      1
    
    
      29
      2
      49
      1
    
    
      ...
      ...
      ...
      ...
    
    
      9999970
      1
      10
      1
    
    
      9999971
      2
      40
      1
    
    
      9999972
      1
      46
      1
    
    
      9999973
      1
      10
      2
    
    
      9999974
      1
      25
      1
    
    
      9999975
      2
      48
      1
    
    
      9999976
      2
      4
      1
    
    
      9999977
      1
      35
      1
    
    
      9999978
      1
      44
      1
    
    
      9999979
      1
      6
      1
    
    
      9999980
      1
      10
      1
    
    
      9999981
      1
      33
      1
    
    
      9999982
      1
      44
      1
    
    
      9999983
      2
      13
      1
    
    
      9999984
      1
      39
      1
    
    
      9999985
      2
      45
      2
    
    
      9999986
      1
      23
      1
    
    
      9999987
      2
      24
      1
    
    
      9999988
      1
      18
      1
    
    
      9999989
      2
      4
      1
    
    
      9999990
      1
      16
      2
    
    
      9999991
      2
      36
      1
    
    
      9999992
      1
      38
      1
    
    
      9999993
      2
      43
      1
    
    
      9999994
      1
      16
      2
    
    
      9999995
      1
      37
      1
    
    
      9999996
      1
      16
      1
    
    
      9999997
      1
      24
      1
    
    
      9999998
      1
      28
      1
    
    
      9999999
      1
      23
      1
    
  

10000000 rows × 3 columns



In [66]:

    
fraud.iloc[:,1:3]









    Out[66]:






  
    
      
      gender
      state
    
  
  
    
      0
      1
      35
    
    
      1
      2
      2
    
    
      2
      2
      2
    
    
      3
      1
      15
    
    
      4
      1
      46
    
    
      5
      2
      44
    
    
      6
      1
      3
    
    
      7
      1
      10
    
    
      8
      2
      32
    
    
      9
      1
      23
    
    
      10
      1
      46
    
    
      11
      1
      10
    
    
      12
      1
      6
    
    
      13
      2
      38
    
    
      14
      1
      27
    
    
      15
      1
      44
    
    
      16
      2
      18
    
    
      17
      1
      35
    
    
      18
      1
      5
    
    
      19
      2
      31
    
    
      20
      1
      39
    
    
      21
      1
      34
    
    
      22
      1
      5
    
    
      23
      2
      21
    
    
      24
      1
      25
    
    
      25
      2
      29
    
    
      26
      1
      38
    
    
      27
      1
      9
    
    
      28
      2
      20
    
    
      29
      2
      49
    
    
      ...
      ...
      ...
    
    
      9999970
      1
      10
    
    
      9999971
      2
      40
    
    
      9999972
      1
      46
    
    
      9999973
      1
      10
    
    
      9999974
      1
      25
    
    
      9999975
      2
      48
    
    
      9999976
      2
      4
    
    
      9999977
      1
      35
    
    
      9999978
      1
      44
    
    
      9999979
      1
      6
    
    
      9999980
      1
      10
    
    
      9999981
      1
      33
    
    
      9999982
      1
      44
    
    
      9999983
      2
      13
    
    
      9999984
      1
      39
    
    
      9999985
      2
      45
    
    
      9999986
      1
      23
    
    
      9999987
      2
      24
    
    
      9999988
      1
      18
    
    
      9999989
      2
      4
    
    
      9999990
      1
      16
    
    
      9999991
      2
      36
    
    
      9999992
      1
      38
    
    
      9999993
      2
      43
    
    
      9999994
      1
      16
    
    
      9999995
      1
      37
    
    
      9999996
      1
      16
    
    
      9999997
      1
      24
    
    
      9999998
      1
      28
    
    
      9999999
      1
      23
    
  

10000000 rows × 2 columns



In [67]:

    
fraud.iloc[0:10,1:3]



In [68]:

    
import numpy as np



In [69]:

    
np.random.choice(10,2)









    Out[69]:





array([1, 0])



In [70]:

    
np.random.choice(len(fraud),0.000001*len(fraud))









    



C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':






    Out[70]:





array([7100177, 4058088, 2302568, 9002606, 3792183,  888579, 6465822,
       3062360, 3663548, 5578048])



In [71]:

    
b=np.random.choice(len(fraud),0.000001*len(fraud))









    



C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':



In [72]:

    
fraud.ix[b]









    Out[72]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      7461768
      7461769
      1
      21
      1
      3000
      15
      2
      2
      0
    
    
      5089359
      5089360
      1
      5
      1
      8000
      80
      35
      7
      1
    
    
      8548436
      8548437
      1
      5
      1
      5000
      3
      0
      4
      0
    
    
      3997580
      3997581
      1
      32
      1
      10000
      47
      1
      9
      0
    
    
      1776937
      1776938
      1
      43
      1
      3000
      100
      0
      2
      0
    
    
      3590544
      3590545
      2
      5
      1
      4000
      52
      15
      3
      0
    
    
      1824320
      1824321
      1
      35
      1
      1118
      87
      8
      5
      0
    
    
      906910
      906911
      1
      10
      1
      6000
      6
      0
      5
      0
    
    
      2265882
      2265883
      1
      5
      1
      1426
      17
      0
      8
      0
    
    
      7983346
      7983347
      2
      13
      1
      3000
      43
      8
      2
      0



In [73]:

    
d=fraud.ix[b]



In [74]:

    
d.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 7461768 to 7983346
Data columns (total 9 columns):
custID          10 non-null int64
gender          10 non-null int64
state           10 non-null int64
cardholder      10 non-null int64
balance         10 non-null int64
numTrans        10 non-null int64
numIntlTrans    10 non-null int64
creditLine      10 non-null int64
fraudRisk       10 non-null int64
dtypes: int64(9)
memory usage: 800.0 bytes



In [75]:

    
del d



In [76]:

    
del fraud['gender']



In [77]:

    
fraud.columns









    Out[77]:





Index(['custID', 'state', 'cardholder', 'balance', 'numTrans', 'numIntlTrans',
       'creditLine', 'fraudRisk'],
      dtype='object')



In [78]:

    
fraud.drop(fraud.columns[[1, 5]], axis=1, inplace=True)



In [79]:

    
fraud.columns









    Out[79]:





Index(['custID', 'cardholder', 'balance', 'numTrans', 'creditLine',
       'fraudRisk'],
      dtype='object')



In [80]:

    
fraud.drop(fraud.columns[[1,2,3]], axis=1, inplace=True)



In [81]:

    
fraud.columns









    Out[81]:





Index(['custID', 'creditLine', 'fraudRisk'], dtype='object')



In [82]:

    
fraud.head()



In [83]:

    
fraud.drop(fraud.index[[1,3]])









    Out[83]:






  
    
      
      custID
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      2
      0
    
    
      2
      3
      16
      0
    
    
      4
      5
      7
      0
    
    
      5
      6
      13
      0
    
    
      6
      7
      1
      0
    
    
      7
      8
      6
      0
    
    
      8
      9
      22
      0
    
    
      9
      10
      5
      0
    
    
      10
      11
      4
      0
    
    
      11
      12
      2
      0
    
    
      12
      13
      4
      0
    
    
      13
      14
      8
      0
    
    
      14
      15
      17
      0
    
    
      15
      16
      5
      0
    
    
      16
      17
      13
      0
    
    
      17
      18
      8
      0
    
    
      18
      19
      8
      0
    
    
      19
      20
      8
      0
    
    
      20
      21
      3
      0
    
    
      21
      22
      3
      0
    
    
      22
      23
      11
      0
    
    
      23
      24
      3
      0
    
    
      24
      25
      65
      0
    
    
      25
      26
      4
      0
    
    
      26
      27
      3
      0
    
    
      27
      28
      11
      0
    
    
      28
      29
      2
      0
    
    
      29
      30
      13
      1
    
    
      30
      31
      4
      0
    
    
      31
      32
      5
      0
    
    
      ...
      ...
      ...
      ...
    
    
      9999970
      9999971
      11
      0
    
    
      9999971
      9999972
      7
      0
    
    
      9999972
      9999973
      4
      0
    
    
      9999973
      9999974
      14
      0
    
    
      9999974
      9999975
      3
      0
    
    
      9999975
      9999976
      8
      0
    
    
      9999976
      9999977
      7
      0
    
    
      9999977
      9999978
      7
      0
    
    
      9999978
      9999979
      4
      0
    
    
      9999979
      9999980
      7
      0
    
    
      9999980
      9999981
      13
      0
    
    
      9999981
      9999982
      18
      0
    
    
      9999982
      9999983
      2
      0
    
    
      9999983
      9999984
      5
      0
    
    
      9999984
      9999985
      6
      0
    
    
      9999985
      9999986
      2
      0
    
    
      9999986
      9999987
      6
      0
    
    
      9999987
      9999988
      5
      0
    
    
      9999988
      9999989
      14
      1
    
    
      9999989
      9999990
      11
      0
    
    
      9999990
      9999991
      5
      0
    
    
      9999991
      9999992
      5
      0
    
    
      9999992
      9999993
      2
      0
    
    
      9999993
      9999994
      5
      0
    
    
      9999994
      9999995
      20
      0
    
    
      9999995
      9999996
      9
      0
    
    
      9999996
      9999997
      4
      0
    
    
      9999997
      9999998
      8
      0
    
    
      9999998
      9999999
      6
      0
    
    
      9999999
      10000000
      7
      0
    
  

9999998 rows × 3 columns



In [84]:

    
pd.Series(range(1,101))









    Out[84]:





0       1
1       2
2       3
3       4
4       5
5       6
6       7
7       8
8       9
9      10
10     11
11     12
12     13
13     14
14     15
15     16
16     17
17     18
18     19
19     20
20     21
21     22
22     23
23     24
24     25
25     26
26     27
27     28
28     29
29     30
     ... 
70     71
71     72
72     73
73     74
74     75
75     76
76     77
77     78
78     79
79     80
80     81
81     82
82     83
83     84
84     85
85     86
86     87
87     88
88     89
89     90
90     91
91     92
92     93
93     94
94     95
95     96
96     97
97     98
98     99
99    100
dtype: int32



In [85]:

    
s=pd.Series(range(1,101))



In [86]:

    
fraud.drop(fraud.index[[s]])









    Out[86]:






  
    
      
      custID
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      2
      0
    
    
      101
      102
      10
      0
    
    
      102
      103
      28
      0
    
    
      103
      104
      22
      0
    
    
      104
      105
      10
      0
    
    
      105
      106
      4
      0
    
    
      106
      107
      11
      0
    
    
      107
      108
      6
      0
    
    
      108
      109
      4
      0
    
    
      109
      110
      4
      0
    
    
      110
      111
      10
      1
    
    
      111
      112
      4
      0
    
    
      112
      113
      3
      0
    
    
      113
      114
      5
      0
    
    
      114
      115
      4
      0
    
    
      115
      116
      2
      0
    
    
      116
      117
      4
      0
    
    
      117
      118
      13
      0
    
    
      118
      119
      9
      0
    
    
      119
      120
      8
      0
    
    
      120
      121
      3
      0
    
    
      121
      122
      11
      0
    
    
      122
      123
      5
      0
    
    
      123
      124
      4
      0
    
    
      124
      125
      13
      0
    
    
      125
      126
      4
      0
    
    
      126
      127
      5
      0
    
    
      127
      128
      6
      0
    
    
      128
      129
      3
      0
    
    
      129
      130
      2
      0
    
    
      ...
      ...
      ...
      ...
    
    
      9999970
      9999971
      11
      0
    
    
      9999971
      9999972
      7
      0
    
    
      9999972
      9999973
      4
      0
    
    
      9999973
      9999974
      14
      0
    
    
      9999974
      9999975
      3
      0
    
    
      9999975
      9999976
      8
      0
    
    
      9999976
      9999977
      7
      0
    
    
      9999977
      9999978
      7
      0
    
    
      9999978
      9999979
      4
      0
    
    
      9999979
      9999980
      7
      0
    
    
      9999980
      9999981
      13
      0
    
    
      9999981
      9999982
      18
      0
    
    
      9999982
      9999983
      2
      0
    
    
      9999983
      9999984
      5
      0
    
    
      9999984
      9999985
      6
      0
    
    
      9999985
      9999986
      2
      0
    
    
      9999986
      9999987
      6
      0
    
    
      9999987
      9999988
      5
      0
    
    
      9999988
      9999989
      14
      1
    
    
      9999989
      9999990
      11
      0
    
    
      9999990
      9999991
      5
      0
    
    
      9999991
      9999992
      5
      0
    
    
      9999992
      9999993
      2
      0
    
    
      9999993
      9999994
      5
      0
    
    
      9999994
      9999995
      20
      0
    
    
      9999995
      9999996
      9
      0
    
    
      9999996
      9999997
      4
      0
    
    
      9999997
      9999998
      8
      0
    
    
      9999998
      9999999
      6
      0
    
    
      9999999
      10000000
      7
      0
    
  

9999900 rows × 3 columns



In [87]:

    
fraud.query('fraudRisk==0').head(6)



In [88]:

    
fraud.loc[fraud.fraudRisk == 0,'creditLine'] = -1; fraud









    Out[88]:






  
    
      
      custID
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      -1
      0
    
    
      1
      2
      -1
      0
    
    
      2
      3
      -1
      0
    
    
      3
      4
      -1
      0
    
    
      4
      5
      -1
      0
    
    
      5
      6
      -1
      0
    
    
      6
      7
      -1
      0
    
    
      7
      8
      -1
      0
    
    
      8
      9
      -1
      0
    
    
      9
      10
      -1
      0
    
    
      10
      11
      -1
      0
    
    
      11
      12
      -1
      0
    
    
      12
      13
      -1
      0
    
    
      13
      14
      -1
      0
    
    
      14
      15
      -1
      0
    
    
      15
      16
      -1
      0
    
    
      16
      17
      -1
      0
    
    
      17
      18
      -1
      0
    
    
      18
      19
      -1
      0
    
    
      19
      20
      -1
      0
    
    
      20
      21
      -1
      0
    
    
      21
      22
      -1
      0
    
    
      22
      23
      -1
      0
    
    
      23
      24
      -1
      0
    
    
      24
      25
      -1
      0
    
    
      25
      26
      -1
      0
    
    
      26
      27
      -1
      0
    
    
      27
      28
      -1
      0
    
    
      28
      29
      -1
      0
    
    
      29
      30
      13
      1
    
    
      ...
      ...
      ...
      ...
    
    
      9999970
      9999971
      -1
      0
    
    
      9999971
      9999972
      -1
      0
    
    
      9999972
      9999973
      -1
      0
    
    
      9999973
      9999974
      -1
      0
    
    
      9999974
      9999975
      -1
      0
    
    
      9999975
      9999976
      -1
      0
    
    
      9999976
      9999977
      -1
      0
    
    
      9999977
      9999978
      -1
      0
    
    
      9999978
      9999979
      -1
      0
    
    
      9999979
      9999980
      -1
      0
    
    
      9999980
      9999981
      -1
      0
    
    
      9999981
      9999982
      -1
      0
    
    
      9999982
      9999983
      -1
      0
    
    
      9999983
      9999984
      -1
      0
    
    
      9999984
      9999985
      -1
      0
    
    
      9999985
      9999986
      -1
      0
    
    
      9999986
      9999987
      -1
      0
    
    
      9999987
      9999988
      -1
      0
    
    
      9999988
      9999989
      14
      1
    
    
      9999989
      9999990
      -1
      0
    
    
      9999990
      9999991
      -1
      0
    
    
      9999991
      9999992
      -1
      0
    
    
      9999992
      9999993
      -1
      0
    
    
      9999993
      9999994
      -1
      0
    
    
      9999994
      9999995
      -1
      0
    
    
      9999995
      9999996
      -1
      0
    
    
      9999996
      9999997
      -1
      0
    
    
      9999997
      9999998
      -1
      0
    
    
      9999998
      9999999
      -1
      0
    
    
      9999999
      10000000
      -1
      0
    
  

10000000 rows × 3 columns



In [89]:

    
fraud2.head()



In [90]:

    
fraud3.head()









    Out[90]:






  
    
      
      custID
      gender
      state
      cardholder
      balance
      numTrans
      numIntlTrans
      creditLine
      fraudRisk
    
  
  
    
      0
      1
      1
      35
      1
      3000
      4
      14
      2
      0
    
    
      1
      2
      2
      2
      1
      0
      9
      0
      18
      0
    
    
      2
      3
      2
      2
      1
      0
      27
      9
      16
      0
    
    
      3
      4
      1
      15
      1
      0
      12
      0
      5
      0
    
    
      4
      5
      1
      46
      1
      0
      11
      16
      7
      0



In [ ]:

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
0	1	1	35	1	3000	4	14	2
1	2	2	2	1	0	9	0	18
2	3	2	2	1	0	27	9	16
3	4	1	15	1	0	12	0	5
4	5	1	46	1	0	11	16	7

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
9999995	9999996	1	37	1	0	10	0	9
9999996	9999997	1	16	1	0	33	2	4
9999997	9999998	1	24	1	9000	38	0	8
9999998	9999999	1	28	1	7000	20	19	6
9999999	10000000	1	23	1	0	13	0	7

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine
0	1	1	35	1	3000	4	14	2
1	2	2	2	1	0	9	0	18
2	3	2	2	1	0	27	9	16
3	4	1	15	1	0	12	0	5
4	5	1	46	1	0	11	16	7
5	6	2	44	2	5546	21	0	13
6	7	1	3	1	2000	41	0	1
7	8	1	10	1	6016	20	3	6
8	9	2	32	1	2428	4	10	22
9	10	1	23	1	0	18	56	5
10	11	1	46	1	4601	54	0	4

	gender	state	cardholder
0	1	35	1
1	2	2	1
2	2	2	1
3	1	15	1
4	1	46	1
5	2	44	2
6	1	3	1
7	1	10	1
8	2	32	1
9	1	23	1
10	1	46	1
11	1	10	1
12	1	6	1
13	2	38	1
14	1	27	1
15	1	44	1
16	2	18	1
17	1	35	1
18	1	5	1
19	2	31	1
20	1	39	1
21	1	34	1
22	1	5	1
23	2	21	1
24	1	25	1
25	2	29	1
26	1	38	1
27	1	9	1
28	2	20	1
29	2	49	1
...	...	...	...
9999970	1	10	1
9999971	2	40	1
9999972	1	46	1
9999973	1	10	2
9999974	1	25	1
9999975	2	48	1
9999976	2	4	1
9999977	1	35	1
9999978	1	44	1
9999979	1	6	1
9999980	1	10	1
9999981	1	33	1
9999982	1	44	1
9999983	2	13	1
9999984	1	39	1
9999985	2	45	2
9999986	1	23	1
9999987	2	24	1
9999988	1	18	1
9999989	2	4	1
9999990	1	16	2
9999991	2	36	1
9999992	1	38	1
9999993	2	43	1
9999994	1	16	2
9999995	1	37	1
9999996	1	16	1
9999997	1	24	1
9999998	1	28	1
9999999	1	23	1

	custID	gender	state	cardholder	balance	numTrans	numIntlTrans	creditLine	fraudRisk
7461768	7461769	1	21	1	3000	15	2	2	0
5089359	5089360	1	5	1	8000	80	35	7	1
8548436	8548437	1	5	1	5000	3	0	4	0
3997580	3997581	1	32	1	10000	47	1	9	0
1776937	1776938	1	43	1	3000	100	0	2	0
3590544	3590545	2	5	1	4000	52	15	3	0
1824320	1824321	1	35	1	1118	87	8	5	0
906910	906911	1	10	1	6000	6	0	5	0
2265882	2265883	1	5	1	1426	17	0	8	0
7983346	7983347	2	13	1	3000	43	8	2	0