In [5]:
import os
import glob

In [6]:
os.getcwd()


Out[6]:
'C:\\Users\\Dell'

In [7]:
path = 'C:\\Users\\Dell\\Downloads'

In [8]:
extension = 'csv'
os.chdir(path)

In [9]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)


['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']

In [10]:
import pandas as pd

In [12]:
os.listdir()


Out[12]:
['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'All+CSV+Files+in+a+Folder.ipynb',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond.csv',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'Hdma.csv',
 'Hedonic.csv',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'internship.docx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'Invoice trafla format.docx',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'March invoice Indicus   - Sheet1.pdf',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mtcarslm.R',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'pandas+11.ipynb',
 'pandas+analysis+1.ipynb',
 'pandas+data+manipulation.ipynb',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'Python.docx',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376562.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 930917.crdownload',
 'Unconfirmed 950045.crdownload',
 'unvbasicvapp__9411008__ova__en__sp0__1.ova.crdownload',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']

In [15]:
diamonds=pd.read_csv("C:\\Users\\Dell\\Downloads\\BigDiamonds.csv\\BigDiamonds.csv")

In [38]:
type(diamonds)


Out[38]:
pandas.core.frame.DataFrame

In [37]:
len(diamonds)


Out[37]:
598024

In [36]:
diamonds.columns


Out[36]:
Index(['carat', 'cut', 'color', 'clarity', 'table', 'depth', 'cert',
       'measurements', 'price', 'x', 'y', 'z'],
      dtype='object')

In [35]:
diamonds.shape


Out[35]:
(598024, 12)

In [19]:
diamonds.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0      598024 non-null int64
carat           598024 non-null float64
cut             598024 non-null object
color           598024 non-null object
clarity         598024 non-null object
table           598024 non-null float64
depth           598024 non-null float64
cert            598024 non-null object
measurements    597978 non-null object
price           597311 non-null float64
x               596209 non-null float64
y               596172 non-null float64
z               595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB

In [16]:
diamonds.head()


Out[16]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52
1 2 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4.00 4.05 2.30
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68

In [17]:
diamonds2=diamonds.copy()

In [53]:
pd.value_counts(diamonds3.cut)


Out[53]:
Ideal     369448
V.Good    168896
Good       59680
Name: cut, dtype: int64

In [18]:
diamonds.describe()


C:\Users\Dell\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[18]:
Unnamed: 0 carat table depth price x y z
count 598024.000000 598024.000000 598024.000000 598024.000000 597311.000000 596209.000000 596172.000000 595480.000000
mean 299012.500000 1.071297 57.631077 61.063683 8753.017974 5.990771 6.198671 4.033430
std 172634.803028 0.812696 4.996892 7.604342 13017.567760 1.530936 1.485891 1.240951
min 1.000000 0.200000 0.000000 0.000000 300.000000 0.150000 1.000000 0.040000
25% 149506.750000 0.500000 56.000000 61.000000 NaN NaN NaN NaN
50% 299012.500000 0.900000 58.000000 62.100000 NaN NaN NaN NaN
75% 448518.250000 1.500000 59.000000 62.700000 NaN NaN NaN NaN
max 598024.000000 9.250000 75.900000 81.300000 99990.000000 13.890000 13.890000 13.180000

In [20]:
diamonds = diamonds.notnull() * 1

In [21]:
diamonds.head()


Out[21]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 1 1 1 1 1 1 1 1 0 1 1 1
1 1 1 1 1 1 1 1 1 1 0 1 1 1
2 1 1 1 1 1 1 1 1 1 0 1 1 1
3 1 1 1 1 1 1 1 1 1 0 1 1 1
4 1 1 1 1 1 1 1 1 1 0 1 1 1

In [22]:
diamonds=diamonds.drop('Unnamed: 0',1)

In [24]:
diamonds.head()


Out[24]:
carat cut color clarity table depth cert measurements price x y z
0 1 1 1 1 1 1 1 1 0 1 1 1
1 1 1 1 1 1 1 1 1 0 1 1 1
2 1 1 1 1 1 1 1 1 0 1 1 1
3 1 1 1 1 1 1 1 1 0 1 1 1
4 1 1 1 1 1 1 1 1 0 1 1 1

In [25]:
diamonds2.head()


Out[25]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52
1 2 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4.00 4.05 2.30
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68

In [26]:
diamonds3=diamonds2.copy()

In [28]:
diamonds2.fillna("AJAY").head()


Out[28]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 AJAY 3.96 3.95 2.52
1 2 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 AJAY 4 4.05 2.3
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 AJAY 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 AJAY 3.8 3.82 2.31
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 AJAY 4.35 4.26 2.68

In [31]:
diamonds2=diamonds2.dropna(how="any")

In [32]:
diamonds2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 593784 entries, 493 to 598023
Data columns (total 13 columns):
Unnamed: 0      593784 non-null int64
carat           593784 non-null float64
cut             593784 non-null object
color           593784 non-null object
clarity         593784 non-null object
table           593784 non-null float64
depth           593784 non-null float64
cert            593784 non-null object
measurements    593784 non-null object
price           593784 non-null float64
x               593784 non-null float64
y               593784 non-null float64
z               593784 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 63.4+ MB

In [44]:
data=diamonds3.values
data


Out[44]:
array([[1, 0.25, 'V.Good', ..., 3.96, 3.95, 2.52],
       [2, 0.23, 'Good', ..., 4.0, 4.05, 2.3],
       [3, 0.34, 'Good', ..., 4.56, 4.53, 2.67],
       ..., 
       [598022, 3.43, 'Ideal', ..., 9.66, 9.61, 6.05],
       [598023, 3.01, 'V.Good', ..., 9.15, 9.19, 5.77],
       [598024, 4.13, 'Ideal', ..., 10.27, 10.19, 6.4]], dtype=object)

In [48]:
diamonds3.columns


Out[48]:
Index(['Unnamed: 0', 'carat', 'cut', 'color', 'clarity', 'table', 'depth',
       'cert', 'measurements', 'price', 'x', 'y', 'z'],
      dtype='object')

In [47]:
g=pd.DataFrame(data=data[0:,0:],    # values
              index=range(0,len(data)),    # 1st column as index
              columns=diamonds3.columns[0:])  # 1st row as the column names

In [49]:
g.head()


Out[49]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52
1 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4 4.05 2.3
2 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.8 3.82 2.31
4 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68

In [55]:
diamonds3.iloc[2:5,:]


Out[55]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68

In [56]:
diamonds3.iloc[:,2:5]


Out[56]:
cut color clarity
0 V.Good K I1
1 Good G I1
2 Good J I2
3 V.Good D I1
4 V.Good K I1
5 Good G SI2
6 Good G SI2
7 V.Good D I1
8 V.Good K SI2
9 Good F SI1
10 V.Good F I1
11 V.Good G SI2
12 V.Good D SI2
13 Good G SI2
14 V.Good F I1
15 V.Good J SI2
16 Good E VS2
17 V.Good G I1
18 V.Good F I1
19 Good E SI1
20 V.Good K I1
21 Good D VS2
22 Ideal E I1
23 Good D SI2
24 V.Good E SI2
25 Good G SI2
26 Good E SI1
27 V.Good H SI1
28 Ideal F I1
29 Good D I1
... ... ... ...
597994 Ideal I VS2
597995 Ideal J VS1
597996 Good F VVS2
597997 V.Good I VS1
597998 Ideal D VS2
597999 Ideal D VVS2
598000 Ideal F VS2
598001 V.Good E VVS2
598002 Ideal G VS2
598003 Ideal E VS2
598004 Ideal E VS2
598005 Ideal F VVS2
598006 Ideal H SI2
598007 Ideal F VVS1
598008 Ideal H IF
598009 Good J VS2
598010 V.Good H SI1
598011 V.Good D VS1
598012 Ideal I VS2
598013 Ideal E IF
598014 Ideal I SI1
598015 Ideal H SI1
598016 Ideal D VS2
598017 Good E VS1
598018 Ideal D VS2
598019 Ideal E VVS2
598020 V.Good I VVS2
598021 Ideal F VS2
598022 V.Good E VS1
598023 Ideal H IF

598024 rows × 3 columns


In [59]:
diamonds3[['cut','color','clarity']].head()


Out[59]:
cut color clarity
0 V.Good K I1
1 Good G I1
2 Good J I2
3 V.Good D I1
4 V.Good K I1

In [60]:
diamonds3.ix[20:40]


Out[60]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
20 21 0.31 V.Good K I1 60.0 59.2 GIA 4.45 x 4.50 x 2.65 NaN 4.45 4.50 2.65
21 22 0.22 Good D VS2 61.0 63.7 GIA 3.77 x 3.73 x 2.39 NaN 3.77 3.73 2.39
22 23 0.21 Ideal E I1 59.0 61.5 GIA 3.80 x 3.82 x 2.34 NaN 3.80 3.82 2.34
23 24 0.21 Good D SI2 62.0 64.4 IGI 3.73 x 3.78 x 2.42 NaN 3.73 3.78 2.42
24 25 0.20 V.Good E SI2 58.0 62.5 IGI 3.71 x 3.75 x 2.34 NaN 3.71 3.75 2.34
25 26 0.20 Good G SI2 60.0 64.4 GIA 3.67 x 3.74 x 2.38 NaN 3.67 3.74 2.38
26 27 0.20 Good E SI1 61.0 59.3 GIA 3.81 x 3.79 x 2.25 NaN 3.81 3.79 2.25
27 28 0.20 V.Good H SI1 60.5 63.9 IGI 3.62 x 3.69 x 2.34 NaN 3.62 3.69 2.34
28 29 0.23 Ideal F I1 60.0 60.6 GIA 3.98 x 4.00 x 2.42 NaN 3.98 4.00 2.42
29 30 0.23 Good D I1 65.0 62.4 GIA 3.87 x 3.91 x 2.43 NaN 3.87 3.91 2.43
30 31 0.20 Ideal F SI2 58.0 60.9 IGI 3.79 x 3.82 x 2.32 NaN 3.79 3.82 2.32
31 32 0.20 V.Good E SI2 59.0 62.0 GIA 3.73 x 3.75 x 2.32 NaN 3.73 3.75 2.32
32 33 0.26 Ideal F I1 60.0 60.3 GIA 4.13 x 4.14 x 2.50 NaN 4.13 4.14 2.50
33 34 0.25 Good H I1 61.0 62.3 GIA 3.98 x 4.01 x 2.49 NaN 3.98 4.01 2.49
34 35 0.27 V.Good J I1 60.0 60.4 GIA 4.21 x 4.23 x 2.55 NaN 4.21 4.23 2.55
35 36 0.22 V.Good E SI1 58.0 63.8 GIA 3.85 x 3.83 x 2.45 NaN 3.85 3.83 2.45
36 37 0.22 Good E SI1 60.0 62.6 GIA 3.81 x 3.79 x 2.38 NaN 3.81 3.79 2.38
37 38 0.26 Ideal E I1 57.0 61.9 GIA 4.08 x 4.10 x 2.53 NaN 4.08 4.10 2.53
38 39 0.21 Good G SI1 60.0 64.0 IGI 3.68 x 3.76 x 2.38 NaN 3.68 3.76 2.38
39 40 0.20 Good E SI1 66.0 58.0 GIA 3.86 x 3.88 x 2.24 NaN 3.86 3.88 2.24
40 41 0.24 Ideal E I1 56.0 62.3 GIA 4.02 x 4.04 x 2.51 NaN 4.02 4.04 2.51

In [64]:
diamonds3.corr()


Out[64]:
Unnamed: 0 carat table depth price x y z
Unnamed: 0 1.000000 0.823737 0.022406 0.020020 0.709190 0.825588 0.922538 0.761033
carat 0.823737 1.000000 0.036533 0.009846 0.856328 0.860246 0.960807 0.792051
table 0.022406 0.036533 1.000000 0.448772 0.023378 0.027504 0.044542 0.030344
depth 0.020020 0.009846 0.448772 1.000000 -0.001006 -0.003279 0.007669 0.031801
price 0.709190 0.856328 0.023378 -0.001006 1.000000 0.719778 0.796765 0.645317
x 0.825588 0.860246 0.027504 -0.003279 0.719778 1.000000 0.894203 0.483102
y 0.922538 0.960807 0.044542 0.007669 0.796765 0.894203 1.000000 0.820211
z 0.761033 0.792051 0.030344 0.031801 0.645317 0.483102 0.820211 1.000000

In [65]:
diamonds3.head()


Out[65]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52
1 2 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4.00 4.05 2.30
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
3 4 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68

In [67]:
diamonds3.drop(diamonds3.index[[1,3]]).head()


Out[67]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
0 1 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52
2 3 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67
4 5 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68
5 6 0.20 Good G SI2 60.0 64.4 GIA 3.74 x 3.67 x 2.38 NaN 3.74 3.67 2.38
6 7 0.20 Good G SI2 63.0 62.6 GIA 3.72 x 3.65 x 2.31 NaN 3.72 3.65 2.31

In [68]:
s=pd.Series(range(0,100))

In [70]:
type(s)


Out[70]:
pandas.core.series.Series

In [71]:
diamonds3.drop(diamonds3.index[[s]]).head()


Out[71]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
100 101 0.25 Ideal D I1 59.0 60.0 GIA 4.08 x 4.10 x 2.45 NaN 4.08 4.10 2.45
101 102 0.24 Good F VVS2 64.0 63.6 GIA 3.89 x 3.82 x 2.45 NaN 3.89 3.82 2.45
102 103 0.23 Ideal J SI1 59.0 62.4 EGL 2.46 x 3.9 x 3.98 NaN 2.46 3.90 3.98
103 104 0.21 Good I SI2 0.0 0.0 OTHER 0.00-0.00 x 0.00 NaN NaN NaN NaN
104 105 0.21 V.Good I VS2 66.0 60.0 IGI 3.83 x 3.85 x 2.31 NaN 3.83 3.85 2.31

In [75]:
del diamonds

In [79]:
diamonds3.query('carat >.50 and price >3000')


Out[79]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
277140 277141 0.70 V.Good G VS2 60.0 62.2 GIA 5.70 x 5.68 x 3.54 3001.0 5.70 5.68 3.54
277141 277142 0.61 Ideal F IF 58.0 62.4 GIA 5.42 x 5.38 x 3.37 3001.0 5.42 5.38 3.37
277142 277143 0.85 Ideal J VS2 58.0 61.3 GIA 6.08 x 6.06 x 3.72 3001.0 6.08 6.06 3.72
277143 277144 0.61 Ideal G IF 57.0 60.8 GIA 5.50 x 5.53 x 3.35 3001.0 5.50 5.53 3.35
277144 277145 0.70 Good E VVS2 59.0 63.2 GIA 5.54 x 5.56 x 3.51 3001.0 5.54 5.56 3.51
277145 277146 0.91 Good I SI1 56.0 63.5 GIA 6.04 x 6.10 x 3.85 3001.0 6.04 6.10 3.85
277146 277147 0.71 Good E VVS2 57.0 66.1 GIA 5.48 x 5.52 x 3.63 3001.0 5.48 5.52 3.63
277147 277148 1.00 Good E SI2 56.0 64.4 EGL USA 6.07 x 6.21 x 3.96 3001.0 6.07 6.21 3.96
277148 277149 1.11 V.Good I I1 56.0 64.3 GIA 6.51 x 6.58 x 4.21 3001.0 6.51 6.58 4.21
277149 277150 0.70 V.Good G VVS2 58.0 63.9 GIA 5.57 x 5.59 x 3.57 3001.0 5.57 5.59 3.57
277150 277151 0.70 V.Good G VVS2 59.0 64.5 GIA 5.53 x 5.54 x 3.57 3001.0 5.53 5.54 3.57
277151 277152 0.70 V.Good G VVS2 58.0 63.5 GIA 5.56 x 5.63 x 0.00 3001.0 5.56 5.63 NaN
277152 277153 0.80 V.Good E SI2 59.0 60.8 GIA 5.97 x 5.93 x 3.62 3001.0 5.97 5.93 3.62
277153 277154 0.80 V.Good E SI2 55.0 62.9 GIA 5.92 x 5.88 x 3.71 3001.0 5.92 5.88 3.71
277154 277155 0.85 V.Good H SI1 59.0 59.3 GIA 6.19 x 6.12 x 3.65 3001.0 6.19 6.12 3.65
277155 277156 1.03 Good G I2 56.0 63.9 EGL 6.38 x 6.33 x 4.06 3001.0 6.38 6.33 4.06
277156 277157 1.08 V.Good G SI2 58.0 61.7 EGL 6.54 x 6.52 x 4.03 3001.0 6.54 6.52 4.03
277157 277158 1.08 V.Good I SI1 58.0 61.1 EGL 6.61 x 6.58 x 4.03 3001.0 6.61 6.58 4.03
277158 277159 0.80 V.Good G SI1 58.0 63.3 GIA 5.81 x 5.90 x 3.70 3001.0 5.81 5.90 3.70
277159 277160 0.79 V.Good G SI1 57.0 63.8 GIA 5.80 x 5.89 x 3.73 3001.0 5.80 5.89 3.73
277160 277161 0.81 Ideal F VS1 59.0 61.2 OTHER 5.98 x 6.02 x 3.67 3001.0 5.98 6.02 3.67
277161 277162 1.01 V.Good F I1 59.5 63.3 IGI 6.25 x 6.30 x 3.97 3001.0 6.25 6.30 3.97
277162 277163 0.80 Ideal G SI1 57.0 62.7 GIA 5.90 x 5.94 x 3.71 3001.0 5.90 5.94 3.71
277163 277164 0.93 V.Good G SI2 57.0 64.9 EGL USA 6.07 x 6.09 x 3.94 3001.0 6.07 6.09 3.94
277164 277165 0.80 V.Good G SI1 61.0 61.2 GIA 5.86 x 5.92 x 3.61 3001.0 5.86 5.92 3.61
277165 277166 0.93 V.Good H SI1 55.0 64.4 EGL ISRAEL 6.12 x 6.15 x 3.95 3001.0 6.12 6.15 3.95
277166 277167 0.93 V.Good H SI1 55.0 64.4 EGL ISRAEL 6.12 x 6.15 x 3.95 3001.0 6.12 6.15 3.95
277167 277168 0.80 Ideal G SI1 58.0 62.0 GIA 5.91 x 5.96 x 3.68 3001.0 5.91 5.96 3.68
277168 277169 0.61 Ideal E VVS2 57.0 61.8 GIA 5.43 x 5.44 x 3.36 3002.0 5.43 5.44 3.36
277169 277170 0.80 V.Good G SI2 55.0 62.3 GIA 5.94 x 5.92 x 3.69 3002.0 5.94 5.92 3.69
... ... ... ... ... ... ... ... ... ... ... ... ... ...
597994 597995 4.65 Ideal I VS2 56.0 61.2 GIA 10.72 x 10.80 x 6.59 99626.0 10.72 10.80 6.59
597995 597996 5.75 Ideal J VS1 58.0 62.7 IGI 11.34 x 11.29 x 7.1 99630.0 11.34 11.29 7.10
597996 597997 3.01 Good F VVS2 62.0 62.5 GIA 9.15 x 9.12 x 5.71 99630.0 9.15 9.12 5.71
597997 597998 5.05 V.Good I VS1 58.0 0.0 HRD 10.8 x 10.76 x 6.88 99640.0 10.80 10.76 6.88
597998 597999 3.54 Ideal D VS2 61.0 59.1 GIA 9.95 x 10.00 x 5.89 99656.0 9.95 10.00 5.89
597999 598000 2.71 Ideal D VVS2 56.0 62.7 GIA 8.93 x 8.9 x 5.59 99660.0 8.93 8.90 5.59
598000 598001 3.65 Ideal F VS2 59.0 61.0 GIA 9.87 x 9.94 x 6.04 99669.0 9.87 9.94 6.04
598001 598002 3.01 V.Good E VVS2 57.0 63.4 GIA 9.16 x 9.09 x 5.78 99700.0 9.16 9.09 5.78
598002 598003 3.86 Ideal G VS2 60.0 62.3 GIA 9.92 x 10.06 x 6.22 99711.0 9.92 10.06 6.22
598003 598004 3.04 Ideal E VS2 58.0 61.2 GIA 9.38 x 9.33 x 5.73 99730.0 9.38 9.33 5.73
598004 598005 3.04 Ideal E VS2 56.0 62.0 GIA 9.35 x 9.31 x 5.79 99730.0 9.35 9.31 5.79
598005 598006 3.07 Ideal F VVS2 58.0 59.5 GIA 9.45 x 9.51 x 5.64 99776.0 9.45 9.51 5.64
598006 598007 5.33 Ideal H SI2 59.0 61.1 GIA 6.87 x 11.22 x 11.27 99778.0 6.87 11.22 11.27
598007 598008 3.56 Ideal F VVS1 61.2 58.0 GIA 9.82 x 9.87 x 6.02 99780.0 9.82 9.87 6.02
598008 598009 3.43 Ideal H IF 60.0 59.5 GIA 5.88 x 9.86 x 9.88 99802.0 5.88 9.86 9.88
598009 598010 5.02 Good J VS2 57.0 62.3 GIA 10.77 x 10.84 x 6.73 99806.0 10.77 10.84 6.73
598010 598011 5.01 V.Good H SI1 57.0 59.6 GIA 11.11 x 11.17 x 6.64 99810.0 11.11 11.17 6.64
598011 598012 3.05 V.Good D VS1 60.0 60.1 GIA 9.4 x 9.34 x 5.63 99870.0 9.40 9.34 5.63
598012 598013 5.59 Ideal I VS2 61.0 60.4 HRD 11.52 x 11.57 x 6.97 99890.0 11.52 11.57 6.97
598013 598014 2.57 Ideal E IF 59.0 60.9 GIA 8.82 x 8.88 x 5.39 99896.0 8.82 8.88 5.39
598014 598015 5.24 Ideal I SI1 60.0 59.5 GIA 11.35 x 11.43 x 6.78 99910.0 11.35 11.43 6.78
598015 598016 5.03 Ideal H SI1 58.0 62.2 HRD 6.82 x 10.94 x 10.98 99913.0 6.82 10.94 10.98
598016 598017 3.05 Ideal D VS2 59.0 61.3 GIA 5.73 x 9.33 x 9.36 99916.0 5.73 9.33 9.36
598017 598018 3.01 Good E VS1 61.0 62.6 GIA 9.16 x 9.25 x 5.76 99920.0 9.16 9.25 5.76
598018 598019 3.01 Ideal D VS2 58.0 62.0 GIA 9.25 x 9.2 x 5.72 99920.0 9.25 9.20 5.72
598019 598020 3.02 Ideal E VVS2 58.0 59.8 HRD 9.43 x 9.51 x 5.66 99930.0 9.43 9.51 5.66
598020 598021 5.01 V.Good I VVS2 63.5 61.5 IGI 10.78 x 10.89 x 6.68 99942.0 10.78 10.89 6.68
598021 598022 3.43 Ideal F VS2 54.0 62.7 GIA 9.66 x 9.61 x 6.05 99960.0 9.66 9.61 6.05
598022 598023 3.01 V.Good E VS1 58.0 62.9 GIA 9.15 x 9.19 x 5.77 99966.0 9.15 9.19 5.77
598023 598024 4.13 Ideal H IF 56.0 62.5 IGI 10.27 x 10.19 x 6.4 99990.0 10.27 10.19 6.40

320592 rows × 13 columns


In [80]:
del diamonds3["Unnamed: 0"]

In [82]:
diamonds3.query('price >5000')


Out[82]:
carat cut color clarity table depth cert measurements price x y z
350904 1.03 Ideal J VS2 60.0 62.0 GIA 6.43 x 6.48 x 4.00 5001.0 6.43 6.48 4.00
350905 1.01 V.Good H SI2 59.0 63.0 GIA 6.32 x 6.35 x 3.99 5001.0 6.32 6.35 3.99
350906 1.04 Ideal G SI2 61.0 59.8 GIA 6.57 x 6.59 x 3.94 5001.0 6.57 6.59 3.94
350907 0.90 Ideal G VS2 56.0 62.8 GIA 6.17 x 6.14 x 3.87 5001.0 6.17 6.14 3.87
350908 1.05 Ideal I SI1 59.0 60.3 GIA 6.60 x 6.57 x 3.97 5001.0 6.60 6.57 3.97
350909 1.09 V.Good D VS2 63.0 61.8 EGL 6.66 x 6.55 x 4.08 5001.0 6.66 6.55 4.08
350910 1.27 Ideal G SI2 55.0 62.1 EGL 6.96 x 6.91 x 4.31 5001.0 6.96 6.91 4.31
350911 1.40 Good G SI2 59.0 63.7 EGL 6.95 x 6.89 x 4.44 5001.0 6.95 6.89 4.44
350912 1.00 Good G SI1 58.0 65.7 GIA 6.15 x 6.08 x 4.02 5002.0 6.15 6.08 4.02
350913 0.92 V.Good F VS2 58.0 63.3 GIA 6.16 x 6.18 x 3.91 5002.0 6.16 6.18 3.91
350914 1.00 Good H SI2 59.0 65.2 GIA 6.17 x 6.19 x 4.03 5002.0 6.17 6.19 4.03
350915 0.79 Ideal E VS1 59.0 59.0 GIA 6.07 x 6.08 x 3.58 5002.0 6.07 6.08 3.58
350916 1.00 Good H SI2 61.0 64.0 GIA 6.19 x 6.22 x 3.97 5002.0 6.19 6.22 3.97
350917 1.02 Ideal J SI1 59.0 61.9 GIA 6.42 x 6.45 x 3.99 5002.0 6.42 6.45 3.99
350918 1.00 Good E SI1 52.0 65.8 GIA 6.15 x 6.12 x 4.03 5002.0 6.15 6.12 4.03
350919 1.08 V.Good I SI1 56.0 63.5 GIA 6.50 x 6.53 x 4.14 5002.0 6.50 6.53 4.14
350920 1.23 V.Good F SI2 57.0 63.0 EGL USA 6.79 x 6.84 x 4.29 5002.0 6.79 6.84 4.29
350921 1.50 Good K VS2 58.0 62.5 EGL USA 7.27 x 7.32 x 4.56 5002.0 7.27 7.32 4.56
350922 1.00 Good H VVS1 53.0 64.6 HRD 6.25 x 6.29 x 4.05 5002.0 6.25 6.29 4.05
350923 1.00 V.Good H SI1 60.0 62.0 GIA 6.40 x 6.46 x 3.99 5002.0 6.40 6.46 3.99
350924 1.23 V.Good I VS2 62.0 59.3 EGL USA 6.94 x 7.00 x 4.13 5002.0 6.94 7.00 4.13
350925 1.08 V.Good J SI2 57.0 62.9 GIA 6.47 x 6.52 x 4.08 5002.0 6.47 6.52 4.08
350926 2.67 V.Good K I1 59.0 63.7 EGL 8.65 x 8.72 x 5.53 5002.0 8.65 8.72 5.53
350927 1.03 Good D SI1 60.0 57.8 EGL 6.61 x 6.58 x 3.81 5002.0 6.61 6.58 3.81
350928 1.20 V.Good G SI1 59.0 63.0 EGL 6.71 x 6.68 x 4.22 5002.0 6.71 6.68 4.22
350929 1.35 V.Good I VVS2 60.0 59.4 EGL 7.21 x 7.10 x 4.25 5002.0 7.21 7.10 4.25
350930 1.01 Ideal I SI1 57.0 62.7 GIA 6.39 x 6.43 x 4.02 5002.0 6.39 6.43 4.02
350931 1.01 Good G SI1 61.0 65.8 OTHER 6.18 x 6.26 x 4.10 5002.0 6.18 6.26 4.10
350932 1.30 V.Good G I1 60.0 63.3 GIA 6.85 x 6.86 x 4.34 5002.0 6.85 6.86 4.34
350933 1.02 Good F SI1 61.0 64.3 GIA 6.27 x 6.30 x 4.04 5002.0 6.27 6.30 4.04
... ... ... ... ... ... ... ... ... ... ... ... ...
597994 4.65 Ideal I VS2 56.0 61.2 GIA 10.72 x 10.80 x 6.59 99626.0 10.72 10.80 6.59
597995 5.75 Ideal J VS1 58.0 62.7 IGI 11.34 x 11.29 x 7.1 99630.0 11.34 11.29 7.10
597996 3.01 Good F VVS2 62.0 62.5 GIA 9.15 x 9.12 x 5.71 99630.0 9.15 9.12 5.71
597997 5.05 V.Good I VS1 58.0 0.0 HRD 10.8 x 10.76 x 6.88 99640.0 10.80 10.76 6.88
597998 3.54 Ideal D VS2 61.0 59.1 GIA 9.95 x 10.00 x 5.89 99656.0 9.95 10.00 5.89
597999 2.71 Ideal D VVS2 56.0 62.7 GIA 8.93 x 8.9 x 5.59 99660.0 8.93 8.90 5.59
598000 3.65 Ideal F VS2 59.0 61.0 GIA 9.87 x 9.94 x 6.04 99669.0 9.87 9.94 6.04
598001 3.01 V.Good E VVS2 57.0 63.4 GIA 9.16 x 9.09 x 5.78 99700.0 9.16 9.09 5.78
598002 3.86 Ideal G VS2 60.0 62.3 GIA 9.92 x 10.06 x 6.22 99711.0 9.92 10.06 6.22
598003 3.04 Ideal E VS2 58.0 61.2 GIA 9.38 x 9.33 x 5.73 99730.0 9.38 9.33 5.73
598004 3.04 Ideal E VS2 56.0 62.0 GIA 9.35 x 9.31 x 5.79 99730.0 9.35 9.31 5.79
598005 3.07 Ideal F VVS2 58.0 59.5 GIA 9.45 x 9.51 x 5.64 99776.0 9.45 9.51 5.64
598006 5.33 Ideal H SI2 59.0 61.1 GIA 6.87 x 11.22 x 11.27 99778.0 6.87 11.22 11.27
598007 3.56 Ideal F VVS1 61.2 58.0 GIA 9.82 x 9.87 x 6.02 99780.0 9.82 9.87 6.02
598008 3.43 Ideal H IF 60.0 59.5 GIA 5.88 x 9.86 x 9.88 99802.0 5.88 9.86 9.88
598009 5.02 Good J VS2 57.0 62.3 GIA 10.77 x 10.84 x 6.73 99806.0 10.77 10.84 6.73
598010 5.01 V.Good H SI1 57.0 59.6 GIA 11.11 x 11.17 x 6.64 99810.0 11.11 11.17 6.64
598011 3.05 V.Good D VS1 60.0 60.1 GIA 9.4 x 9.34 x 5.63 99870.0 9.40 9.34 5.63
598012 5.59 Ideal I VS2 61.0 60.4 HRD 11.52 x 11.57 x 6.97 99890.0 11.52 11.57 6.97
598013 2.57 Ideal E IF 59.0 60.9 GIA 8.82 x 8.88 x 5.39 99896.0 8.82 8.88 5.39
598014 5.24 Ideal I SI1 60.0 59.5 GIA 11.35 x 11.43 x 6.78 99910.0 11.35 11.43 6.78
598015 5.03 Ideal H SI1 58.0 62.2 HRD 6.82 x 10.94 x 10.98 99913.0 6.82 10.94 10.98
598016 3.05 Ideal D VS2 59.0 61.3 GIA 5.73 x 9.33 x 9.36 99916.0 5.73 9.33 9.36
598017 3.01 Good E VS1 61.0 62.6 GIA 9.16 x 9.25 x 5.76 99920.0 9.16 9.25 5.76
598018 3.01 Ideal D VS2 58.0 62.0 GIA 9.25 x 9.2 x 5.72 99920.0 9.25 9.20 5.72
598019 3.02 Ideal E VVS2 58.0 59.8 HRD 9.43 x 9.51 x 5.66 99930.0 9.43 9.51 5.66
598020 5.01 V.Good I VVS2 63.5 61.5 IGI 10.78 x 10.89 x 6.68 99942.0 10.78 10.89 6.68
598021 3.43 Ideal F VS2 54.0 62.7 GIA 9.66 x 9.61 x 6.05 99960.0 9.66 9.61 6.05
598022 3.01 V.Good E VS1 58.0 62.9 GIA 9.15 x 9.19 x 5.77 99966.0 9.15 9.19 5.77
598023 4.13 Ideal H IF 56.0 62.5 IGI 10.27 x 10.19 x 6.4 99990.0 10.27 10.19 6.40

247060 rows × 12 columns


In [84]:
diamonds2.query('color=="J" or price >4000')


Out[84]:
Unnamed: 0 carat cut color clarity table depth cert measurements price x y z
495 496 0.26 Good J VS2 56.5 64.1 IGI 4.01 x 4.05 x 2.58 300.0 4.01 4.05 2.58
536 537 0.29 V.Good J SI1 62.0 59.4 GIA 4.31 x 4.33 x 2.56 303.0 4.31 4.33 2.56
554 555 0.33 Good J SI2 64.0 58.4 GIA 4.50 x 4.54 x 2.64 305.0 4.50 4.54 2.64
576 577 0.23 Ideal J VS1 57.0 61.1 GIA 3.92 x 3.94 x 2.40 307.0 3.92 3.94 2.40
590 591 0.20 Ideal J VS2 57.0 62.0 GIA 3.73 x 3.75 x 2.32 308.0 3.73 3.75 2.32
601 602 0.49 Good J I1 66.0 65.7 GIA 4.97 x 4.82 x 3.21 309.0 4.97 4.82 3.21
603 604 0.20 V.Good J IF 57.0 63.8 GIA 3.66 x 3.68 x 2.34 310.0 3.66 3.68 2.34
604 605 0.20 V.Good J IF 60.0 60.4 GIA 3.75 x 3.77 x 2.27 310.0 3.75 3.77 2.27
636 637 0.20 Ideal J VVS1 60.0 61.0 GIA 3.78 x 3.81 x 2.32 314.0 3.78 3.81 2.32
638 639 0.25 Ideal J VS2 58.0 61.4 GIA 4.04 x 4.06 x 2.49 314.0 4.04 4.06 2.49
649 650 0.37 Good J I1 69.0 56.5 GIA 4.73 x 4.80 x 2.69 316.0 4.73 4.80 2.69
668 669 0.22 Ideal J VVS1 55.0 62.4 IGI 3.85 x 3.87 x 2.41 318.0 3.85 3.87 2.41
669 670 0.22 Ideal J VVS2 56.0 62.1 IGI 3.89 x 3.91 x 2.42 318.0 3.89 3.91 2.42
683 684 0.20 Ideal J IF 54.0 61.3 IGI 3.84 x 3.86 x 2.36 320.0 3.84 3.86 2.36
685 686 0.20 Ideal J VVS2 59.0 61.0 GIA 3.77 x 3.79 x 2.31 320.0 3.77 3.79 2.31
738 739 0.20 Ideal J VVS1 56.0 61.8 GIA 3.75 x 3.78 x 2.33 323.0 3.75 3.78 2.33
741 742 0.20 Ideal J VVS2 56.0 62.3 GIA 3.71 x 3.74 x 2.32 323.0 3.71 3.74 2.32
747 748 0.25 V.Good J VS1 57.0 61.3 GIA 4.04 x 4.05 x 2.48 323.0 4.04 4.05 2.48
754 755 0.34 Good J SI2 57.0 63.0 IGI 4.40 x 4.43 x 2.78 323.0 4.40 4.43 2.78
758 759 0.48 Good J I2 59.0 66.1 GIA 4.83 x 4.76 x 3.17 323.0 4.83 4.76 3.17
770 771 0.21 Ideal J IF 56.0 61.6 IGI 3.85 x 3.88 x 2.38 325.0 3.85 3.88 2.38
781 782 0.41 Good J I1 58.0 61.5 GIA 4.81 x 4.74 x 2.94 325.0 4.81 4.74 2.94
797 798 0.23 V.Good J VVS2 63.0 60.5 IGI 3.92 x 3.95 x 2.38 327.0 3.92 3.95 2.38
804 805 0.21 Ideal J VVS1 57.0 62.5 GIA 3.80 x 3.81 x 2.38 328.0 3.80 3.81 2.38
823 824 0.38 V.Good J I1 61.0 62.5 GIA 4.57 x 4.58 x 2.86 329.0 4.57 4.58 2.86
827 828 0.50 Good J I2 56.0 68.5 EGL 4.94 x 4.78 x 3.33 329.0 4.94 4.78 3.33
833 834 0.37 Good J SI1 55.0 57.7 GIA 4.75 x 4.62 x 2.70 330.0 4.75 4.62 2.70
839 840 0.30 Good J SI1 68.0 60.5 GIA 4.29 x 4.33 x 2.61 330.0 4.29 4.33 2.61
847 848 0.41 Good J I1 58.0 61.5 GIA 4.74 x 4.81 x 2.94 330.0 4.74 4.81 2.94
852 853 0.22 V.Good J VVS2 59.0 61.5 GIA 3.9 x 3.86 x 2.39 330.0 3.90 3.86 2.39
... ... ... ... ... ... ... ... ... ... ... ... ... ...
597994 597995 4.65 Ideal I VS2 56.0 61.2 GIA 10.72 x 10.80 x 6.59 99626.0 10.72 10.80 6.59
597995 597996 5.75 Ideal J VS1 58.0 62.7 IGI 11.34 x 11.29 x 7.1 99630.0 11.34 11.29 7.10
597996 597997 3.01 Good F VVS2 62.0 62.5 GIA 9.15 x 9.12 x 5.71 99630.0 9.15 9.12 5.71
597997 597998 5.05 V.Good I VS1 58.0 0.0 HRD 10.8 x 10.76 x 6.88 99640.0 10.80 10.76 6.88
597998 597999 3.54 Ideal D VS2 61.0 59.1 GIA 9.95 x 10.00 x 5.89 99656.0 9.95 10.00 5.89
597999 598000 2.71 Ideal D VVS2 56.0 62.7 GIA 8.93 x 8.9 x 5.59 99660.0 8.93 8.90 5.59
598000 598001 3.65 Ideal F VS2 59.0 61.0 GIA 9.87 x 9.94 x 6.04 99669.0 9.87 9.94 6.04
598001 598002 3.01 V.Good E VVS2 57.0 63.4 GIA 9.16 x 9.09 x 5.78 99700.0 9.16 9.09 5.78
598002 598003 3.86 Ideal G VS2 60.0 62.3 GIA 9.92 x 10.06 x 6.22 99711.0 9.92 10.06 6.22
598003 598004 3.04 Ideal E VS2 58.0 61.2 GIA 9.38 x 9.33 x 5.73 99730.0 9.38 9.33 5.73
598004 598005 3.04 Ideal E VS2 56.0 62.0 GIA 9.35 x 9.31 x 5.79 99730.0 9.35 9.31 5.79
598005 598006 3.07 Ideal F VVS2 58.0 59.5 GIA 9.45 x 9.51 x 5.64 99776.0 9.45 9.51 5.64
598006 598007 5.33 Ideal H SI2 59.0 61.1 GIA 6.87 x 11.22 x 11.27 99778.0 6.87 11.22 11.27
598007 598008 3.56 Ideal F VVS1 61.2 58.0 GIA 9.82 x 9.87 x 6.02 99780.0 9.82 9.87 6.02
598008 598009 3.43 Ideal H IF 60.0 59.5 GIA 5.88 x 9.86 x 9.88 99802.0 5.88 9.86 9.88
598009 598010 5.02 Good J VS2 57.0 62.3 GIA 10.77 x 10.84 x 6.73 99806.0 10.77 10.84 6.73
598010 598011 5.01 V.Good H SI1 57.0 59.6 GIA 11.11 x 11.17 x 6.64 99810.0 11.11 11.17 6.64
598011 598012 3.05 V.Good D VS1 60.0 60.1 GIA 9.4 x 9.34 x 5.63 99870.0 9.40 9.34 5.63
598012 598013 5.59 Ideal I VS2 61.0 60.4 HRD 11.52 x 11.57 x 6.97 99890.0 11.52 11.57 6.97
598013 598014 2.57 Ideal E IF 59.0 60.9 GIA 8.82 x 8.88 x 5.39 99896.0 8.82 8.88 5.39
598014 598015 5.24 Ideal I SI1 60.0 59.5 GIA 11.35 x 11.43 x 6.78 99910.0 11.35 11.43 6.78
598015 598016 5.03 Ideal H SI1 58.0 62.2 HRD 6.82 x 10.94 x 10.98 99913.0 6.82 10.94 10.98
598016 598017 3.05 Ideal D VS2 59.0 61.3 GIA 5.73 x 9.33 x 9.36 99916.0 5.73 9.33 9.36
598017 598018 3.01 Good E VS1 61.0 62.6 GIA 9.16 x 9.25 x 5.76 99920.0 9.16 9.25 5.76
598018 598019 3.01 Ideal D VS2 58.0 62.0 GIA 9.25 x 9.2 x 5.72 99920.0 9.25 9.20 5.72
598019 598020 3.02 Ideal E VVS2 58.0 59.8 HRD 9.43 x 9.51 x 5.66 99930.0 9.43 9.51 5.66
598020 598021 5.01 V.Good I VVS2 63.5 61.5 IGI 10.78 x 10.89 x 6.68 99942.0 10.78 10.89 6.68
598021 598022 3.43 Ideal F VS2 54.0 62.7 GIA 9.66 x 9.61 x 6.05 99960.0 9.66 9.61 6.05
598022 598023 3.01 V.Good E VS1 58.0 62.9 GIA 9.15 x 9.19 x 5.77 99966.0 9.15 9.19 5.77
598023 598024 4.13 Ideal H IF 56.0 62.5 IGI 10.27 x 10.19 x 6.4 99990.0 10.27 10.19 6.40

299941 rows × 13 columns


In [86]:
diamonds3['newvar']=1

In [87]:
diamonds3.head()


Out[87]:
carat cut color clarity table depth cert measurements price x y z newvar
0 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52 1
1 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4.00 4.05 2.30 1
2 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67 1
3 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31 1
4 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68 1

In [89]:
diamonds3.loc[diamonds3.price>=5000,'newvar']="Expensive"

In [92]:
diamonds3.query('price >5000').head()


Out[92]:
carat cut color clarity table depth cert measurements price x y z newvar
350904 1.03 Ideal J VS2 60.0 62.0 GIA 6.43 x 6.48 x 4.00 5001.0 6.43 6.48 4.00 Expensive
350905 1.01 V.Good H SI2 59.0 63.0 GIA 6.32 x 6.35 x 3.99 5001.0 6.32 6.35 3.99 Expensive
350906 1.04 Ideal G SI2 61.0 59.8 GIA 6.57 x 6.59 x 3.94 5001.0 6.57 6.59 3.94 Expensive
350907 0.90 Ideal G VS2 56.0 62.8 GIA 6.17 x 6.14 x 3.87 5001.0 6.17 6.14 3.87 Expensive
350908 1.05 Ideal I SI1 59.0 60.3 GIA 6.60 x 6.57 x 3.97 5001.0 6.60 6.57 3.97 Expensive

In [94]:
diamonds3['ppc']=diamonds3.price/diamonds3.carat

In [95]:
diamonds3.head()


Out[95]:
carat cut color clarity table depth cert measurements price x y z newvar ppc
0 0.25 V.Good K I1 59.0 63.7 GIA 3.96 x 3.95 x 2.52 NaN 3.96 3.95 2.52 1 NaN
1 0.23 Good G I1 61.0 58.1 GIA 4.00 x 4.05 x 2.30 NaN 4.00 4.05 2.30 1 NaN
2 0.34 Good J I2 58.0 58.7 GIA 4.56 x 4.53 x 2.67 NaN 4.56 4.53 2.67 1 NaN
3 0.21 V.Good D I1 60.0 60.6 GIA 3.80 x 3.82 x 2.31 NaN 3.80 3.82 2.31 1 NaN
4 0.31 V.Good K I1 59.0 62.2 EGL 4.35 x 4.26 x 2.68 NaN 4.35 4.26 2.68 1 NaN

In [96]:
diamonds4=diamonds3.copy()

In [97]:
diamonds3=diamonds3.dropna(how='any')

In [98]:
diamonds3.head()


Out[98]:
carat cut color clarity table depth cert measurements price x y z newvar ppc
493 0.24 V.Good G SI1 61.0 58.9 GIA 4.09 x 4.10 x 2.41 300.0 4.09 4.10 2.41 1 1250.000000
494 0.31 V.Good K SI2 59.0 60.2 GIA 4.40 x 4.42 x 2.65 300.0 4.40 4.42 2.65 1 967.741935
495 0.26 Good J VS2 56.5 64.1 IGI 4.01 x 4.05 x 2.58 300.0 4.01 4.05 2.58 1 1153.846154
496 0.24 Ideal G SI1 55.0 61.3 GIA 4.01 x 4.03 x 2.47 300.0 4.01 4.03 2.47 1 1250.000000
497 0.30 Good H I1 57.0 62.2 GIA 4.21 x 4.24 x 2.63 300.0 4.21 4.24 2.63 1 1000.000000

In [99]:
os.listdir()


Out[99]:
['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'All+CSV+Files+in+a+Folder.ipynb',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond.csv',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'Hdma.csv',
 'Hedonic.csv',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'internship.docx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'Invoice trafla format.docx',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'March invoice Indicus   - Sheet1.pdf',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mtcarslm.R',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'pandas+11.ipynb',
 'pandas+analysis+1.ipynb',
 'pandas+data+manipulation.ipynb',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'Python.docx',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376562.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 930917.crdownload',
 'Unconfirmed 950045.crdownload',
 'unvbasicvapp__9411008__ova__en__sp0__1.ova.crdownload',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']

In [100]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)


['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']

In [101]:
f=pd.read_csv('ccFraud.csv')

In [103]:
f.dtypes


Out[103]:
custID          int64
gender          int64
state           int64
cardholder      int64
balance         int64
numTrans        int64
numIntlTrans    int64
creditLine      int64
fraudRisk       int64
dtype: object

In [104]:
f.index


Out[104]:
RangeIndex(start=0, stop=10000000, step=1)

In [105]:
f.columns


Out[105]:
Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',
       'numIntlTrans', 'creditLine', 'fraudRisk'],
      dtype='object')

In [106]:
f.values


Out[106]:
array([[       1,        1,       35, ...,       14,        2,        0],
       [       2,        2,        2, ...,        0,       18,        0],
       [       3,        2,        2, ...,        9,       16,        0],
       ..., 
       [ 9999998,        1,       24, ...,        0,        8,        0],
       [ 9999999,        1,       28, ...,       19,        6,        0],
       [10000000,        1,       23, ...,        0,        7,        0]], dtype=int64)

In [108]:
f.describe()


Out[108]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07
mean 5.000000e+06 1.382177e+00 2.466127e+01 1.030004e+00 4.109920e+03 2.893519e+01 4.047190e+00 9.134469e+00 5.960140e-02
std 2.886751e+06 4.859195e-01 1.497012e+01 1.705991e-01 3.996847e+03 2.655378e+01 8.602970e+00 9.641974e+00 2.367469e-01
min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
25% 2.500001e+06 1.000000e+00 1.000000e+01 1.000000e+00 0.000000e+00 1.000000e+01 0.000000e+00 4.000000e+00 0.000000e+00
50% 5.000000e+06 1.000000e+00 2.400000e+01 1.000000e+00 3.706000e+03 1.900000e+01 0.000000e+00 6.000000e+00 0.000000e+00
75% 7.500000e+06 2.000000e+00 3.800000e+01 1.000000e+00 6.000000e+03 3.900000e+01 4.000000e+00 1.100000e+01 0.000000e+00
max 1.000000e+07 2.000000e+00 5.100000e+01 2.000000e+00 4.148500e+04 1.000000e+02 6.000000e+01 7.500000e+01 1.000000e+00

In [109]:
f.T


Out[109]:
0 1 2 3 4 5 6 7 8 9 ... 9999990 9999991 9999992 9999993 9999994 9999995 9999996 9999997 9999998 9999999
custID 1 2 3 4 5 6 7 8 9 10 ... 9999991 9999992 9999993 9999994 9999995 9999996 9999997 9999998 9999999 10000000
gender 1 2 2 1 1 2 1 1 2 1 ... 1 2 1 2 1 1 1 1 1 1
state 35 2 2 15 46 44 3 10 32 23 ... 16 36 38 43 16 37 16 24 28 23
cardholder 1 1 1 1 1 2 1 1 1 1 ... 2 1 1 1 2 1 1 1 1 1
balance 3000 0 0 0 0 5546 2000 6016 2428 0 ... 1561 5217 0 2607 17376 0 0 9000 7000 0
numTrans 4 9 27 12 11 21 41 20 4 18 ... 8 6 7 6 3 10 33 38 20 13
numIntlTrans 14 0 9 0 16 0 0 3 10 56 ... 0 0 0 0 0 0 2 0 19 0
creditLine 2 18 16 5 7 13 1 6 22 5 ... 5 5 2 5 20 9 4 8 6 7
fraudRisk 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

9 rows × 10000000 columns


In [110]:
f.sort(columns='balance')


C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[110]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
9999999 10000000 1 23 1 0 13 0 7 0
2581279 2581280 1 37 2 0 34 0 28 0
6743525 6743526 1 49 1 0 35 0 27 0
6743524 6743525 1 44 1 0 13 16 13 0
6743522 6743523 2 5 1 0 6 5 12 0
6743519 6743520 1 44 1 0 8 0 13 0
6743517 6743518 2 46 1 0 6 0 9 0
2581289 2581290 2 24 1 0 24 0 26 0
6743501 6743502 1 44 1 0 20 0 5 0
6743497 6743498 2 35 1 0 24 0 7 0
2581293 2581294 1 15 1 0 10 1 1 0
6743496 6743497 2 44 1 0 24 0 8 0
6743486 6743487 1 39 1 0 5 8 3 0
2581298 2581299 2 5 1 0 12 0 58 0
6743484 6743485 1 10 1 0 44 9 8 0
6743483 6743484 2 7 1 0 4 4 3 0
2581303 2581304 1 29 1 0 10 4 5 0
6743482 6743483 1 39 1 0 27 0 7 0
2581305 2581306 1 2 1 0 15 0 5 0
6743479 6743480 1 46 1 0 38 0 5 0
2581309 2581310 2 5 1 0 22 0 23 0
2581311 2581312 1 29 1 0 25 0 4 0
6743527 6743528 1 5 1 0 9 0 8 0
6743476 6743477 2 5 1 0 26 0 10 0
6743528 6743529 1 29 1 0 31 2 2 0
6743533 6743534 2 35 1 0 9 0 5 0
2581234 2581235 2 29 1 0 15 13 11 0
2581238 2581239 1 3 1 0 65 0 24 0
6743568 6743569 1 5 1 0 100 1 12 0
6743567 6743568 2 23 1 0 14 7 3 0
... ... ... ... ... ... ... ... ... ...
8395955 8395956 1 5 1 34871 99 1 38 1
2006812 2006813 2 5 1 34898 16 10 38 1
3803760 3803761 2 46 1 34938 37 5 37 1
5174944 5174945 1 5 1 34941 45 0 57 1
9368966 9368967 2 46 1 35000 10 0 34 1
753353 753354 1 17 1 35039 100 4 39 1
962369 962370 1 46 1 35074 30 0 47 1
8826733 8826734 1 44 1 35079 27 0 35 1
2896280 2896281 2 12 1 35233 64 6 35 1
7061678 7061679 1 18 1 35239 13 7 75 1
8070316 8070317 1 49 1 35382 14 0 53 1
1017215 1017216 2 3 1 35447 9 4 40 1
4631316 4631317 1 48 1 35868 18 0 39 1
5690915 5690916 1 32 1 35957 28 12 51 1
887966 887967 1 5 1 35982 70 4 43 1
8641170 8641171 1 7 1 36000 9 11 35 1
6885917 6885918 1 39 1 36065 8 0 64 1
1092996 1092997 2 44 1 36099 59 0 37 1
8797380 8797381 1 36 1 36153 17 5 37 1
2302929 2302930 2 2 1 36499 43 2 41 1
1211347 1211348 2 26 1 36534 30 0 53 1
2619291 2619292 2 5 1 36567 48 3 59 1
7829649 7829650 2 40 1 36592 11 3 71 1
7530764 7530765 1 5 1 36671 8 0 75 1
9123139 9123140 1 3 1 37000 10 1 36 1
7202753 7202754 1 51 1 37557 10 0 40 1
162444 162445 2 39 1 39554 6 16 52 1
471477 471478 1 35 1 39725 61 0 41 1
9957408 9957409 2 48 1 39987 84 1 56 1
3086051 3086052 1 10 1 41485 35 0 56 1

10000000 rows × 9 columns


In [112]:
f.sort_index(axis=0, ascending=False)


Out[112]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
9999999 10000000 1 23 1 0 13 0 7 0
9999998 9999999 1 28 1 7000 20 19 6 0
9999997 9999998 1 24 1 9000 38 0 8 0
9999996 9999997 1 16 1 0 33 2 4 0
9999995 9999996 1 37 1 0 10 0 9 0
9999994 9999995 1 16 2 17376 3 0 20 0
9999993 9999994 2 43 1 2607 6 0 5 0
9999992 9999993 1 38 1 0 7 0 2 0
9999991 9999992 2 36 1 5217 6 0 5 0
9999990 9999991 1 16 2 1561 8 0 5 0
9999989 9999990 2 4 1 12000 17 7 11 0
9999988 9999989 1 18 1 15000 72 0 14 1
9999987 9999988 2 24 1 6000 46 0 5 0
9999986 9999987 1 23 1 7000 59 0 6 0
9999985 9999986 2 45 2 3000 4 0 2 0
9999984 9999985 1 39 1 7000 30 0 6 0
9999983 9999984 2 13 1 0 2 0 5 0
9999982 9999983 1 44 1 3000 14 0 2 0
9999981 9999982 1 33 1 0 23 26 18 0
9999980 9999981 1 10 1 13889 54 0 13 0
9999979 9999980 1 6 1 7669 5 3 7 0
9999978 9999979 1 44 1 5000 12 0 4 0
9999977 9999978 1 35 1 8000 24 0 7 0
9999976 9999977 2 4 1 0 14 0 7 0
9999975 9999976 2 48 1 4210 23 0 8 0
9999974 9999975 1 25 1 0 18 0 3 0
9999973 9999974 1 10 2 0 36 31 14 0
9999972 9999973 1 46 1 5000 69 0 4 0
9999971 9999972 2 40 1 3683 31 3 7 0
9999970 9999971 1 10 1 0 12 0 11 0
... ... ... ... ... ... ... ... ... ...
29 30 2 49 1 5192 84 0 13 1
28 29 2 20 1 0 19 0 2 0
27 28 1 9 1 12000 20 0 11 0
26 27 1 38 1 4000 21 5 3 0
25 26 2 29 1 5000 4 9 4 0
24 25 1 25 1 0 12 0 65 0
23 24 2 21 1 0 15 0 3 0
22 23 1 5 1 0 7 0 11 0
21 22 1 34 1 0 22 0 3 0
20 21 1 39 1 4000 24 0 3 0
19 20 2 31 1 1860 21 10 8 0
18 19 1 5 1 9000 20 2 8 0
17 18 1 35 1 3113 13 6 8 0
16 17 2 18 1 13970 20 0 13 0
15 16 1 44 1 0 22 0 5 0
14 15 1 27 1 5227 60 0 17 0
13 14 2 38 1 9000 41 3 8 0
12 13 1 6 1 0 45 2 4 0
11 12 1 10 1 3000 20 0 2 0
10 11 1 46 1 4601 54 0 4 0
9 10 1 23 1 0 18 56 5 0
8 9 2 32 1 2428 4 10 22 0
7 8 1 10 1 6016 20 3 6 0
6 7 1 3 1 2000 41 0 1 0
5 6 2 44 2 5546 21 0 13 0
4 5 1 46 1 0 11 16 7 0
3 4 1 15 1 0 12 0 5 0
2 3 2 2 1 0 27 9 16 0
1 2 2 2 1 0 9 0 18 0
0 1 1 35 1 3000 4 14 2 0

10000000 rows × 9 columns


In [113]:
f.sort_index(axis=1)


Out[113]:
balance cardholder creditLine custID fraudRisk gender numIntlTrans numTrans state
0 3000 1 2 1 0 1 14 4 35
1 0 1 18 2 0 2 0 9 2
2 0 1 16 3 0 2 9 27 2
3 0 1 5 4 0 1 0 12 15
4 0 1 7 5 0 1 16 11 46
5 5546 2 13 6 0 2 0 21 44
6 2000 1 1 7 0 1 0 41 3
7 6016 1 6 8 0 1 3 20 10
8 2428 1 22 9 0 2 10 4 32
9 0 1 5 10 0 1 56 18 23
10 4601 1 4 11 0 1 0 54 46
11 3000 1 2 12 0 1 0 20 10
12 0 1 4 13 0 1 2 45 6
13 9000 1 8 14 0 2 3 41 38
14 5227 1 17 15 0 1 0 60 27
15 0 1 5 16 0 1 0 22 44
16 13970 1 13 17 0 2 0 20 18
17 3113 1 8 18 0 1 6 13 35
18 9000 1 8 19 0 1 2 20 5
19 1860 1 8 20 0 2 10 21 31
20 4000 1 3 21 0 1 0 24 39
21 0 1 3 22 0 1 0 22 34
22 0 1 11 23 0 1 0 7 5
23 0 1 3 24 0 2 0 15 21
24 0 1 65 25 0 1 0 12 25
25 5000 1 4 26 0 2 9 4 29
26 4000 1 3 27 0 1 5 21 38
27 12000 1 11 28 0 1 0 20 9
28 0 1 2 29 0 2 0 19 20
29 5192 1 13 30 1 2 0 84 49
... ... ... ... ... ... ... ... ... ...
9999970 0 1 11 9999971 0 1 0 12 10
9999971 3683 1 7 9999972 0 2 3 31 40
9999972 5000 1 4 9999973 0 1 0 69 46
9999973 0 2 14 9999974 0 1 31 36 10
9999974 0 1 3 9999975 0 1 0 18 25
9999975 4210 1 8 9999976 0 2 0 23 48
9999976 0 1 7 9999977 0 2 0 14 4
9999977 8000 1 7 9999978 0 1 0 24 35
9999978 5000 1 4 9999979 0 1 0 12 44
9999979 7669 1 7 9999980 0 1 3 5 6
9999980 13889 1 13 9999981 0 1 0 54 10
9999981 0 1 18 9999982 0 1 26 23 33
9999982 3000 1 2 9999983 0 1 0 14 44
9999983 0 1 5 9999984 0 2 0 2 13
9999984 7000 1 6 9999985 0 1 0 30 39
9999985 3000 2 2 9999986 0 2 0 4 45
9999986 7000 1 6 9999987 0 1 0 59 23
9999987 6000 1 5 9999988 0 2 0 46 24
9999988 15000 1 14 9999989 1 1 0 72 18
9999989 12000 1 11 9999990 0 2 7 17 4
9999990 1561 2 5 9999991 0 1 0 8 16
9999991 5217 1 5 9999992 0 2 0 6 36
9999992 0 1 2 9999993 0 1 0 7 38
9999993 2607 1 5 9999994 0 2 0 6 43
9999994 17376 2 20 9999995 0 1 0 3 16
9999995 0 1 9 9999996 0 1 0 10 37
9999996 0 1 4 9999997 0 1 2 33 16
9999997 9000 1 8 9999998 0 1 0 38 24
9999998 7000 1 6 9999999 0 1 19 20 28
9999999 0 1 7 10000000 0 1 0 13 23

10000000 rows × 9 columns


In [115]:
f.head()


Out[115]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
0 1 1 35 1 3000 4 14 2 0
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0
3 4 1 15 1 0 12 0 5 0
4 5 1 46 1 0 11 16 7 0

In [116]:
f.tail(2)


Out[116]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
9999998 9999999 1 28 1 7000 20 19 6 0
9999999 10000000 1 23 1 0 13 0 7 0

In [117]:
f['balance']


Out[117]:
0           3000
1              0
2              0
3              0
4              0
5           5546
6           2000
7           6016
8           2428
9              0
10          4601
11          3000
12             0
13          9000
14          5227
15             0
16         13970
17          3113
18          9000
19          1860
20          4000
21             0
22             0
23             0
24             0
25          5000
26          4000
27         12000
28             0
29          5192
           ...  
9999970        0
9999971     3683
9999972     5000
9999973        0
9999974        0
9999975     4210
9999976        0
9999977     8000
9999978     5000
9999979     7669
9999980    13889
9999981        0
9999982     3000
9999983        0
9999984     7000
9999985     3000
9999986     7000
9999987     6000
9999988    15000
9999989    12000
9999990     1561
9999991     5217
9999992        0
9999993     2607
9999994    17376
9999995        0
9999996        0
9999997     9000
9999998     7000
9999999        0
Name: balance, dtype: int64

In [118]:
f[1:3]


Out[118]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
1 2 2 2 1 0 9 0 18 0
2 3 2 2 1 0 27 9 16 0

In [121]:
f.loc[:,['balance' , 'gender' ]]


Out[121]:
balance gender
0 3000 1
1 0 2
2 0 2
3 0 1
4 0 1
5 5546 2
6 2000 1
7 6016 1
8 2428 2
9 0 1
10 4601 1
11 3000 1
12 0 1
13 9000 2
14 5227 1
15 0 1
16 13970 2
17 3113 1
18 9000 1
19 1860 2
20 4000 1
21 0 1
22 0 1
23 0 2
24 0 1
25 5000 2
26 4000 1
27 12000 1
28 0 2
29 5192 2
... ... ...
9999970 0 1
9999971 3683 2
9999972 5000 1
9999973 0 1
9999974 0 1
9999975 4210 2
9999976 0 2
9999977 8000 1
9999978 5000 1
9999979 7669 1
9999980 13889 1
9999981 0 1
9999982 3000 1
9999983 0 2
9999984 7000 1
9999985 3000 2
9999986 7000 1
9999987 6000 2
9999988 15000 1
9999989 12000 2
9999990 1561 1
9999991 5217 2
9999992 0 1
9999993 2607 2
9999994 17376 1
9999995 0 1
9999996 0 1
9999997 9000 1
9999998 7000 1
9999999 0 1

10000000 rows × 2 columns


In [122]:
f[['balance' , 'gender' ]]


Out[122]:
balance gender
0 3000 1
1 0 2
2 0 2
3 0 1
4 0 1
5 5546 2
6 2000 1
7 6016 1
8 2428 2
9 0 1
10 4601 1
11 3000 1
12 0 1
13 9000 2
14 5227 1
15 0 1
16 13970 2
17 3113 1
18 9000 1
19 1860 2
20 4000 1
21 0 1
22 0 1
23 0 2
24 0 1
25 5000 2
26 4000 1
27 12000 1
28 0 2
29 5192 2
... ... ...
9999970 0 1
9999971 3683 2
9999972 5000 1
9999973 0 1
9999974 0 1
9999975 4210 2
9999976 0 2
9999977 8000 1
9999978 5000 1
9999979 7669 1
9999980 13889 1
9999981 0 1
9999982 3000 1
9999983 0 2
9999984 7000 1
9999985 3000 2
9999986 7000 1
9999987 6000 2
9999988 15000 1
9999989 12000 2
9999990 1561 1
9999991 5217 2
9999992 0 1
9999993 2607 2
9999994 17376 1
9999995 0 1
9999996 0 1
9999997 9000 1
9999998 7000 1
9999999 0 1

10000000 rows × 2 columns


In [125]:
f[f['balance'] > 3000]


Out[125]:
custID gender state cardholder balance numTrans numIntlTrans creditLine fraudRisk
5 6 2 44 2 5546 21 0 13 0
7 8 1 10 1 6016 20 3 6 0
10 11 1 46 1 4601 54 0 4 0
13 14 2 38 1 9000 41 3 8 0
14 15 1 27 1 5227 60 0 17 0
16 17 2 18 1 13970 20 0 13 0
17 18 1 35 1 3113 13 6 8 0
18 19 1 5 1 9000 20 2 8 0
20 21 1 39 1 4000 24 0 3 0
25 26 2 29 1 5000 4 9 4 0
26 27 1 38 1 4000 21 5 3 0
27 28 1 9 1 12000 20 0 11 0
29 30 2 49 1 5192 84 0 13 1
31 32 1 31 1 6000 8 0 5 0
32 33 2 48 1 4899 49 0 10 0
33 34 2 20 1 7000 43 2 6 0
35 36 1 20 1 10257 23 0 13 0
36 37 1 36 1 5000 24 0 4 0
38 39 1 5 1 5000 14 0 4 0
39 40 2 46 1 9000 7 0 8 0
41 42 1 35 1 4973 13 0 12 0
43 44 2 11 1 17656 16 3 25 1
47 48 1 3 1 11090 41 0 13 0
49 50 2 5 1 10826 4 0 20 0
53 54 2 5 1 8000 22 0 7 0
54 55 1 10 1 6000 52 0 5 0
55 56 2 43 2 9000 5 0 8 0
56 57 2 10 1 8000 30 60 7 1
59 60 2 23 2 11025 14 15 16 0
60 61 2 19 2 4000 1 2 3 0
... ... ... ... ... ... ... ... ... ...
9999933 9999934 1 44 1 4000 13 0 3 0
9999936 9999937 2 10 1 8000 56 0 7 0
9999937 9999938 2 7 1 7000 40 0 6 0
9999940 9999941 1 35 1 6000 30 26 5 0
9999944 9999945 1 35 1 7000 97 0 6 0
9999946 9999947 1 5 1 6000 2 6 5 0
9999948 9999949 1 2 1 9395 57 0 13 0
9999949 9999950 2 21 1 4000 10 0 3 0
9999950 9999951 2 35 1 7468 35 0 16 0
9999953 9999954 1 5 1 6729 35 0 6 0
9999954 9999955 2 29 1 4877 7 0 17 0
9999955 9999956 2 5 1 3770 98 0 23 0
9999960 9999961 1 35 1 4000 27 0 3 0
9999968 9999969 1 41 1 3938 24 9 16 0
9999971 9999972 2 40 1 3683 31 3 7 0
9999972 9999973 1 46 1 5000 69 0 4 0
9999975 9999976 2 48 1 4210 23 0 8 0
9999977 9999978 1 35 1 8000 24 0 7 0
9999978 9999979 1 44 1 5000 12 0 4 0
9999979 9999980 1 6 1 7669 5 3 7 0
9999980 9999981 1 10 1 13889 54 0 13 0
9999984 9999985 1 39 1 7000 30 0 6 0
9999986 9999987 1 23 1 7000 59 0 6 0
9999987 9999988 2 24 1 6000 46 0 5 0
9999988 9999989 1 18 1 15000 72 0 14 1
9999989 9999990 2 4 1 12000 17 7 11 0
9999991 9999992 2 36 1 5217 6 0 5 0
9999994 9999995 1 16 2 17376 3 0 20 0
9999997 9999998 1 24 1 9000 38 0 8 0
9999998 9999999 1 28 1 7000 20 19 6 0

5304124 rows × 9 columns


In [ ]: