In [5]:
import os
import glob
In [6]:
os.getcwd()
Out[6]:
'C:\\Users\\Dell'
In [7]:
path = 'C:\\Users\\Dell\\Downloads'
In [8]:
extension = 'csv'
os.chdir(path)
In [9]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']
In [10]:
import pandas as pd
In [12]:
os.listdir()
Out[12]:
['140749_2017.pdf',
'2011-F01-0700-Rev4-MDDS.XLSX',
'20150817143155.pdf',
'20160111060911.pdf',
'20170214052225.pdf',
'861415_10151432783238421_2124270505_o (1).jpg',
'861415_10151432783238421_2124270505_o.jpg',
'AirPassengers.csv',
'ajayo.jpg',
'Alison Python Invoice - Sheet1.pdf',
'Alison SAS Invoice - Sheet1.pdf',
'All+CSV+Files+in+a+Folder.ipynb',
'Allison Interview Jones Invoice - Sheet1.pdf',
'Anaconda3-4.2.0-Windows-x86_64.exe',
'apachehttpd.exe',
'April invoice adaptive analytics - Sheet1.pdf',
'Assignment14_BusinessAnalytics (1).docx',
'Assignment14_BusinessAnalytics.docx',
'Assignment15_BusinessAnalytics.docx',
'Assignment16_BusinessAnalytics (1).docx',
'Assignment16_BusinessAnalytics (2).docx',
'Assignment16_BusinessAnalytics.docx',
'aug ust 2008.JPG',
'avast_free_antivirus_setup_online.exe',
'avinash_ltv.zip',
'BigDiamonds.csv',
'BigDiamonds.csv (1).zip',
'BigDiamonds.csv (2)',
'BigDiamonds.csv (2).zip',
'BigDiamonds.csv.zip',
'Boston (1).csv',
'Boston.csv',
'CAM- Ajay Ohri (1).pdf',
'CAM- Ajay Ohri.pdf',
'camtasia.exe',
'ccFraud.csv',
'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
'chapter+3+_+spark.html',
'chi+square+test.ipynb',
'chromeinstall-8u111.exe',
'Cisco_WebEx_Add-On.exe',
'class2.csv',
'Collabera Invoice (1).pdf',
'Collabera Invoice.pdf',
'Collectcent Invoice.pdf',
'college degrees.pdf',
'DAP 1.pdf',
'DAP 1.pptx',
'DAP 6 RDBMS and SQL.pdf',
'DAP 6 RDBMS and SQL.pptx',
'data+exploration.ipynb',
'data+manipulation.ipynb',
'data1.csv',
'datasets.csv',
'Decision Trees.pdf',
'DecisionStatsOfferLetter.docx',
'DecisionStatsRelievingLetter.docx',
'descriptive+stats+in+Python.ipynb',
'desktop.ini',
'Diamond (1).csv',
'Diamond (2).csv',
'Diamond (3).csv',
'Diamond (4).csv',
'Diamond (5).csv',
'Diamond (6).csv',
'Diamond.csv',
'DropboxInstaller.exe',
'edb_npgsql.exe',
'edb_pgjdbc.exe',
'edb_psqlodbc.exe',
'edb_psqlodbc.exe-20170203172812',
'edb_psqlodbc.exe-20170307203617',
'final invoice edureka - Sheet1.pdf',
'FinalPythonforRUsersAnapproachforDataScience (1).docx',
'FinalPythonforRUsersAnapproachforDataScience (2).docx',
'FinalPythonforRUsersAnapproachforDataScience (3).docx',
'FinalPythonforRUsersAnapproachforDataScience (4).docx',
'FinalPythonforRUsersAnapproachforDataScience.docx',
'final_webinar (1).pdf',
'final_webinar.pdf',
'Git-2.11.0-64-bit.exe',
'Git-2.12.0-64-bit.exe',
'GitHubSetup (1).exe',
'GitHubSetup (2).exe',
'GitHubSetup.exe',
'GOMAUDIOGLOBALSETUP.EXE',
'Hdma.csv',
'Hedonic.csv',
'HP Downloads',
'HPSupportSolutionsFramework-12.5.32.203.exe',
'image.png',
'IMS PROSCHOOL Workshop.pptx.pdf',
'IMS PROSCHOOL Workshop.pptx.pptx',
'internship.docx',
'Introduction to SAS (1).pdf',
'Introduction to SAS Part 1 (1).pdf',
'Introduction to SAS Part 1.pdf',
'Introduction to SAS.pdf',
'Invoice for Digital Vidya.pdf',
'Invoice for Weekendr.pdf',
'Invoice format - Ajay Ohri CONTATA (1).xls',
'Invoice format - Ajay Ohri CONTATA.xls',
'invoice rapid miner.pdf',
'Invoice trafla format.docx',
'iris2 (1).ipynb',
'iris2 (2).ipynb',
'iris2.ipynb',
'January invoice Indicus .pdf',
'June AV Invoice - Sheet1.pdf',
'Lecture 6 - KNN & Naive Bayes.ppt',
'Local Disk (C) - Shortcut.lnk',
'logistic regression - script for ppt.R',
'logistic_regression_-_script_for_ppt.html',
'March invoice Indicus - Sheet1.pdf',
'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
'mongodb-win32-x86_64-3.4.2-signed.msi',
'mtcarslm.R',
'nltk.ipynb',
'notebook-Copy1.html',
'Offer Letter - Ajay Ohri (1).pdf',
'Offer Letter - Ajay Ohri.pdf',
'Other Data Mining Methods (1).pdf',
'Other Data Mining Methods.pdf',
'output1 (1).xls',
'output1 (2).xls',
'output1.xls',
'pandas+11.ipynb',
'pandas+analysis+1.ipynb',
'pandas+data+manipulation.ipynb',
'passport image.pdf',
'Pawconinvoice2016.pdf',
'Pawconinvoice2017 (1).pdf',
'Pawconinvoice2017 (2).pdf',
'Pawconinvoice2017 (3).pdf',
'Pawconinvoice2017.pdf',
'Payslip Feb 2016 - Sheet1.pdf',
'Payslip Feb 2016.pdf',
'Payslip Format Decisionstats - Sheet1.pdf',
'Payslip Jan 2016 - Sheet1.pdf',
'Payslip Jan 2016.pdf',
'Payslip March 2016 - Sheet1.pdf',
'Payslip March 2016.pdf',
'pgd.csv',
'postgresql-9.6.1-1-windows-x64.exe',
'Program 1-results.rtf',
'protein.csv',
'python+with+postgres (1).ipynb',
'python+with+postgres.ipynb',
'Python.docx',
'R-3.3.2-win.exe',
'R-3.3.3-win.exe',
'RCertificationExam.pdf',
'reg+model.ipynb',
'Revision - Business Analytics (1).pdf',
'Revision - Business Analytics.pdf',
'RidingMowers.csv',
'rsconnect',
'RStudio-1.0.136.exe',
'Salary Slip, Feb 2016.pdf',
'Salary Slip, Jan 2016.pdf',
'Salary Slip, March 2016 (1).pdf',
'Salary Slip, March 2016 (2).pdf',
'Salary Slip, March 2016.pdf',
'sales-of-shampoo-over-a-three-ye.csv',
'SAS part 2.pdf',
'SAS Part 3.pdf',
'sas-university-edition-107140.pdf',
'Scan0095.pdf',
'Scanned Invoice for Collabera.pdf',
'Screenshot 2017-01-23 12.36.55.png',
'September invoice adaptive analytics - Sheet1.pdf',
'Sollers January.pdf',
'sqlalchemy.ipynb',
'stackoverflow-dump-analysis.html',
'Sunstone.pdf',
'Tableau.pdf',
'TableauPublicDesktop-64bit-10-1-3.exe',
'TableauPublicDesktop-64bit-10-1-4.exe',
'telecom.csv',
'TelecomServiceProviderCaseStudy.pdf',
'Text Mining (1).pdf',
'Text Mining.pdf',
'third.sas7bdat',
'Time Series Forecasting (1).pdf',
'Time Series Forecasting.pdf',
'ts.html',
'ts.R',
'Unconfirmed 373974.crdownload',
'Unconfirmed 376562.crdownload',
'Unconfirmed 376991.crdownload',
'Unconfirmed 930917.crdownload',
'Unconfirmed 950045.crdownload',
'unvbasicvapp__9411008__ova__en__sp0__1.ova.crdownload',
'VirtualBox-5.1.8-111374-Win (1).exe',
'VirtualBox-5.1.8-111374-Win.exe',
'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
'Webinar for Business Analytics.pdf',
'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']
In [15]:
diamonds=pd.read_csv("C:\\Users\\Dell\\Downloads\\BigDiamonds.csv\\BigDiamonds.csv")
In [38]:
type(diamonds)
Out[38]:
pandas.core.frame.DataFrame
In [37]:
len(diamonds)
Out[37]:
598024
In [36]:
diamonds.columns
Out[36]:
Index(['carat', 'cut', 'color', 'clarity', 'table', 'depth', 'cert',
'measurements', 'price', 'x', 'y', 'z'],
dtype='object')
In [35]:
diamonds.shape
Out[35]:
(598024, 12)
In [19]:
diamonds.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0 598024 non-null int64
carat 598024 non-null float64
cut 598024 non-null object
color 598024 non-null object
clarity 598024 non-null object
table 598024 non-null float64
depth 598024 non-null float64
cert 598024 non-null object
measurements 597978 non-null object
price 597311 non-null float64
x 596209 non-null float64
y 596172 non-null float64
z 595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB
In [16]:
diamonds.head()
Out[16]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
2
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4.00
4.05
2.30
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
In [17]:
diamonds2=diamonds.copy()
In [53]:
pd.value_counts(diamonds3.cut)
Out[53]:
Ideal 369448
V.Good 168896
Good 59680
Name: cut, dtype: int64
In [18]:
diamonds.describe()
C:\Users\Dell\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
RuntimeWarning)
Out[18]:
Unnamed: 0
carat
table
depth
price
x
y
z
count
598024.000000
598024.000000
598024.000000
598024.000000
597311.000000
596209.000000
596172.000000
595480.000000
mean
299012.500000
1.071297
57.631077
61.063683
8753.017974
5.990771
6.198671
4.033430
std
172634.803028
0.812696
4.996892
7.604342
13017.567760
1.530936
1.485891
1.240951
min
1.000000
0.200000
0.000000
0.000000
300.000000
0.150000
1.000000
0.040000
25%
149506.750000
0.500000
56.000000
61.000000
NaN
NaN
NaN
NaN
50%
299012.500000
0.900000
58.000000
62.100000
NaN
NaN
NaN
NaN
75%
448518.250000
1.500000
59.000000
62.700000
NaN
NaN
NaN
NaN
max
598024.000000
9.250000
75.900000
81.300000
99990.000000
13.890000
13.890000
13.180000
In [20]:
diamonds = diamonds.notnull() * 1
In [21]:
diamonds.head()
Out[21]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
2
1
1
1
1
1
1
1
1
1
0
1
1
1
3
1
1
1
1
1
1
1
1
1
0
1
1
1
4
1
1
1
1
1
1
1
1
1
0
1
1
1
In [22]:
diamonds=diamonds.drop('Unnamed: 0',1)
In [24]:
diamonds.head()
Out[24]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
2
1
1
1
1
1
1
1
1
0
1
1
1
3
1
1
1
1
1
1
1
1
0
1
1
1
4
1
1
1
1
1
1
1
1
0
1
1
1
In [25]:
diamonds2.head()
Out[25]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
2
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4.00
4.05
2.30
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
In [26]:
diamonds3=diamonds2.copy()
In [28]:
diamonds2.fillna("AJAY").head()
Out[28]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
AJAY
3.96
3.95
2.52
1
2
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
AJAY
4
4.05
2.3
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
AJAY
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
AJAY
3.8
3.82
2.31
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
AJAY
4.35
4.26
2.68
In [31]:
diamonds2=diamonds2.dropna(how="any")
In [32]:
diamonds2.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 593784 entries, 493 to 598023
Data columns (total 13 columns):
Unnamed: 0 593784 non-null int64
carat 593784 non-null float64
cut 593784 non-null object
color 593784 non-null object
clarity 593784 non-null object
table 593784 non-null float64
depth 593784 non-null float64
cert 593784 non-null object
measurements 593784 non-null object
price 593784 non-null float64
x 593784 non-null float64
y 593784 non-null float64
z 593784 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 63.4+ MB
In [44]:
data=diamonds3.values
data
Out[44]:
array([[1, 0.25, 'V.Good', ..., 3.96, 3.95, 2.52],
[2, 0.23, 'Good', ..., 4.0, 4.05, 2.3],
[3, 0.34, 'Good', ..., 4.56, 4.53, 2.67],
...,
[598022, 3.43, 'Ideal', ..., 9.66, 9.61, 6.05],
[598023, 3.01, 'V.Good', ..., 9.15, 9.19, 5.77],
[598024, 4.13, 'Ideal', ..., 10.27, 10.19, 6.4]], dtype=object)
In [48]:
diamonds3.columns
Out[48]:
Index(['Unnamed: 0', 'carat', 'cut', 'color', 'clarity', 'table', 'depth',
'cert', 'measurements', 'price', 'x', 'y', 'z'],
dtype='object')
In [47]:
g=pd.DataFrame(data=data[0:,0:], # values
index=range(0,len(data)), # 1st column as index
columns=diamonds3.columns[0:]) # 1st row as the column names
In [49]:
g.head()
Out[49]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
2
0.23
Good
G
I1
61
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4
4.05
2.3
2
3
0.34
Good
J
I2
58
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.8
3.82
2.31
4
5
0.31
V.Good
K
I1
59
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
In [55]:
diamonds3.iloc[2:5,:]
Out[55]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
In [56]:
diamonds3.iloc[:,2:5]
Out[56]:
cut
color
clarity
0
V.Good
K
I1
1
Good
G
I1
2
Good
J
I2
3
V.Good
D
I1
4
V.Good
K
I1
5
Good
G
SI2
6
Good
G
SI2
7
V.Good
D
I1
8
V.Good
K
SI2
9
Good
F
SI1
10
V.Good
F
I1
11
V.Good
G
SI2
12
V.Good
D
SI2
13
Good
G
SI2
14
V.Good
F
I1
15
V.Good
J
SI2
16
Good
E
VS2
17
V.Good
G
I1
18
V.Good
F
I1
19
Good
E
SI1
20
V.Good
K
I1
21
Good
D
VS2
22
Ideal
E
I1
23
Good
D
SI2
24
V.Good
E
SI2
25
Good
G
SI2
26
Good
E
SI1
27
V.Good
H
SI1
28
Ideal
F
I1
29
Good
D
I1
...
...
...
...
597994
Ideal
I
VS2
597995
Ideal
J
VS1
597996
Good
F
VVS2
597997
V.Good
I
VS1
597998
Ideal
D
VS2
597999
Ideal
D
VVS2
598000
Ideal
F
VS2
598001
V.Good
E
VVS2
598002
Ideal
G
VS2
598003
Ideal
E
VS2
598004
Ideal
E
VS2
598005
Ideal
F
VVS2
598006
Ideal
H
SI2
598007
Ideal
F
VVS1
598008
Ideal
H
IF
598009
Good
J
VS2
598010
V.Good
H
SI1
598011
V.Good
D
VS1
598012
Ideal
I
VS2
598013
Ideal
E
IF
598014
Ideal
I
SI1
598015
Ideal
H
SI1
598016
Ideal
D
VS2
598017
Good
E
VS1
598018
Ideal
D
VS2
598019
Ideal
E
VVS2
598020
V.Good
I
VVS2
598021
Ideal
F
VS2
598022
V.Good
E
VS1
598023
Ideal
H
IF
598024 rows × 3 columns
In [59]:
diamonds3[['cut','color','clarity']].head()
Out[59]:
cut
color
clarity
0
V.Good
K
I1
1
Good
G
I1
2
Good
J
I2
3
V.Good
D
I1
4
V.Good
K
I1
In [60]:
diamonds3.ix[20:40]
Out[60]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
20
21
0.31
V.Good
K
I1
60.0
59.2
GIA
4.45 x 4.50 x 2.65
NaN
4.45
4.50
2.65
21
22
0.22
Good
D
VS2
61.0
63.7
GIA
3.77 x 3.73 x 2.39
NaN
3.77
3.73
2.39
22
23
0.21
Ideal
E
I1
59.0
61.5
GIA
3.80 x 3.82 x 2.34
NaN
3.80
3.82
2.34
23
24
0.21
Good
D
SI2
62.0
64.4
IGI
3.73 x 3.78 x 2.42
NaN
3.73
3.78
2.42
24
25
0.20
V.Good
E
SI2
58.0
62.5
IGI
3.71 x 3.75 x 2.34
NaN
3.71
3.75
2.34
25
26
0.20
Good
G
SI2
60.0
64.4
GIA
3.67 x 3.74 x 2.38
NaN
3.67
3.74
2.38
26
27
0.20
Good
E
SI1
61.0
59.3
GIA
3.81 x 3.79 x 2.25
NaN
3.81
3.79
2.25
27
28
0.20
V.Good
H
SI1
60.5
63.9
IGI
3.62 x 3.69 x 2.34
NaN
3.62
3.69
2.34
28
29
0.23
Ideal
F
I1
60.0
60.6
GIA
3.98 x 4.00 x 2.42
NaN
3.98
4.00
2.42
29
30
0.23
Good
D
I1
65.0
62.4
GIA
3.87 x 3.91 x 2.43
NaN
3.87
3.91
2.43
30
31
0.20
Ideal
F
SI2
58.0
60.9
IGI
3.79 x 3.82 x 2.32
NaN
3.79
3.82
2.32
31
32
0.20
V.Good
E
SI2
59.0
62.0
GIA
3.73 x 3.75 x 2.32
NaN
3.73
3.75
2.32
32
33
0.26
Ideal
F
I1
60.0
60.3
GIA
4.13 x 4.14 x 2.50
NaN
4.13
4.14
2.50
33
34
0.25
Good
H
I1
61.0
62.3
GIA
3.98 x 4.01 x 2.49
NaN
3.98
4.01
2.49
34
35
0.27
V.Good
J
I1
60.0
60.4
GIA
4.21 x 4.23 x 2.55
NaN
4.21
4.23
2.55
35
36
0.22
V.Good
E
SI1
58.0
63.8
GIA
3.85 x 3.83 x 2.45
NaN
3.85
3.83
2.45
36
37
0.22
Good
E
SI1
60.0
62.6
GIA
3.81 x 3.79 x 2.38
NaN
3.81
3.79
2.38
37
38
0.26
Ideal
E
I1
57.0
61.9
GIA
4.08 x 4.10 x 2.53
NaN
4.08
4.10
2.53
38
39
0.21
Good
G
SI1
60.0
64.0
IGI
3.68 x 3.76 x 2.38
NaN
3.68
3.76
2.38
39
40
0.20
Good
E
SI1
66.0
58.0
GIA
3.86 x 3.88 x 2.24
NaN
3.86
3.88
2.24
40
41
0.24
Ideal
E
I1
56.0
62.3
GIA
4.02 x 4.04 x 2.51
NaN
4.02
4.04
2.51
In [64]:
diamonds3.corr()
Out[64]:
Unnamed: 0
carat
table
depth
price
x
y
z
Unnamed: 0
1.000000
0.823737
0.022406
0.020020
0.709190
0.825588
0.922538
0.761033
carat
0.823737
1.000000
0.036533
0.009846
0.856328
0.860246
0.960807
0.792051
table
0.022406
0.036533
1.000000
0.448772
0.023378
0.027504
0.044542
0.030344
depth
0.020020
0.009846
0.448772
1.000000
-0.001006
-0.003279
0.007669
0.031801
price
0.709190
0.856328
0.023378
-0.001006
1.000000
0.719778
0.796765
0.645317
x
0.825588
0.860246
0.027504
-0.003279
0.719778
1.000000
0.894203
0.483102
y
0.922538
0.960807
0.044542
0.007669
0.796765
0.894203
1.000000
0.820211
z
0.761033
0.792051
0.030344
0.031801
0.645317
0.483102
0.820211
1.000000
In [65]:
diamonds3.head()
Out[65]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
2
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4.00
4.05
2.30
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
3
4
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
In [67]:
diamonds3.drop(diamonds3.index[[1,3]]).head()
Out[67]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
0
1
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
2
3
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
4
5
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
5
6
0.20
Good
G
SI2
60.0
64.4
GIA
3.74 x 3.67 x 2.38
NaN
3.74
3.67
2.38
6
7
0.20
Good
G
SI2
63.0
62.6
GIA
3.72 x 3.65 x 2.31
NaN
3.72
3.65
2.31
In [68]:
s=pd.Series(range(0,100))
In [70]:
type(s)
Out[70]:
pandas.core.series.Series
In [71]:
diamonds3.drop(diamonds3.index[[s]]).head()
Out[71]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
100
101
0.25
Ideal
D
I1
59.0
60.0
GIA
4.08 x 4.10 x 2.45
NaN
4.08
4.10
2.45
101
102
0.24
Good
F
VVS2
64.0
63.6
GIA
3.89 x 3.82 x 2.45
NaN
3.89
3.82
2.45
102
103
0.23
Ideal
J
SI1
59.0
62.4
EGL
2.46 x 3.9 x 3.98
NaN
2.46
3.90
3.98
103
104
0.21
Good
I
SI2
0.0
0.0
OTHER
0.00-0.00 x 0.00
NaN
NaN
NaN
NaN
104
105
0.21
V.Good
I
VS2
66.0
60.0
IGI
3.83 x 3.85 x 2.31
NaN
3.83
3.85
2.31
In [75]:
del diamonds
In [79]:
diamonds3.query('carat >.50 and price >3000')
Out[79]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
277140
277141
0.70
V.Good
G
VS2
60.0
62.2
GIA
5.70 x 5.68 x 3.54
3001.0
5.70
5.68
3.54
277141
277142
0.61
Ideal
F
IF
58.0
62.4
GIA
5.42 x 5.38 x 3.37
3001.0
5.42
5.38
3.37
277142
277143
0.85
Ideal
J
VS2
58.0
61.3
GIA
6.08 x 6.06 x 3.72
3001.0
6.08
6.06
3.72
277143
277144
0.61
Ideal
G
IF
57.0
60.8
GIA
5.50 x 5.53 x 3.35
3001.0
5.50
5.53
3.35
277144
277145
0.70
Good
E
VVS2
59.0
63.2
GIA
5.54 x 5.56 x 3.51
3001.0
5.54
5.56
3.51
277145
277146
0.91
Good
I
SI1
56.0
63.5
GIA
6.04 x 6.10 x 3.85
3001.0
6.04
6.10
3.85
277146
277147
0.71
Good
E
VVS2
57.0
66.1
GIA
5.48 x 5.52 x 3.63
3001.0
5.48
5.52
3.63
277147
277148
1.00
Good
E
SI2
56.0
64.4
EGL USA
6.07 x 6.21 x 3.96
3001.0
6.07
6.21
3.96
277148
277149
1.11
V.Good
I
I1
56.0
64.3
GIA
6.51 x 6.58 x 4.21
3001.0
6.51
6.58
4.21
277149
277150
0.70
V.Good
G
VVS2
58.0
63.9
GIA
5.57 x 5.59 x 3.57
3001.0
5.57
5.59
3.57
277150
277151
0.70
V.Good
G
VVS2
59.0
64.5
GIA
5.53 x 5.54 x 3.57
3001.0
5.53
5.54
3.57
277151
277152
0.70
V.Good
G
VVS2
58.0
63.5
GIA
5.56 x 5.63 x 0.00
3001.0
5.56
5.63
NaN
277152
277153
0.80
V.Good
E
SI2
59.0
60.8
GIA
5.97 x 5.93 x 3.62
3001.0
5.97
5.93
3.62
277153
277154
0.80
V.Good
E
SI2
55.0
62.9
GIA
5.92 x 5.88 x 3.71
3001.0
5.92
5.88
3.71
277154
277155
0.85
V.Good
H
SI1
59.0
59.3
GIA
6.19 x 6.12 x 3.65
3001.0
6.19
6.12
3.65
277155
277156
1.03
Good
G
I2
56.0
63.9
EGL
6.38 x 6.33 x 4.06
3001.0
6.38
6.33
4.06
277156
277157
1.08
V.Good
G
SI2
58.0
61.7
EGL
6.54 x 6.52 x 4.03
3001.0
6.54
6.52
4.03
277157
277158
1.08
V.Good
I
SI1
58.0
61.1
EGL
6.61 x 6.58 x 4.03
3001.0
6.61
6.58
4.03
277158
277159
0.80
V.Good
G
SI1
58.0
63.3
GIA
5.81 x 5.90 x 3.70
3001.0
5.81
5.90
3.70
277159
277160
0.79
V.Good
G
SI1
57.0
63.8
GIA
5.80 x 5.89 x 3.73
3001.0
5.80
5.89
3.73
277160
277161
0.81
Ideal
F
VS1
59.0
61.2
OTHER
5.98 x 6.02 x 3.67
3001.0
5.98
6.02
3.67
277161
277162
1.01
V.Good
F
I1
59.5
63.3
IGI
6.25 x 6.30 x 3.97
3001.0
6.25
6.30
3.97
277162
277163
0.80
Ideal
G
SI1
57.0
62.7
GIA
5.90 x 5.94 x 3.71
3001.0
5.90
5.94
3.71
277163
277164
0.93
V.Good
G
SI2
57.0
64.9
EGL USA
6.07 x 6.09 x 3.94
3001.0
6.07
6.09
3.94
277164
277165
0.80
V.Good
G
SI1
61.0
61.2
GIA
5.86 x 5.92 x 3.61
3001.0
5.86
5.92
3.61
277165
277166
0.93
V.Good
H
SI1
55.0
64.4
EGL ISRAEL
6.12 x 6.15 x 3.95
3001.0
6.12
6.15
3.95
277166
277167
0.93
V.Good
H
SI1
55.0
64.4
EGL ISRAEL
6.12 x 6.15 x 3.95
3001.0
6.12
6.15
3.95
277167
277168
0.80
Ideal
G
SI1
58.0
62.0
GIA
5.91 x 5.96 x 3.68
3001.0
5.91
5.96
3.68
277168
277169
0.61
Ideal
E
VVS2
57.0
61.8
GIA
5.43 x 5.44 x 3.36
3002.0
5.43
5.44
3.36
277169
277170
0.80
V.Good
G
SI2
55.0
62.3
GIA
5.94 x 5.92 x 3.69
3002.0
5.94
5.92
3.69
...
...
...
...
...
...
...
...
...
...
...
...
...
...
597994
597995
4.65
Ideal
I
VS2
56.0
61.2
GIA
10.72 x 10.80 x 6.59
99626.0
10.72
10.80
6.59
597995
597996
5.75
Ideal
J
VS1
58.0
62.7
IGI
11.34 x 11.29 x 7.1
99630.0
11.34
11.29
7.10
597996
597997
3.01
Good
F
VVS2
62.0
62.5
GIA
9.15 x 9.12 x 5.71
99630.0
9.15
9.12
5.71
597997
597998
5.05
V.Good
I
VS1
58.0
0.0
HRD
10.8 x 10.76 x 6.88
99640.0
10.80
10.76
6.88
597998
597999
3.54
Ideal
D
VS2
61.0
59.1
GIA
9.95 x 10.00 x 5.89
99656.0
9.95
10.00
5.89
597999
598000
2.71
Ideal
D
VVS2
56.0
62.7
GIA
8.93 x 8.9 x 5.59
99660.0
8.93
8.90
5.59
598000
598001
3.65
Ideal
F
VS2
59.0
61.0
GIA
9.87 x 9.94 x 6.04
99669.0
9.87
9.94
6.04
598001
598002
3.01
V.Good
E
VVS2
57.0
63.4
GIA
9.16 x 9.09 x 5.78
99700.0
9.16
9.09
5.78
598002
598003
3.86
Ideal
G
VS2
60.0
62.3
GIA
9.92 x 10.06 x 6.22
99711.0
9.92
10.06
6.22
598003
598004
3.04
Ideal
E
VS2
58.0
61.2
GIA
9.38 x 9.33 x 5.73
99730.0
9.38
9.33
5.73
598004
598005
3.04
Ideal
E
VS2
56.0
62.0
GIA
9.35 x 9.31 x 5.79
99730.0
9.35
9.31
5.79
598005
598006
3.07
Ideal
F
VVS2
58.0
59.5
GIA
9.45 x 9.51 x 5.64
99776.0
9.45
9.51
5.64
598006
598007
5.33
Ideal
H
SI2
59.0
61.1
GIA
6.87 x 11.22 x 11.27
99778.0
6.87
11.22
11.27
598007
598008
3.56
Ideal
F
VVS1
61.2
58.0
GIA
9.82 x 9.87 x 6.02
99780.0
9.82
9.87
6.02
598008
598009
3.43
Ideal
H
IF
60.0
59.5
GIA
5.88 x 9.86 x 9.88
99802.0
5.88
9.86
9.88
598009
598010
5.02
Good
J
VS2
57.0
62.3
GIA
10.77 x 10.84 x 6.73
99806.0
10.77
10.84
6.73
598010
598011
5.01
V.Good
H
SI1
57.0
59.6
GIA
11.11 x 11.17 x 6.64
99810.0
11.11
11.17
6.64
598011
598012
3.05
V.Good
D
VS1
60.0
60.1
GIA
9.4 x 9.34 x 5.63
99870.0
9.40
9.34
5.63
598012
598013
5.59
Ideal
I
VS2
61.0
60.4
HRD
11.52 x 11.57 x 6.97
99890.0
11.52
11.57
6.97
598013
598014
2.57
Ideal
E
IF
59.0
60.9
GIA
8.82 x 8.88 x 5.39
99896.0
8.82
8.88
5.39
598014
598015
5.24
Ideal
I
SI1
60.0
59.5
GIA
11.35 x 11.43 x 6.78
99910.0
11.35
11.43
6.78
598015
598016
5.03
Ideal
H
SI1
58.0
62.2
HRD
6.82 x 10.94 x 10.98
99913.0
6.82
10.94
10.98
598016
598017
3.05
Ideal
D
VS2
59.0
61.3
GIA
5.73 x 9.33 x 9.36
99916.0
5.73
9.33
9.36
598017
598018
3.01
Good
E
VS1
61.0
62.6
GIA
9.16 x 9.25 x 5.76
99920.0
9.16
9.25
5.76
598018
598019
3.01
Ideal
D
VS2
58.0
62.0
GIA
9.25 x 9.2 x 5.72
99920.0
9.25
9.20
5.72
598019
598020
3.02
Ideal
E
VVS2
58.0
59.8
HRD
9.43 x 9.51 x 5.66
99930.0
9.43
9.51
5.66
598020
598021
5.01
V.Good
I
VVS2
63.5
61.5
IGI
10.78 x 10.89 x 6.68
99942.0
10.78
10.89
6.68
598021
598022
3.43
Ideal
F
VS2
54.0
62.7
GIA
9.66 x 9.61 x 6.05
99960.0
9.66
9.61
6.05
598022
598023
3.01
V.Good
E
VS1
58.0
62.9
GIA
9.15 x 9.19 x 5.77
99966.0
9.15
9.19
5.77
598023
598024
4.13
Ideal
H
IF
56.0
62.5
IGI
10.27 x 10.19 x 6.4
99990.0
10.27
10.19
6.40
320592 rows × 13 columns
In [80]:
del diamonds3["Unnamed: 0"]
In [82]:
diamonds3.query('price >5000')
Out[82]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
350904
1.03
Ideal
J
VS2
60.0
62.0
GIA
6.43 x 6.48 x 4.00
5001.0
6.43
6.48
4.00
350905
1.01
V.Good
H
SI2
59.0
63.0
GIA
6.32 x 6.35 x 3.99
5001.0
6.32
6.35
3.99
350906
1.04
Ideal
G
SI2
61.0
59.8
GIA
6.57 x 6.59 x 3.94
5001.0
6.57
6.59
3.94
350907
0.90
Ideal
G
VS2
56.0
62.8
GIA
6.17 x 6.14 x 3.87
5001.0
6.17
6.14
3.87
350908
1.05
Ideal
I
SI1
59.0
60.3
GIA
6.60 x 6.57 x 3.97
5001.0
6.60
6.57
3.97
350909
1.09
V.Good
D
VS2
63.0
61.8
EGL
6.66 x 6.55 x 4.08
5001.0
6.66
6.55
4.08
350910
1.27
Ideal
G
SI2
55.0
62.1
EGL
6.96 x 6.91 x 4.31
5001.0
6.96
6.91
4.31
350911
1.40
Good
G
SI2
59.0
63.7
EGL
6.95 x 6.89 x 4.44
5001.0
6.95
6.89
4.44
350912
1.00
Good
G
SI1
58.0
65.7
GIA
6.15 x 6.08 x 4.02
5002.0
6.15
6.08
4.02
350913
0.92
V.Good
F
VS2
58.0
63.3
GIA
6.16 x 6.18 x 3.91
5002.0
6.16
6.18
3.91
350914
1.00
Good
H
SI2
59.0
65.2
GIA
6.17 x 6.19 x 4.03
5002.0
6.17
6.19
4.03
350915
0.79
Ideal
E
VS1
59.0
59.0
GIA
6.07 x 6.08 x 3.58
5002.0
6.07
6.08
3.58
350916
1.00
Good
H
SI2
61.0
64.0
GIA
6.19 x 6.22 x 3.97
5002.0
6.19
6.22
3.97
350917
1.02
Ideal
J
SI1
59.0
61.9
GIA
6.42 x 6.45 x 3.99
5002.0
6.42
6.45
3.99
350918
1.00
Good
E
SI1
52.0
65.8
GIA
6.15 x 6.12 x 4.03
5002.0
6.15
6.12
4.03
350919
1.08
V.Good
I
SI1
56.0
63.5
GIA
6.50 x 6.53 x 4.14
5002.0
6.50
6.53
4.14
350920
1.23
V.Good
F
SI2
57.0
63.0
EGL USA
6.79 x 6.84 x 4.29
5002.0
6.79
6.84
4.29
350921
1.50
Good
K
VS2
58.0
62.5
EGL USA
7.27 x 7.32 x 4.56
5002.0
7.27
7.32
4.56
350922
1.00
Good
H
VVS1
53.0
64.6
HRD
6.25 x 6.29 x 4.05
5002.0
6.25
6.29
4.05
350923
1.00
V.Good
H
SI1
60.0
62.0
GIA
6.40 x 6.46 x 3.99
5002.0
6.40
6.46
3.99
350924
1.23
V.Good
I
VS2
62.0
59.3
EGL USA
6.94 x 7.00 x 4.13
5002.0
6.94
7.00
4.13
350925
1.08
V.Good
J
SI2
57.0
62.9
GIA
6.47 x 6.52 x 4.08
5002.0
6.47
6.52
4.08
350926
2.67
V.Good
K
I1
59.0
63.7
EGL
8.65 x 8.72 x 5.53
5002.0
8.65
8.72
5.53
350927
1.03
Good
D
SI1
60.0
57.8
EGL
6.61 x 6.58 x 3.81
5002.0
6.61
6.58
3.81
350928
1.20
V.Good
G
SI1
59.0
63.0
EGL
6.71 x 6.68 x 4.22
5002.0
6.71
6.68
4.22
350929
1.35
V.Good
I
VVS2
60.0
59.4
EGL
7.21 x 7.10 x 4.25
5002.0
7.21
7.10
4.25
350930
1.01
Ideal
I
SI1
57.0
62.7
GIA
6.39 x 6.43 x 4.02
5002.0
6.39
6.43
4.02
350931
1.01
Good
G
SI1
61.0
65.8
OTHER
6.18 x 6.26 x 4.10
5002.0
6.18
6.26
4.10
350932
1.30
V.Good
G
I1
60.0
63.3
GIA
6.85 x 6.86 x 4.34
5002.0
6.85
6.86
4.34
350933
1.02
Good
F
SI1
61.0
64.3
GIA
6.27 x 6.30 x 4.04
5002.0
6.27
6.30
4.04
...
...
...
...
...
...
...
...
...
...
...
...
...
597994
4.65
Ideal
I
VS2
56.0
61.2
GIA
10.72 x 10.80 x 6.59
99626.0
10.72
10.80
6.59
597995
5.75
Ideal
J
VS1
58.0
62.7
IGI
11.34 x 11.29 x 7.1
99630.0
11.34
11.29
7.10
597996
3.01
Good
F
VVS2
62.0
62.5
GIA
9.15 x 9.12 x 5.71
99630.0
9.15
9.12
5.71
597997
5.05
V.Good
I
VS1
58.0
0.0
HRD
10.8 x 10.76 x 6.88
99640.0
10.80
10.76
6.88
597998
3.54
Ideal
D
VS2
61.0
59.1
GIA
9.95 x 10.00 x 5.89
99656.0
9.95
10.00
5.89
597999
2.71
Ideal
D
VVS2
56.0
62.7
GIA
8.93 x 8.9 x 5.59
99660.0
8.93
8.90
5.59
598000
3.65
Ideal
F
VS2
59.0
61.0
GIA
9.87 x 9.94 x 6.04
99669.0
9.87
9.94
6.04
598001
3.01
V.Good
E
VVS2
57.0
63.4
GIA
9.16 x 9.09 x 5.78
99700.0
9.16
9.09
5.78
598002
3.86
Ideal
G
VS2
60.0
62.3
GIA
9.92 x 10.06 x 6.22
99711.0
9.92
10.06
6.22
598003
3.04
Ideal
E
VS2
58.0
61.2
GIA
9.38 x 9.33 x 5.73
99730.0
9.38
9.33
5.73
598004
3.04
Ideal
E
VS2
56.0
62.0
GIA
9.35 x 9.31 x 5.79
99730.0
9.35
9.31
5.79
598005
3.07
Ideal
F
VVS2
58.0
59.5
GIA
9.45 x 9.51 x 5.64
99776.0
9.45
9.51
5.64
598006
5.33
Ideal
H
SI2
59.0
61.1
GIA
6.87 x 11.22 x 11.27
99778.0
6.87
11.22
11.27
598007
3.56
Ideal
F
VVS1
61.2
58.0
GIA
9.82 x 9.87 x 6.02
99780.0
9.82
9.87
6.02
598008
3.43
Ideal
H
IF
60.0
59.5
GIA
5.88 x 9.86 x 9.88
99802.0
5.88
9.86
9.88
598009
5.02
Good
J
VS2
57.0
62.3
GIA
10.77 x 10.84 x 6.73
99806.0
10.77
10.84
6.73
598010
5.01
V.Good
H
SI1
57.0
59.6
GIA
11.11 x 11.17 x 6.64
99810.0
11.11
11.17
6.64
598011
3.05
V.Good
D
VS1
60.0
60.1
GIA
9.4 x 9.34 x 5.63
99870.0
9.40
9.34
5.63
598012
5.59
Ideal
I
VS2
61.0
60.4
HRD
11.52 x 11.57 x 6.97
99890.0
11.52
11.57
6.97
598013
2.57
Ideal
E
IF
59.0
60.9
GIA
8.82 x 8.88 x 5.39
99896.0
8.82
8.88
5.39
598014
5.24
Ideal
I
SI1
60.0
59.5
GIA
11.35 x 11.43 x 6.78
99910.0
11.35
11.43
6.78
598015
5.03
Ideal
H
SI1
58.0
62.2
HRD
6.82 x 10.94 x 10.98
99913.0
6.82
10.94
10.98
598016
3.05
Ideal
D
VS2
59.0
61.3
GIA
5.73 x 9.33 x 9.36
99916.0
5.73
9.33
9.36
598017
3.01
Good
E
VS1
61.0
62.6
GIA
9.16 x 9.25 x 5.76
99920.0
9.16
9.25
5.76
598018
3.01
Ideal
D
VS2
58.0
62.0
GIA
9.25 x 9.2 x 5.72
99920.0
9.25
9.20
5.72
598019
3.02
Ideal
E
VVS2
58.0
59.8
HRD
9.43 x 9.51 x 5.66
99930.0
9.43
9.51
5.66
598020
5.01
V.Good
I
VVS2
63.5
61.5
IGI
10.78 x 10.89 x 6.68
99942.0
10.78
10.89
6.68
598021
3.43
Ideal
F
VS2
54.0
62.7
GIA
9.66 x 9.61 x 6.05
99960.0
9.66
9.61
6.05
598022
3.01
V.Good
E
VS1
58.0
62.9
GIA
9.15 x 9.19 x 5.77
99966.0
9.15
9.19
5.77
598023
4.13
Ideal
H
IF
56.0
62.5
IGI
10.27 x 10.19 x 6.4
99990.0
10.27
10.19
6.40
247060 rows × 12 columns
In [84]:
diamonds2.query('color=="J" or price >4000')
Out[84]:
Unnamed: 0
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
495
496
0.26
Good
J
VS2
56.5
64.1
IGI
4.01 x 4.05 x 2.58
300.0
4.01
4.05
2.58
536
537
0.29
V.Good
J
SI1
62.0
59.4
GIA
4.31 x 4.33 x 2.56
303.0
4.31
4.33
2.56
554
555
0.33
Good
J
SI2
64.0
58.4
GIA
4.50 x 4.54 x 2.64
305.0
4.50
4.54
2.64
576
577
0.23
Ideal
J
VS1
57.0
61.1
GIA
3.92 x 3.94 x 2.40
307.0
3.92
3.94
2.40
590
591
0.20
Ideal
J
VS2
57.0
62.0
GIA
3.73 x 3.75 x 2.32
308.0
3.73
3.75
2.32
601
602
0.49
Good
J
I1
66.0
65.7
GIA
4.97 x 4.82 x 3.21
309.0
4.97
4.82
3.21
603
604
0.20
V.Good
J
IF
57.0
63.8
GIA
3.66 x 3.68 x 2.34
310.0
3.66
3.68
2.34
604
605
0.20
V.Good
J
IF
60.0
60.4
GIA
3.75 x 3.77 x 2.27
310.0
3.75
3.77
2.27
636
637
0.20
Ideal
J
VVS1
60.0
61.0
GIA
3.78 x 3.81 x 2.32
314.0
3.78
3.81
2.32
638
639
0.25
Ideal
J
VS2
58.0
61.4
GIA
4.04 x 4.06 x 2.49
314.0
4.04
4.06
2.49
649
650
0.37
Good
J
I1
69.0
56.5
GIA
4.73 x 4.80 x 2.69
316.0
4.73
4.80
2.69
668
669
0.22
Ideal
J
VVS1
55.0
62.4
IGI
3.85 x 3.87 x 2.41
318.0
3.85
3.87
2.41
669
670
0.22
Ideal
J
VVS2
56.0
62.1
IGI
3.89 x 3.91 x 2.42
318.0
3.89
3.91
2.42
683
684
0.20
Ideal
J
IF
54.0
61.3
IGI
3.84 x 3.86 x 2.36
320.0
3.84
3.86
2.36
685
686
0.20
Ideal
J
VVS2
59.0
61.0
GIA
3.77 x 3.79 x 2.31
320.0
3.77
3.79
2.31
738
739
0.20
Ideal
J
VVS1
56.0
61.8
GIA
3.75 x 3.78 x 2.33
323.0
3.75
3.78
2.33
741
742
0.20
Ideal
J
VVS2
56.0
62.3
GIA
3.71 x 3.74 x 2.32
323.0
3.71
3.74
2.32
747
748
0.25
V.Good
J
VS1
57.0
61.3
GIA
4.04 x 4.05 x 2.48
323.0
4.04
4.05
2.48
754
755
0.34
Good
J
SI2
57.0
63.0
IGI
4.40 x 4.43 x 2.78
323.0
4.40
4.43
2.78
758
759
0.48
Good
J
I2
59.0
66.1
GIA
4.83 x 4.76 x 3.17
323.0
4.83
4.76
3.17
770
771
0.21
Ideal
J
IF
56.0
61.6
IGI
3.85 x 3.88 x 2.38
325.0
3.85
3.88
2.38
781
782
0.41
Good
J
I1
58.0
61.5
GIA
4.81 x 4.74 x 2.94
325.0
4.81
4.74
2.94
797
798
0.23
V.Good
J
VVS2
63.0
60.5
IGI
3.92 x 3.95 x 2.38
327.0
3.92
3.95
2.38
804
805
0.21
Ideal
J
VVS1
57.0
62.5
GIA
3.80 x 3.81 x 2.38
328.0
3.80
3.81
2.38
823
824
0.38
V.Good
J
I1
61.0
62.5
GIA
4.57 x 4.58 x 2.86
329.0
4.57
4.58
2.86
827
828
0.50
Good
J
I2
56.0
68.5
EGL
4.94 x 4.78 x 3.33
329.0
4.94
4.78
3.33
833
834
0.37
Good
J
SI1
55.0
57.7
GIA
4.75 x 4.62 x 2.70
330.0
4.75
4.62
2.70
839
840
0.30
Good
J
SI1
68.0
60.5
GIA
4.29 x 4.33 x 2.61
330.0
4.29
4.33
2.61
847
848
0.41
Good
J
I1
58.0
61.5
GIA
4.74 x 4.81 x 2.94
330.0
4.74
4.81
2.94
852
853
0.22
V.Good
J
VVS2
59.0
61.5
GIA
3.9 x 3.86 x 2.39
330.0
3.90
3.86
2.39
...
...
...
...
...
...
...
...
...
...
...
...
...
...
597994
597995
4.65
Ideal
I
VS2
56.0
61.2
GIA
10.72 x 10.80 x 6.59
99626.0
10.72
10.80
6.59
597995
597996
5.75
Ideal
J
VS1
58.0
62.7
IGI
11.34 x 11.29 x 7.1
99630.0
11.34
11.29
7.10
597996
597997
3.01
Good
F
VVS2
62.0
62.5
GIA
9.15 x 9.12 x 5.71
99630.0
9.15
9.12
5.71
597997
597998
5.05
V.Good
I
VS1
58.0
0.0
HRD
10.8 x 10.76 x 6.88
99640.0
10.80
10.76
6.88
597998
597999
3.54
Ideal
D
VS2
61.0
59.1
GIA
9.95 x 10.00 x 5.89
99656.0
9.95
10.00
5.89
597999
598000
2.71
Ideal
D
VVS2
56.0
62.7
GIA
8.93 x 8.9 x 5.59
99660.0
8.93
8.90
5.59
598000
598001
3.65
Ideal
F
VS2
59.0
61.0
GIA
9.87 x 9.94 x 6.04
99669.0
9.87
9.94
6.04
598001
598002
3.01
V.Good
E
VVS2
57.0
63.4
GIA
9.16 x 9.09 x 5.78
99700.0
9.16
9.09
5.78
598002
598003
3.86
Ideal
G
VS2
60.0
62.3
GIA
9.92 x 10.06 x 6.22
99711.0
9.92
10.06
6.22
598003
598004
3.04
Ideal
E
VS2
58.0
61.2
GIA
9.38 x 9.33 x 5.73
99730.0
9.38
9.33
5.73
598004
598005
3.04
Ideal
E
VS2
56.0
62.0
GIA
9.35 x 9.31 x 5.79
99730.0
9.35
9.31
5.79
598005
598006
3.07
Ideal
F
VVS2
58.0
59.5
GIA
9.45 x 9.51 x 5.64
99776.0
9.45
9.51
5.64
598006
598007
5.33
Ideal
H
SI2
59.0
61.1
GIA
6.87 x 11.22 x 11.27
99778.0
6.87
11.22
11.27
598007
598008
3.56
Ideal
F
VVS1
61.2
58.0
GIA
9.82 x 9.87 x 6.02
99780.0
9.82
9.87
6.02
598008
598009
3.43
Ideal
H
IF
60.0
59.5
GIA
5.88 x 9.86 x 9.88
99802.0
5.88
9.86
9.88
598009
598010
5.02
Good
J
VS2
57.0
62.3
GIA
10.77 x 10.84 x 6.73
99806.0
10.77
10.84
6.73
598010
598011
5.01
V.Good
H
SI1
57.0
59.6
GIA
11.11 x 11.17 x 6.64
99810.0
11.11
11.17
6.64
598011
598012
3.05
V.Good
D
VS1
60.0
60.1
GIA
9.4 x 9.34 x 5.63
99870.0
9.40
9.34
5.63
598012
598013
5.59
Ideal
I
VS2
61.0
60.4
HRD
11.52 x 11.57 x 6.97
99890.0
11.52
11.57
6.97
598013
598014
2.57
Ideal
E
IF
59.0
60.9
GIA
8.82 x 8.88 x 5.39
99896.0
8.82
8.88
5.39
598014
598015
5.24
Ideal
I
SI1
60.0
59.5
GIA
11.35 x 11.43 x 6.78
99910.0
11.35
11.43
6.78
598015
598016
5.03
Ideal
H
SI1
58.0
62.2
HRD
6.82 x 10.94 x 10.98
99913.0
6.82
10.94
10.98
598016
598017
3.05
Ideal
D
VS2
59.0
61.3
GIA
5.73 x 9.33 x 9.36
99916.0
5.73
9.33
9.36
598017
598018
3.01
Good
E
VS1
61.0
62.6
GIA
9.16 x 9.25 x 5.76
99920.0
9.16
9.25
5.76
598018
598019
3.01
Ideal
D
VS2
58.0
62.0
GIA
9.25 x 9.2 x 5.72
99920.0
9.25
9.20
5.72
598019
598020
3.02
Ideal
E
VVS2
58.0
59.8
HRD
9.43 x 9.51 x 5.66
99930.0
9.43
9.51
5.66
598020
598021
5.01
V.Good
I
VVS2
63.5
61.5
IGI
10.78 x 10.89 x 6.68
99942.0
10.78
10.89
6.68
598021
598022
3.43
Ideal
F
VS2
54.0
62.7
GIA
9.66 x 9.61 x 6.05
99960.0
9.66
9.61
6.05
598022
598023
3.01
V.Good
E
VS1
58.0
62.9
GIA
9.15 x 9.19 x 5.77
99966.0
9.15
9.19
5.77
598023
598024
4.13
Ideal
H
IF
56.0
62.5
IGI
10.27 x 10.19 x 6.4
99990.0
10.27
10.19
6.40
299941 rows × 13 columns
In [86]:
diamonds3['newvar']=1
In [87]:
diamonds3.head()
Out[87]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
newvar
0
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
1
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4.00
4.05
2.30
1
2
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
1
3
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
1
4
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
1
In [89]:
diamonds3.loc[diamonds3.price>=5000,'newvar']="Expensive"
In [92]:
diamonds3.query('price >5000').head()
Out[92]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
newvar
350904
1.03
Ideal
J
VS2
60.0
62.0
GIA
6.43 x 6.48 x 4.00
5001.0
6.43
6.48
4.00
Expensive
350905
1.01
V.Good
H
SI2
59.0
63.0
GIA
6.32 x 6.35 x 3.99
5001.0
6.32
6.35
3.99
Expensive
350906
1.04
Ideal
G
SI2
61.0
59.8
GIA
6.57 x 6.59 x 3.94
5001.0
6.57
6.59
3.94
Expensive
350907
0.90
Ideal
G
VS2
56.0
62.8
GIA
6.17 x 6.14 x 3.87
5001.0
6.17
6.14
3.87
Expensive
350908
1.05
Ideal
I
SI1
59.0
60.3
GIA
6.60 x 6.57 x 3.97
5001.0
6.60
6.57
3.97
Expensive
In [94]:
diamonds3['ppc']=diamonds3.price/diamonds3.carat
In [95]:
diamonds3.head()
Out[95]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
newvar
ppc
0
0.25
V.Good
K
I1
59.0
63.7
GIA
3.96 x 3.95 x 2.52
NaN
3.96
3.95
2.52
1
NaN
1
0.23
Good
G
I1
61.0
58.1
GIA
4.00 x 4.05 x 2.30
NaN
4.00
4.05
2.30
1
NaN
2
0.34
Good
J
I2
58.0
58.7
GIA
4.56 x 4.53 x 2.67
NaN
4.56
4.53
2.67
1
NaN
3
0.21
V.Good
D
I1
60.0
60.6
GIA
3.80 x 3.82 x 2.31
NaN
3.80
3.82
2.31
1
NaN
4
0.31
V.Good
K
I1
59.0
62.2
EGL
4.35 x 4.26 x 2.68
NaN
4.35
4.26
2.68
1
NaN
In [96]:
diamonds4=diamonds3.copy()
In [97]:
diamonds3=diamonds3.dropna(how='any')
In [98]:
diamonds3.head()
Out[98]:
carat
cut
color
clarity
table
depth
cert
measurements
price
x
y
z
newvar
ppc
493
0.24
V.Good
G
SI1
61.0
58.9
GIA
4.09 x 4.10 x 2.41
300.0
4.09
4.10
2.41
1
1250.000000
494
0.31
V.Good
K
SI2
59.0
60.2
GIA
4.40 x 4.42 x 2.65
300.0
4.40
4.42
2.65
1
967.741935
495
0.26
Good
J
VS2
56.5
64.1
IGI
4.01 x 4.05 x 2.58
300.0
4.01
4.05
2.58
1
1153.846154
496
0.24
Ideal
G
SI1
55.0
61.3
GIA
4.01 x 4.03 x 2.47
300.0
4.01
4.03
2.47
1
1250.000000
497
0.30
Good
H
I1
57.0
62.2
GIA
4.21 x 4.24 x 2.63
300.0
4.21
4.24
2.63
1
1000.000000
In [99]:
os.listdir()
Out[99]:
['140749_2017.pdf',
'2011-F01-0700-Rev4-MDDS.XLSX',
'20150817143155.pdf',
'20160111060911.pdf',
'20170214052225.pdf',
'861415_10151432783238421_2124270505_o (1).jpg',
'861415_10151432783238421_2124270505_o.jpg',
'AirPassengers.csv',
'ajayo.jpg',
'Alison Python Invoice - Sheet1.pdf',
'Alison SAS Invoice - Sheet1.pdf',
'All+CSV+Files+in+a+Folder.ipynb',
'Allison Interview Jones Invoice - Sheet1.pdf',
'Anaconda3-4.2.0-Windows-x86_64.exe',
'apachehttpd.exe',
'April invoice adaptive analytics - Sheet1.pdf',
'Assignment14_BusinessAnalytics (1).docx',
'Assignment14_BusinessAnalytics.docx',
'Assignment15_BusinessAnalytics.docx',
'Assignment16_BusinessAnalytics (1).docx',
'Assignment16_BusinessAnalytics (2).docx',
'Assignment16_BusinessAnalytics.docx',
'aug ust 2008.JPG',
'avast_free_antivirus_setup_online.exe',
'avinash_ltv.zip',
'BigDiamonds.csv',
'BigDiamonds.csv (1).zip',
'BigDiamonds.csv (2)',
'BigDiamonds.csv (2).zip',
'BigDiamonds.csv.zip',
'Boston (1).csv',
'Boston.csv',
'CAM- Ajay Ohri (1).pdf',
'CAM- Ajay Ohri.pdf',
'camtasia.exe',
'ccFraud.csv',
'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
'chapter+3+_+spark.html',
'chi+square+test.ipynb',
'chromeinstall-8u111.exe',
'Cisco_WebEx_Add-On.exe',
'class2.csv',
'Collabera Invoice (1).pdf',
'Collabera Invoice.pdf',
'Collectcent Invoice.pdf',
'college degrees.pdf',
'DAP 1.pdf',
'DAP 1.pptx',
'DAP 6 RDBMS and SQL.pdf',
'DAP 6 RDBMS and SQL.pptx',
'data+exploration.ipynb',
'data+manipulation.ipynb',
'data1.csv',
'datasets.csv',
'Decision Trees.pdf',
'DecisionStatsOfferLetter.docx',
'DecisionStatsRelievingLetter.docx',
'descriptive+stats+in+Python.ipynb',
'desktop.ini',
'Diamond (1).csv',
'Diamond (2).csv',
'Diamond (3).csv',
'Diamond (4).csv',
'Diamond (5).csv',
'Diamond (6).csv',
'Diamond.csv',
'DropboxInstaller.exe',
'edb_npgsql.exe',
'edb_pgjdbc.exe',
'edb_psqlodbc.exe',
'edb_psqlodbc.exe-20170203172812',
'edb_psqlodbc.exe-20170307203617',
'final invoice edureka - Sheet1.pdf',
'FinalPythonforRUsersAnapproachforDataScience (1).docx',
'FinalPythonforRUsersAnapproachforDataScience (2).docx',
'FinalPythonforRUsersAnapproachforDataScience (3).docx',
'FinalPythonforRUsersAnapproachforDataScience (4).docx',
'FinalPythonforRUsersAnapproachforDataScience.docx',
'final_webinar (1).pdf',
'final_webinar.pdf',
'Git-2.11.0-64-bit.exe',
'Git-2.12.0-64-bit.exe',
'GitHubSetup (1).exe',
'GitHubSetup (2).exe',
'GitHubSetup.exe',
'GOMAUDIOGLOBALSETUP.EXE',
'Hdma.csv',
'Hedonic.csv',
'HP Downloads',
'HPSupportSolutionsFramework-12.5.32.203.exe',
'image.png',
'IMS PROSCHOOL Workshop.pptx.pdf',
'IMS PROSCHOOL Workshop.pptx.pptx',
'internship.docx',
'Introduction to SAS (1).pdf',
'Introduction to SAS Part 1 (1).pdf',
'Introduction to SAS Part 1.pdf',
'Introduction to SAS.pdf',
'Invoice for Digital Vidya.pdf',
'Invoice for Weekendr.pdf',
'Invoice format - Ajay Ohri CONTATA (1).xls',
'Invoice format - Ajay Ohri CONTATA.xls',
'invoice rapid miner.pdf',
'Invoice trafla format.docx',
'iris2 (1).ipynb',
'iris2 (2).ipynb',
'iris2.ipynb',
'January invoice Indicus .pdf',
'June AV Invoice - Sheet1.pdf',
'Lecture 6 - KNN & Naive Bayes.ppt',
'Local Disk (C) - Shortcut.lnk',
'logistic regression - script for ppt.R',
'logistic_regression_-_script_for_ppt.html',
'March invoice Indicus - Sheet1.pdf',
'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
'mongodb-win32-x86_64-3.4.2-signed.msi',
'mtcarslm.R',
'nltk.ipynb',
'notebook-Copy1.html',
'Offer Letter - Ajay Ohri (1).pdf',
'Offer Letter - Ajay Ohri.pdf',
'Other Data Mining Methods (1).pdf',
'Other Data Mining Methods.pdf',
'output1 (1).xls',
'output1 (2).xls',
'output1.xls',
'pandas+11.ipynb',
'pandas+analysis+1.ipynb',
'pandas+data+manipulation.ipynb',
'passport image.pdf',
'Pawconinvoice2016.pdf',
'Pawconinvoice2017 (1).pdf',
'Pawconinvoice2017 (2).pdf',
'Pawconinvoice2017 (3).pdf',
'Pawconinvoice2017.pdf',
'Payslip Feb 2016 - Sheet1.pdf',
'Payslip Feb 2016.pdf',
'Payslip Format Decisionstats - Sheet1.pdf',
'Payslip Jan 2016 - Sheet1.pdf',
'Payslip Jan 2016.pdf',
'Payslip March 2016 - Sheet1.pdf',
'Payslip March 2016.pdf',
'pgd.csv',
'postgresql-9.6.1-1-windows-x64.exe',
'Program 1-results.rtf',
'protein.csv',
'python+with+postgres (1).ipynb',
'python+with+postgres.ipynb',
'Python.docx',
'R-3.3.2-win.exe',
'R-3.3.3-win.exe',
'RCertificationExam.pdf',
'reg+model.ipynb',
'Revision - Business Analytics (1).pdf',
'Revision - Business Analytics.pdf',
'RidingMowers.csv',
'rsconnect',
'RStudio-1.0.136.exe',
'Salary Slip, Feb 2016.pdf',
'Salary Slip, Jan 2016.pdf',
'Salary Slip, March 2016 (1).pdf',
'Salary Slip, March 2016 (2).pdf',
'Salary Slip, March 2016.pdf',
'sales-of-shampoo-over-a-three-ye.csv',
'SAS part 2.pdf',
'SAS Part 3.pdf',
'sas-university-edition-107140.pdf',
'Scan0095.pdf',
'Scanned Invoice for Collabera.pdf',
'Screenshot 2017-01-23 12.36.55.png',
'September invoice adaptive analytics - Sheet1.pdf',
'Sollers January.pdf',
'sqlalchemy.ipynb',
'stackoverflow-dump-analysis.html',
'Sunstone.pdf',
'Tableau.pdf',
'TableauPublicDesktop-64bit-10-1-3.exe',
'TableauPublicDesktop-64bit-10-1-4.exe',
'telecom.csv',
'TelecomServiceProviderCaseStudy.pdf',
'Text Mining (1).pdf',
'Text Mining.pdf',
'third.sas7bdat',
'Time Series Forecasting (1).pdf',
'Time Series Forecasting.pdf',
'ts.html',
'ts.R',
'Unconfirmed 373974.crdownload',
'Unconfirmed 376562.crdownload',
'Unconfirmed 376991.crdownload',
'Unconfirmed 930917.crdownload',
'Unconfirmed 950045.crdownload',
'unvbasicvapp__9411008__ova__en__sp0__1.ova.crdownload',
'VirtualBox-5.1.8-111374-Win (1).exe',
'VirtualBox-5.1.8-111374-Win.exe',
'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
'Webinar for Business Analytics.pdf',
'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']
In [100]:
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']
In [101]:
f=pd.read_csv('ccFraud.csv')
Credit for this part http://www.cs.tufts.edu/comp/150VAN/demos/DataWrangling.pdf data from https://packages.revolutionanalytics.com/datasets/ccFraud.csv
In [103]:
f.dtypes
Out[103]:
custID int64
gender int64
state int64
cardholder int64
balance int64
numTrans int64
numIntlTrans int64
creditLine int64
fraudRisk int64
dtype: object
In [104]:
f.index
Out[104]:
RangeIndex(start=0, stop=10000000, step=1)
In [105]:
f.columns
Out[105]:
Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',
'numIntlTrans', 'creditLine', 'fraudRisk'],
dtype='object')
In [106]:
f.values
Out[106]:
array([[ 1, 1, 35, ..., 14, 2, 0],
[ 2, 2, 2, ..., 0, 18, 0],
[ 3, 2, 2, ..., 9, 16, 0],
...,
[ 9999998, 1, 24, ..., 0, 8, 0],
[ 9999999, 1, 28, ..., 19, 6, 0],
[10000000, 1, 23, ..., 0, 7, 0]], dtype=int64)
In [108]:
f.describe()
Out[108]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
count
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
1.000000e+07
mean
5.000000e+06
1.382177e+00
2.466127e+01
1.030004e+00
4.109920e+03
2.893519e+01
4.047190e+00
9.134469e+00
5.960140e-02
std
2.886751e+06
4.859195e-01
1.497012e+01
1.705991e-01
3.996847e+03
2.655378e+01
8.602970e+00
9.641974e+00
2.367469e-01
min
1.000000e+00
1.000000e+00
1.000000e+00
1.000000e+00
0.000000e+00
0.000000e+00
0.000000e+00
1.000000e+00
0.000000e+00
25%
2.500001e+06
1.000000e+00
1.000000e+01
1.000000e+00
0.000000e+00
1.000000e+01
0.000000e+00
4.000000e+00
0.000000e+00
50%
5.000000e+06
1.000000e+00
2.400000e+01
1.000000e+00
3.706000e+03
1.900000e+01
0.000000e+00
6.000000e+00
0.000000e+00
75%
7.500000e+06
2.000000e+00
3.800000e+01
1.000000e+00
6.000000e+03
3.900000e+01
4.000000e+00
1.100000e+01
0.000000e+00
max
1.000000e+07
2.000000e+00
5.100000e+01
2.000000e+00
4.148500e+04
1.000000e+02
6.000000e+01
7.500000e+01
1.000000e+00
In [109]:
f.T
Out[109]:
0
1
2
3
4
5
6
7
8
9
...
9999990
9999991
9999992
9999993
9999994
9999995
9999996
9999997
9999998
9999999
custID
1
2
3
4
5
6
7
8
9
10
...
9999991
9999992
9999993
9999994
9999995
9999996
9999997
9999998
9999999
10000000
gender
1
2
2
1
1
2
1
1
2
1
...
1
2
1
2
1
1
1
1
1
1
state
35
2
2
15
46
44
3
10
32
23
...
16
36
38
43
16
37
16
24
28
23
cardholder
1
1
1
1
1
2
1
1
1
1
...
2
1
1
1
2
1
1
1
1
1
balance
3000
0
0
0
0
5546
2000
6016
2428
0
...
1561
5217
0
2607
17376
0
0
9000
7000
0
numTrans
4
9
27
12
11
21
41
20
4
18
...
8
6
7
6
3
10
33
38
20
13
numIntlTrans
14
0
9
0
16
0
0
3
10
56
...
0
0
0
0
0
0
2
0
19
0
creditLine
2
18
16
5
7
13
1
6
22
5
...
5
5
2
5
20
9
4
8
6
7
fraudRisk
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
9 rows × 10000000 columns
In [110]:
f.sort(columns='balance')
C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
if __name__ == '__main__':
Out[110]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
9999999
10000000
1
23
1
0
13
0
7
0
2581279
2581280
1
37
2
0
34
0
28
0
6743525
6743526
1
49
1
0
35
0
27
0
6743524
6743525
1
44
1
0
13
16
13
0
6743522
6743523
2
5
1
0
6
5
12
0
6743519
6743520
1
44
1
0
8
0
13
0
6743517
6743518
2
46
1
0
6
0
9
0
2581289
2581290
2
24
1
0
24
0
26
0
6743501
6743502
1
44
1
0
20
0
5
0
6743497
6743498
2
35
1
0
24
0
7
0
2581293
2581294
1
15
1
0
10
1
1
0
6743496
6743497
2
44
1
0
24
0
8
0
6743486
6743487
1
39
1
0
5
8
3
0
2581298
2581299
2
5
1
0
12
0
58
0
6743484
6743485
1
10
1
0
44
9
8
0
6743483
6743484
2
7
1
0
4
4
3
0
2581303
2581304
1
29
1
0
10
4
5
0
6743482
6743483
1
39
1
0
27
0
7
0
2581305
2581306
1
2
1
0
15
0
5
0
6743479
6743480
1
46
1
0
38
0
5
0
2581309
2581310
2
5
1
0
22
0
23
0
2581311
2581312
1
29
1
0
25
0
4
0
6743527
6743528
1
5
1
0
9
0
8
0
6743476
6743477
2
5
1
0
26
0
10
0
6743528
6743529
1
29
1
0
31
2
2
0
6743533
6743534
2
35
1
0
9
0
5
0
2581234
2581235
2
29
1
0
15
13
11
0
2581238
2581239
1
3
1
0
65
0
24
0
6743568
6743569
1
5
1
0
100
1
12
0
6743567
6743568
2
23
1
0
14
7
3
0
...
...
...
...
...
...
...
...
...
...
8395955
8395956
1
5
1
34871
99
1
38
1
2006812
2006813
2
5
1
34898
16
10
38
1
3803760
3803761
2
46
1
34938
37
5
37
1
5174944
5174945
1
5
1
34941
45
0
57
1
9368966
9368967
2
46
1
35000
10
0
34
1
753353
753354
1
17
1
35039
100
4
39
1
962369
962370
1
46
1
35074
30
0
47
1
8826733
8826734
1
44
1
35079
27
0
35
1
2896280
2896281
2
12
1
35233
64
6
35
1
7061678
7061679
1
18
1
35239
13
7
75
1
8070316
8070317
1
49
1
35382
14
0
53
1
1017215
1017216
2
3
1
35447
9
4
40
1
4631316
4631317
1
48
1
35868
18
0
39
1
5690915
5690916
1
32
1
35957
28
12
51
1
887966
887967
1
5
1
35982
70
4
43
1
8641170
8641171
1
7
1
36000
9
11
35
1
6885917
6885918
1
39
1
36065
8
0
64
1
1092996
1092997
2
44
1
36099
59
0
37
1
8797380
8797381
1
36
1
36153
17
5
37
1
2302929
2302930
2
2
1
36499
43
2
41
1
1211347
1211348
2
26
1
36534
30
0
53
1
2619291
2619292
2
5
1
36567
48
3
59
1
7829649
7829650
2
40
1
36592
11
3
71
1
7530764
7530765
1
5
1
36671
8
0
75
1
9123139
9123140
1
3
1
37000
10
1
36
1
7202753
7202754
1
51
1
37557
10
0
40
1
162444
162445
2
39
1
39554
6
16
52
1
471477
471478
1
35
1
39725
61
0
41
1
9957408
9957409
2
48
1
39987
84
1
56
1
3086051
3086052
1
10
1
41485
35
0
56
1
10000000 rows × 9 columns
In [112]:
f.sort_index(axis=0, ascending=False)
Out[112]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
9999999
10000000
1
23
1
0
13
0
7
0
9999998
9999999
1
28
1
7000
20
19
6
0
9999997
9999998
1
24
1
9000
38
0
8
0
9999996
9999997
1
16
1
0
33
2
4
0
9999995
9999996
1
37
1
0
10
0
9
0
9999994
9999995
1
16
2
17376
3
0
20
0
9999993
9999994
2
43
1
2607
6
0
5
0
9999992
9999993
1
38
1
0
7
0
2
0
9999991
9999992
2
36
1
5217
6
0
5
0
9999990
9999991
1
16
2
1561
8
0
5
0
9999989
9999990
2
4
1
12000
17
7
11
0
9999988
9999989
1
18
1
15000
72
0
14
1
9999987
9999988
2
24
1
6000
46
0
5
0
9999986
9999987
1
23
1
7000
59
0
6
0
9999985
9999986
2
45
2
3000
4
0
2
0
9999984
9999985
1
39
1
7000
30
0
6
0
9999983
9999984
2
13
1
0
2
0
5
0
9999982
9999983
1
44
1
3000
14
0
2
0
9999981
9999982
1
33
1
0
23
26
18
0
9999980
9999981
1
10
1
13889
54
0
13
0
9999979
9999980
1
6
1
7669
5
3
7
0
9999978
9999979
1
44
1
5000
12
0
4
0
9999977
9999978
1
35
1
8000
24
0
7
0
9999976
9999977
2
4
1
0
14
0
7
0
9999975
9999976
2
48
1
4210
23
0
8
0
9999974
9999975
1
25
1
0
18
0
3
0
9999973
9999974
1
10
2
0
36
31
14
0
9999972
9999973
1
46
1
5000
69
0
4
0
9999971
9999972
2
40
1
3683
31
3
7
0
9999970
9999971
1
10
1
0
12
0
11
0
...
...
...
...
...
...
...
...
...
...
29
30
2
49
1
5192
84
0
13
1
28
29
2
20
1
0
19
0
2
0
27
28
1
9
1
12000
20
0
11
0
26
27
1
38
1
4000
21
5
3
0
25
26
2
29
1
5000
4
9
4
0
24
25
1
25
1
0
12
0
65
0
23
24
2
21
1
0
15
0
3
0
22
23
1
5
1
0
7
0
11
0
21
22
1
34
1
0
22
0
3
0
20
21
1
39
1
4000
24
0
3
0
19
20
2
31
1
1860
21
10
8
0
18
19
1
5
1
9000
20
2
8
0
17
18
1
35
1
3113
13
6
8
0
16
17
2
18
1
13970
20
0
13
0
15
16
1
44
1
0
22
0
5
0
14
15
1
27
1
5227
60
0
17
0
13
14
2
38
1
9000
41
3
8
0
12
13
1
6
1
0
45
2
4
0
11
12
1
10
1
3000
20
0
2
0
10
11
1
46
1
4601
54
0
4
0
9
10
1
23
1
0
18
56
5
0
8
9
2
32
1
2428
4
10
22
0
7
8
1
10
1
6016
20
3
6
0
6
7
1
3
1
2000
41
0
1
0
5
6
2
44
2
5546
21
0
13
0
4
5
1
46
1
0
11
16
7
0
3
4
1
15
1
0
12
0
5
0
2
3
2
2
1
0
27
9
16
0
1
2
2
2
1
0
9
0
18
0
0
1
1
35
1
3000
4
14
2
0
10000000 rows × 9 columns
In [113]:
f.sort_index(axis=1)
Out[113]:
balance
cardholder
creditLine
custID
fraudRisk
gender
numIntlTrans
numTrans
state
0
3000
1
2
1
0
1
14
4
35
1
0
1
18
2
0
2
0
9
2
2
0
1
16
3
0
2
9
27
2
3
0
1
5
4
0
1
0
12
15
4
0
1
7
5
0
1
16
11
46
5
5546
2
13
6
0
2
0
21
44
6
2000
1
1
7
0
1
0
41
3
7
6016
1
6
8
0
1
3
20
10
8
2428
1
22
9
0
2
10
4
32
9
0
1
5
10
0
1
56
18
23
10
4601
1
4
11
0
1
0
54
46
11
3000
1
2
12
0
1
0
20
10
12
0
1
4
13
0
1
2
45
6
13
9000
1
8
14
0
2
3
41
38
14
5227
1
17
15
0
1
0
60
27
15
0
1
5
16
0
1
0
22
44
16
13970
1
13
17
0
2
0
20
18
17
3113
1
8
18
0
1
6
13
35
18
9000
1
8
19
0
1
2
20
5
19
1860
1
8
20
0
2
10
21
31
20
4000
1
3
21
0
1
0
24
39
21
0
1
3
22
0
1
0
22
34
22
0
1
11
23
0
1
0
7
5
23
0
1
3
24
0
2
0
15
21
24
0
1
65
25
0
1
0
12
25
25
5000
1
4
26
0
2
9
4
29
26
4000
1
3
27
0
1
5
21
38
27
12000
1
11
28
0
1
0
20
9
28
0
1
2
29
0
2
0
19
20
29
5192
1
13
30
1
2
0
84
49
...
...
...
...
...
...
...
...
...
...
9999970
0
1
11
9999971
0
1
0
12
10
9999971
3683
1
7
9999972
0
2
3
31
40
9999972
5000
1
4
9999973
0
1
0
69
46
9999973
0
2
14
9999974
0
1
31
36
10
9999974
0
1
3
9999975
0
1
0
18
25
9999975
4210
1
8
9999976
0
2
0
23
48
9999976
0
1
7
9999977
0
2
0
14
4
9999977
8000
1
7
9999978
0
1
0
24
35
9999978
5000
1
4
9999979
0
1
0
12
44
9999979
7669
1
7
9999980
0
1
3
5
6
9999980
13889
1
13
9999981
0
1
0
54
10
9999981
0
1
18
9999982
0
1
26
23
33
9999982
3000
1
2
9999983
0
1
0
14
44
9999983
0
1
5
9999984
0
2
0
2
13
9999984
7000
1
6
9999985
0
1
0
30
39
9999985
3000
2
2
9999986
0
2
0
4
45
9999986
7000
1
6
9999987
0
1
0
59
23
9999987
6000
1
5
9999988
0
2
0
46
24
9999988
15000
1
14
9999989
1
1
0
72
18
9999989
12000
1
11
9999990
0
2
7
17
4
9999990
1561
2
5
9999991
0
1
0
8
16
9999991
5217
1
5
9999992
0
2
0
6
36
9999992
0
1
2
9999993
0
1
0
7
38
9999993
2607
1
5
9999994
0
2
0
6
43
9999994
17376
2
20
9999995
0
1
0
3
16
9999995
0
1
9
9999996
0
1
0
10
37
9999996
0
1
4
9999997
0
1
2
33
16
9999997
9000
1
8
9999998
0
1
0
38
24
9999998
7000
1
6
9999999
0
1
19
20
28
9999999
0
1
7
10000000
0
1
0
13
23
10000000 rows × 9 columns
In [115]:
f.head()
Out[115]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
0
1
1
35
1
3000
4
14
2
0
1
2
2
2
1
0
9
0
18
0
2
3
2
2
1
0
27
9
16
0
3
4
1
15
1
0
12
0
5
0
4
5
1
46
1
0
11
16
7
0
In [116]:
f.tail(2)
Out[116]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
9999998
9999999
1
28
1
7000
20
19
6
0
9999999
10000000
1
23
1
0
13
0
7
0
In [117]:
f['balance']
Out[117]:
0 3000
1 0
2 0
3 0
4 0
5 5546
6 2000
7 6016
8 2428
9 0
10 4601
11 3000
12 0
13 9000
14 5227
15 0
16 13970
17 3113
18 9000
19 1860
20 4000
21 0
22 0
23 0
24 0
25 5000
26 4000
27 12000
28 0
29 5192
...
9999970 0
9999971 3683
9999972 5000
9999973 0
9999974 0
9999975 4210
9999976 0
9999977 8000
9999978 5000
9999979 7669
9999980 13889
9999981 0
9999982 3000
9999983 0
9999984 7000
9999985 3000
9999986 7000
9999987 6000
9999988 15000
9999989 12000
9999990 1561
9999991 5217
9999992 0
9999993 2607
9999994 17376
9999995 0
9999996 0
9999997 9000
9999998 7000
9999999 0
Name: balance, dtype: int64
In [118]:
f[1:3]
Out[118]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
1
2
2
2
1
0
9
0
18
0
2
3
2
2
1
0
27
9
16
0
In [121]:
f.loc[:,['balance' , 'gender' ]]
Out[121]:
balance
gender
0
3000
1
1
0
2
2
0
2
3
0
1
4
0
1
5
5546
2
6
2000
1
7
6016
1
8
2428
2
9
0
1
10
4601
1
11
3000
1
12
0
1
13
9000
2
14
5227
1
15
0
1
16
13970
2
17
3113
1
18
9000
1
19
1860
2
20
4000
1
21
0
1
22
0
1
23
0
2
24
0
1
25
5000
2
26
4000
1
27
12000
1
28
0
2
29
5192
2
...
...
...
9999970
0
1
9999971
3683
2
9999972
5000
1
9999973
0
1
9999974
0
1
9999975
4210
2
9999976
0
2
9999977
8000
1
9999978
5000
1
9999979
7669
1
9999980
13889
1
9999981
0
1
9999982
3000
1
9999983
0
2
9999984
7000
1
9999985
3000
2
9999986
7000
1
9999987
6000
2
9999988
15000
1
9999989
12000
2
9999990
1561
1
9999991
5217
2
9999992
0
1
9999993
2607
2
9999994
17376
1
9999995
0
1
9999996
0
1
9999997
9000
1
9999998
7000
1
9999999
0
1
10000000 rows × 2 columns
In [122]:
f[['balance' , 'gender' ]]
Out[122]:
balance
gender
0
3000
1
1
0
2
2
0
2
3
0
1
4
0
1
5
5546
2
6
2000
1
7
6016
1
8
2428
2
9
0
1
10
4601
1
11
3000
1
12
0
1
13
9000
2
14
5227
1
15
0
1
16
13970
2
17
3113
1
18
9000
1
19
1860
2
20
4000
1
21
0
1
22
0
1
23
0
2
24
0
1
25
5000
2
26
4000
1
27
12000
1
28
0
2
29
5192
2
...
...
...
9999970
0
1
9999971
3683
2
9999972
5000
1
9999973
0
1
9999974
0
1
9999975
4210
2
9999976
0
2
9999977
8000
1
9999978
5000
1
9999979
7669
1
9999980
13889
1
9999981
0
1
9999982
3000
1
9999983
0
2
9999984
7000
1
9999985
3000
2
9999986
7000
1
9999987
6000
2
9999988
15000
1
9999989
12000
2
9999990
1561
1
9999991
5217
2
9999992
0
1
9999993
2607
2
9999994
17376
1
9999995
0
1
9999996
0
1
9999997
9000
1
9999998
7000
1
9999999
0
1
10000000 rows × 2 columns
In [125]:
f[f['balance'] > 3000]
Out[125]:
custID
gender
state
cardholder
balance
numTrans
numIntlTrans
creditLine
fraudRisk
5
6
2
44
2
5546
21
0
13
0
7
8
1
10
1
6016
20
3
6
0
10
11
1
46
1
4601
54
0
4
0
13
14
2
38
1
9000
41
3
8
0
14
15
1
27
1
5227
60
0
17
0
16
17
2
18
1
13970
20
0
13
0
17
18
1
35
1
3113
13
6
8
0
18
19
1
5
1
9000
20
2
8
0
20
21
1
39
1
4000
24
0
3
0
25
26
2
29
1
5000
4
9
4
0
26
27
1
38
1
4000
21
5
3
0
27
28
1
9
1
12000
20
0
11
0
29
30
2
49
1
5192
84
0
13
1
31
32
1
31
1
6000
8
0
5
0
32
33
2
48
1
4899
49
0
10
0
33
34
2
20
1
7000
43
2
6
0
35
36
1
20
1
10257
23
0
13
0
36
37
1
36
1
5000
24
0
4
0
38
39
1
5
1
5000
14
0
4
0
39
40
2
46
1
9000
7
0
8
0
41
42
1
35
1
4973
13
0
12
0
43
44
2
11
1
17656
16
3
25
1
47
48
1
3
1
11090
41
0
13
0
49
50
2
5
1
10826
4
0
20
0
53
54
2
5
1
8000
22
0
7
0
54
55
1
10
1
6000
52
0
5
0
55
56
2
43
2
9000
5
0
8
0
56
57
2
10
1
8000
30
60
7
1
59
60
2
23
2
11025
14
15
16
0
60
61
2
19
2
4000
1
2
3
0
...
...
...
...
...
...
...
...
...
...
9999933
9999934
1
44
1
4000
13
0
3
0
9999936
9999937
2
10
1
8000
56
0
7
0
9999937
9999938
2
7
1
7000
40
0
6
0
9999940
9999941
1
35
1
6000
30
26
5
0
9999944
9999945
1
35
1
7000
97
0
6
0
9999946
9999947
1
5
1
6000
2
6
5
0
9999948
9999949
1
2
1
9395
57
0
13
0
9999949
9999950
2
21
1
4000
10
0
3
0
9999950
9999951
2
35
1
7468
35
0
16
0
9999953
9999954
1
5
1
6729
35
0
6
0
9999954
9999955
2
29
1
4877
7
0
17
0
9999955
9999956
2
5
1
3770
98
0
23
0
9999960
9999961
1
35
1
4000
27
0
3
0
9999968
9999969
1
41
1
3938
24
9
16
0
9999971
9999972
2
40
1
3683
31
3
7
0
9999972
9999973
1
46
1
5000
69
0
4
0
9999975
9999976
2
48
1
4210
23
0
8
0
9999977
9999978
1
35
1
8000
24
0
7
0
9999978
9999979
1
44
1
5000
12
0
4
0
9999979
9999980
1
6
1
7669
5
3
7
0
9999980
9999981
1
10
1
13889
54
0
13
0
9999984
9999985
1
39
1
7000
30
0
6
0
9999986
9999987
1
23
1
7000
59
0
6
0
9999987
9999988
2
24
1
6000
46
0
5
0
9999988
9999989
1
18
1
15000
72
0
14
1
9999989
9999990
2
4
1
12000
17
7
11
0
9999991
9999992
2
36
1
5217
6
0
5
0
9999994
9999995
1
16
2
17376
3
0
20
0
9999997
9999998
1
24
1
9000
38
0
8
0
9999998
9999999
1
28
1
7000
20
19
6
0
5304124 rows × 9 columns
In [ ]:
Content source: decisionstats/pythonfordatascience
Similar notebooks: