notebook.community

Edit and run



In [3]:

    
import os
import glob



In [4]:

    
os.getcwd()









    Out[4]:





'C:\\Users\\Dell'



In [5]:

    
path = 'C:\\Users\\Dell\\Downloads'



In [6]:

    
extension = 'csv'
os.chdir(path)



In [7]:

    
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)









    



['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond (7).csv', 'Diamond (8).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']



In [8]:

    
import pandas as pd



In [40]:

    
iris=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")



In [45]:

    
iris.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
Sepal.Length    150 non-null float64
Sepal.Width     150 non-null float64
Petal.Length    150 non-null float64
Petal.Width     150 non-null float64
Species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB



In [104]:

    
import seaborn as sns
%matplotlib inline



In [46]:

    
import matplotlib.pyplot as plt 
%matplotlib inline



In [49]:

    
plt.bar(iris['Sepal.Length'],iris['Sepal.Width'],label="bar1",color='r')









    Out[49]:





<Container object of 150 artists>



In [50]:

    
plt.bar(iris['Petal.Length'],iris['Petal.Width'],label="bar1",color='g')









    Out[50]:





<Container object of 150 artists>



In [52]:

    
fig=plt.figure()









    





<matplotlib.figure.Figure at 0xd110f60>



In [53]:

    
ax1=fig.add_subplot(1,2,1)
ax2=fig.add_subplot(1,2,2)



In [58]:

    
ax1.boxplot(iris['Sepal.Length'])
ax1.set_xlabel('Sepal.Length')
plt.show()



In [59]:

    
ax2.boxplot(iris['Petal.Length'])
ax2.set_xlabel('Petal.Length')
plt.show()



In [60]:

    
plt.boxplot(iris['Petal.Length'])









    Out[60]:





{'boxes': [<matplotlib.lines.Line2D at 0xd434320>],
 'caps': [<matplotlib.lines.Line2D at 0xd43add8>,
  <matplotlib.lines.Line2D at 0xd43af60>],
 'fliers': [<matplotlib.lines.Line2D at 0xd43ffd0>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0xd43f7f0>],
 'whiskers': [<matplotlib.lines.Line2D at 0xd434cf8>,
  <matplotlib.lines.Line2D at 0xd434eb8>]}



In [61]:

    
plt.hist(iris['Sepal.Length'])









    Out[61]:





(array([  9.,  23.,  14.,  27.,  16.,  26.,  18.,   6.,   5.,   6.]),
 array([ 4.3 ,  4.66,  5.02,  5.38,  5.74,  6.1 ,  6.46,  6.82,  7.18,
         7.54,  7.9 ]),
 <a list of 10 Patch objects>)



In [64]:

    
plt.scatter(iris['Petal.Length'],iris['Sepal.Length'])









    Out[64]:





<matplotlib.collections.PathCollection at 0x15efc668>



In [ ]:



In [67]:

    
slices=pd.value_counts(iris.Species)
print(slices)









    



virginica     50
setosa        50
versicolor    50
Name: Species, dtype: int64



In [71]:

    
labels=pd.Series(iris.Species.unique())
print(labels)









    



0        setosa
1    versicolor
2     virginica
dtype: object



In [ ]:

    
colors=['r','y','g']



In [83]:

    
plt.pie(pd.value_counts(iris.Species),labels=['virginica','versicolor','setosa'],colors=['r','y','g'],autopct='%1.1f%%')









    Out[83]:





([<matplotlib.patches.Wedge at 0x19c3fb70>,
  <matplotlib.patches.Wedge at 0x19c3cda0>,
  <matplotlib.patches.Wedge at 0x19ca6f98>],
 [<matplotlib.text.Text at 0x19c3c3c8>,
  <matplotlib.text.Text at 0x19ca65c0>,
  <matplotlib.text.Text at 0x19cb37b8>],
 [<matplotlib.text.Text at 0x19c3c860>,
  <matplotlib.text.Text at 0x19ca6a58>,
  <matplotlib.text.Text at 0x19cb3c50>])



In [84]:

    
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
np.random.seed(sum(map(ord, "aesthetics")))



In [9]:

    
os.listdir()









    Out[9]:





['140749_2017.pdf',
 '2011-F01-0700-Rev4-MDDS.XLSX',
 '20150817143155.pdf',
 '20160111060911.pdf',
 '20170214052225.pdf',
 '7z1604-x64.exe',
 '7z1604.exe',
 '861415_10151432783238421_2124270505_o (1).jpg',
 '861415_10151432783238421_2124270505_o.jpg',
 'AirPassengers.csv',
 'ajayo.jpg',
 'Alison Python  Invoice   - Sheet1.pdf',
 'Alison SAS  Invoice   - Sheet1.pdf',
 'All+CSV+Files+in+a+Folder.ipynb',
 'Allison Interview Jones Invoice   - Sheet1.pdf',
 'Anaconda3-4.2.0-Windows-x86_64.exe',
 'anscombe+dataset.ipynb',
 'apachehttpd.exe',
 'April invoice adaptive analytics   - Sheet1.pdf',
 'Assignment14_BusinessAnalytics (1).docx',
 'Assignment14_BusinessAnalytics.docx',
 'Assignment15_BusinessAnalytics.docx',
 'Assignment16_BusinessAnalytics (1).docx',
 'Assignment16_BusinessAnalytics (2).docx',
 'Assignment16_BusinessAnalytics.docx',
 'aug ust 2008.JPG',
 'avast_free_antivirus_setup_online.exe',
 'avinash_ltv.zip',
 'BigDiamonds.csv',
 'BigDiamonds.csv (1).zip',
 'BigDiamonds.csv (2)',
 'BigDiamonds.csv (2).zip',
 'BigDiamonds.csv (3).zip',
 'BigDiamonds.csv.zip',
 'Boston (1).csv',
 'Boston.csv',
 'CAM- Ajay Ohri (1).pdf',
 'CAM- Ajay Ohri.pdf',
 'camtasia.exe',
 'ccFraud.csv',
 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',
 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',
 'chapter+3+_+spark.html',
 'chi+square+test.ipynb',
 'chromeinstall-8u111.exe',
 'Cisco_WebEx_Add-On.exe',
 'class2.csv',
 'Collabera Invoice (1).pdf',
 'Collabera Invoice.pdf',
 'Collectcent Invoice.pdf',
 'college degrees.pdf',
 'DAP 1.pdf',
 'DAP 1.pptx',
 'DAP 6 RDBMS and SQL.pdf',
 'DAP 6 RDBMS and SQL.pptx',
 'Data Analysis (1).7z',
 'Data Analysis (1).rar',
 'Data Analysis (2).rar',
 'Data Analysis (3).rar',
 'Data Analysis.rar',
 'Data Viz.pptx',
 'data+exploration.ipynb',
 'data+manipulation.ipynb',
 'data+munging+again.ipynb',
 'data+wrangling+titanic+dataset.ipynb',
 'data1.csv',
 'datasets.csv',
 'Decision Trees.pdf',
 'DecisionStatsOfferLetter.docx',
 'DecisionStatsRelievingLetter.docx',
 'descriptive+stats+in+Python.ipynb',
 'desktop.ini',
 'Diamond (1).csv',
 'Diamond (2).csv',
 'Diamond (3).csv',
 'Diamond (4).csv',
 'Diamond (5).csv',
 'Diamond (6).csv',
 'Diamond (7).csv',
 'Diamond (8).csv',
 'Diamond.csv',
 'DolbyVoiceClient.msi',
 'DropboxInstaller.exe',
 'edb_npgsql.exe',
 'edb_pgjdbc.exe',
 'edb_psqlodbc.exe',
 'edb_psqlodbc.exe-20170203172812',
 'edb_psqlodbc.exe-20170307203617',
 'final invoice edureka  - Sheet1.pdf',
 'FinalPythonforRUsersAnapproachforDataScience (1).docx',
 'FinalPythonforRUsersAnapproachforDataScience (2).docx',
 'FinalPythonforRUsersAnapproachforDataScience (3).docx',
 'FinalPythonforRUsersAnapproachforDataScience (4).docx',
 'FinalPythonforRUsersAnapproachforDataScience.docx',
 'final_webinar (1).pdf',
 'final_webinar.pdf',
 'Git-2.11.0-64-bit.exe',
 'Git-2.12.0-64-bit.exe',
 'GitHubSetup (1).exe',
 'GitHubSetup (2).exe',
 'GitHubSetup.exe',
 'GOMAUDIOGLOBALSETUP.EXE',
 'Hdma.csv',
 'Hedonic.csv',
 'HP Downloads',
 'HPSupportSolutionsFramework-12.5.32.203.exe',
 'image.png',
 'IMS PROSCHOOL Workshop.pptx.pdf',
 'IMS PROSCHOOL Workshop.pptx.pptx',
 'internship.docx',
 'Introduction to SAS (1).pdf',
 'Introduction to SAS Part 1 (1).pdf',
 'Introduction to SAS Part 1.pdf',
 'Introduction to SAS.pdf',
 'introductory+python.ipynb',
 'Invoice for Digital Vidya.pdf',
 'Invoice for Weekendr.pdf',
 'Invoice format - Ajay Ohri CONTATA (1).xls',
 'Invoice format - Ajay Ohri CONTATA.xls',
 'invoice rapid miner.pdf',
 'Invoice trafla format.docx',
 'iris2 (1).ipynb',
 'iris2 (2).ipynb',
 'iris2.ipynb',
 'January invoice Indicus  .pdf',
 'June AV   Invoice   - Sheet1.pdf',
 'Lecture 6 - KNN & Naive Bayes.ppt',
 'Local Disk (C) - Shortcut.lnk',
 'logistic regression - script for ppt.R',
 'logistic_regression_-_script_for_ppt.html',
 'lyncentry.exe',
 'March invoice Indicus   - Sheet1.pdf',
 'matplotlib+cars.ipynb',
 'matplotlib+line+graph.ipynb',
 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',
 'mongodb-win32-x86_64-3.4.2-signed.msi',
 'mortDefault',
 'mortDefault.zip',
 'mtcarslm.R',
 'multiple+file+concat+in+pandas (1).ipynb',
 'multiple+file+concat+in+pandas.ipynb',
 'my+first+class+in+python.ipynb',
 'nltk.ipynb',
 'notebook-Copy1.html',
 'Offer Letter - Ajay Ohri (1).pdf',
 'Offer Letter - Ajay Ohri.pdf',
 'Other Data Mining  Methods (1).pdf',
 'Other Data Mining  Methods.pdf',
 'output1 (1).xls',
 'output1 (2).xls',
 'output1.xls',
 'pandas+11.ipynb',
 'pandas+analysis+1.ipynb',
 'pandas+data+manipulation.ipynb',
 'passport image.pdf',
 'Pawconinvoice2016.pdf',
 'Pawconinvoice2017 (1).pdf',
 'Pawconinvoice2017 (2).pdf',
 'Pawconinvoice2017 (3).pdf',
 'Pawconinvoice2017.pdf',
 'Payslip Feb 2016 - Sheet1.pdf',
 'Payslip Feb 2016.pdf',
 'Payslip Format Decisionstats - Sheet1.pdf',
 'Payslip Jan 2016 - Sheet1.pdf',
 'Payslip Jan 2016.pdf',
 'Payslip March 2016 - Sheet1.pdf',
 'Payslip March 2016.pdf',
 'pgd.csv',
 'postgresql-9.6.1-1-windows-x64.exe',
 'Program 1-results.rtf',
 'protein.csv',
 'python+with+postgres (1).ipynb',
 'python+with+postgres.ipynb',
 'Python.docx',
 'R-3.3.2-win.exe',
 'R-3.3.3-win.exe',
 'RCertificationExam.pdf',
 'reg+model.ipynb',
 'Revision -  Business Analytics (1).pdf',
 'Revision -  Business Analytics.pdf',
 'RidingMowers.csv',
 'rsconnect',
 'RStudio-1.0.136.exe',
 'Salary Slip, Feb 2016.pdf',
 'Salary Slip, Jan 2016.pdf',
 'Salary Slip, March 2016 (1).pdf',
 'Salary Slip, March 2016 (2).pdf',
 'Salary Slip, March 2016.pdf',
 'sales-of-shampoo-over-a-three-ye.csv',
 'SAS part 2.pdf',
 'SAS Part 3.pdf',
 'sas-university-edition-107140.pdf',
 'Scan0095.pdf',
 'Scanned Invoice for Collabera.pdf',
 'Screenshot 2017-01-23 12.36.55.png',
 'September invoice adaptive analytics   - Sheet1.pdf',
 'simple+matplot+graph.ipynb',
 'Sollers January.pdf',
 'sqlalchemy.ipynb',
 'stackoverflow-dump-analysis.html',
 'Sunstone.pdf',
 'Tableau.pdf',
 'TableauPublicDesktop-64bit-10-1-3.exe',
 'TableauPublicDesktop-64bit-10-1-4.exe',
 'telecom.csv',
 'TelecomServiceProviderCaseStudy.pdf',
 'test+web+scraping.ipynb',
 'Text Mining (1).pdf',
 'Text Mining.pdf',
 'third.sas7bdat',
 'Time Series  Forecasting (1).pdf',
 'Time Series  Forecasting.pdf',
 'ts.html',
 'ts.R',
 'Unconfirmed 373974.crdownload',
 'Unconfirmed 376991.crdownload',
 'Unconfirmed 950045.crdownload',
 'uTorrent.exe',
 'VirtualBox-5.1.8-111374-Win (1).exe',
 'VirtualBox-5.1.8-111374-Win.exe',
 'visualcppbuildtools_full.exe',
 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',
 'Webinar for Business Analytics.pdf',
 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',
 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']



In [85]:

    
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)



In [86]:

    
sinplot()



In [87]:

    
sns.set_style("white")



In [88]:

    
sinplot()



In [89]:

    
sns.set_style("ticks")
sinplot()



In [91]:

    
sns.palplot(sns.color_palette())



In [92]:

    
sns.palplot(sns.color_palette("hls",8))



In [93]:

    
sns.palplot(sns.color_palette("BuGn", 10))



In [94]:

    
sinplot()



In [95]:

    
diamonds=pd.read_csv("C:\\Users\\Dell\\Downloads\\BigDiamonds.csv\\BigDiamonds.csv")



In [96]:

    
type(diamonds)









    Out[96]:





pandas.core.frame.DataFrame



In [97]:

    
len(diamonds)









    Out[97]:





598024



In [98]:

    
diamonds.columns









    Out[98]:





Index(['Unnamed: 0', 'carat', 'cut', 'color', 'clarity', 'table', 'depth',
       'cert', 'measurements', 'price', 'x', 'y', 'z'],
      dtype='object')



In [99]:

    
diamonds.shape









    Out[99]:





(598024, 13)



In [15]:

    
diamonds.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 598024 entries, 0 to 598023
Data columns (total 13 columns):
Unnamed: 0      598024 non-null int64
carat           598024 non-null float64
cut             598024 non-null object
color           598024 non-null object
clarity         598024 non-null object
table           598024 non-null float64
depth           598024 non-null float64
cert            598024 non-null object
measurements    597978 non-null object
price           597311 non-null float64
x               596209 non-null float64
y               596172 non-null float64
z               595480 non-null float64
dtypes: float64(7), int64(1), object(5)
memory usage: 59.3+ MB



In [100]:

    
diamonds.head()









    Out[100]:






  
    
      
      Unnamed: 0
      carat
      cut
      color
      clarity
      table
      depth
      cert
      measurements
      price
      x
      y
      z
    
  
  
    
      0
      1
      0.25
      V.Good
      K
      I1
      59.0
      63.7
      GIA
      3.96 x 3.95 x 2.52
      NaN
      3.96
      3.95
      2.52
    
    
      1
      2
      0.23
      Good
      G
      I1
      61.0
      58.1
      GIA
      4.00 x 4.05 x 2.30
      NaN
      4.00
      4.05
      2.30
    
    
      2
      3
      0.34
      Good
      J
      I2
      58.0
      58.7
      GIA
      4.56 x 4.53 x 2.67
      NaN
      4.56
      4.53
      2.67
    
    
      3
      4
      0.21
      V.Good
      D
      I1
      60.0
      60.6
      GIA
      3.80 x 3.82 x 2.31
      NaN
      3.80
      3.82
      2.31
    
    
      4
      5
      0.31
      V.Good
      K
      I1
      59.0
      62.2
      EGL
      4.35 x 4.26 x 2.68
      NaN
      4.35
      4.26
      2.68



In [101]:

    
diamonds2=diamonds.copy()



In [102]:

    
pd.value_counts(diamonds2.cut)









    Out[102]:





Ideal     369448
V.Good    168896
Good       59680
Name: cut, dtype: int64



In [103]:

    
diamonds.describe()









    



C:\Users\Dell\Anaconda3\lib\site-packages\numpy\lib\function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)






    Out[103]:






  
    
      
      Unnamed: 0
      carat
      table
      depth
      price
      x
      y
      z
    
  
  
    
      count
      598024.000000
      598024.000000
      598024.000000
      598024.000000
      597311.000000
      596209.000000
      596172.000000
      595480.000000
    
    
      mean
      299012.500000
      1.071297
      57.631077
      61.063683
      8753.017974
      5.990771
      6.198671
      4.033430
    
    
      std
      172634.803028
      0.812696
      4.996892
      7.604342
      13017.567760
      1.530936
      1.485891
      1.240951
    
    
      min
      1.000000
      0.200000
      0.000000
      0.000000
      300.000000
      0.150000
      1.000000
      0.040000
    
    
      25%
      149506.750000
      0.500000
      56.000000
      61.000000
      NaN
      NaN
      NaN
      NaN
    
    
      50%
      299012.500000
      0.900000
      58.000000
      62.100000
      NaN
      NaN
      NaN
      NaN
    
    
      75%
      448518.250000
      1.500000
      59.000000
      62.700000
      NaN
      NaN
      NaN
      NaN
    
    
      max
      598024.000000
      9.250000
      75.900000
      81.300000
      99990.000000
      13.890000
      13.890000
      13.180000



In [107]:

    
diamonds=diamonds.drop("Unnamed: 0",1)



In [105]:

    
diamonds=diamonds.dropna(how="any")



In [108]:

    
sns.distplot(diamonds.price, bins=20, kde=True, rug=False)









    



C:\Users\Dell\Anaconda3\lib\site-packages\statsmodels\nonparametric\kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[108]:





<matplotlib.axes._subplots.AxesSubplot at 0x201290b8>



In [110]:

    
sns.distplot(diamonds.price, bins=20, kde=False, rug=False)









    Out[110]:





<matplotlib.axes._subplots.AxesSubplot at 0xb4632b0>



In [111]:

    
sns.boxplot(x="color", y="price", data=diamonds)









    Out[111]:





<matplotlib.axes._subplots.AxesSubplot at 0xb521a90>



In [112]:

    
sns.jointplot('price','carat',data=diamonds2)









    Out[112]:





<seaborn.axisgrid.JointGrid at 0x19f420b8>



In [113]:

    
sns.factorplot(x="color", y="price",
col="cut", data=diamonds, kind="box", size=4, aspect=.5);



In [114]:

    
from ggplot import *



In [116]:

    
p + geom_point()









    












    Out[116]:





<ggplot: (27091425)>



In [117]:

    
p + geom_point() +facet_grid('cut')









    












    Out[117]:





<ggplot: (27091425)>



In [118]:

    
p = ggplot(aes(x='price', y='carat',color="cut"), data=diamonds)
p + geom_point()









    












    Out[118]:





<ggplot: (-9223372036842762997)>



In [115]:

    
p = ggplot(aes(x='price', y='carat'), data=diamonds)
p









    












    Out[115]:





<ggplot: (27091425)>



In [39]:

    
p = ggplot(aes(x='price', y='carat',color="clarity"), data=diamonds)
p + geom_point()









    












    Out[39]:





<ggplot: (36002880)>



In [ ]:

    
diamonds = diamonds.notnull() * 1



In [ ]:

    
diamonds.head()



In [ ]:

    
diamonds=diamonds.drop('Unnamed: 0',1)



In [ ]:

    
diamonds.head()



In [ ]:

    
diamonds2.head()



In [ ]:

    
diamonds3=diamonds2.copy()



In [ ]:

    
diamonds2.fillna("AJAY").head()



In [ ]:

    
diamonds2=diamonds2.dropna(how="any")



In [ ]:

    
diamonds2.info()



In [ ]:

    
data=diamonds3.values
data



In [ ]:

    
diamonds3.columns



In [ ]:

    
g=pd.DataFrame(data=data[0:,0:],    # values
              index=range(0,len(data)),    # 1st column as index
              columns=diamonds3.columns[0:])  # 1st row as the column names



In [ ]:

    
g.head()



In [ ]:

    
diamonds3.iloc[2:5,:]



In [ ]:

    
diamonds3.iloc[:,2:5]



In [ ]:

    
diamonds3[['cut','color','clarity']].head()



In [ ]:

    
diamonds3.ix[20:40]



In [ ]:

    
diamonds3.corr()



In [ ]:

    
diamonds3.head()



In [ ]:

    
diamonds3.drop(diamonds3.index[[1,3]]).head()



In [ ]:

    
s=pd.Series(range(0,100))



In [ ]:

    
type(s)



In [ ]:

    
diamonds3.drop(diamonds3.index[[s]]).head()



In [ ]:

    
del diamonds



In [ ]:

    
diamonds3.query('carat >.50 and price >3000')



In [ ]:

    
del diamonds3["Unnamed: 0"]



In [ ]:

    
diamonds3.query('price >5000')



In [ ]:

    
diamonds2.query('color=="J" or price >4000')



In [ ]:

    
diamonds3['newvar']=1



In [ ]:

    
diamonds3.head()



In [ ]:

    
diamonds3.loc[diamonds3.price>=5000,'newvar']="Expensive"



In [ ]:

    
diamonds3.query('price >5000').head()



In [ ]:

    
diamonds3['ppc']=diamonds3.price/diamonds3.carat



In [ ]:

    
diamonds3.head()



In [ ]:

    
diamonds4=diamonds3.copy()



In [ ]:

    
diamonds3=diamonds3.dropna(how='any')



In [ ]:

    
diamonds3.head()



In [ ]:

    
os.listdir()



In [ ]:

    
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)



In [ ]:

    
f=pd.read_csv('ccFraud.csv')

Credit for this part http://www.cs.tufts.edu/comp/150VAN/demos/DataWrangling.pdf data from https://packages.revolutionanalytics.com/datasets/ccFraud.csv



In [ ]:

    
f.dtypes



In [ ]:

    
f.index



In [ ]:

    
f.columns



In [ ]:

    
f.values



In [ ]:

    
f.describe()



In [ ]:

    
f.T



In [ ]:

    
f.sort(columns='balance')



In [ ]:

    
f.sort_index(axis=0, ascending=False)



In [ ]:

    
f.sort_index(axis=1)



In [ ]:

    
f.head()



In [ ]:

    
f.tail(2)



In [ ]:

    
f['balance']



In [ ]:

    
f[1:3]



In [ ]:

    
f.loc[:,['balance' , 'gender' ]]



In [ ]:

    
f[['balance' , 'gender' ]]



In [ ]:

    
f[f['balance'] > 3000]



In [ ]:

	Unnamed: 0	carat	cut	color	clarity	table	depth	cert	measurements	price	x	y	z
0	1	0.25	V.Good	K	I1	59.0	63.7	GIA	3.96 x 3.95 x 2.52	NaN	3.96	3.95	2.52
1	2	0.23	Good	G	I1	61.0	58.1	GIA	4.00 x 4.05 x 2.30	NaN	4.00	4.05	2.30
2	3	0.34	Good	J	I2	58.0	58.7	GIA	4.56 x 4.53 x 2.67	NaN	4.56	4.53	2.67
3	4	0.21	V.Good	D	I1	60.0	60.6	GIA	3.80 x 3.82 x 2.31	NaN	3.80	3.82	2.31
4	5	0.31	V.Good	K	I1	59.0	62.2	EGL	4.35 x 4.26 x 2.68	NaN	4.35	4.26	2.68

	Unnamed: 0	carat	table	depth	price	x	y	z
count	598024.000000	598024.000000	598024.000000	598024.000000	597311.000000	596209.000000	596172.000000	595480.000000
mean	299012.500000	1.071297	57.631077	61.063683	8753.017974	5.990771	6.198671	4.033430
std	172634.803028	0.812696	4.996892	7.604342	13017.567760	1.530936	1.485891	1.240951
min	1.000000	0.200000	0.000000	0.000000	300.000000	0.150000	1.000000	0.040000
25%	149506.750000	0.500000	56.000000	61.000000	NaN	NaN	NaN	NaN
50%	299012.500000	0.900000	58.000000	62.100000	NaN	NaN	NaN	NaN
75%	448518.250000	1.500000	59.000000	62.700000	NaN	NaN	NaN	NaN
max	598024.000000	9.250000	75.900000	81.300000	99990.000000	13.890000	13.890000	13.180000