In [1]:
# Import libraries
import pandas as pd
import sys
In [2]:
print 'Python version ' + sys.version
print 'Pandas version: ' + pd.__version__
In [3]:
# Our small data set
d = [0,1,2,3,4,5,6,7,8,9]
# Create dataframe
df = pd.DataFrame(d)
df
Out[3]:
In [4]:
# Lets change the name of the column
df.columns = ['Rev']
df
Out[4]:
In [5]:
# Lets add a column
df['NewCol'] = 5
df
Out[5]:
In [6]:
# Lets modify our new column
df['NewCol'] = df['NewCol'] + 1
df
Out[6]:
In [7]:
# We can delete columns
del df['NewCol']
df
Out[7]:
In [8]:
# Lets add a couple of columns
df['test'] = 3
df['col'] = df['Rev']
df
Out[8]:
In [9]:
# If we wanted, we could change the name of the index
i = ['a','b','c','d','e','f','g','h','i','j']
df.index = i
df
Out[9]:
We can now start to select pieces of the dataframe using loc.
In [10]:
df.loc['a']
Out[10]:
In [11]:
# df.loc[inclusive:inclusive]
df.loc['a':'d']
Out[11]:
In [12]:
# df.iloc[inclusive:exclusive]
# Note: .iloc is strictly integer position based. It is available from [version 0.11.0] (http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-11-0-april-22-2013)
df.iloc[0:3]
Out[12]:
We can also select using the column name.
In [13]:
df['Rev']
Out[13]:
In [14]:
df[['Rev', 'test']]
Out[14]:
In [15]:
# df['ColumnName'][inclusive:exclusive]
df['Rev'][0:3]
Out[15]:
In [16]:
df['col'][5:]
Out[16]:
In [17]:
df[['col', 'test']][:3]
Out[17]:
There is also some handy function to select the top and bottom records of a dataframe.
In [18]:
# Select top N number of records (default = 5)
df.head()
Out[18]:
In [19]:
# Select bottom N number of records (default = 5)
df.tail()
Out[19]:
Author: David Rojas