Watch Me Code 1: Pandas Basics

  • Series
  • DataFrame
  • Select columns, Select rows with Boolean indexing

In [7]:
import pandas as pd

In [8]:
fruits = pd.Series(['Apple', 'Banana', 'Cherry', 'Orange'], name = "Fruit")
fruits


Out[8]:
0     Apple
1    Banana
2    Cherry
3    Orange
Name: Fruit, dtype: object

In [9]:
qtys = pd.Series([5,7,2,9])
qtys


Out[9]:
0    5
1    7
2    2
3    9
dtype: int64

In [11]:
# dictionary of Series
inventory = pd.DataFrame({ 'Fruit' : fruits, 'Qty' : qtys, 'Price': [2.99,1.99,3.99,2.99] })
inventory


Out[11]:
Fruit Price Qty
0 Apple 2.99 5
1 Banana 1.99 7
2 Cherry 3.99 2
3 Orange 2.99 9

In [12]:
# column selection
inventory['Fruit']


Out[12]:
0     Apple
1    Banana
2    Cherry
3    Orange
Name: Fruit, dtype: object

In [13]:
# as DataFrame
inventory[ ['Fruit'] ]


Out[13]:
Fruit
0 Apple
1 Banana
2 Cherry
3 Orange

In [14]:
#two columns in the list
inventory[ ['Fruit','Price'] ]


Out[14]:
Fruit Price
0 Apple 2.99
1 Banana 1.99
2 Cherry 3.99
3 Orange 2.99

In [15]:
# Boolean index
inventory['Qty'] >5


Out[15]:
0    False
1     True
2    False
3     True
Name: Qty, dtype: bool

In [16]:
# applying a boolean index to a dataframe
inventory[ inventory['Qty'] >5 ]


Out[16]:
Fruit Price Qty
1 Banana 1.99 7
3 Orange 2.99 9

In [20]:
#combining columns and filters
fruit_and_price_over5 = inventory[['Fruit','Price']][inventory['Qty'] >5 ] 
fruit_and_price_over5


Out[20]:
Fruit Price
1 Banana 1.99
3 Orange 2.99

In [19]:
# Confused? Too hot to handle??? Use variables!
large_qty = inventory[ inventory['Qty'] >5 ]
fruit_and_price_over5 = large_qty[ ['Fruit', 'Price' ] ]
fruit_and_price_over5


Out[19]:
Fruit Price
1 Banana 1.99
3 Orange 2.99

In [ ]: