Pandas Crash Course

We'll use numpy a lot more than pandas, but here is a quick taste in case you haven't seen it before.


In [1]:
import pandas as pd

In [2]:
# Import a CSV as pandas DataFrame
df = pd.read_csv('salaries.csv')

In [3]:
df


Out[3]:
Name Salary Age
0 John 50000 34
1 Sally 120000 45
2 Alyssa 80000 27

In [4]:
# Get a spefic column from the DataFrame
df['Name']


Out[4]:
0      John
1     Sally
2    Alyssa
Name: Name, dtype: object

In [5]:
df['Salary']


Out[5]:
0     50000
1    120000
2     80000
Name: Salary, dtype: int64

In [6]:
df[['Name', 'Salary']]


Out[6]:
Name Salary
0 John 50000
1 Sally 120000
2 Alyssa 80000

In [7]:
df['Age']


Out[7]:
0    34
1    45
2    27
Name: Age, dtype: int64

In [8]:
# Calculate mean value for the column
df['Age'].mean()


Out[8]:
35.333333333333336

In [9]:
# Mask
df['Age'] > 30


Out[9]:
0     True
1     True
2    False
Name: Age, dtype: bool

In [20]:
age_filter = df['Age'] > 30


  File "<ipython-input-20-53532bac93c9>", line 1
    type([(age_filter = df['Age'] > 30])
                      ^
SyntaxError: invalid syntax

In [11]:
df[age_filter]


Out[11]:
Name Salary Age
0 John 50000 34
1 Sally 120000 45

In [21]:
# Filter the DataFrame 
# And return values that comply with the filtering requirements
df[df['Age'] > 30]


Out[21]:
Name Salary Age
0 John 50000 34
1 Sally 120000 45