In [1]:
# 1. import pandas as pd / pd.read_csv() the simple.csv. Get top 3 entries via .head() and assign to df.
import pandas as pd
df = pd.read_csv('data/simple.csv').head(3)
In [2]:
# 2. Define a function that takes an input, prints .name attribute of input, prints type(input) and prints the input.
def print_info(x):
print(x.name)
print(type(x))
print(x)
print_info
Out[2]:
In [5]:
# 3. Use the dataframe.apply() method, and supply the function (with no parenthesis) and axis=1
output = df.apply(print_info, axis=1)
In [6]:
# 4. Now do it with axis=0. Which one of these does a function by row? Which by column? Significance of .name?
df.apply(print_info, axis=0)
Out[6]:
In [7]:
# 5. Write a function for a row series that gets the date and count and prints "The Count was X on Date X."
def get_count(row):
date = row['Date']
count = row['Count']
print("The count was {} on date {}.".format(count, date))
get_count
Out[7]:
In [8]:
# 6. Write a function that returns a string. Apply this function to your dataframe. What is the result?
def get_count_string(row):
date = row['Date']
count = row['Count']
return "The count was {} on date {}.".format(count, date)
print(df.apply(get_count_string, axis=1))
type(df.apply(get_count_string, axis=1))
Out[8]:
In [10]:
# 7. Use your new function, and assign the returned series it to a new column, df['New Column 1']
df['New Column 1'] = df.apply(get_count_string, axis=1)
df
Out[10]:
In [11]:
# 8. Write a function that takes a count and adds 5. Use map() with your function on your df['Count']
def add_to_count(cell_value):
new_value = cell_value + 5
return new_value
df['Count'].map(add_to_count)
Out[11]:
In [13]:
# 9. This returns a series with each value individually altered. Assign the result to df['New Column 2]
df['New Column 2'] = df['Count'].map(add_to_count)
df
Out[13]: