In [183]:
# Import a library called Pandas and assign it to the alias "pd." This is a standard convention.
# Import a library called Numpy and assign it to the alias "np."
import pandas as pd
import numpy as np
In [221]:
# Call a built-in Pandas function called .read_csv() to create a new spreadsheet-like object (i.e., a dataframe).
df = pd.read_csv('resources/sample_data.csv')
In [222]:
# Print the spreadsheet for easy viewing.
df
Out[222]:
In [223]:
# Create a dictionary called item_name_changes and populate it with key/value pairs.
item_name_changes = {'European Pear':'Continental Fruit', 'Red Apple':'Pomme Rouge'}
In [224]:
# Call the dataframe's built-in .replace() function on the Item column to change the item names
df['Item'].replace(item_name_changes, inplace=True)
In [225]:
# Create a function in which an input (lbs, expressed here as x) is converted to grams.
# Format the results as a floating point object to two decimal points.
def convert_lb_to_kg(x):
return x*0.453
In [226]:
# Create a new column applying the conversion function to every row in the Amt Picked (lbs) column.
df['Amt Picked (kg)'] = [convert_lb_to_kg(row) for row in df['Amt Picked (lbs)']]
In [227]:
def convert_dollars_per_lb_to_euro_per_kg(x):
return (x*0.95) / 0.453
In [228]:
df['Sale Value (€/kg)'] = [convert_dollars_per_lb_to_euro_per_kg(row) for row in df['Sale Value ($/lb)']]
In [229]:
# Create a new column calculating the total amount earned from selling that particular item.
df['Amt Earned (€)'] = df['Amt Picked (kg)'] * df['Sale Value (€/kg)']
In [230]:
# Create a new column calculating the cumulative amount earned that day by each person.
# Note that Lucas' cumulative total increases after factoring in the Pomme Rouge sale.
df['Cumulative Amt Earned Per Person (€)'] = df.groupby('Name')['Amt Earned (€)'].transform(np.cumsum)
In [231]:
# Create a new column totaling the time spent harvesting this product category on this day, across all harvesters.
df['Collective Time Harvesting This Category (hrs)'] = df.groupby('Product Category')['Time (hrs)'].transform(np.sum)
In [232]:
# Format the results of the numeric columns as floating point objects to one decimal place.
numeric_columns = ['Amt Picked (kg)', 'Sale Value (€/kg)', 'Amt Earned (€)', 'Cumulative Amt Earned Per Person (€)']
df[numeric_columns] = df[numeric_columns].applymap('{:,.1f}'.format)
In [233]:
# Drop the unnecessary legacy columns.
df.drop(['Amt Picked (lbs)', 'Sale Value ($/lb)'], axis=1, inplace=True)
In [234]:
# Print the final result.
df
Out[234]:
In [ ]: