In [1]:
%matplotlib inline
import pandas as pd
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [3]:
sales1 = pd.read_csv('sales1.csv')
sales1
Out[3]:
In [4]:
sales2 = pd.read_csv('sales2.csv')
sales2.fillna('')
Out[4]:
In [5]:
# Challenge: first combine these sales together into a single dataframe,
# then compute how much money consumers spent on each book in each currency.
In [6]:
df1 = sales1.rename(columns={
'Book title': 'title',
'Number sold': 'number',
'Sales price': 'their_price',
'Royalty paid': 'we_got',
})
df1['currency'] = 'USD'
df1
Out[6]:
In [7]:
s = sales2.copy()
t = sales2['Title']
t = t.where(t.str.endswith(')')).str.split().str[-1].str.strip('()')
s['currency'] = t.fillna(method='bfill')
s = s[s['List price'].notnull()]
df2 = s.rename(columns={
'Units sold': 'number',
'Title': 'title',
'List price': 'their_price',
'Royalty': 'we_got',
})
s.head()
Out[7]:
In [8]:
df = pd.concat([df1, df2])
df
Out[8]:
In [9]:
# First try: differences in case and punctuation
# make some books look like two different entries.
t = df
t = t.assign(total_paid=t.their_price * t.number)
t.groupby(['title', 'currency'])[['total_paid']].sum()
Out[9]:
In [10]:
# Second try: make book titles match by fixing differences
# in casing and punctuation.
t = df
t = t.assign(total_paid=t.their_price * t.number)
t = t.assign(title_fixed=t['title'].str.lower().str.replace("’", "'"))
t.groupby(['title_fixed', 'currency'])[['total_paid']].sum()
Out[10]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: