In [1]:
%matplotlib inline
import pandas as pd

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[2]:

In [3]:
sales1 = pd.read_csv('sales1.csv')
sales1


Out[3]:
Book title Number sold Sales price Royalty paid
0 The Bricklayer’s Bible 8 2.99 0.55
1 Swimrand 2 1.99 0.35
2 Pining For The Fisheries of Yore 28 2.99 0.55
3 The Duck Goes Here 34 2.99 0.55
4 The Tower Commission Report 4 11.50 4.25

In [4]:
sales2 = pd.read_csv('sales2.csv')
sales2.fillna('')


Out[4]:
Title Units sold List price Royalty
0
1 Sales report for Q4
2 E-Book Reader US Store
3 Pining for the Fisheries of Yore 80 3.5 14.98
4 Swimrand 1 2.99 0.14
5 The Bricklayer's Bible 17 3.5 5.15
6 The Duck Goes Here 34 2.99 5.78
7 The Tower Commission Report 4 9.5 6.2
8 US royalties (USD) 32.25
9
10
11 Sales report for Q4
12 E-Book Reader UK Store
13 Pining for the Fisheries of Yore 47 2.99 11.98
14 The Bricklayer's Bible 17 2.99 3.5
15 The Tower Commission Report 4 6.5 4.8
16 UK royalties (GBP) 20.28
17
18
19 Sales report for Q4
20 E-Book Reader France Store
21 Swimrand 8 1.99 0.88
22 The Duck Goes Here 12 1.99 1.5
23 France royalties (EUR) 2.38

In [5]:
# Challenge: first combine these sales together into a single dataframe,
# then compute how much money consumers spent on each book in each currency.

In [6]:
df1 = sales1.rename(columns={
    'Book title': 'title',
    'Number sold': 'number',
    'Sales price': 'their_price',
    'Royalty paid': 'we_got',
    })
df1['currency'] = 'USD'
df1


Out[6]:
title number their_price we_got currency
0 The Bricklayer’s Bible 8 2.99 0.55 USD
1 Swimrand 2 1.99 0.35 USD
2 Pining For The Fisheries of Yore 28 2.99 0.55 USD
3 The Duck Goes Here 34 2.99 0.55 USD
4 The Tower Commission Report 4 11.50 4.25 USD

In [7]:
s = sales2.copy()
t = sales2['Title']
t = t.where(t.str.endswith(')')).str.split().str[-1].str.strip('()')
s['currency'] = t.fillna(method='bfill')
s = s[s['List price'].notnull()]
df2 = s.rename(columns={
    'Units sold': 'number',
    'Title': 'title',
    'List price': 'their_price',
    'Royalty': 'we_got',
    })
s.head()


Out[7]:
Title Units sold List price Royalty currency
3 Pining for the Fisheries of Yore 80.0 3.50 14.98 USD
4 Swimrand 1.0 2.99 0.14 USD
5 The Bricklayer's Bible 17.0 3.50 5.15 USD
6 The Duck Goes Here 34.0 2.99 5.78 USD
7 The Tower Commission Report 4.0 9.50 6.20 USD

In [8]:
df = pd.concat([df1, df2])
df


Out[8]:
title number their_price we_got currency
0 The Bricklayer’s Bible 8.0 2.99 0.55 USD
1 Swimrand 2.0 1.99 0.35 USD
2 Pining For The Fisheries of Yore 28.0 2.99 0.55 USD
3 The Duck Goes Here 34.0 2.99 0.55 USD
4 The Tower Commission Report 4.0 11.50 4.25 USD
3 Pining for the Fisheries of Yore 80.0 3.50 14.98 USD
4 Swimrand 1.0 2.99 0.14 USD
5 The Bricklayer's Bible 17.0 3.50 5.15 USD
6 The Duck Goes Here 34.0 2.99 5.78 USD
7 The Tower Commission Report 4.0 9.50 6.20 USD
13 Pining for the Fisheries of Yore 47.0 2.99 11.98 GBP
14 The Bricklayer's Bible 17.0 2.99 3.50 GBP
15 The Tower Commission Report 4.0 6.50 4.80 GBP
21 Swimrand 8.0 1.99 0.88 EUR
22 The Duck Goes Here 12.0 1.99 1.50 EUR

In [9]:
# First try: differences in case and punctuation
# make some books look like two different entries.

t = df
t = t.assign(total_paid=t.their_price * t.number)
t.groupby(['title', 'currency'])[['total_paid']].sum()


Out[9]:
total_paid
title currency
Pining For The Fisheries of Yore USD 83.72
Pining for the Fisheries of Yore GBP 140.53
USD 280.00
Swimrand EUR 15.92
USD 6.97
The Bricklayer's Bible GBP 50.83
USD 59.50
The Bricklayer’s Bible USD 23.92
The Duck Goes Here EUR 23.88
USD 203.32
The Tower Commission Report GBP 26.00
USD 84.00

In [10]:
# Second try: make book titles match by fixing differences
# in casing and punctuation.

t = df
t = t.assign(total_paid=t.their_price * t.number)
t = t.assign(title_fixed=t['title'].str.lower().str.replace("’", "'"))
t.groupby(['title_fixed', 'currency'])[['total_paid']].sum()


Out[10]:
total_paid
title_fixed currency
pining for the fisheries of yore GBP 140.53
USD 363.72
swimrand EUR 15.92
USD 6.97
the bricklayer's bible GBP 50.83
USD 83.42
the duck goes here EUR 23.88
USD 203.32
the tower commission report GBP 26.00
USD 84.00

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: