notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd



In [2]:

    
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))









    Out[2]:



In [3]:

    
sales1 = pd.read_csv('sales1.csv')
sales1









    Out[3]:







  
    
      
      Book title
      Number sold
      Sales price
      Royalty paid
    
  
  
    
      0
      The Bricklayer’s Bible
      8
      2.99
      0.55
    
    
      1
      Swimrand
      2
      1.99
      0.35
    
    
      2
      Pining For The Fisheries of Yore
      28
      2.99
      0.55
    
    
      3
      The Duck Goes Here
      34
      2.99
      0.55
    
    
      4
      The Tower Commission Report
      4
      11.50
      4.25



In [4]:

    
sales2 = pd.read_csv('sales2.csv')
sales2.fillna('')









    Out[4]:







  
    
      
      Title
      Units sold
      List price
      Royalty
    
  
  
    
      0
      
      
      
      
    
    
      1
      Sales report for Q4
      
      
      
    
    
      2
      E-Book Reader US Store
      
      
      
    
    
      3
      Pining for the Fisheries of Yore
      80
      3.5
      14.98
    
    
      4
      Swimrand
      1
      2.99
      0.14
    
    
      5
      The Bricklayer's Bible
      17
      3.5
      5.15
    
    
      6
      The Duck Goes Here
      34
      2.99
      5.78
    
    
      7
      The Tower Commission Report
      4
      9.5
      6.2
    
    
      8
      US royalties (USD)
      
      
      32.25
    
    
      9
      
      
      
      
    
    
      10
      
      
      
      
    
    
      11
      Sales report for Q4
      
      
      
    
    
      12
      E-Book Reader UK Store
      
      
      
    
    
      13
      Pining for the Fisheries of Yore
      47
      2.99
      11.98
    
    
      14
      The Bricklayer's Bible
      17
      2.99
      3.5
    
    
      15
      The Tower Commission Report
      4
      6.5
      4.8
    
    
      16
      UK royalties (GBP)
      
      
      20.28
    
    
      17
      
      
      
      
    
    
      18
      
      
      
      
    
    
      19
      Sales report for Q4
      
      
      
    
    
      20
      E-Book Reader France Store
      
      
      
    
    
      21
      Swimrand
      8
      1.99
      0.88
    
    
      22
      The Duck Goes Here
      12
      1.99
      1.5
    
    
      23
      France royalties (EUR)
      
      
      2.38



In [5]:

    
# Challenge: first combine these sales together into a single dataframe,
# then compute how much money consumers spent on each book in each currency.



In [6]:

    
df1 = sales1.rename(columns={
    'Book title': 'title',
    'Number sold': 'number',
    'Sales price': 'their_price',
    'Royalty paid': 'we_got',
    })
df1['currency'] = 'USD'
df1









    Out[6]:







  
    
      
      title
      number
      their_price
      we_got
      currency
    
  
  
    
      0
      The Bricklayer’s Bible
      8
      2.99
      0.55
      USD
    
    
      1
      Swimrand
      2
      1.99
      0.35
      USD
    
    
      2
      Pining For The Fisheries of Yore
      28
      2.99
      0.55
      USD
    
    
      3
      The Duck Goes Here
      34
      2.99
      0.55
      USD
    
    
      4
      The Tower Commission Report
      4
      11.50
      4.25
      USD



In [7]:

    
s = sales2.copy()
t = sales2['Title']
t = t.where(t.str.endswith(')')).str.split().str[-1].str.strip('()')
s['currency'] = t.fillna(method='bfill')
s = s[s['List price'].notnull()]
df2 = s.rename(columns={
    'Units sold': 'number',
    'Title': 'title',
    'List price': 'their_price',
    'Royalty': 'we_got',
    })
s.head()









    Out[7]:







  
    
      
      Title
      Units sold
      List price
      Royalty
      currency
    
  
  
    
      3
      Pining for the Fisheries of Yore
      80.0
      3.50
      14.98
      USD
    
    
      4
      Swimrand
      1.0
      2.99
      0.14
      USD
    
    
      5
      The Bricklayer's Bible
      17.0
      3.50
      5.15
      USD
    
    
      6
      The Duck Goes Here
      34.0
      2.99
      5.78
      USD
    
    
      7
      The Tower Commission Report
      4.0
      9.50
      6.20
      USD



In [8]:

    
df = pd.concat([df1, df2])
df









    Out[8]:







  
    
      
      title
      number
      their_price
      we_got
      currency
    
  
  
    
      0
      The Bricklayer’s Bible
      8.0
      2.99
      0.55
      USD
    
    
      1
      Swimrand
      2.0
      1.99
      0.35
      USD
    
    
      2
      Pining For The Fisheries of Yore
      28.0
      2.99
      0.55
      USD
    
    
      3
      The Duck Goes Here
      34.0
      2.99
      0.55
      USD
    
    
      4
      The Tower Commission Report
      4.0
      11.50
      4.25
      USD
    
    
      3
      Pining for the Fisheries of Yore
      80.0
      3.50
      14.98
      USD
    
    
      4
      Swimrand
      1.0
      2.99
      0.14
      USD
    
    
      5
      The Bricklayer's Bible
      17.0
      3.50
      5.15
      USD
    
    
      6
      The Duck Goes Here
      34.0
      2.99
      5.78
      USD
    
    
      7
      The Tower Commission Report
      4.0
      9.50
      6.20
      USD
    
    
      13
      Pining for the Fisheries of Yore
      47.0
      2.99
      11.98
      GBP
    
    
      14
      The Bricklayer's Bible
      17.0
      2.99
      3.50
      GBP
    
    
      15
      The Tower Commission Report
      4.0
      6.50
      4.80
      GBP
    
    
      21
      Swimrand
      8.0
      1.99
      0.88
      EUR
    
    
      22
      The Duck Goes Here
      12.0
      1.99
      1.50
      EUR



In [9]:

    
# First try: differences in case and punctuation
# make some books look like two different entries.

t = df
t = t.assign(total_paid=t.their_price * t.number)
t.groupby(['title', 'currency'])[['total_paid']].sum()









    Out[9]:







  
    
      
      
      total_paid
    
    
      title
      currency
      
    
  
  
    
      Pining For The Fisheries of Yore
      USD
      83.72
    
    
      Pining for the Fisheries of Yore
      GBP
      140.53
    
    
      USD
      280.00
    
    
      Swimrand
      EUR
      15.92
    
    
      USD
      6.97
    
    
      The Bricklayer's Bible
      GBP
      50.83
    
    
      USD
      59.50
    
    
      The Bricklayer’s Bible
      USD
      23.92
    
    
      The Duck Goes Here
      EUR
      23.88
    
    
      USD
      203.32
    
    
      The Tower Commission Report
      GBP
      26.00
    
    
      USD
      84.00



In [10]:

    
# Second try: make book titles match by fixing differences
# in casing and punctuation.

t = df
t = t.assign(total_paid=t.their_price * t.number)
t = t.assign(title_fixed=t['title'].str.lower().str.replace("’", "'"))
t.groupby(['title_fixed', 'currency'])[['total_paid']].sum()









    Out[10]:







  
    
      
      
      total_paid
    
    
      title_fixed
      currency
      
    
  
  
    
      pining for the fisheries of yore
      GBP
      140.53
    
    
      USD
      363.72
    
    
      swimrand
      EUR
      15.92
    
    
      USD
      6.97
    
    
      the bricklayer's bible
      GBP
      50.83
    
    
      USD
      83.42
    
    
      the duck goes here
      EUR
      23.88
    
    
      USD
      203.32
    
    
      the tower commission report
      GBP
      26.00
    
    
      USD
      84.00



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	Book title	Number sold	Sales price	Royalty paid
0	The Bricklayer’s Bible	8	2.99	0.55
1	Swimrand	2	1.99	0.35
2	Pining For The Fisheries of Yore	28	2.99	0.55
3	The Duck Goes Here	34	2.99	0.55
4	The Tower Commission Report	4	11.50	4.25

	Title	Units sold	List price	Royalty
0
1	Sales report for Q4
2	E-Book Reader US Store
3	Pining for the Fisheries of Yore	80	3.5	14.98
4	Swimrand	1	2.99	0.14
5	The Bricklayer's Bible	17	3.5	5.15
6	The Duck Goes Here	34	2.99	5.78
7	The Tower Commission Report	4	9.5	6.2
8	US royalties (USD)			32.25
9
10
11	Sales report for Q4
12	E-Book Reader UK Store
13	Pining for the Fisheries of Yore	47	2.99	11.98
14	The Bricklayer's Bible	17	2.99	3.5
15	The Tower Commission Report	4	6.5	4.8
16	UK royalties (GBP)			20.28
17
18
19	Sales report for Q4
20	E-Book Reader France Store
21	Swimrand	8	1.99	0.88
22	The Duck Goes Here	12	1.99	1.5
23	France royalties (EUR)			2.38

	Title	Units sold	List price	Royalty	currency
3	Pining for the Fisheries of Yore	80.0	3.50	14.98	USD
4	Swimrand	1.0	2.99	0.14	USD
5	The Bricklayer's Bible	17.0	3.50	5.15	USD
6	The Duck Goes Here	34.0	2.99	5.78	USD
7	The Tower Commission Report	4.0	9.50	6.20	USD

		total_paid
title	currency
Pining For The Fisheries of Yore	USD	83.72
Pining for the Fisheries of Yore	GBP	140.53
Pining for the Fisheries of Yore	USD	280.00
Swimrand	EUR	15.92
Swimrand	USD	6.97
The Bricklayer's Bible	GBP	50.83
The Bricklayer's Bible	USD	59.50
The Bricklayer’s Bible	USD	23.92
The Duck Goes Here	EUR	23.88
The Duck Goes Here	USD	203.32
The Tower Commission Report	GBP	26.00
The Tower Commission Report	USD	84.00

		total_paid
title_fixed	currency
pining for the fisheries of yore	GBP	140.53
pining for the fisheries of yore	USD	363.72
swimrand	EUR	15.92
swimrand	USD	6.97
the bricklayer's bible	GBP	50.83
the bricklayer's bible	USD	83.42
the duck goes here	EUR	23.88
the duck goes here	USD	203.32
the tower commission report	GBP	26.00
the tower commission report	USD	84.00