In [90]:
# First, we'll import some libraries of Python functions that we'll need to run this code
import pandas as pd
import sqlite3
import xlrd as xl

# Select an excel file saved in the same folder as this SQL_Practice file. 
path = ('book_db.xlsx')

# if this .sqlite db doesn't already exists, this will create it
# if the .sqlite db *does* already exist, this establishes the desired connection
con = sqlite3.connect("book_db.sqlite")

# this pulls out the names of the sheets in the workbook. We'll use these to name the different tables in the SQLite database that we'll create
table_names = xl.open_workbook(path).sheet_names()

# this loop makes it possible to use any other .xls sheet, since the sheet names aren't called out specifically
for table in table_names:
    df = pd.read_excel(path, sheetname='{}'.format(table))
    con.execute("DROP TABLE IF EXISTS {}".format(table))
    pd.io.sql.to_sql(df, "{}".format(table), con, index=False)

# now the spreadsheets are in tables in a mini database!

# Finally, a little function to make it easy to run queries on this mini-database
def run(query):
    results = pd.read_sql("{}".format(query), con)
    return results

Some notes about common web metrics:

  • Rank: Position that your ad appears in relation to other advertisements (refers to paid search ads).
  • CTR (Click-Through-Rate): Rate at which users click on an ad after they see it.
    • If 10 people see your advertisement and 1 person clicks on it, your CTR is 10%.
  • CPC (Cost Per Click): How much you spent each time a user clicked on your advertisement. With online advertising, you frequently only pay the site displaying your ad (or the ad network, like Google) when a user clicks on your advertisement.
  • RPC (Revenue per Click): The average revenue you make each time a user clicks on your advertisement.
    • If you get 10 clicks and make one sale worth 10 dollars, then your RPC is 1 dollar.
  • Conversion: The completion of a desired action. This might be a sale, a signup or registration, participation in a survey, a video view, etc.
  • Conversion Rate: The rate at which users go from clicking to actually completing the desired action.
    • If you get 10 clicks and 1 person completes the purchase, your conversion rate is 10%.
  • COS (Cost of Sale): How much you spent to get 1 customer to convert. Don't confuse with CPA (cost per acquisition), which usually only refers to when you only pay your ad publisher when a customer makes a purchase)
    • If you get 10 clicks, spend 50 cents per click, and 1 customer makes a purchase, your COS is 5 dollars
  • Customer Lifetime Value: The total revenue a customer has generated for you. Often this takes into account returns and user-support costs.
  • RPM (Revenue Per Thousand Impressions: Total revenue for every 1000 times the ad was displayed

In [91]:
run('''
PRAGMA TABLE_INFO(transactions)
''')


Out[91]:
cid name type notnull dflt_value pk
0 0 ID INTEGER 0 None 0
1 1 UserID INTEGER 0 None 0
2 2 BookID INTEGER 0 None 0
3 3 Rank INTEGER 0 None 0
4 4 Clicks REAL 0 None 0
5 5 Spend REAL 0 None 0
6 6 Purchases REAL 0 None 0

In [112]:
run('''
SELECT  
    author,
    COUNT(DISTINCT(title)) as unique_titles,
    SUM(CASE WHEN gender = 'F' THEN price*purchases end) AS female_revenue,
    SUM(CASE WHEN gender = 'M' THEN price*purchases end) AS male_revenue
FROM
    books B
    JOIN transactions T ON B.id = T.bookid
    JOIN users U on U.id = T.userID
    JOIN authors A on A.id = B.AuthorID
GROUP BY author
ORDER BY female_revenue + male_revenue DESC
LIMIT 10
    ''')


Out[112]:
Author unique_titles female_revenue male_revenue
0 John Steinbeck 6 2663.87 2621.23
1 Toni Morrison 4 1342.33 1663.94
2 William Faulkner 4 1224.74 1509.65
3 Henry James 4 1385.66 1309.74
4 Kurt Vonnegut 3 1193.23 1134.89
5 D.H. Lawrence 3 1124.68 1024.91
6 Charlotte Bronte 2 961.52 780.30
7 Theodore Dreiser 2 982.02 686.58
8 Virginia Woolf 3 877.68 763.38
9 E.M. Forster 2 650.96 787.16

In [114]:
run('''
SELECT 
    title, 
    author,
    SUM(clicks)/COUNT(*) as CTR,
    SUM(spend)/SUM(clicks) as CPC,
    SUM(price*purchases)/SUM(clicks) as RPC,
    SUM(purchases) as Conversions, 
    SUM(purchases)/SUM(clicks) as Conversion_Rate,
    SUM(spend)/SUM(purchases) as COS,
    SUM(price*purchases)/COUNT(*)*1000 RPM
FROM
    books B
    JOIN transactions T ON B.id = T.bookid
    JOIN users U on U.id = T.userID
    JOIN authors A on A.id = B.AuthorID
GROUP BY title
ORDER BY RPM DESC
LIMIT 10
    ''')


Out[114]:
Title Author CTR CPC RPC Conversions Conversion_Rate COS RPM
0 The Way of the Flesh Samuel Butler 0.254215 0.496633 6.942092 47 0.239796 2.071064 1764.785992
1 Travels with Charley John Steinbeck 0.261348 0.477842 6.649158 43 0.226316 2.111395 1737.744154
2 The Ambassadors Henry James 0.255405 0.517407 6.513704 43 0.227513 2.274186 1663.635135
3 The Pearl John Steinbeck 0.264980 0.521859 5.930050 44 0.221106 2.360227 1571.344874
4 As I Lay Dying William Faulkner 0.273361 0.523010 5.659184 40 0.204082 2.562750 1547.001395
5 A Passage to India E.M. Forster 0.264516 0.555561 5.448585 52 0.253659 2.190192 1441.238710
6 I, Claudius Robert Graves 0.251323 0.477895 5.545263 40 0.210526 2.270000 1393.650794
7 Wuthering Heights Emily Bronte 0.255784 0.511558 5.132161 35 0.175879 2.908571 1312.724936
8 An American Tragedy Theodore Dreiser 0.233202 0.523842 5.595254 36 0.203390 2.575556 1304.822134
9 East of Eden John Steinbeck 0.271540 0.515288 4.725000 39 0.187500 2.748205 1283.028721

In [ ]: