Imports


In [1]:
import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

Constants


In [2]:
total_budget = 50000000
gold_price = 1500 # dollars per ounce

Data Loading

- Costs Data


In [3]:
costs_data = pd.read_csv("data/costs_data.csv")
costs_data.head()


Out[3]:
gold_amount low med high
0 0 0 0 0
1 2000 964000 882000 768000
2 4000 1868000 1734000 1359000
3 6000 2729000 2565000 1861000
4 8000 3559000 3380000 2323000

- Elevation Data


In [4]:
elevation_data = pd.read_csv("data/elevation_data.csv")
elevation_data.head()


Out[4]:
parcel_id Easting Northing elevation
0 1 0.50 0.50 434.65
1 2 1.50 0.50 709.21
2 3 2.50 0.50 1000.25
3 4 3.50 0.50 296.81
4 5 4.50 0.50 412.29

- Sample Data

  • Parcels that have already been auctioned and exploited (see the gold_available column)
  • Why are the Northing and Eastig shifted by .5?

In [5]:
sample_data = pd.read_csv("data/sample_data.csv")
sample_data.head()


Out[5]:
parcel_id Easting Northing Pyerite Mexallon Tritanium Megacyte Nocxium Isogen Veldspar Plagioclase Hedbergite Spudumain Gneiss Arkonor Mercoxit Bistot Crokite gold_available
0 4 3.50 0.50 133.27 108.26 95.88 129.70 111.15 138.74 109.82 148.30 130.35 145.60 114.18 105.52 107.60 94.12 108.02 3901.52
1 9 8.50 0.50 144.45 110.17 97.84 104.66 104.43 123.08 104.58 117.52 128.96 126.25 119.67 116.79 109.63 130.86 113.13 17523.53
2 11 10.50 0.50 145.70 110.63 116.20 158.60 118.84 109.34 98.57 148.77 129.75 149.73 119.89 103.67 114.77 119.50 106.39 4537.82
3 14 13.50 0.50 126.26 116.73 120.17 116.64 127.24 103.68 117.29 101.20 191.69 178.01 138.92 101.05 107.52 104.59 105.25 21601.11
4 17 16.50 0.50 107.38 98.94 120.78 98.77 119.90 121.31 121.41 167.34 158.55 178.47 131.63 109.36 101.88 111.56 104.27 21186.86

- Auction Parcels

  • The objective is to figure out on which of these to bid and how much

In [6]:
auction_parcels = pd.read_csv("data/auction_parcels.csv")
auction_parcels.head()


Out[6]:
parcel_id Easting Northing elevation Arkonor Mercoxit Bistot Crokite
0 515 64.50 3.50 23.19 107.21 101.98 105.71 102.50
1 914 13.50 6.50 784.99 102.44 104.95 108.83 109.35
2 1538 37.50 10.50 1258.73 103.50 111.47 105.55 98.07
3 1790 139.50 11.50 474.18 102.07 109.97 105.21 104.42
4 2416 15.50 16.50 1584.86 114.48 116.99 113.14 108.81

Fixed costs calculator


In [7]:
def fixed_cost(row): 
    if row["elevation"]  < 0: 
        return 4000000.0 
    elif row["elevation"] > 0 and row["elevation"]  <= 500: 
        return 3000000.0
    else:
        return 8000000.0

Variable costs calculator

  • Use a simple linear regression

In [8]:
from sklearn import linear_model

regressions = {}
for elevation in ["low", "med", "high"]:
    regression = linear_model.LinearRegression()
    regression.fit(costs_data.loc[:, ["gold_amount"]], costs_data.loc[:,elevation])
    regressions[elevation] = regression

def variable_cost(row):
    if row["elevation"] < 0: 
        return regressions['low'].predict(row["elevation"])[0]
    elif row["elevation"] >= 0 and row["elevation"] <= 700: 
        return regressions['med'].predict(row["elevation"])[0]
    else:
        return regressions['high'].predict(row["elevation"])[0]

Estimate the average amount of gold under each biddable parcel

  • Naive approach: average the known quantities of gold in a circle of a given radius around each parcel
  • Use the smallest circle radius for which the gold amount estimations start to increase monotonically
    • run the algorithm for radius in range(1, 10) and eyeball a decent value

In [9]:
plt.gca().set_autoscale_on(False)
plt.axis([0.0, 150.0, 0.0, 150.0])
plt.scatter(sample_data.Easting, sample_data.Northing, s=1, color='y', label='Sample Data')
plt.scatter(auction_parcels.Easting, auction_parcels.Northing, marker='x', color='r', label='Auction Parcels')
plt.scatter(auction_parcels.Easting, auction_parcels.Northing, s=1000, facecolors='none', edgecolors='b', label='Radius')
lgnd = plt.legend(scatterpoints=1, fontsize=10)
lgnd.legendHandles[0]._sizes = [50]
lgnd.legendHandles[1]._sizes = [50]
lgnd.legendHandles[2]._sizes = [50]
plt.xlabel('Easting')
plt.ylabel('Northing')


Out[9]:
<matplotlib.text.Text at 0x11f0bb128>

In [12]:
def estimate_gold(radius):
    gold_estimations = []
    for idx_ap, ap in auction_parcels.iterrows():
        sum = 0
        count = 0
        for idx_sd, sd in sample_data.iterrows():
            if (radius >= np.linalg.norm(np.array([sd['Easting'], sd['Northing']]) - np.array([ap['Easting'], ap['Northing']]))):
                sum += sd['gold_available']
                count += 1
        sum = sum / count if count > 0 else 0
        estimated_gold_column = 'estimated_gold_r{:d}'.format(radius)
        included_samples_column = 'included_samples_r{:d}'.format(radius)
        gold_estimations.append({'parcel_id': ap['parcel_id'], estimated_gold_column: sum, included_samples_column: count})
    return gold_estimations

In [13]:
gold_estimations = auction_parcels.loc[:,['parcel_id']]
for radius in range(1, 10):
    gold_estimations = gold_estimations.merge(pd.DataFrame(estimate_gold(radius)), on='parcel_id')
    
gold_estimations


Out[13]:
parcel_id estimated_gold_r1 included_samples_r1 estimated_gold_r2 included_samples_r2 estimated_gold_r3 included_samples_r3 estimated_gold_r4 included_samples_r4 estimated_gold_r5 included_samples_r5 estimated_gold_r6 included_samples_r6 estimated_gold_r7 included_samples_r7 estimated_gold_r8 included_samples_r8 estimated_gold_r9 included_samples_r9
0 515 8430.11 1 13168.56 4 16190.58 6 16494.78 9 15915.63 15 15393.42 22 15690.17 30 14702.80 36 13949.57 44
1 914 24700.44 1 14531.39 4 15582.80 6 14144.52 11 13606.70 16 14756.13 18 14600.23 26 13677.95 35 13725.51 46
2 1538 9408.78 1 10292.52 4 15627.27 7 13899.16 11 13216.88 16 13283.81 20 11782.28 28 11652.88 40 11685.16 52
3 1790 17702.74 1 17702.74 1 8560.63 4 9413.39 8 8775.94 13 10102.08 18 11188.92 30 12018.55 40 11946.98 53
4 2416 20295.74 1 15225.96 4 15832.77 8 14422.47 13 14549.92 15 16149.23 22 15528.10 30 15456.95 42 14541.80 49
5 3332 11883.47 1 14105.59 4 13876.81 8 13151.41 11 13767.57 20 12742.37 29 12177.57 39 11300.35 50 10915.65 62
6 3697 9593.29 2 11602.23 6 9605.71 9 10314.71 12 11013.78 19 11581.40 23 10860.98 31 10890.04 39 11560.53 54
7 4006 0.00 0 10881.76 1 8551.74 3 11547.55 7 11566.03 17 10878.92 23 11765.73 29 11889.88 35 12261.89 46
8 4125 18885.74 1 18885.74 1 11337.85 4 9482.24 9 9874.56 16 11334.35 28 12214.98 39 12536.73 52 12852.20 59
9 4577 0.00 0 12185.13 2 12886.61 5 11929.91 9 10974.56 19 11997.18 26 11992.95 33 11848.74 39 12555.18 53
10 5712 5193.65 1 8333.71 2 13476.74 9 13066.80 14 12602.24 24 11864.83 33 12385.42 39 11746.96 47 11527.23 62
11 6558 8575.01 1 13952.57 3 16602.40 6 17440.18 9 16334.51 17 15171.77 24 13834.79 32 12626.15 41 12413.54 47
12 6875 0.00 0 17552.33 5 17099.39 6 17583.48 9 15118.79 20 14461.38 27 14157.99 37 13292.32 54 12941.55 69
13 7146 0.00 0 8995.40 1 9426.45 4 11570.95 9 12006.31 13 10679.03 21 9960.22 26 9997.27 42 10460.56 57
14 7837 0.00 0 19739.79 4 19300.19 8 16436.14 14 15436.35 23 15676.52 31 15850.96 40 15300.21 49 14733.66 65
15 8081 14386.85 1 15065.42 2 10852.93 4 14885.84 10 12012.31 20 12439.73 25 12434.70 31 12251.65 40 13032.14 53
16 8712 14715.66 1 8325.18 4 12411.88 8 12710.68 13 12177.76 17 11540.38 28 10798.30 38 10041.78 48 10051.36 66
17 9543 12041.05 1 9456.90 2 9456.90 2 14019.35 5 12602.12 7 14015.23 10 13409.58 17 12111.87 28 11397.55 43
18 10226 16378.87 2 17323.31 4 14444.09 7 14960.13 15 16363.82 22 15124.18 32 14589.90 39 14194.01 50 14317.09 63
19 10495 2824.63 1 12666.69 3 11753.48 7 10636.51 9 10484.87 19 9573.00 32 10377.03 39 10655.54 45 10658.39 52
20 10905 0.00 0 12374.90 1 11739.21 6 11245.30 8 11245.82 16 12951.64 25 12942.75 33 13378.39 48 13062.80 59
21 11489 18646.39 2 15262.67 4 15386.50 9 14034.00 14 13851.70 22 12457.65 32 12643.86 42 12101.53 51 12231.40 63
22 11614 23263.07 1 18384.65 5 16338.16 10 16745.00 14 16709.58 20 16078.32 25 15087.06 34 15045.76 44 14581.09 59
23 12221 15610.93 2 12042.25 3 14644.98 5 11423.80 11 12480.41 20 12806.55 27 13770.32 38 13240.68 53 13305.27 66
24 12810 24363.53 1 18432.28 5 18371.89 8 17417.16 13 16525.03 19 14782.62 26 14457.40 34 15006.16 48 15080.84 61
25 13071 0.00 0 11680.13 3 13581.79 6 14219.37 12 12148.56 18 12195.63 22 12589.94 29 12839.39 38 12911.31 54
26 13249 17198.29 3 13376.57 5 15210.67 9 14899.06 14 14082.58 23 14155.12 29 14055.76 37 13151.40 49 13147.61 61
27 13683 13642.30 2 10923.61 5 9589.21 8 10378.09 15 10606.50 22 10465.96 32 10991.65 42 11031.04 49 10919.08 62
28 13905 4010.68 2 6102.22 3 6880.99 4 7276.81 6 9397.68 18 9356.68 24 10227.44 35 9957.70 47 10609.57 59
29 13917 10208.89 1 13967.21 3 12278.85 6 12213.47 8 11305.80 13 12112.97 22 11457.64 28 11533.83 43 11468.75 53
30 14154 13062.91 2 13176.25 5 10862.17 8 10337.61 11 10783.29 17 12329.49 23 13203.29 32 13373.29 41 14252.67 59
31 14655 8006.35 1 13545.07 3 11830.74 8 10892.63 12 11987.49 19 11518.96 24 11630.49 32 11137.09 40 11007.90 51
32 15178 7941.19 1 11818.04 2 14665.91 6 14807.15 8 12516.64 13 13051.92 22 13041.19 29 13757.32 42 13977.70 54
33 15815 9106.73 1 7052.70 2 13490.72 8 13287.48 14 13295.25 25 13129.91 31 13731.22 41 13487.18 51 12528.99 62
34 16067 8765.81 1 13655.87 3 14320.17 7 10738.04 13 12393.21 23 11330.58 29 11369.19 39 11358.75 48 11071.00 63
35 16475 7026.04 1 7026.04 1 14639.28 4 12725.63 9 12272.12 16 12882.06 23 13717.47 33 12372.88 45 12149.73 60
36 16838 16870.87 1 14426.68 5 14638.62 7 13724.14 13 12460.74 20 12201.22 26 12584.00 31 12619.73 40 12710.42 52
37 16925 0.00 0 13928.91 3 13928.91 3 11520.83 9 11155.94 15 12397.33 22 12051.44 28 11794.80 39 12414.00 56
38 17186 19567.32 1 10644.22 3 13948.12 7 15290.86 10 12485.37 17 13235.55 23 12688.84 35 12443.94 42 12539.47 56
39 18992 0.00 0 0.00 0 10048.37 6 12454.73 11 12456.28 16 12012.98 24 12154.38 30 12961.83 46 13206.15 67
40 19114 13982.31 4 15869.71 6 16367.82 9 15461.55 15 13451.29 22 13808.44 27 13568.65 34 13330.16 42 12408.12 51
41 19448 11748.63 2 12795.55 6 12380.15 11 13549.23 17 13119.07 24 14417.87 33 14358.51 37 14081.31 44 13951.20 57
42 19984 8342.01 2 9877.28 3 14719.17 9 13761.18 16 13854.34 19 14489.19 26 13721.34 29 13756.10 46 13861.37 64
43 20194 21796.66 2 20825.18 3 16484.74 8 16703.68 12 16222.13 23 15527.95 32 15074.19 43 14141.60 52 13405.10 66
44 20402 0.00 0 8746.90 3 8935.74 7 9374.12 11 9865.72 18 10680.25 21 11402.74 27 11479.26 32 11765.73 39
45 20433 11773.88 1 12806.13 4 10986.68 8 11465.31 11 13880.85 20 13938.22 28 14059.97 42 13727.89 47 14029.10 60
46 21042 18286.45 1 12156.74 2 10237.95 6 11557.07 10 11425.36 18 12325.97 25 12007.14 26 11787.42 38 11724.08 49
47 21278 10691.90 1 7983.94 2 10562.88 8 9768.98 12 11322.58 20 11375.23 24 12278.95 30 11036.24 44 10851.61 55
48 22007 24623.63 1 21470.23 2 12394.77 6 12126.96 10 13462.50 14 13718.75 22 13743.44 32 13785.29 36 13916.64 44
49 22264 11374.72 2 11376.56 5 9531.60 7 8926.35 13 10405.79 18 9972.85 22 11403.01 29 11295.43 33 11349.10 35

Radius = 3 seems to fit the bill

Compute the total extraction costs and estimated profit


In [16]:
total_costs = gold_estimations.loc[:, ['parcel_id', 'estimated_gold_r3']]

total_costs = total_costs.merge(elevation_data.loc[:, ['parcel_id', 'elevation']], on='parcel_id')

total_costs['total_cost'] = total_costs.apply(lambda row: fixed_cost(row), axis = 1) + total_costs.apply(lambda row: variable_cost(row), axis = 1)

total_costs['estimated_profit'] = total_costs.apply(lambda row: gold_price * row['estimated_gold_r3'] - row['total_cost'], axis = 1)

total_costs.sort_values(by=['estimated_profit'])


Out[16]:
parcel_id estimated_gold_r3 elevation total_cost estimated_profit
7 4006 8551.74 1128.92 8548726.75 4278886.02
28 13905 6880.99 -334.67 4048496.39 6272989.40
39 18992 10048.37 709.52 8446713.16 6625847.41
47 21278 10562.88 656.77 8358826.14 7485493.61
30 14154 10862.17 933.84 8501277.14 7791977.14
45 20433 10986.68 784.49 8464949.67 8015076.45
20 10905 11739.21 583.50 8329169.47 9279646.85
3 1790 8560.63 474.18 3284920.47 9556019.52
13 7146 9426.45 -157.97 4120855.66 10018817.00
16 8712 12411.88 1007.17 8519111.23 10098713.70
49 22264 9531.60 -128.66 4132857.18 10164537.36
44 20402 8935.74 248.42 3193545.91 10210063.12
17 9543 9456.90 41.61 3109840.19 11075511.99
27 13683 9589.21 195.77 3172237.90 11211575.70
6 3697 9605.71 152.18 3154594.40 11253970.36
33 15815 13490.72 1596.11 8662360.65 11573715.43
46 21042 10237.95 466.96 3281999.50 12074921.04
15 8081 10852.93 -501.93 3980003.81 12299390.01
37 16925 13928.91 1051.53 8529901.71 12363458.76
34 16067 14320.17 1304.02 8591314.48 12888933.55
18 10226 14444.09 1092.87 8539956.57 13126184.94
8 4125 11337.85 -770.37 3870077.37 13136701.16
23 12221 14644.98 634.89 8349969.44 13617503.08
42 19984 14719.17 528.30 8306827.47 13771934.56
29 13917 12278.85 -181.76 4111114.85 14307162.11
19 10495 11753.48 432.31 3267975.14 14362247.01
31 14655 11830.74 407.39 3257890.32 14488221.64
2 1538 15627.27 1258.73 8580298.84 14860608.32
1 914 15582.80 784.99 8465070.21 14909126.30
4 2416 15832.77 1584.86 8659624.62 15089533.22
9 4577 12886.61 -198.78 4104144.37 15225775.42
41 19448 12380.15 314.58 3220326.34 15349899.71
48 22007 12394.77 185.78 3168195.01 15423953.37
11 6558 16602.40 885.70 8489566.65 16414035.37
38 17186 13948.12 -178.90 4112286.05 16809890.88
10 5712 13476.74 169.53 3161614.98 17053498.88
25 13071 13581.79 499.42 3295138.36 17077549.27
12 6875 17099.39 798.47 8468350.09 17180740.61
5 3332 13876.81 209.17 3177662.26 17637547.46
35 16475 14639.28 460.39 3279339.79 18679585.96
36 16838 14638.62 328.22 3225844.99 18732081.90
32 15178 14665.91 286.29 3208876.19 18789989.48
26 13249 15210.67 -492.42 3983900.23 18832108.37
21 11489 15386.50 -390.65 4025575.76 19054177.21
14 7837 19300.19 684.70 8370127.52 20580162.17
0 515 16190.58 23.19 3102385.84 21183485.72
40 19114 16367.82 496.51 3293960.39 21257774.74
22 11614 16338.16 266.58 3200895.25 21306340.94
43 20194 16484.74 413.19 3260236.74 21466877.59
24 12810 18371.89 -296.70 4064047.21 23493789.95

Select top 5 most promising parcels that also match Kevin's predictions


In [17]:
# Parcel IDs from Kevin: [7837, 19114, 20194, 11489,10905,1790,13249,14154,12810,11614,12221]

selected_parcels = [19114, 20194, 11489, 11614, 12810]

selected_total_costs = total_costs.loc[total_costs.parcel_id.isin(selected_parcels), :].sort_values(by=['estimated_profit'])

selected_total_costs


Out[17]:
parcel_id estimated_gold_r3 elevation total_cost estimated_profit
21 11489 15386.50 -390.65 4025575.76 19054177.21
40 19114 16367.82 496.51 3293960.39 21257774.74
22 11614 16338.16 266.58 3200895.25 21306340.94
43 20194 16484.74 413.19 3260236.74 21466877.59
24 12810 18371.89 -296.70 4064047.21 23493789.95

Remove the total costs from the available budget


In [20]:
total_cost = selected_total_costs.total_cost.sum()

bid_money = total_budget - total_cost

bid_money


Out[20]:
32155284.65558866

Place bids using an empiric "Gauss Distribution"

  • Offer 7 million for the middle 3 parcels
  • Divide the rest evenly for the remaining 2

In [58]:
bids = selected_total_costs.loc[:, ['parcel_id']]

max_bid = 7000000
remaining_money = bid_money - (max_bid * 3)

bids['bid_amount'] = pd.Series([remaining_money / 2, max_bid, max_bid, max_bid, remaining_money / 2]).values

bids


Out[58]:
parcel_id bid_amount
21 11489 5577642.33
40 19114 7000000.00
22 11614 7000000.00
43 20194 7000000.00
24 12810 5577642.33

In [62]:
bids.to_csv("kevin_mihai.csv", cols=['parcel_id', 'bid_amount'], index=False)