In [1]:
import pandas as pd
pd.set_option('display.float_format', '{:.2f}'.format)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
In [2]:
total_budget = 50000000
gold_price = 1500 # dollars per ounce
In [3]:
costs_data = pd.read_csv("data/costs_data.csv")
costs_data.head()
Out[3]:
In [4]:
elevation_data = pd.read_csv("data/elevation_data.csv")
elevation_data.head()
Out[4]:
In [5]:
sample_data = pd.read_csv("data/sample_data.csv")
sample_data.head()
Out[5]:
In [6]:
auction_parcels = pd.read_csv("data/auction_parcels.csv")
auction_parcels.head()
Out[6]:
In [7]:
def fixed_cost(row):
if row["elevation"] < 0:
return 4000000.0
elif row["elevation"] > 0 and row["elevation"] <= 500:
return 3000000.0
else:
return 8000000.0
In [8]:
from sklearn import linear_model
regressions = {}
for elevation in ["low", "med", "high"]:
regression = linear_model.LinearRegression()
regression.fit(costs_data.loc[:, ["gold_amount"]], costs_data.loc[:,elevation])
regressions[elevation] = regression
def variable_cost(row):
if row["elevation"] < 0:
return regressions['low'].predict(row["elevation"])[0]
elif row["elevation"] >= 0 and row["elevation"] <= 700:
return regressions['med'].predict(row["elevation"])[0]
else:
return regressions['high'].predict(row["elevation"])[0]
for radius in range(1, 10)
and eyeball a decent value
In [9]:
plt.gca().set_autoscale_on(False)
plt.axis([0.0, 150.0, 0.0, 150.0])
plt.scatter(sample_data.Easting, sample_data.Northing, s=1, color='y', label='Sample Data')
plt.scatter(auction_parcels.Easting, auction_parcels.Northing, marker='x', color='r', label='Auction Parcels')
plt.scatter(auction_parcels.Easting, auction_parcels.Northing, s=1000, facecolors='none', edgecolors='b', label='Radius')
lgnd = plt.legend(scatterpoints=1, fontsize=10)
lgnd.legendHandles[0]._sizes = [50]
lgnd.legendHandles[1]._sizes = [50]
lgnd.legendHandles[2]._sizes = [50]
plt.xlabel('Easting')
plt.ylabel('Northing')
Out[9]:
In [12]:
def estimate_gold(radius):
gold_estimations = []
for idx_ap, ap in auction_parcels.iterrows():
sum = 0
count = 0
for idx_sd, sd in sample_data.iterrows():
if (radius >= np.linalg.norm(np.array([sd['Easting'], sd['Northing']]) - np.array([ap['Easting'], ap['Northing']]))):
sum += sd['gold_available']
count += 1
sum = sum / count if count > 0 else 0
estimated_gold_column = 'estimated_gold_r{:d}'.format(radius)
included_samples_column = 'included_samples_r{:d}'.format(radius)
gold_estimations.append({'parcel_id': ap['parcel_id'], estimated_gold_column: sum, included_samples_column: count})
return gold_estimations
In [13]:
gold_estimations = auction_parcels.loc[:,['parcel_id']]
for radius in range(1, 10):
gold_estimations = gold_estimations.merge(pd.DataFrame(estimate_gold(radius)), on='parcel_id')
gold_estimations
Out[13]:
In [16]:
total_costs = gold_estimations.loc[:, ['parcel_id', 'estimated_gold_r3']]
total_costs = total_costs.merge(elevation_data.loc[:, ['parcel_id', 'elevation']], on='parcel_id')
total_costs['total_cost'] = total_costs.apply(lambda row: fixed_cost(row), axis = 1) + total_costs.apply(lambda row: variable_cost(row), axis = 1)
total_costs['estimated_profit'] = total_costs.apply(lambda row: gold_price * row['estimated_gold_r3'] - row['total_cost'], axis = 1)
total_costs.sort_values(by=['estimated_profit'])
Out[16]:
In [17]:
# Parcel IDs from Kevin: [7837, 19114, 20194, 11489,10905,1790,13249,14154,12810,11614,12221]
selected_parcels = [19114, 20194, 11489, 11614, 12810]
selected_total_costs = total_costs.loc[total_costs.parcel_id.isin(selected_parcels), :].sort_values(by=['estimated_profit'])
selected_total_costs
Out[17]:
In [20]:
total_cost = selected_total_costs.total_cost.sum()
bid_money = total_budget - total_cost
bid_money
Out[20]:
In [58]:
bids = selected_total_costs.loc[:, ['parcel_id']]
max_bid = 7000000
remaining_money = bid_money - (max_bid * 3)
bids['bid_amount'] = pd.Series([remaining_money / 2, max_bid, max_bid, max_bid, remaining_money / 2]).values
bids
Out[58]:
In [62]:
bids.to_csv("kevin_mihai.csv", cols=['parcel_id', 'bid_amount'], index=False)