In [ ]:
import numpy as np
import pandas as pd
import os

In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
# pandas options
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)

In [ ]:
from etl_helpers import *
from model_helpers import *

Read in player results and salaries


In [ ]:
df_dict, df_salaries, df_points = import_data()

In [ ]:
df_dict.head()

In [ ]:
df_salaries.head()

In [ ]:
df_points.describe()

Clean and merge data


In [ ]:
df = clean_and_merge(df_dict=df_dict, df_salaries=df_salaries, df_points=df_points)

In [ ]:
df.head()

In [ ]:
df.dtypes

In [ ]:
df.describe()

Create features


In [ ]:
meanpoints = df[['GID', 'DK points']].groupby('GID').mean()

In [ ]:
df = df.set_index('GID').join(meanpoints, rsuffix='_mean').reset_index()

In [ ]:
df['DK points_mean'] = df['DK points_mean']/df['Salary']

In [ ]:
df.head()

a) Pick a single team


In [ ]:
# how many distinct weeks are in our df?
weeks = df.Week.unique()
weeks

In [ ]:
# restrict to current week
df = df.loc[df.Week == weeks.max()]

In [ ]:
# draw a single team
team = single_draw(df=df, seed=0)

In [ ]:
# returns array of GIDs
team

In [ ]:
# feed team array into performance function together with data df and column that holds performance measure
get_performance(team=team, df=df, point_col='DK points_mean')

In [ ]:
# feed team array into salary function together with data df and column that holds salary
get_salary(team=team, df=df)

In [ ]:
df.loc[df.GID.isin(bestteam), ['GID', 'Team', 'Name', 'Position', 'Salary', 'DK points', 'DK points_mean']]

In [ ]:


In [ ]:

b) Draw random teams


In [ ]:
# how many distinct weeks are in our df?
weeks = df.Week.unique()
weeks

In [ ]:
# restrict to current week
df = df.loc[df.Week == weeks.max()]

In [ ]:
# draw n teams and sort by points_col, return sum(point_col) and random seed 
bestlist = multi_draw(df=df, n=100, point_col='DK points_mean')

In [ ]:
# returns array of performances and random seeds
np.shape(bestlist)

In [ ]:
# use random seed of best team to get array of GIDs
bestteam = single_draw(df=df, seed=bestlist[0][1])

In [ ]:
# use array of GIDs to get team performance
get_performance(team=bestteam, df=df, point_col='DK points_mean')

In [ ]:
# feed team array into salary function together with data df and column that holds salary
get_salary(team=bestteam, df=df)

In [ ]:
df.loc[df.GID.isin(bestteam), ['GID', 'Team', 'Name', 'Position', 'Salary', 'DK points', 'DK points_mean']]

In [ ]:


In [ ]:

c) Pick optimal team


In [ ]:
# how many distinct weeks are in our df?
weeks = df.Week.unique()
weeks

In [ ]:
# restrict to current week
df = df.loc[df.Week == weeks.max()]

In [ ]:
InjuredPlayers = [5559, 4494, 1518]
df = df.loc[~df.GID.isin(InjuredPlayers)]

In [ ]:


In [ ]:
# use linear programming to optimize team
optimalteam = optimal_draw(df=df, points_col='DK points_mean')

In [ ]:
# use array of GIDs to get team performance
get_performance(team=optimalteam, df=df, point_col='DK points_mean')

In [ ]:
# feed team array into salary function together with data df and column that holds salary
get_salary(team=optimalteam, df=df)

In [ ]:
df.loc[df.GID.isin(optimalteam), ['GID', 'Team', 'Name', 'Position', 'Salary', 'DK points', 'DK points_mean']]

In [ ]:


In [ ]: