Replication

This notebook replicates some of the analysis done for this project. Not all the code is here but there is enough to visualize some results.



In [ ]:

    
#imports
import numpy as np
import pickle
import os
import glob
import operator
from sklearn import linear_model
from sklearn import preprocessing
import matplotlib.pyplot as plt



In [ ]:

    
#global variables
seasons = ['2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2012-13', '2013-14', '2014-15']
start_dates = ['NOV 01, 2005', 'OCT 31, 2006', 'OCT 30, 2007', 'OCT 28, 2008', 'OCT 27, 2009', 'OCT 26, 2010', 'OCT 30, 2012', 'OCT 29, 2013', 'OCT 28, 2014']
end_dates = ['APR 19, 2006', 'APR 18, 2007', 'APR 16, 2008', 'APR 15, 2009', 'APR 14, 2010', 'APR 13, 2011', 'APR 17, 2013', 'APR 16, 2014', 'APR 15, 2015']

model = linear_model.LinearRegression(normalize=True)

months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
months_30 = [4, 6, 9, 11]

Date Manipulation



In [ ]:

    
#Input a month in following format 'FEB', return month number
def month_to_number(month):
    return months.index(month) + 1

#Inputs a number and returns the month in format 'FEB'
def number_to_month(number):
    if number <= 0 or number > 12:
        exit('This is not a month number')

    else:
        return months[number - 1]

#Outputs date with correct string format when day, month and year are given as numbers
def date_to_string(year, month, day):
    day = '0' + str(day) if day < 10 else str(day)
    return number_to_month(month) + ' ' + day + ', ' + str(year)

#compares 2 dates given in following format : "FEB 10, 2015"
#returns true if date1 is before or equal date2
def date_before(date1, date2):
    year1, month1, day1 = int(date1[8:]), month_to_number(date1[:3]), int(date1[4:6])
    year2, month2, day2 = int(date2[8:]), month_to_number(date2[:3]), int(date2[4:6])

    if year1 < year2:
        return True

    elif year1 > year2:
        return False

    else:
        if month1 < month2:
            return True

        elif month1 > month2:
            return False

        else:
            return day1 <= day2

#returns true if date is in between start and end
def date_in(date, start, end):
    return date_before(date, end) and date_before(start, date)

def is_leap_year(year):
    return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0

#adds days to date
def date_add(date, days):
    year, month, day = int(date[8:]), month_to_number(date[:3]), int(date[4:6])

    def date_update(day, month, year, month_lenght):
        if day + days <= month_lenght:
            day += days

        else:
            if month == 12:
                month = 1
                year += 1

            else:
                month += 1

            day += days - month_lenght

        return year, month, day

    if month == 2:
        if is_leap_year(year):
            year, month, day = date_update(day, month, year, 29)

        else:
            year, month, day = date_update(day, month, year, 28)

    elif month in months_30:
        year, month, day = date_update(day, month, year, 30)

    else:
        year, month, day = date_update(day, month, year, 31)

    return date_to_string(year, month, day)

#substract days to date
def date_sub(date, days):
    year, month, day = int(date[8:]), month_to_number(date[:3]), int(date[4:6])

    def date_update(day, month, year, prev_month_lenght):
        if day - days > 0:
            day -= days

        else:
            if month == 1:
                month = 12
                year -= 1

            else:
                month -= 1

            day = prev_month_lenght - days + day

        return year, month, day

    if month == 3:
        if is_leap_year(year):
            year, month, day = date_update(day, month, year, 29)

        else:
            year, month, day = date_update(day, month, year, 28)

    elif month in [x + 1 for x in months_30]:
        year, month, day = date_update(day, month, year, 30)

    else:
        year, month, day = date_update(day, month, year, 31)

    return date_to_string(year, month, day)

#Returns the stats of a player between a start and end date
#date must follow following format : "FEB 10, 2015"
def get_games(player, start, end):
    tmp = []
    for game in player['stats']:
        if date_in(game[1], start, end):
            tmp.append(game)

    return tmp

#Returns the stats of all players between start and end date in a given season
#date must follow following format : "FEB 10, 2015"
def get_all_games(season, start, end):
    players = glob.glob('data' + os.sep + season + os.sep + 'player_stats' + os.sep + "*.pkl")
    tmp = []

    for file in players:
        playerID = file[26:-4]
        player = pickle.load(open('data' + os.sep + season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))
        games = get_games(player, start, end)
        tmp += games

    return tmp

#Computes fantasy score of a given game
def get_fantasy(game, PTS = 1, BLK = 1, STL = 1, AST = 1, REB = 1, FGM = 1, FTM = 1, FGA = -1, FTA = -1, TOV = -1):
    return PTS*game[22] + BLK*game[19] + STL*game[18] + AST*game[17] + REB*game[16] + FGM*game[5] \
    + FTM*game[11] + FGA*game[6] + FTA*game[12] + TOV*game[20]

#given two dates and a player, returns the game numbers of first and last game played
def get_games_num(player, start, end):
    for i, game in enumerate(player['stats']):
        if date_in(game[1], start, end):
            first = i
            break

    last = len(player['stats']) - 1

    #in case no games are played in that period of time
    try:
        for j, game in enumerate(player['stats'][first:]):
            if not date_in(game[1], start, end):
                last = j + first - 1
                break

    except UnboundLocalError:
        return (-1, -1)

    return first, last

Stats Functions



In [ ]:

    
#Returns the averaged stats (all, home and away) of a given player between game start and end
#Returns averaged of all games but last by default
def average(player, end = -1, start = 0):
    games_num = len(player['stats'])

    experience = player['experience']
    age = player['age']
    height = 6*int(player['height'].split('-')[0]) + int(player['height'].split('-')[1])
    weight = int(player['weight'])

    if end == 0:
        tmp = [0.]*25
        tmp[21] = experience
        tmp[22] = age
        tmp[23] = height
        tmp[24] = weight
        return tmp, tmp, tmp
        # print "Please choose a strictly positive number of games"
        # exit()

    if end == -1:
        return average(player, games_num - 1)

    elif end > games_num:
        print "not enough games, returned average of all available games (%d)" % games_num
        return average(player, games_num)

    elif start >= end:
        print "start must be smaller then end, returned average of all available games (%d)" % games_num
        return average(player, games_num)

    elif start < 0:
        return average(player, end)

    else:
        averaged = [float(sum(x))/float(len(x)) for x in zip(*[match[4:] for match in player['stats'][start:end]])]

        #Ensuring Percentages are correct (using average as default value)
        for i, j in zip([3, 6, 9], [0.45, 0.35, 0.75]):
            averaged[i] = j if averaged[i - 1] == 0 else averaged[i - 2]/averaged[i - 1]

        won = float([match[3] for match in player['stats'][start:end]].count('W'))
        winrate = won/end
        averaged.append(winrate)
        averaged.append(experience)
        averaged.append(age)
        averaged.append(height)
        averaged.append(weight)

        home = [match for match in player['stats'][start:end] if match[2][4] == '@']
        away = [match for match in player['stats'][start:end] if match[2][4] != '@']

        #In order to avoid unreferenced return
        home_avg = []
        away_avg = []

        if len(home) != 0:
            home_avg = [float(sum(x))/float(len(x)) for x in zip(*[match[4:] for match in home])]

            #Ensuring Percentages are correct
            for i, j in zip([3, 6, 9], [0.45, 0.35, 0.75]):
                averaged[i] = j if averaged[i - 1] == 0 else averaged[i - 2]/averaged[i - 1]

            home_won = float([match[3] for match in home].count('W'))
            home_winrate = home_won/len(home)
            home_avg.append(home_winrate)
            home_avg.append(experience)
            home_avg.append(age)
            home_avg.append(height)
            home_avg.append(weight)

        if len(away) != 0:
            away_avg = [float(sum(x))/float(len(x)) for x in zip(*[match[4:] for match in away])]

            #Ensuring Percentages are correct
            for i, j in zip([3, 6, 9], [0.45, 0.35, 0.75]):
                averaged[i] = j if averaged[i - 1] == 0 else averaged[i - 2]/averaged[i - 1]

            away_won = float([match[3] for match in away].count('W'))
            away_winrate = away_won/len(away)
            away_avg.append(away_winrate)
            away_avg.append(experience)
            away_avg.append(age)
            away_avg.append(height)
            away_avg.append(weight)

        return averaged, home_avg, away_avg

#print average('2011-12', '201149')

#computes fantasy points of a given player on his given ith game (last by default)
#Allows different way of computing points but has espn values by default
def compute_fantasy(player, game_number = -1,
                    PTS = 1, BLK = 1, STL = 1, AST = 1, REB = 1, FGM = 1, FTM = 1, FGA = -1, FTA = -1, TOV = -1):
    games_num = len(player['stats'])

    if game_number == -1:
        return compute_fantasy(player, games_num,
                        PTS, BLK, STL, AST, REB, FGM, FTM, FGA, FTA, TOV)

    elif game_number >= games_num:
        print "This game does not exist, returned last game played instead"
        return compute_fantasy(player, games_num,
                        PTS, BLK, STL, AST, REB, FGM, FTM, FGA, FTA, TOV)

    else:
        game = player['stats'][game_number]
        score = PTS*game[22] + BLK*game[19] + STL*game[18] + AST*game[17] + REB*game[16] + FGM*game[5] \
                + FTM*game[11] + FGA*game[6] + FTA*game[12] + TOV*game[20]

        return score

First Model



In [ ]:

    
#factored code to compute sliding feature matrices for one player
def player_features(season, playerID, binary_pos = False, include_loc = False, num_last_games = 0, start = 1, end = -1):
    averages = []
    next_match_points = []
    points = []
    player = pickle.load(open('data' + os.sep + season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))

    if end == -1:
        end = len(player['stats']) - 1

    for j in range(start):
        points.append(compute_fantasy(player, j))

    for i in range(start, end):
        all, home, away = average(player, i)

        tmp = list(all)

        if binary_pos:
            positions = ['Center', 'Forward', 'Center-Forward', 'Guard', 'Forward-Guard', 'Forward-Center', 'Guard-Forward']
            index = positions.index(player["position"])
            bin = [0, 0, 0, 0, 0, 0, 0]
            bin[index] += 1

            tmp = bin + tmp

        if include_loc:
            #test if next game is home or away
            if player['stats'][i + 1][2][4] == '@':
                #To make sure features exist
                if home != []:
                    tmp += home
                else:
                    continue

            else:
                if away != []:
                    tmp += away
                else:
                    continue

        if num_last_games > 0:
            last = average(player, i, i - num_last_games)[0]
            tmp += last

        points.append(compute_fantasy(player, i))

        tmp.append(i)
        tmp.append(np.mean(points))
        averages.append(tmp)
        next_match_points.append(compute_fantasy(player, i + 1))

    X = np.array(averages)
    y = np.array(next_match_points)

    return X, y

#factored code to compute sliding feature matrices for one season
def season_features(season, binary_pos = False, include_loc = False, num_last_games = 0, best_players = 0):
    Xs = []
    ys = []

    if best_players == 0:
        players = glob.glob('data' + os.sep + season + os.sep + 'player_stats' + os.sep + "*.pkl")

    else:
        best = get_fantasies(season, 'OCT 20, ' + season[:4], 'DEC 15, ' + season[:4])
        players = []

        for player in best[:best_players]:
            players.append(player[0])

    for player in players:
        playerID = player[26:-4] if best_players == 0 else player

        #print "Dealing with {}".format(playerID)
        X, y = player_features(season, playerID, binary_pos, include_loc, num_last_games)

        if X.shape != (0,):
            Xs.append(X)
            ys.append(y)

    Xf = np.concatenate(Xs)
    yf = np.concatenate(ys)

    filename = 'slide'

    if binary_pos:
        filename = 'B' + filename

    if include_loc:
        filename += '_loc'

    if num_last_games > 0:
        filename += '_' + str(num_last_games)

    if os.path.exists('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_X.pkl'):
        os.remove('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_X.pkl')
        os.remove('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_y.pkl')

    pickle.dump(Xf, open('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_X.pkl', 'wb'))
    pickle.dump(yf, open('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_y.pkl', 'wb'))

    return Xf, yf

#computed error given model as input
def error(model, X, y):
    predictions = model.predict(X)
    avg_error = 0.
    max_error = 0.
    errors = []
    games = []
    values = [0.]*82
    counter = [0.]*82

    for game, (i, prediction) in zip(X, enumerate(predictions)):
        amount = int(game[-1])
        error = abs(prediction - y[i])
        avg_error += error
        max_error = error if error > max_error else max_error
        errors.append(error)
        games.append(amount)
        
    return avg_error/predictions.shape[0], max_error

#all but one fold error over seasons using inputed average type (raw, sliding, ...)
def ABOF_error(seasons, model, degree = 0, binary_pos = False, include_loc = False, num_last_games = 0):
    def polyf(X):
        poly = preprocessing.PolynomialFeatures(degree)
        return poly.fit_transform(X)

    filename = 'slide'

    if binary_pos:
        filename = 'B' + filename

    if include_loc:
        filename += '_loc'

    if num_last_games > 0:
        filename += '_' + str(num_last_games)

    Xs = []
    ys = []

    for season in seasons:
        #print season
        X = pickle.load(open('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_X.pkl', 'rb'))
        y = pickle.load(open('data' + os.sep + season + os.sep + 'averages' + os.sep + filename + '_y.pkl', 'rb'))

        if degree > 0:
            X = polyf(X)

        Xs.append(X)
        ys.append(y)

    errors = []
    avg_error = 0.
    avg_max = 0.

    for i, season in enumerate(seasons):
        print "Testing on season %s (Training on the rest)" % season
        tmp_X = list(Xs)
        tmp_y = list(ys)
        testX, testy = tmp_X.pop(i), tmp_y.pop(i)
        trainX, trainy = np.concatenate(tmp_X), np.concatenate(tmp_y)

        print trainX.shape

        model.fit(trainX, trainy)
        #print model.coef_
        err = error(model, testX, testy)
        errors.append(err[0])
        avg_error += err[0]
        avg_max += err[1]

        print "error for this season is %s" % (err,)

    result = avg_error/len(seasons), avg_max/len(seasons)
    print "Average error and Averaged max error over all seasons is %s" % (result,)

    print filename
    return result

def compute_and_results(seasons, model, degree=0, binary_pos=False, include_loc=False, num_last_games=0, best_players = 0):
    for season in seasons:
        print season
        season_features(season, binary_pos, include_loc, num_last_games, best_players)
    ABOF_error(seasons, model, degree, binary_pos, include_loc, num_last_games)



In [ ]:

    
# To test model
#binary_pos to add feature vector containing position
#include_loc to include location features
#num_last_games : if not 0, uses average of given number of last games as features
#best_players : if not 0, only uses the given number of best players to train/test

compute_and_results(seasons, model, binary_pos=False, include_loc=False, num_last_games=0, best_players=0)

Simulation



In [ ]:

    
#builds feature vector for week predition
#start and end are the dates of the week to be predicted, stats of player before start date will be used for prediction
def week_feature(player, start_date, end_date, season_start, binary_pos = False, num_last_games = 0):
    start, end = get_games_num(player, start_date, end_date)

    next_games = get_games(player, start_date, end_date)

    #score = -100 means no games were played by the player the following week and the feature will not be used
    score = 0. if next_games != [] else -100.

    avg = []

    if start > 0:
        avg = average(player, start - 1)[0]

        if binary_pos:
            positions = ['Center', 'Forward', 'Center-Forward', 'Guard', 'Forward-Guard', 'Forward-Center', 'Guard-Forward']
            index = positions.index(player["position"])
            bin = [0, 0, 0, 0, 0, 0, 0]
            bin[index] += 1

            avg = bin + avg

        if num_last_games > 0:
            last = average(player, start - 1, start - 1 - num_last_games)[0]
            avg += last

        for game in next_games:
            score += get_fantasy(game)

        avg.append(start - 1)

        games = get_games(player, season_start, date_sub(start_date, 1))
        avg_points = 0.
        for game in games:
            avg_points += get_fantasy(game)

        if len(games) != 0:
            avg_points = avg_points/len(games)

        avg.append(avg_points)
        avg.append(len(next_games))

    return avg, score

#produces the feature matrix for a entire season wih given step in between 2 dates
def week_features(season, start_date, end_date, step, binary_pos = False, num_last_games = 0, best_players = 0):
    Xs = []
    ys = []

    if best_players == 0:
        players = glob.glob('data' + os.sep + season + os.sep + 'player_stats' + os.sep + "*.pkl")

    else:
        best = get_fantasies(season, 'OCT 20, ' + season[:4], 'DEC 15, ' + season[:4])
        players = []

        for player in best[:best_players]:
            players.append(player[0])

    for player in players:
        playerID = player[26:-4] if best_players == 0 else player
        player = pickle.load(open('data' + os.sep + season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))
        curr_date = date_add(start_date, step)
        curr_end_date = date_add(curr_date, step)
        while date_before(date_add(curr_end_date, step + 1), end_date):
            X, y = week_feature(player, curr_date, curr_end_date, start_date, binary_pos, num_last_games)

            #make sure some games are played the next week
            if X != [] and y != -100:
                Xs.append(X)
                ys.append(y)

            curr_date = date_add(curr_date, step + 1)
            curr_end_date = date_add(curr_end_date, step + 1)

    Xf = np.reshape(Xs, (len(Xs), len(Xs[0])))
    yf = np.reshape(ys, len(ys))

    return Xf, yf


class week_simul:

    def __init__(self, season, start_date, end_date, model, days = 6, players_num = 0, binary_pos = False, num_last_games = 0, best_players = 0, predict_num = 0):
        print "Initializing attributes"
        self.season = season
        self.curr_date = start_date
        self.prev_seasons = []
        self.model = model
        self.days = days
        self.players_num = players_num
        self.binary_pos = binary_pos
        self.num_last_games = num_last_games
        self.end_date = end_date
        self.week = 0
        self.start_date = start_date
        self.poly = preprocessing.PolynomialFeatures(2)
        self.predict_num = predict_num
        self.week_avg_errors = []
        self.week_max_errors = []

        if self.players_num == 0:
            self.players = glob.glob('data' + os.sep + self.season + os.sep + 'player_stats' + os.sep + "*.pkl")

        else:
            print "Gathering list of best players"
            best = get_fantasies(self.season, 'OCT 20, ' + self.season[:4], 'DEC 15, ' + self.season[:4])
            self.players = []

            for player in best[:self.players_num]:
                self.players.append(player[0])

        #ensuring training data will only be in the past
        for s, s_start, s_end in zip(seasons, start_dates, end_dates):
            year = int(self.season[:4])
            if int(s[:4]) < year:
                self.prev_seasons.append((s, s_start, s_end))

        #creating initial training data
        Xs = []
        ys = []

        print "Building training data"
        for season in self.prev_seasons:
            print season
            X, y = week_features(season[0], season[1], season[2], self.days, self.binary_pos, self.num_last_games, best_players)
            X = self.poly.fit_transform(X)

            Xs.append(X)
            ys.append(y)

        self.trainX, self.trainy = np.concatenate(Xs), np.concatenate(ys)
        print self.trainX.shape, self.trainy.shape
        print self.trainy.min(), self.trainy.max()

        print "Building initial test data"
        next_date = date_add(self.curr_date, self.days)
        self.curr_date = date_add(next_date, 1)
        self.week += 1
        self.update_testing()

    def debug(self):
        print self.season
        print self.curr_date
        print self.prev_seasons
        print self.model
        print self.days
        print len(self.players)
        print self.trainX.shape, self.trainy.shape
        print self.testX.shape, self.testy.shape

    def update_testing(self):
        Xs = []
        ys = []
        next_date = date_add(self.curr_date, self.days)
        for player in self.players:
            playerID = player[26:-4] if self.players_num == 0 else player
            player = pickle.load(open('data' + os.sep + self.season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))
            X, y = week_feature(player, self.curr_date, next_date, self.start_date, self.binary_pos, self.num_last_games)

            if X != [] and y != -100:
                Xs.append(X)
                ys.append(y)

        if len(Xs) != 0:
            self.testX, self.testy = np.reshape(Xs, (len(Xs), len(Xs[0]))), np.reshape(ys, len(ys))
            self.testX = self.poly.fit_transform(self.testX)


    #predicts values and updates training and testing set for the next days
    def simulate(self):
        next_date = date_add(self.curr_date, self.days)
        self.model.fit(self.trainX, self.trainy)
        errors = error(self.model, self.testX, self.testy)

        print "Average and max errors on predictions from {} to {} is {}".format(self.curr_date, next_date, errors)

        self.week_avg_errors.append(errors[0])
        self.week_max_errors.append(errors[1])

        self.trainX, self.trainy = np.concatenate((self.trainX, self.testX)), np.concatenate((self.trainy, self.testy))

        self.curr_date = date_add(next_date, 1)
        self.week += 1

        print "Average error per week so far is {}, {}".format(np.mean(self.week_avg_errors), np.mean(self.week_max_errors))

        if date_before(self.curr_date, self.end_date):
            self.update_testing()

        else:
            print "Season has ended, user should stop simulating"

        return errors

    #produces list of best players and compares to best doable.
    def predict(self):
        next_date = date_add(self.curr_date, self.days)
        self.model.fit(self.trainX, self.trainy)

        predicted = dict()
        scores = dict()

        Xs = []
        ys = []

        for player in self.players:
            playerID = player[26:-4] if self.players_num == 0 else player
            player = pickle.load(open('data' + os.sep + self.season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))
            X, y = week_feature(player, self.curr_date, next_date, self.start_date, self.binary_pos, self.num_last_games)

            if X != [] and y != -100:
                X = np.reshape(X, (1, len(X)))
                X = self.poly.fit_transform(X)
                predicted[player['name']] = model.predict(X)
                scores[player['name']] = y

                #to update training
                Xs.append(X)
                ys.append(y)

        #testX and y are not really used here but useful to update training
        self.testX, self.testy = np.reshape(Xs, (len(Xs), Xs[0].shape[1])), np.reshape(ys, len(ys))
        self.trainX, self.trainy = np.concatenate((self.trainX, self.testX)), np.concatenate((self.trainy, self.testy))

        best_predicted = sorted(predicted.items(), key=operator.itemgetter(1), reverse = True)[:self.predict_num]
        best_real = sorted(scores.items(), key=operator.itemgetter(1), reverse = True)[:self.predict_num]
        predicted_real = []
        for candidate in best_predicted:
            name = candidate[0]
            predicted_real.append((name, scores[name]))

        print "Predictions from {} to {} are as follows : \n".format(self.curr_date, next_date)

        print 'players predicted :\n'
        print best_predicted
        print '\nTheir real score :\n'
        print predicted_real
        print '\nActual best players :\n'
        print best_real

        predicted_score = sum(x[1] for x in best_predicted)
        best_score = sum(x[1] for x in best_real)
        actual_score = sum(x[1] for x in predicted_real)

        print "\nPredicted score : {}".format(predicted_score[0])
        print "Actual score : {}".format(actual_score)
        print "best possible score : {}".format(best_score)

        self.curr_date = date_add(next_date, 1)
        self.week += 1

        if not date_before(self.curr_date, self.end_date):
            print "Season has ended, user should stop simulating"

        return best_score - actual_score

    def full_prediction(self):
        errors = []
        weeks = []

        while date_before(self.curr_date, self.end_date):
            errors.append(self.predict())
            weeks.append(self.week)

        plt.plot(weeks, errors)
        plt.show()

        print "Total error for the {} season is {} which averages to {} a week".format(self.season, sum(errors), np.mean(errors))


    def full_simulation(self, playerID = None):
        errors = []
        weeks = []

        if playerID:
            Xs = []
            ys = []
            player = pickle.load(open('data' + os.sep + self.season + os.sep + 'player_stats' + os.sep + playerID + '.pkl', 'rb'))
            games = get_games(player, self.start_date, date_add(self.start_date, self.days))
            score = 0
            for game in games:
                score += get_fantasy(game)

        while date_before(self.curr_date, self.end_date):
            if playerID:
                X, y = week_feature(player, self.curr_date, date_add(self.curr_date, self.days), self.start_date, self.binary_pos, self.num_last_games)

                if X != [] and y != -100:
                    Xs.append(X)
                    ys.append(y)

            errors.append(self.simulate())
            weeks.append(self.week)

        if playerID:
            playerX, playery = np.reshape(Xs, (len(Xs), len(Xs[0]))), np.reshape(ys, len(ys))
            playerX = self.poly.fit_transform(playerX)
            predicts = self.model.predict(playerX)
            bs = (np.insert(playery, 0, score))[:-1]
            bs_avg = np.zeros(len(playery))
            for i in range(len(playery)):
                bs_avg[i] = np.mean(bs[:i + 1])
            games = range(len(playery))
            plt.plot(games, predicts, 'ro', games, playery, 'o', games, bs, 'go', games, bs_avg, 'yo')
            plt.show()
            plt.clf()
            plt.plot(games, abs(predicts - playery), 'r', games, abs(bs - playery), 'g', games, abs(bs_avg - playery), 'y')
            plt.show()
            plt.clf()

        plt.plot(weeks, errors)
        plt.show()



In [ ]:

    
#to test simulation
#days controls the duration of a 'week'. Note that it should always be -1 the expected value. Therefore a real week simulation needs an input of 6
#the parameter for full_simulation is the player whose plot you wish to visualize

test = week_simul('2006-07', 'OCT 31, 2006', 'APR 18, 2007', model, days=6)
test.full_simulation('708')



In [ ]:

    
#to test prediction
#days controls the duration of a 'week'. Note that it should always be -1 the expected value. Therefore a real week simulation needs an input of 6
#predict_num controls the number of best players you want as output for everyweek.

test = week_simul('2006-07', 'OCT 31, 2006', 'APR 18, 2007', model, days=6, predict_num=20)
test.full_prediction()



In [ ]: