notebook.community

Edit and run



In [ ]:

    
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import datetime
from pprint import pprint as pp
import os
from timeit import Timer
import random
import dateutil
import pytz
import pylab as pl
from IPython.display import display, display_pretty, Javascript, HTML
from pandas_highcharts.core import serialize
from pandas_highcharts.display import display_charts
import scipy.stats as stats
import time

## Surpresses annoying errors
pd.options.mode.chained_assignment = None

# display(HTML('<script src="//code.highcharts.com/stock/highstock.js"></script><script src="//code.highcharts.com/highcharts-more.js"></script><script src="//code.highcharts.com/modules/exporting.js"></script>'))

player_name = "Jacob Singh"

matches = pd.read_csv(os.path.join('../data/', 'all_but_champ/match_log.csv'))
player_matches = matches[(matches['player1-name'] == player_name) | (matches['player2-name'] == player_name)]
player_matches['ELO'] = player_matches.loc[player_matches['player1-name'] == player_name]['player1_ELO']
player_matches['ELO'][player_matches['player2-name'] == player_name] = player_matches.loc[player_matches['player2-name'] == player_name]['player2_ELO']
player_matches = player_matches[['match-completed-at', 'name', 'winner','loser', 'winner_elo_in', 'loser_elo', 'ELO']]
player_matches.columns=['Date', 'Tournament', 'Winner', 'Loser', 'Winner ELO', 'Loser ELO', 'Player ELO']

players = pd.read_csv(os.path.join('../data/', 'all_but_champ/players_elo.csv'))


players = players[players["xp"] > 0]
players = players[players["xp"] > 100]
h = sorted(players["ELO"].tolist())
dist = stats.norm.pdf(h, 1500, np.std(h))

df = pd.DataFrame({"ELO": h, "fd": dist})
df = df.set_index("ELO")

# fit = stats.norm.pdf(h, np.mean(h), np.std(h))

# pl.plot(h,fit)

# pl.hist(h,normed=True)      #use this to draw histogram of your data

# pl.show()                   #use may also need add this



In [ ]:

    
import pandas as pd


pp(matches[matches["name"] == "Solar Quest ABG Tournament, Mercury"])

matches = pd.read_csv('../data/all_but_champ/match_log.csv')
#tournaments = matches["tournament_type"].unique()

tournaments = matches.groupby(["name"])['match-round'].max()
for name,max_round in tournaments.items():
   
    final = matches[(matches["name"] == name) & (matches["match-round"] == max_round)]
    pp(name)
    pp(final)



In [ ]:

    
### DO NOT DELETE - Success by match length

matches = pd.read_csv('../data/all_but_champ/match_log.csv')
matches["foo"] = matches["player1-name"] + "(" + matches["player2-name"] + ")"
matches.head()

# matches["length"] = matches["player1-score"] + matches["player2-score"]
# travis_winner = matches[matches["winner"] == "Travis Roberts"]
# travis_loser = matches[matches["loser"] == "Travis Roberts"]

# travis_winner["elo_change"] = matches["winner_elo_change"]
# travis_loser["elo_change"] = matches["loser_elo_change"]

# travis_winner = travis_winner[["length", "elo_change"]]
# travis_loser = travis_loser[["length", "elo_change"]]

# lengths = pd.concat([travis_winner, travis_loser])

# out = lengths.groupby('length')["elo_change"].mean()

# print(out)
# plt.bar()
# plt.show()



In [ ]:

    
#REad in the log
matches = pd.read_csv('../data/all_but_champ/match_log.csv')

#Add a column for match length
matches["length"] = matches["player1-score"] + matches["player2-score"]

# Get all the records where Trav won
travis_winner = matches[matches["winner"] == "Travis Roberts"]
# Get all the records where Trav lost
travis_loser = matches[matches["loser"] == "Travis Roberts"]

#Create an ELO column for all of travis's changes
travis_winner["elo"] = matches["winner_elo"]
travis_loser["elo"] = matches["loser_elo"]

#Put the winnings and losing records together
df = pd.concat([travis_winner, travis_loser])

#Sort by date
df.sort_values("match-completed-at", inplace=True)

#generate the rolling mean with a 20 match window
df.set_index("match-completed-at")

x = df.rolling("1m")

#plot it!
plt.plot(list(x))
plt.show()



In [76]:

    
def get_player_matches_df(matches, player_name):
    player_matches = matches[(matches['player1-name'] == player_name) | (matches['player2-name'] == player_name)]
    player_winner = matches[matches["winner"] == player_name]
    player_loser = matches[matches["loser"] == player_name]

    player_winner["player_elo_change"] = matches["winner_elo_change"]
    player_loser["player_elo_change"] = matches["loser_elo_change"]
    player_winner["player_elo"] = matches["winner_elo"]
    player_loser["player_elo"] = matches["loser_elo"]

    player_matches = pd.concat([player_winner, player_loser]).sort_values("match-completed-at")

    float_format = lambda x: "{0:.2f}".format(x)
    player_matches["winner_elo_display"] = player_matches["winner_elo_in"].map(float_format) + " (" + player_matches["winner_elo"].map(float_format)  + ")"
    player_matches["loser_elo_display"] = player_matches["loser_elo_in"].map(float_format)  + " (" + player_matches["loser_elo"].map(float_format)  + ")"
    player_matches = player_matches[['match-completed-at', 'name', 'winner', 'loser', 'winner_elo_display', 'loser_elo_display', 'player_elo']]
    player_matches.columns = ['Date', 'Tournament', 'Winner', 'Loser', 'Winner ELO (result)', 'Loser ELO (result)', 'Player ELO']

    return player_matches

matches = pd.read_csv('../data/all_but_champ/match_log.csv')
player = "Jacob Singh"
player_matches = get_player_matches_df(matches, player)
pt = pd.pivot_table(player_matches, index=["opponent"], values=["W","L"], aggfunc=np.sum)
pt["Total"] = pt["W"] + pt["L"]
pt.sort_values("Total", inplace=True, ascending=False)
pt.head()


# player_matches[player_matches["Winner"] == player]["other_player"] = player_matches[player_matches["Winner"] == player]["Loser"]
# player_matches[player_matches["Loser"] == player]["other_player"] = player_matches[player_matches["Winner"] == player]["Winner"]
# player_matches.head()
# pd.pivot_table(player_matches, values=["Winner"], index="" )









    Out[76]:






  
    
      
      L
      W
      Total
    
    
      opponent
      
      
      
    
  
  
    
      Paul Whitter
      4
      3
      7
    
    
      Ryan Westman
      4
      2
      6
    
    
      Lorenda James
      2
      3
      5
    
    
      Ahmed Sayed
      1
      3
      4
    
    
      Ron Carkeet
      0
      4
      4



In [ ]:

    
import numpy as np
import matplotlib.pyplot as plt
# The norm module has a pdf function (pdf stands for probability density function)
from scipy.stats import norm
import pandas as pd
players = pd.read_csv(os.path.join('../data/', 'all_but_champ/players_elo.csv'))
points = sorted(players["ELO"].tolist())
stddev = players["ELO"].std()
# The arange function generates a numpy vector
# The vector below will start at -1, and go up to, but not including 1
# It will proceed in "steps" of .01.  So the first element will be -1, the second -.99, the third -.98, all the way up to .99.
#points = np.arange(-1, 1, 0.01)
# The norm.pdf function will take points vector and turn it into a probability vector
# Each element in the vector will correspond to the normal distribution (earlier elements and later element smaller, peak in the center)
# The distribution will be centered on 0, and will have a standard devation of .3
probabilities = norm.pdf(points, 1500, stddev)

jacob_elo = players[players["player_name"] == "Jacob Singh"].iloc[0]
print(jacob_elo)

stddevs = (jacob_elo["ELO"] - np.mean(points)) / stddev
print(stddevs)

# Plot the points values on the x axis and the corresponding probabilities on the y axis
# See the bell curve?
plt.plot(points, probabilities)


plt.axvline(1500 + stddevs * stddev, color="b")
plt.show()

	L	W	Total
opponent
Paul Whitter	4	3	7
Ryan Westman	4	2	6
Lorenda James	2	3	5
Ahmed Sayed	1	3	4
Ron Carkeet	0	4	4