In [3]:
import pandas
import arrow
from time import strptime
import datetime
In [7]:
def tendata(dirc, filename):
matdat = pandas.read_csv(dirc + filename)
matdat.drop(matdat.tail(2).index,inplace=True) # drop last n rows
timelist = list()
for wrtime in matdat.Tournament_Date:
wrspi = (wrtime.split('-'))
#print('20' +str(wrspi[2]) + '-' + str(strptime(wrspi[1],'%b').tm_mon) + '-' + str(wrspi[0]))
yearnow = int('20' +str(wrspi[2]))
#print(yearnow)
mthnow = int(strptime(wrspi[1],'%b').tm_mon)
#print(mthnow)
daynow = int(wrspi[0])
#print(daynow)
#arnow = arrow.get(str(yearnow) + '-' + str(mthnow) + '-' + str(daynow), 'YYYY M DD')
arfin = arrow.get(yearnow, mthnow, daynow)
#arnows = arrow.get(wrtime, 'YYYY MM DD')
#print(arnows)
#print(arfin.date)
timelist.append(arfin.datetime)
#print(strptime(wrspi[1],'%b').tm_mon)
#print('20' +str(wrspi[2]))
n = matdat.columns[3]
matdat.drop(n, axis = 1, inplace = True)
df1 = matdat.assign(date = timelist)
df1.to_csv('{}'.format(filename), index_label=False)
In [8]:
tendata('/mnt/c/Users/luke/Downloads/datathon/Datathon/', 'ATP_matches.csv')
In [ ]:
In [9]:
tendata('/mnt/c/Users/luke/Downloads/datathon/Datathon/', 'WTA_matches.csv')
In [10]:
dateten = pandas.read_csv('/mnt/c/Users/luke/Documents/test123.csv', index_col='date')
In [11]:
surfacechoice = set(dateten['Court_Surface'])
In [12]:
surfacechoice
Out[12]:
In [14]:
sampfil = pandas.read_csv('/mnt/c/Users/luke/Downloads/datathon/Datathon/women_dummy_submission_file.csv', encoding='latin-1')
In [60]:
windict = dict()
In [ ]:
In [62]:
for sampf in sampfil.values:
#print(sampf[0])
#print(sampf[1])
playerstat = winlosecheck(sampf[0], sampf[1])
totgame = playerstat['playerwins'] + playerstat['playerlosers']
try:
windict.update({'player_1_win_probability' : 100 * float(playerstat['playerwins'])//float(totgame), 'player1' : sampf[0],
'player2' : sampf[1]})
except ZeroDivisionError:
windict.update({'player_1_win_probability' : 'nan', 'player1' : sampf[0],
'player2' : sampf[1]})
In [ ]:
with open('winprob.json', 'w') as winwr:
winwr.write(json.dumps(windict))
In [ ]:
In [19]:
yrwins = dict()
In [20]:
def winnersearch(nameofwinner):
windf = (dateten[dateten['Winner'].str.match(nameofwinner)])
return(windf)
def losersearch(nameofloser):
return(dateten[dateten['Loser'].str.match(nameofloser)])
def filtersurface(nameofsurface):
return(dateten[dateten['Court_Surface'].str.match(nameofsurface)])
def playersearch(player, anonplayer):
winfrm = winnersearch(player)
return(winfrm[winfrm['Loser'].str.match(anonplayer)])
In [21]:
#function that takes 2 players and type of surface and returns the win chance.
In [22]:
def playersearch(player, anonplayer):
winfrm = winnersearch(player)
return(winfrm[winfrm['Loser'].str.match(anonplayer)])
In [23]:
def winlosecheck(player, anonplayer):
winch = playersearch(player, anonplayer).shape[0]
losch = playersearch(anonplayer, player).shape[0]
return({'playerwins' : winch, 'playerlosers' : losch})
In [24]:
playerstat = winlosecheck('Ana Ivanovic', 'Serena Williams')
In [25]:
playerstat['playerwins']
Out[25]:
In [27]:
playerstat['playerlosers']
Out[27]:
In [28]:
totgame = playerstat['playerwins'] + playerstat['playerlosers']
In [29]:
totgame
Out[29]:
In [39]:
print(100 * float(playerstat['playerwins'])/float(totgame))
In [ ]:
In [32]:
float(totgame / playerstat['playerwins'])
Out[32]:
In [ ]:
In [ ]:
In [80]:
playersearch('Serena Williams', 'Andrea Petkovic').shape[0]
Out[80]:
In [78]:
playersearch('Andrea Petkovic', 'Serena Williams')
Out[78]:
In [ ]:
In [48]:
winnersearch('Serena Williams').head()
Out[48]:
In [ ]:
In [49]:
filtersurface('Grass').head()
Out[49]:
In [50]:
winnersearch('Qiang Wang').head()
Out[50]:
In [51]:
losersearch('Qiang Wang').head()
Out[51]:
In [ ]:
In [52]:
dateten[dateten['Winner'].str.match('Timea Babos')].head()
Out[52]:
In [53]:
for yearsw in range(2014, 2019):
print(yearsw)
curyear = (dateten.loc['{}-1-1 01:00:00'.format(yearsw) : '{}-12-31 23:00:00'.format(yearsw)])
#print(curyear)
yrwins.update({yearsw : list(curyear.Winner)})
In [54]:
from time import strptime
strptime('Feb','%b').tm_mon
Out[54]:
In [55]:
lislose =list(dateten.Loser)
In [56]:
liswin = list(dateten.Winner)
In [57]:
liswin = list(allwiner)
In [58]:
winset = set(liswin)
In [ ]:
winhisd = dict()
In [ ]:
In [ ]:
for wins in winset:
#print(wins)
totgame = liswin.count(wins) + lislose.count(wins)
winhisd.update({'name' : wins, 'winhist' : liswin.count(wins), 'losehist' : lislose.count(wins),
'totalgame' : totgame, 'winpercent' : (liswin.count(wins)/totgame) *100})
#print(lislose.count(wins))
In [ ]:
with open('winhistory.json', 'w') as winh:
winh.wrie(json.dumps(winhisd))