In [1]:
import pymongo
connection = pymongo.MongoClient("mongodb://localhost")
db = connection.tnt
In [32]:
def runvalue(player):
p = db.statistics.find_one({ '_id': player })
average = p['BattingAverage']
sr = p['BattingSR']
try:
balls = 100.0 * average / sr
except:
return 0
ballvalue = balls * avgscore / 300
return average - ballvalue
In [30]:
def featuresum(player):
p = db.scorelist.find_one({ '_id': player })
scores = p['Scores']
milestones = [score for score in scores if score>=50]
milestone50s = [score for score in milestones if score < 100]
milestone100s = [score for score in milestones if score >= 100]
milestoneValue = []
for score in milestones:
if score < 75:
milestoneValue.append(0.4)
elif score < 90:
milestoneValue.append(0.8)
elif score < 100:
milestoneValue.append(0.9)
elif score < 125:
milestoneValue.append(1.0)
elif score < 150:
milestoneValue.append(1.3)
elif score < 175:
milestoneValue.append(1.7)
else:
milestoneValue.append(2.0)
feature = sum(milestoneValue) + runvalue(player)
try:
feature += (5.0 * len(milestone50s) / len(scores))
except:
pass
try:
feature += (10.0 * len(milestone100s) / len(milestones))
except:
pass
return feature
In [6]:
totals = db.odi.aggregate([{
"$group": { "_id": 1, "Total runs": { "$sum": "$Runs Batsman" }, "Total Balls": { "$sum": 1 } } }
])['result'][0]
avgscore = 300.0 * totals['Total runs'] / totals['Total Balls']
In [38]:
playerFeatureSums = [(player['_id'], featuresum(player['_id'])) for player in db.players.find({'Balls faced': {'$gte': 400}})]
In [34]:
import operator
In [39]:
sorted(playerFeatureSums, key=operator.itemgetter(1))[-10:]
Out[39]:
In [48]:
def stdstats(player):
p = db.statistics.find_one({ '_id': player })
avg = 10.0 * (p['BattingAverage'] / 50.0)
sr = 10.0 * (p['BattingSR'] / 120.0)
ph = 10.0 * (p['BattingPinchHitting'] / 15.93)
return avg + sr + ph
In [44]:
playerStdStats = [(player['_id'], stdstats(player['_id'])) for player in db.players.find({'Balls faced': {'$gte': 400}})]
In [45]:
sorted(playerStdStats, key=operator.itemgetter(1))[-10:]
Out[45]:
In [49]:
playerTotalStats = [(player['_id'], featuresum(player['_id']) + stdstats(player['_id']))
for player in db.players.find({'Balls faced': {'$gte': 400}})]
In [53]:
sorted(playerTotalStats, key=operator.itemgetter(1))[-20:]
Out[53]:
In [54]:
indTotalStats = [(player['_id'], featuresum(player['_id']) + stdstats(player['_id']))
for player in db.players.find({'Balls faced': {'$gte': 200}, 'Team': 'India'})]
In [55]:
sorted(indTotalStats, key=operator.itemgetter(1))[-10:]
Out[55]:
In [99]:
ballsStart = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$lte": 15 } } },
{ "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']
ballsMid = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 15, "$lte": 40 } } },
{ "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']
ballsEnd = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 40 } } },
{ "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']
dotStart = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$lte": 15 }, "Total Runs": 0 } },
{ "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']
dotMid = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Total Runs": 0 } },
{ "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']
dotEnd = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 40 }, "Total Runs": 0 } },
{ "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']
bndStart = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$lte": 15 }, "Total Runs": { "$gte": 4 } } },
{ "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']
bndMid = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Total Runs": { "$gte": 4 } } },
{ "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']
bndEnd = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 40 }, "Total Runs": { "$gte": 4 } } },
{ "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']
runsStart = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$lte": 15 } } },
{ "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']
runsMid = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 15, "$lte": 40 } } },
{ "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']
runsEnd = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 40 } } },
{ "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']
wickStart = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$lte": 15 }, "Wicket Kind":
{ "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } },
{ "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']
wickMid = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Wicket Kind":
{ "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } },
{ "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']
wickEnd = 1.0 * db.odi.aggregate([
{ "$match": { "Over": { "$gte": 40 }, "Wicket Kind":
{ "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } },
{ "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']
In [100]:
avgDotStart = dotStart / ballsStart
avgDotMid = dotMid / ballsMid
avgDotEnd = dotEnd / ballsEnd
avgBndStart = bndStart / ballsStart
avgBndMid = bndMid / ballsMid
avgBndEnd = bndEnd / ballsEnd
avgEcoStart = runsStart / ballsStart
avgEcoMid = runsMid / ballsMid
avgEcoEnd = runsEnd / ballsEnd
avgWickStart = wickStart / ballsStart
avgWickMid = wickMid / ballsMid
avgWickEnd = wickEnd / ballsEnd
In [326]:
def bowlStats(player):
try:
ballsStart = db.ballsStart.find_one({ '_id': player })['Balls']
except TypeError:
ballsStart = []
try:
ballsMid = db.ballsMid.find_one({ '_id': player })['Balls']
except TypeError:
ballsMid = []
try:
ballsEnd = db.ballsEnd.find_one({ '_id': player })['Balls']
except TypeError:
ballsEnd = []
try:
wicketsStart = db.wicketsStart.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsStart = 0
try:
wicketsMid = db.wicketsMid.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsMid = 0
try:
wicketsEnd = db.wicketsEnd.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsEnd = 0
totalStart = len(ballsStart)
totalMid = len(ballsMid)
totalEnd = len(ballsEnd)
dotsBowledStart = ballsStart.count(0)
dotsBowledMid = ballsMid.count(0)
dotsBowledEnd = ballsEnd.count(0)
runsConcededStart = sum(ballsStart)
runsConcededMid = sum(ballsMid)
runsConcededEnd = sum(ballsEnd)
boundariesStart = sum([ballsStart.count(b) for b in [4, 5, 6, 7]])
boundariesMid = sum([ballsMid.count(b) for b in [4, 5, 6, 7]])
boundariesEnd = sum([ballsEnd.count(b) for b in [4, 5, 6, 7]])
if totalStart > 240:
dScoreStart = (1.0 * dotsBowledStart / totalStart) / avgDotStart
if dScoreStart < 1.0:
dScoreStart = -45.0 * (1.0 - dScoreStart)
else:
dScoreStart = 45.0 * (dScoreStart - 1.0)
bScoreStart = (1.0 * boundariesStart / totalStart) / avgBndStart
if bScoreStart < 1.0:
bScoreStart = -30.0 * (1.0 - bScoreStart)
else:
bScoreStart = 30.0 * (bScoreStart - 1.0)
eScoreStart = (1.0 * runsConcededStart / totalStart) / avgEcoStart
if eScoreStart < 1.0:
eScoreStart = -30.0 * (1.0 - eScoreStart)
else:
eScoreStart = 30.0 * (eScoreStart - 1.0)
wScoreStart = (1.0 * wicketsStart / totalStart) / avgWickStart
if wScoreStart < 1.0:
wScoreStart = -45.0 * (1.0 - wScoreStart)
else:
wScoreStart = 45.0 * (wScoreStart - 1.0)
else:
dScoreStart = bScoreStart = eScoreStart = wScoreStart = 0
if totalMid > 360:
dScoreMid = (1.0 * dotsBowledMid / totalMid) / avgDotMid
if dScoreMid < 1.0:
dScoreMid = -24.0 * (1.0 - dScoreMid)
else:
dScoreMid = 15.0 * (dScoreMid - 1.0)
bScoreMid = (1.0 * boundariesMid / totalMid) / avgBndMid
if bScoreMid < 1.0:
bScoreMid = -15.0 * (1.0 - bScoreMid)
else:
bScoreMid = 12.0 * (bScoreMid - 1.0)
eScoreMid = (1.0 * runsConcededMid / totalMid) / avgEcoMid
if eScoreMid < 1.0:
eScoreMid = -15.0 * (1.0 - eScoreMid)
else:
eScoreMid = 15.0 * (eScoreMid - 1.0)
wScoreMid = (1.0 * wicketsMid / totalMid) / avgWickMid
if wScoreMid < 1.0:
wScoreMid = -30.0 * (1.0 - wScoreMid)
else:
wScoreMid = 30.0 * (wScoreMid - 1.0)
else:
dScoreMid = bScoreMid = eScoreMid = wScoreMid = 0
if totalEnd > 120:
dScoreEnd = (1.0 * dotsBowledEnd / totalEnd) / avgDotEnd
if dScoreEnd < 1.0:
dScoreEnd = -75.0 * (1.0 - dScoreEnd)
else:
dScoreEnd = 75.0 * (dScoreEnd - 1.0)
bScoreEnd = (1.0 * boundariesEnd / totalEnd) / avgBndEnd
if bScoreEnd < 1.0:
bScoreEnd = -45.0 * (1.0 - bScoreEnd)
else:
bScoreEnd = 45.0 * (bScoreEnd - 1.0)
eScoreEnd = (1.0 * runsConcededEnd / totalEnd) / avgEcoEnd
if eScoreEnd < 1.0:
eScoreEnd = -30.0 * (1.0 - eScoreEnd)
else:
eScoreEnd = 45.0 * (eScoreEnd - 1.0)
wScoreEnd = (1.0 * wicketsEnd / totalEnd) / avgWickEnd
if wScoreEnd < 1.0:
wScoreEnd = -30.0 * (1.0 - wScoreEnd)
else:
wScoreEnd = 30.0 * (wScoreEnd - 1.0)
else:
dScoreEnd = bScoreEnd = eScoreEnd = wScoreEnd = 0
p = db.statistics.find_one({ '_id': player })
wicketsper10 = p['BowlingWicketsPer10'] / 2.0
if wicketsper10 > (1.1 / 2.0):
wicketsper10 = 75.0 * wicketsper10
else:
wicketsper10 = -37.5 * wicketsper10
overs100runs = p['BowlingOvers100Runs'] / 25.0
if overs100runs > (19.0 / 25.0):
overs100runs = 60.0 * overs100runs
else:
overs100runs = -30.0 * overs100runs
overs10boundaries = p['BowlingOvers10Boundaries'] / 27.0
if overs10boundaries > (14.5 / 25):
overs10boundaries = 45.0 * overs10boundaries
else:
overs10boundaries = -22.5 * overs10boundaries
oversextras = p['BowlingOversExtras'] / 15.0
if oversextras > (3.7 / 15.0):
oversextras = 15.0 * oversextras
else:
oversextras = -7.5 * oversextras
# print wicketsper10, overs100runs, overs10boundaries, oversextras, dScoreStart, dScoreMid, dScoreEnd, bScoreStart, bScoreMid, bScoreEnd, eScoreStart, eScoreMid, eScoreEnd, wScoreStart, wScoreMid, wScoreEnd
# return wicketsper10 + overs100runs + overs10boundaries + oversextras + dScoreStart + dScoreMid + dScoreEnd + bScoreStart + bScoreMid + bScoreEnd + eScoreStart + eScoreMid + eScoreEnd + wScoreStart + wScoreMid + wScoreEnd
return (wicketsper10 + overs100runs + overs10boundaries + oversextras) + ((dScoreStart + dScoreMid + dScoreEnd) / 3.0) + ((bScoreStart + bScoreMid + bScoreEnd) / 3.0) + ((eScoreStart + eScoreMid + eScoreEnd) / 3.0) + ((wScoreStart + wScoreMid + wScoreEnd) / 3.0)
In [327]:
def indBowlStats(player):
try:
ballsStart = db.ballsStart.find_one({ '_id': player })['Balls']
except TypeError:
ballsStart = []
try:
ballsMid = db.ballsMid.find_one({ '_id': player })['Balls']
except TypeError:
ballsMid = []
try:
ballsEnd = db.ballsEnd.find_one({ '_id': player })['Balls']
except TypeError:
ballsEnd = []
try:
wicketsStart = db.wicketsStart.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsStart = 0
try:
wicketsMid = db.wicketsMid.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsMid = 0
try:
wicketsEnd = db.wicketsEnd.find_one({ '_id': player })['Wickets']
except TypeError:
wicketsEnd = 0
totalStart = len(ballsStart)
totalMid = len(ballsMid)
totalEnd = len(ballsEnd)
dotsBowledStart = ballsStart.count(0)
dotsBowledMid = ballsMid.count(0)
dotsBowledEnd = ballsEnd.count(0)
runsConcededStart = sum(ballsStart)
runsConcededMid = sum(ballsMid)
runsConcededEnd = sum(ballsEnd)
boundariesStart = sum([ballsStart.count(b) for b in [4, 5, 6, 7]])
boundariesMid = sum([ballsMid.count(b) for b in [4, 5, 6, 7]])
boundariesEnd = sum([ballsEnd.count(b) for b in [4, 5, 6, 7]])
if totalStart > 180:
dScoreStart = (1.0 * dotsBowledStart / totalStart) / avgDotStart
if dScoreStart < 1.0:
dScoreStart = -45.0 * (1.0 - dScoreStart)
else:
dScoreStart = 45.0 * (dScoreStart - 1.0)
bScoreStart = (1.0 * boundariesStart / totalStart) / avgBndStart
if bScoreStart < 1.0:
bScoreStart = -30.0 * (1.0 - bScoreStart)
else:
bScoreStart = 30.0 * (bScoreStart - 1.0)
eScoreStart = (1.0 * runsConcededStart / totalStart) / avgEcoStart
if eScoreStart < 1.0:
eScoreStart = -30.0 * (1.0 - eScoreStart)
else:
eScoreStart = 30.0 * (eScoreStart - 1.0)
wScoreStart = (1.0 * wicketsStart / totalStart) / avgWickStart
if wScoreStart < 1.0:
wScoreStart = -45.0 * (1.0 - wScoreStart)
else:
wScoreStart = 45.0 * (wScoreStart - 1.0)
else:
dScoreStart = bScoreStart = eScoreStart = wScoreStart = 0
if totalMid > 240:
dScoreMid = (1.0 * dotsBowledMid / totalMid) / avgDotMid
if dScoreMid < 1.0:
dScoreMid = -24.0 * (1.0 - dScoreMid)
else:
dScoreMid = 15.0 * (dScoreMid - 1.0)
bScoreMid = (1.0 * boundariesMid / totalMid) / avgBndMid
if bScoreMid < 1.0:
bScoreMid = -15.0 * (1.0 - bScoreMid)
else:
bScoreMid = 12.0 * (bScoreMid - 1.0)
eScoreMid = (1.0 * runsConcededMid / totalMid) / avgEcoMid
if eScoreMid < 1.0:
eScoreMid = -15.0 * (1.0 - eScoreMid)
else:
eScoreMid = 15.0 * (eScoreMid - 1.0)
wScoreMid = (1.0 * wicketsMid / totalMid) / avgWickMid
if wScoreMid < 1.0:
wScoreMid = -30.0 * (1.0 - wScoreMid)
else:
wScoreMid = 30.0 * (wScoreMid - 1.0)
else:
dScoreMid = bScoreMid = eScoreMid = wScoreMid = 0
if totalEnd > 120:
dScoreEnd = (1.0 * dotsBowledEnd / totalEnd) / avgDotEnd
if dScoreEnd < 1.0:
dScoreEnd = -75.0 * (1.0 - dScoreEnd)
else:
dScoreEnd = 75.0 * (dScoreEnd - 1.0)
bScoreEnd = (1.0 * boundariesEnd / totalEnd) / avgBndEnd
if bScoreEnd < 1.0:
bScoreEnd = -45.0 * (1.0 - bScoreEnd)
else:
bScoreEnd = 45.0 * (bScoreEnd - 1.0)
eScoreEnd = (1.0 * runsConcededEnd / totalEnd) / avgEcoEnd
if eScoreEnd < 1.0:
eScoreEnd = -30.0 * (1.0 - eScoreEnd)
else:
eScoreEnd = 45.0 * (eScoreEnd - 1.0)
wScoreEnd = (1.0 * wicketsEnd / totalEnd) / avgWickEnd
if wScoreEnd < 1.0:
wScoreEnd = -30.0 * (1.0 - wScoreEnd)
else:
wScoreEnd = 30.0 * (wScoreEnd - 1.0)
else:
dScoreEnd = bScoreEnd = eScoreEnd = wScoreEnd = 0
p = db.statistics.find_one({ '_id': player })
wicketsper10 = p['BowlingWicketsPer10'] / 2.0
if wicketsper10 > (1.1 / 2.0):
wicketsper10 = 75.0 * wicketsper10
else:
wicketsper10 = -37.5 * wicketsper10
overs100runs = p['BowlingOvers100Runs'] / 25.0
if overs100runs > (19.0 / 25.0):
overs100runs = 60.0 * overs100runs
else:
overs100runs = -30.0 * overs100runs
overs10boundaries = p['BowlingOvers10Boundaries'] / 27.0
if overs10boundaries > (14.5 / 25):
overs10boundaries = 45.0 * overs10boundaries
else:
overs10boundaries = -22.5 * overs10boundaries
oversextras = p['BowlingOversExtras'] / 15.0
if oversextras > (3.7 / 15.0):
oversextras = 15.0 * oversextras
else:
oversextras = -7.5 * oversextras
# print wicketsper10, overs100runs, overs10boundaries, oversextras, dScoreStart, dScoreMid, dScoreEnd, bScoreStart, bScoreMid, bScoreEnd, eScoreStart, eScoreMid, eScoreEnd, wScoreStart, wScoreMid, wScoreEnd
# return wicketsper10 + overs100runs + overs10boundaries + oversextras + dScoreStart + dScoreMid + dScoreEnd + bScoreStart + bScoreMid + bScoreEnd + eScoreStart + eScoreMid + eScoreEnd + wScoreStart + wScoreMid + wScoreEnd
return (wicketsper10 + overs100runs + overs10boundaries + oversextras) + ((dScoreStart + dScoreMid + dScoreEnd) / 3.0) + ((bScoreStart + bScoreMid + bScoreEnd) / 3.0) + ((eScoreStart + eScoreMid + eScoreEnd) / 3.0) + ((wScoreStart + wScoreMid + wScoreEnd) / 3.0)
In [328]:
bowlers = [(player['_id'], bowlStats(player['_id'])) for player in db.players.find({'Balls bowled': {"$gte": 1250}})]
In [329]:
sorted(bowlers, key=operator.itemgetter(1))[-10:]
Out[329]:
In [390]:
indBowlers = [(player['_id'], indBowlStats(player['_id'])) for player in db.players.find({'Balls bowled': {"$gte": 750}, 'Team': 'India'})]
In [391]:
sorted(indBowlers, key=operator.itemgetter(1))[-10:]
Out[391]:
In [392]:
allrounders = [(player['_id'], (50.0 * (featuresum(player['_id']) + stdstats(player['_id'])) / 40.0) + (50.0 * bowlStats(player['_id'])) / 140.0)
for player in db.players.find({'Balls faced': {'$gte': 500}, 'Balls bowled': {'$gte': 500}})]
In [393]:
sorted(allrounders, key=operator.itemgetter(1))[-10:]
Out[393]:
In [363]:
arPicks = ["JP Faulkner", "Shahid Afridi", "AD Russell"]
arPickValues = [(player, (50.0 * (featuresum(player) + stdstats(player)) / 40.0), (50.0 * bowlStats(player) / 140.0))
for player in arPicks]
In [364]:
arPickValues
Out[364]:
In [357]:
teamDraft = allstars[:]
teamDraft.remove('Shahid Afridi')
teamDraft
Out[357]:
In [365]:
battingIndex = sum([(50.0 * (featuresum(player) + stdstats(player)) / 40.0) for player in teamDraft])
bowlingIndex = sum([(50.0 * bowlStats(player) / 140.0) for player in teamDraft])
In [366]:
print battingIndex, bowlingIndex
In [370]:
print 'With Faulkner'
print battingIndex + arPickValues[0][1], bowlingIndex + arPickValues[0][2]
print (battingIndex + arPickValues[0][1]) * (bowlingIndex + arPickValues[0][2])
print 'With Afridi'
print battingIndex + arPickValues[1][1], bowlingIndex + arPickValues[1][2]
print (battingIndex + arPickValues[1][1]) * (bowlingIndex + arPickValues[1][2])
print 'With Russell'
print battingIndex + arPickValues[2][1], bowlingIndex + arPickValues[2][2]
print (battingIndex + arPickValues[2][1]) * (bowlingIndex + arPickValues[2][2])
In [376]:
indAllrounders = [(player['_id'], (50.0 * (featuresum(player['_id']) + stdstats(player['_id'])) / 40.0) + (50.0 * bowlStats(player['_id'])) / 140.0)
for player in db.players.find({'Balls faced': {'$gte': 200}, 'Balls bowled': {'$gte': 200}, 'Team': 'India'})]
In [377]:
sorted(indAllrounders, key=operator.itemgetter(1))[-10:]
Out[377]:
In [372]:
allstars = [
"SR Watson",
"HM Amla",
"V Kohli",
"KC Sangakkara",
"AB de Villiers",
"MS Dhoni",
"Shahid Afridi",
"JC Tredwell",
"DW Steyn",
"Saeed Ajmal",
"JM Anderson"
]
In [373]:
indiaXI = [
"V Sehwag",
"S Dhawan",
"V Kohli",
"RG Sharma",
"SK Raina",
"MS Dhoni",
"RA Jadeja",
"R Ashwin",
"B Kumar",
"A Mishra",
"Mohammed Shami"
]
In [ ]: