In [1]:
import pymongo
connection = pymongo.MongoClient("mongodb://localhost")
db = connection.tnt

In [32]:
def runvalue(player):
    p = db.statistics.find_one({ '_id': player })
    average = p['BattingAverage']
    sr = p['BattingSR']
    try:
        balls = 100.0 * average / sr
    except:
        return 0
    ballvalue = balls * avgscore / 300
    return average - ballvalue

In [30]:
def featuresum(player):
    p = db.scorelist.find_one({ '_id': player })
    scores = p['Scores']
    milestones = [score for score in scores if score>=50]
    milestone50s = [score for score in milestones if score < 100]
    milestone100s = [score for score in milestones if score >= 100]
    milestoneValue = []
    for score in milestones:
        if score < 75:
            milestoneValue.append(0.4)
        elif score < 90:
            milestoneValue.append(0.8)
        elif score < 100:
            milestoneValue.append(0.9)
        elif score < 125:
            milestoneValue.append(1.0)
        elif score < 150:
            milestoneValue.append(1.3)
        elif score < 175:
            milestoneValue.append(1.7)
        else:
            milestoneValue.append(2.0)
    feature = sum(milestoneValue) + runvalue(player)
    try:
        feature += (5.0 * len(milestone50s) / len(scores))
    except:
        pass
    try:
        feature += (10.0 * len(milestone100s) / len(milestones))
    except:
        pass
    return feature

In [6]:
totals = db.odi.aggregate([{ 
    "$group": { "_id": 1, "Total runs": { "$sum": "$Runs Batsman" }, "Total Balls": { "$sum": 1 } } }
])['result'][0]
avgscore = 300.0 * totals['Total runs'] / totals['Total Balls']

In [38]:
playerFeatureSums = [(player['_id'], featuresum(player['_id'])) for player in db.players.find({'Balls faced': {'$gte': 400}})]

In [34]:
import operator

In [39]:
sorted(playerFeatureSums, key=operator.itemgetter(1))[-10:]


Out[39]:
[(u'Mohammad Hafeez', 19.390081397722494),
 (u'JP Faulkner', 20.017972167634387),
 (u'GJ Bailey', 20.305453483158733),
 (u'Q de Kock', 20.392601673765814),
 (u'SR Watson', 21.205695287929704),
 (u'HM Amla', 23.660905358346817),
 (u'MS Dhoni', 26.29333666313373),
 (u'KC Sangakkara', 27.817220014066795),
 (u'AB de Villiers', 31.66922560671017),
 (u'V Kohli', 39.86857936819784)]

In [48]:
def stdstats(player):
    p = db.statistics.find_one({ '_id': player })
    avg = 10.0 * (p['BattingAverage'] / 50.0)
    sr = 10.0 * (p['BattingSR'] / 120.0)
    ph = 10.0 * (p['BattingPinchHitting'] / 15.93)
    return avg + sr + ph

In [44]:
playerStdStats = [(player['_id'], stdstats(player['_id'])) for player in db.players.find({'Balls faced': {'$gte': 400}})]

In [45]:
sorted(playerStdStats, key=operator.itemgetter(1))[-10:]


Out[45]:
[(u'SR Watson', 20.86067248729732),
 (u'Q de Kock', 21.099119635086307),
 (u'GJ Bailey', 21.18176670310305),
 (u'JC Buttler', 21.95111410387132),
 (u'V Kohli', 22.5558216928728),
 (u'GJ Maxwell', 22.647018204645324),
 (u'AD Russell', 22.84155844665644),
 (u'JP Faulkner', 23.391258134428625),
 (u'AB de Villiers', 23.904656021312185),
 (u'MS Dhoni', 26.764143340791176)]

In [49]:
playerTotalStats = [(player['_id'], featuresum(player['_id']) + stdstats(player['_id'])) 
                    for player in db.players.find({'Balls faced': {'$gte': 400}})]

In [53]:
sorted(playerTotalStats, key=operator.itemgetter(1))[-20:]


Out[53]:
[(u'Shahid Afridi', 38.24883833406406),
 (u'MJ Guptill', 38.421970638255395),
 (u'BB McCullum', 38.99436528396403),
 (u'Mohammad Hafeez', 39.33791588056667),
 (u'TM Dilshan', 39.39183758392811),
 (u'V Sehwag', 40.122763362754604),
 (u'LRPL Taylor', 40.473030878172494),
 (u'S Dhawan', 40.80353327051276),
 (u'AD Russell', 42.78846054972185),
 (u'GJ Maxwell', 44.535433159135245),
 (u'GJ Bailey', 44.57918295383348),
 (u'Q de Kock', 45.10924288098444),
 (u'JC Buttler', 45.29720931273451),
 (u'SR Watson', 46.66864331595946),
 (u'HM Amla', 47.25817058879925),
 (u'JP Faulkner', 48.06051860240416),
 (u'KC Sangakkara', 49.34612587743932),
 (u'MS Dhoni', 56.626581822014984),
 (u'AB de Villiers', 59.05101296982872),
 (u'V Kohli', 65.7153969607395)]

In [54]:
indTotalStats = [(player['_id'], featuresum(player['_id']) + stdstats(player['_id'])) 
                 for player in db.players.find({'Balls faced': {'$gte': 200}, 'Team': 'India'})]

In [55]:
sorted(indTotalStats, key=operator.itemgetter(1))[-10:]


Out[55]:
[(u'MK Tiwary', 25.031615457031037),
 (u'RA Jadeja', 26.869255386629256),
 (u'SR Tendulkar', 27.505769111923733),
 (u'G Gambhir', 30.420333603509228),
 (u'SK Raina', 35.07664465188411),
 (u'RG Sharma', 36.935918480111205),
 (u'V Sehwag', 40.122763362754604),
 (u'S Dhawan', 40.80353327051276),
 (u'MS Dhoni', 56.626581822014984),
 (u'V Kohli', 65.7153969607395)]

In [99]:
ballsStart = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$lte": 15 } } }, 
    { "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']
ballsMid = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 15, "$lte": 40 } } }, 
    { "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']
ballsEnd = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 40 } } }, 
    { "$group": { "_id": 1, "Balls": { "$sum": 1 } } }
])['result'][0]['Balls']

dotStart = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$lte": 15 }, "Total Runs": 0 } }, 
    { "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']
dotMid = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Total Runs": 0 } }, 
    { "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']
dotEnd = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 40 }, "Total Runs": 0 } }, 
    { "$group": { "_id": 1, "Dots": { "$sum": 1 } } }
])['result'][0]['Dots']

bndStart = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$lte": 15 }, "Total Runs": { "$gte": 4 } } }, 
    { "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']
bndMid = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Total Runs": { "$gte": 4 } } }, 
    { "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']
bndEnd = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 40 }, "Total Runs": { "$gte": 4 } } }, 
    { "$group": { "_id": 1, "Boundaries": { "$sum": 1 } } }
])['result'][0]['Boundaries']

runsStart = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$lte": 15 } } }, 
    { "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']
runsMid = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 15, "$lte": 40 } } }, 
    { "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']
runsEnd = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 40 } } }, 
    { "$group": { "_id": 1, "Runs": { "$sum": "$Total Runs" } } }
])['result'][0]['Runs']

wickStart = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$lte": 15 }, "Wicket Kind": 
                 { "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } }, 
    { "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']
wickMid = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 15, "$lte": 40 }, "Wicket Kind": 
                 { "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } }, 
    { "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']
wickEnd = 1.0 * db.odi.aggregate([
    { "$match": { "Over": { "$gte": 40 }, "Wicket Kind": 
                 { "$in": ["bowled", "caught", "caught and bowled", "lbw", "stumped", "hit wicket"] } } }, 
    { "$group": { "_id": 1, "Wickets": { "$sum": 1 } } }
])['result'][0]['Wickets']

In [100]:
avgDotStart = dotStart / ballsStart
avgDotMid = dotMid / ballsMid
avgDotEnd = dotEnd / ballsEnd
avgBndStart = bndStart / ballsStart
avgBndMid = bndMid / ballsMid
avgBndEnd = bndEnd / ballsEnd
avgEcoStart = runsStart / ballsStart
avgEcoMid = runsMid / ballsMid
avgEcoEnd = runsEnd / ballsEnd
avgWickStart = wickStart / ballsStart
avgWickMid = wickMid / ballsMid
avgWickEnd = wickEnd / ballsEnd

In [326]:
def bowlStats(player):
    try:
        ballsStart = db.ballsStart.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsStart = []
    try:
        ballsMid = db.ballsMid.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsMid = []
    try:
        ballsEnd = db.ballsEnd.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsEnd = []
    try:
        wicketsStart = db.wicketsStart.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsStart = 0
    try:
        wicketsMid = db.wicketsMid.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsMid = 0
    try:
        wicketsEnd = db.wicketsEnd.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsEnd = 0
    totalStart = len(ballsStart)
    totalMid = len(ballsMid)
    totalEnd = len(ballsEnd)
    dotsBowledStart = ballsStart.count(0)
    dotsBowledMid = ballsMid.count(0)
    dotsBowledEnd = ballsEnd.count(0)
    runsConcededStart = sum(ballsStart)
    runsConcededMid = sum(ballsMid)
    runsConcededEnd = sum(ballsEnd)
    boundariesStart = sum([ballsStart.count(b) for b in [4, 5, 6, 7]])
    boundariesMid = sum([ballsMid.count(b) for b in [4, 5, 6, 7]])
    boundariesEnd = sum([ballsEnd.count(b) for b in [4, 5, 6, 7]])
    if totalStart > 240:
        dScoreStart = (1.0 * dotsBowledStart / totalStart) / avgDotStart
        if dScoreStart < 1.0:
            dScoreStart = -45.0 * (1.0 - dScoreStart)
        else:
            dScoreStart = 45.0 * (dScoreStart - 1.0) 
        bScoreStart = (1.0 * boundariesStart / totalStart) / avgBndStart
        if bScoreStart < 1.0:
            bScoreStart = -30.0 * (1.0 - bScoreStart)
        else:
            bScoreStart = 30.0 * (bScoreStart - 1.0)
        eScoreStart = (1.0 * runsConcededStart / totalStart) / avgEcoStart
        if eScoreStart < 1.0:
            eScoreStart = -30.0 * (1.0 - eScoreStart)
        else:
            eScoreStart = 30.0 * (eScoreStart - 1.0)
        wScoreStart = (1.0 * wicketsStart / totalStart) / avgWickStart
        if wScoreStart < 1.0:
            wScoreStart = -45.0 * (1.0 - wScoreStart)
        else:
            wScoreStart = 45.0 * (wScoreStart - 1.0)
    else:
        dScoreStart = bScoreStart = eScoreStart = wScoreStart = 0
    if totalMid > 360:
        dScoreMid = (1.0 * dotsBowledMid / totalMid) / avgDotMid
        if dScoreMid < 1.0:
            dScoreMid = -24.0 * (1.0 - dScoreMid)
        else:
            dScoreMid = 15.0 * (dScoreMid - 1.0) 
        bScoreMid = (1.0 * boundariesMid / totalMid) / avgBndMid
        if bScoreMid < 1.0:
            bScoreMid = -15.0 * (1.0 - bScoreMid)
        else:
            bScoreMid = 12.0 * (bScoreMid - 1.0)
        eScoreMid = (1.0 * runsConcededMid / totalMid) / avgEcoMid
        if eScoreMid < 1.0:
            eScoreMid = -15.0 * (1.0 - eScoreMid)
        else:
            eScoreMid = 15.0 * (eScoreMid - 1.0)
        wScoreMid = (1.0 * wicketsMid / totalMid) / avgWickMid
        if wScoreMid < 1.0:
            wScoreMid = -30.0 * (1.0 - wScoreMid)
        else:
            wScoreMid = 30.0 * (wScoreMid - 1.0)
    else:
        dScoreMid = bScoreMid = eScoreMid = wScoreMid = 0
    if totalEnd > 120:
        dScoreEnd = (1.0 * dotsBowledEnd / totalEnd) / avgDotEnd
        if dScoreEnd < 1.0:
            dScoreEnd = -75.0 * (1.0 - dScoreEnd)
        else:
            dScoreEnd = 75.0 * (dScoreEnd - 1.0) 
        bScoreEnd = (1.0 * boundariesEnd / totalEnd) / avgBndEnd
        if bScoreEnd < 1.0:
            bScoreEnd = -45.0 * (1.0 - bScoreEnd)
        else:
            bScoreEnd = 45.0 * (bScoreEnd - 1.0)
        eScoreEnd = (1.0 * runsConcededEnd / totalEnd) / avgEcoEnd
        if eScoreEnd < 1.0:
            eScoreEnd = -30.0 * (1.0 - eScoreEnd)
        else:
            eScoreEnd = 45.0 * (eScoreEnd - 1.0)
        wScoreEnd = (1.0 * wicketsEnd / totalEnd) / avgWickEnd
        if wScoreEnd < 1.0:
            wScoreEnd = -30.0 * (1.0 - wScoreEnd)
        else:
            wScoreEnd = 30.0 * (wScoreEnd - 1.0)
    else:
        dScoreEnd = bScoreEnd = eScoreEnd = wScoreEnd = 0
    p = db.statistics.find_one({ '_id': player })
    wicketsper10 = p['BowlingWicketsPer10'] / 2.0
    if wicketsper10 > (1.1 / 2.0):
        wicketsper10 = 75.0 * wicketsper10
    else:
        wicketsper10 = -37.5 * wicketsper10
    overs100runs = p['BowlingOvers100Runs'] / 25.0
    if overs100runs > (19.0 / 25.0):
        overs100runs = 60.0 * overs100runs
    else:
        overs100runs = -30.0 * overs100runs
    overs10boundaries = p['BowlingOvers10Boundaries'] / 27.0
    if overs10boundaries > (14.5 / 25):
        overs10boundaries = 45.0 * overs10boundaries
    else:
        overs10boundaries = -22.5 * overs10boundaries
    oversextras = p['BowlingOversExtras'] / 15.0
    if oversextras > (3.7 / 15.0):
        oversextras = 15.0 * oversextras
    else:
        oversextras = -7.5 * oversextras
#     print wicketsper10, overs100runs, overs10boundaries, oversextras, dScoreStart, dScoreMid, dScoreEnd, bScoreStart, bScoreMid, bScoreEnd, eScoreStart, eScoreMid, eScoreEnd, wScoreStart, wScoreMid, wScoreEnd
#     return wicketsper10 + overs100runs + overs10boundaries + oversextras + dScoreStart + dScoreMid + dScoreEnd + bScoreStart + bScoreMid + bScoreEnd + eScoreStart + eScoreMid + eScoreEnd + wScoreStart + wScoreMid + wScoreEnd
    return (wicketsper10 + overs100runs + overs10boundaries + oversextras) + ((dScoreStart + dScoreMid + dScoreEnd) / 3.0) + ((bScoreStart + bScoreMid + bScoreEnd) / 3.0) + ((eScoreStart + eScoreMid + eScoreEnd) / 3.0) + ((wScoreStart + wScoreMid + wScoreEnd) / 3.0)

In [327]:
def indBowlStats(player):
    try:
        ballsStart = db.ballsStart.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsStart = []
    try:
        ballsMid = db.ballsMid.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsMid = []
    try:
        ballsEnd = db.ballsEnd.find_one({ '_id': player })['Balls']
    except TypeError:
        ballsEnd = []
    try:
        wicketsStart = db.wicketsStart.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsStart = 0
    try:
        wicketsMid = db.wicketsMid.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsMid = 0
    try:
        wicketsEnd = db.wicketsEnd.find_one({ '_id': player })['Wickets']
    except TypeError:
        wicketsEnd = 0
    totalStart = len(ballsStart)
    totalMid = len(ballsMid)
    totalEnd = len(ballsEnd)
    dotsBowledStart = ballsStart.count(0)
    dotsBowledMid = ballsMid.count(0)
    dotsBowledEnd = ballsEnd.count(0)
    runsConcededStart = sum(ballsStart)
    runsConcededMid = sum(ballsMid)
    runsConcededEnd = sum(ballsEnd)
    boundariesStart = sum([ballsStart.count(b) for b in [4, 5, 6, 7]])
    boundariesMid = sum([ballsMid.count(b) for b in [4, 5, 6, 7]])
    boundariesEnd = sum([ballsEnd.count(b) for b in [4, 5, 6, 7]])
    if totalStart > 180:
        dScoreStart = (1.0 * dotsBowledStart / totalStart) / avgDotStart
        if dScoreStart < 1.0:
            dScoreStart = -45.0 * (1.0 - dScoreStart)
        else:
            dScoreStart = 45.0 * (dScoreStart - 1.0) 
        bScoreStart = (1.0 * boundariesStart / totalStart) / avgBndStart
        if bScoreStart < 1.0:
            bScoreStart = -30.0 * (1.0 - bScoreStart)
        else:
            bScoreStart = 30.0 * (bScoreStart - 1.0)
        eScoreStart = (1.0 * runsConcededStart / totalStart) / avgEcoStart
        if eScoreStart < 1.0:
            eScoreStart = -30.0 * (1.0 - eScoreStart)
        else:
            eScoreStart = 30.0 * (eScoreStart - 1.0)
        wScoreStart = (1.0 * wicketsStart / totalStart) / avgWickStart
        if wScoreStart < 1.0:
            wScoreStart = -45.0 * (1.0 - wScoreStart)
        else:
            wScoreStart = 45.0 * (wScoreStart - 1.0)
    else:
        dScoreStart = bScoreStart = eScoreStart = wScoreStart = 0
    if totalMid > 240:
        dScoreMid = (1.0 * dotsBowledMid / totalMid) / avgDotMid
        if dScoreMid < 1.0:
            dScoreMid = -24.0 * (1.0 - dScoreMid)
        else:
            dScoreMid = 15.0 * (dScoreMid - 1.0) 
        bScoreMid = (1.0 * boundariesMid / totalMid) / avgBndMid
        if bScoreMid < 1.0:
            bScoreMid = -15.0 * (1.0 - bScoreMid)
        else:
            bScoreMid = 12.0 * (bScoreMid - 1.0)
        eScoreMid = (1.0 * runsConcededMid / totalMid) / avgEcoMid
        if eScoreMid < 1.0:
            eScoreMid = -15.0 * (1.0 - eScoreMid)
        else:
            eScoreMid = 15.0 * (eScoreMid - 1.0)
        wScoreMid = (1.0 * wicketsMid / totalMid) / avgWickMid
        if wScoreMid < 1.0:
            wScoreMid = -30.0 * (1.0 - wScoreMid)
        else:
            wScoreMid = 30.0 * (wScoreMid - 1.0)
    else:
        dScoreMid = bScoreMid = eScoreMid = wScoreMid = 0
    if totalEnd > 120:
        dScoreEnd = (1.0 * dotsBowledEnd / totalEnd) / avgDotEnd
        if dScoreEnd < 1.0:
            dScoreEnd = -75.0 * (1.0 - dScoreEnd)
        else:
            dScoreEnd = 75.0 * (dScoreEnd - 1.0) 
        bScoreEnd = (1.0 * boundariesEnd / totalEnd) / avgBndEnd
        if bScoreEnd < 1.0:
            bScoreEnd = -45.0 * (1.0 - bScoreEnd)
        else:
            bScoreEnd = 45.0 * (bScoreEnd - 1.0)
        eScoreEnd = (1.0 * runsConcededEnd / totalEnd) / avgEcoEnd
        if eScoreEnd < 1.0:
            eScoreEnd = -30.0 * (1.0 - eScoreEnd)
        else:
            eScoreEnd = 45.0 * (eScoreEnd - 1.0)
        wScoreEnd = (1.0 * wicketsEnd / totalEnd) / avgWickEnd
        if wScoreEnd < 1.0:
            wScoreEnd = -30.0 * (1.0 - wScoreEnd)
        else:
            wScoreEnd = 30.0 * (wScoreEnd - 1.0)
    else:
        dScoreEnd = bScoreEnd = eScoreEnd = wScoreEnd = 0
    p = db.statistics.find_one({ '_id': player })
    wicketsper10 = p['BowlingWicketsPer10'] / 2.0
    if wicketsper10 > (1.1 / 2.0):
        wicketsper10 = 75.0 * wicketsper10
    else:
        wicketsper10 = -37.5 * wicketsper10
    overs100runs = p['BowlingOvers100Runs'] / 25.0
    if overs100runs > (19.0 / 25.0):
        overs100runs = 60.0 * overs100runs
    else:
        overs100runs = -30.0 * overs100runs
    overs10boundaries = p['BowlingOvers10Boundaries'] / 27.0
    if overs10boundaries > (14.5 / 25):
        overs10boundaries = 45.0 * overs10boundaries
    else:
        overs10boundaries = -22.5 * overs10boundaries
    oversextras = p['BowlingOversExtras'] / 15.0
    if oversextras > (3.7 / 15.0):
        oversextras = 15.0 * oversextras
    else:
        oversextras = -7.5 * oversextras
#     print wicketsper10, overs100runs, overs10boundaries, oversextras, dScoreStart, dScoreMid, dScoreEnd, bScoreStart, bScoreMid, bScoreEnd, eScoreStart, eScoreMid, eScoreEnd, wScoreStart, wScoreMid, wScoreEnd
#     return wicketsper10 + overs100runs + overs10boundaries + oversextras + dScoreStart + dScoreMid + dScoreEnd + bScoreStart + bScoreMid + bScoreEnd + eScoreStart + eScoreMid + eScoreEnd + wScoreStart + wScoreMid + wScoreEnd
    return (wicketsper10 + overs100runs + overs10boundaries + oversextras) + ((dScoreStart + dScoreMid + dScoreEnd) / 3.0) + ((bScoreStart + bScoreMid + bScoreEnd) / 3.0) + ((eScoreStart + eScoreMid + eScoreEnd) / 3.0) + ((wScoreStart + wScoreMid + wScoreEnd) / 3.0)

In [328]:
bowlers = [(player['_id'], bowlStats(player['_id'])) for player in db.players.find({'Balls bowled': {"$gte": 1250}})]

In [329]:
sorted(bowlers, key=operator.itemgetter(1))[-10:]


Out[329]:
[(u'SP Narine', 165.20184544827356),
 (u'GP Swann', 165.4578846004142),
 (u'RA Jadeja', 166.21056572574827),
 (u'R McLaren', 173.16118940625398),
 (u'M Morkel', 175.371088106162),
 (u'HMRKB Herath', 175.7311255552162),
 (u'Saeed Ajmal', 181.05339212598054),
 (u'DW Steyn', 183.76626454997597),
 (u'JM Anderson', 184.97882043115317),
 (u'JC Tredwell', 243.72363545201438)]

In [390]:
indBowlers = [(player['_id'], indBowlStats(player['_id'])) for player in db.players.find({'Balls bowled': {"$gte": 750}, 'Team': 'India'})]

In [391]:
sorted(indBowlers, key=operator.itemgetter(1))[-10:]


Out[391]:
[(u'I Sharma', 45.16832967302321),
 (u'UT Yadav', 46.07033740768722),
 (u'R Vinay Kumar', 49.91514457790841),
 (u'SK Raina', 80.2765174692456),
 (u'Mohammed Shami', 82.99416925815135),
 (u'B Kumar', 93.08949340740888),
 (u'P Kumar', 112.7989026754889),
 (u'R Ashwin', 130.9888969853051),
 (u'RA Jadeja', 166.21056572574827),
 (u'A Mishra', 210.0273156197658)]

In [392]:
allrounders = [(player['_id'], (50.0 * (featuresum(player['_id']) + stdstats(player['_id'])) / 40.0) + (50.0 * bowlStats(player['_id'])) / 140.0) 
                 for player in db.players.find({'Balls faced': {'$gte': 500}, 'Balls bowled': {'$gte': 500}})]

In [393]:
sorted(allrounders, key=operator.itemgetter(1))[-10:]


Out[393]:
[(u'DJ Bravo', 81.74188744357116),
 (u'Mahmudullah', 81.83049236624328),
 (u'AD Mathews', 82.97533992159283),
 (u'Mohammad Hafeez', 84.90462500729424),
 (u'RS Bopara', 85.12977239234912),
 (u'Shakib Al Hasan', 87.80805669131317),
 (u'RA Jadeja', 92.94748556391096),
 (u'AD Russell', 95.4150601340983),
 (u'Shahid Afridi', 97.85109207429593),
 (u'SR Watson', 107.34441518592311)]

In [363]:
arPicks = ["JP Faulkner", "Shahid Afridi", "AD Russell"]
arPickValues = [(player, (50.0 * (featuresum(player) + stdstats(player)) / 40.0), (50.0 * bowlStats(player) / 140.0))
                for player in arPicks]

In [364]:
arPickValues


Out[364]:
[('JP Faulkner', 60.07564825300519, 19.59374172199674),
 ('Shahid Afridi', 47.811047917580076, 50.04004415671586),
 ('AD Russell', 53.48557568715231, 41.92948444694599)]

In [357]:
teamDraft = allstars[:]
teamDraft.remove('Shahid Afridi')
teamDraft


Out[357]:
['SR Watson',
 'HM Amla',
 'KC Sangakkara',
 'V Kohli',
 'AB de Villiers',
 'MS Dhoni',
 'JC Tredwell',
 'DW Steyn',
 'Saeed Ajmal',
 'JM Anderson']

In [365]:
battingIndex = sum([(50.0 * (featuresum(player) + stdstats(player)) / 40.0) for player in teamDraft])
bowlingIndex = sum([(50.0 * bowlStats(player) / 140.0) for player in teamDraft])

In [366]:
print battingIndex, bowlingIndex


425.96208963 358.29936897

In [370]:
print 'With Faulkner'
print battingIndex + arPickValues[0][1], bowlingIndex + arPickValues[0][2]
print (battingIndex + arPickValues[0][1]) * (bowlingIndex + arPickValues[0][2])
print 'With Afridi'
print battingIndex + arPickValues[1][1], bowlingIndex + arPickValues[1][2]
print (battingIndex + arPickValues[1][1]) * (bowlingIndex + arPickValues[1][2])
print 'With Russell'
print battingIndex + arPickValues[2][1], bowlingIndex + arPickValues[2][2]
print (battingIndex + arPickValues[2][1]) * (bowlingIndex + arPickValues[2][2])


With Faulkner
486.037737883 377.893110692
183670.312682
With Afridi
473.773137547 408.339413127
193460.244941
With Russell
479.447665317 400.228853417
191888.789363

In [376]:
indAllrounders = [(player['_id'], (50.0 * (featuresum(player['_id']) + stdstats(player['_id'])) / 40.0) + (50.0 * bowlStats(player['_id'])) / 140.0) 
                  for player in db.players.find({'Balls faced': {'$gte': 200}, 'Balls bowled': {'$gte': 200}, 'Team': 'India'})]

In [377]:
sorted(indAllrounders, key=operator.itemgetter(1))[-10:]


Out[377]:
[(u'Yuvraj Singh', 42.72474410184754),
 (u'RG Sharma', 61.37166297164676),
 (u'R Ashwin', 69.06634581195773),
 (u'SK Raina', 72.51599062529999),
 (u'V Kohli', 84.22472583507295),
 (u'RA Jadeja', 92.94748556391096)]

In [372]:
allstars = [
    "SR Watson",
    "HM Amla",
    "V Kohli",
    "KC Sangakkara",
    "AB de Villiers",
    "MS Dhoni",
    "Shahid Afridi",
    "JC Tredwell",
    "DW Steyn",
    "Saeed Ajmal",
    "JM Anderson"
]

In [373]:
indiaXI = [
    "V Sehwag",
    "S Dhawan",
    "V Kohli",
    "RG Sharma",
    "SK Raina",
    "MS Dhoni",
    "RA Jadeja",
    "R Ashwin",
    "B Kumar",
    "A Mishra",
    "Mohammed Shami"
]

In [ ]: