In [22]:
# -*- coding: utf-8 -*-
%matplotlib inline
import matplotlib.pyplot as plt
import random
import seaborn as sns
import numpy as np
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Fichier contenant la représentation vectoriel de tous les documents
data_file = 'data'
# Fichier contenant le ranking sans diversité
# Ranking effectué avec un modèle vectoriel
baseline_file = 'baseline'
# Fichier générer par notre moteur de recherche, permet de récupérer les identifiants des documents
dico_file = 'easyCLEF08_text_index'
# Fichiers contenant toutes les requêtes avec les documents pertinents asssociés
rel_file = 'relevants'

### Lecture des fichiers

def get_data(n, m, path):
    X = np.zeros((n, m))
    Y = np.zeros(n)
    i = 0
    for line in open(path):
        tmp = line.split()
        j = 0
        for x in tmp[:-1]: # y en fin
            X[i][j] = float(x) 
            j+= 1
        Y[i] = tmp[-1]
        i += 1
        if i == n:
            break
    return X, Y

def get_relevants(dico, path):
    Q = {}
    f = open(path)
    for line in open(path):
        tmp = line.split()
        n = len(tmp[:-1])
        #print "nbRel Query id " + tmp[-1] + " = " + str(n)
        aux = []
        for i in range(n):
            aux.append(dico[tmp[i]])
        Q[tmp[-1]] = np.array(aux)
    return Q

def get_ranking_baseline(dico, path):
    Q = {}
    for line in open(path):
        tmp = line.split()
        n = len(tmp[:-1])
        aux = []
        for i in range(n):
            aux.append(dico[tmp[i]])
        Q[tmp[-1]] = np.array(aux)
    return Q

def get_dico(path):
    dico = {}
    cpt = 0
    for line in open(path):
        tmp = line.split(':')
        dico[tmp[0]] = cpt
        cpt += 1
    return dico




# Tous les ranking sont constitués des indices des documents dans X

### Algorithmes de diversité

# Sélectionne 'n' documents aléatoirement dans la baseline
def ranking_div_alea(baseline, n):
    return random.sample(xrange(baseline.size), n)

# Sélection aléatoire 'n' documents en alternant les clusters
def ranking_div_kmeans(baseline, n, nbClusters):
    #reduced_data = PCA(n_components=10).fit_transform(X)
    kmeans = KMeans(init='k-means++', n_clusters=nbClusters)
    #kmeans.fit(reduced_data)
    kmeans.fit(X[baseline])

    # on attribue a chaque cluster l'indice du documents X
    matching = {}
    cpt = 0
    for c in kmeans.labels_:
        if matching.has_key(c): 
            matching[c].append(baseline[cpt])
        else:
            matching[c] = [baseline[cpt]]
        cpt += 1

    #print "clusters : " +  str(kmeans.cluster_centers_.shape)

    res = []
    nlabels = kmeans.n_clusters
    tab_nsamples = np.zeros(nlabels)
    i = 0
    while True:
        for k in range(nlabels): 
            if len(matching[k]) > tab_nsamples[k]: 
                tab_nsamples[k] += 1
                i += 1
            if i >= n:
                break
        if i >= n:
            break
    for k in range(nlabels):
        res += random.sample(np.array(matching[k]), int(tab_nsamples[k]))
    return np.array(res)

# Sélection 'n' documents dans les clusters en concervant le ranking de baseline
# cad la similarité à la requête 
# Les clusters sont parcourus par rapport à la distance de leurs centres à la requête
def ranking_div_kmeans2(baseline, n, nbClusters):
    #reduced_data = PCA(n_components=500).fit_transform(X)
    kmeans = KMeans(init='k-means++', n_clusters=nbClusters)
    #kmeans.fit(reduced_data)
    kmeans.fit(X[baseline])
    
    # on attribue a chaque cluster l'indice du documents X
    matching = {}
    cpt = 0
    for c in kmeans.labels_:
        if matching.has_key(c): 
            matching[c].append(baseline[cpt])
        else:
            matching[c] = [baseline[cpt]]
        cpt += 1

    #print "clusters : " +  str(kmeans.cluster_centers_.shape)

    res = []
    nlabels = kmeans.n_clusters
    tab_nsamples = np.zeros(nlabels)
    i = 0

    ordo_cluster = []

    req = X[baseline[0]]
    dist = np.array([((c - req)**2).sum() for c in kmeans.cluster_centers_])
    dist = np.argsort(dist)

    while True:
        for k in dist:
            if len(matching[k]) > tab_nsamples[k]: # FIXME : attention !     
                tab_nsamples[k] += 1
                i += 1
            if i >= n:
                break
        if i >= n:
            break
    for k in range(nlabels):
        res += matching[k][:int(tab_nsamples[k])]
    return np.array(res)


def p_n(ranking, rel, n):
    tmp = ranking[:n]
    return sum(np.array([x in rel for x in tmp])) / float(n)

def cr_n(ranking, rel, n):
    nb_topics_q = len(np.unique(Y[rel]))
    #print "Nombre sous thème de la query : " + str(nb_topics_q)
    return (len(np.unique(Y[ranking[:n]]))) / float(nb_topics_q)

In [19]:
# n est le nombre de documents à renvoyer
n = 20
print "n = " + str(n)
# Sur combien d'éxécutions on moyenne
mean = 3
nbClusters = 18 # doit être plus petit que 'n', et plus petit que le 'nb_topics'
debug = False

nbrDoc_baseline = 200 # doit être plus grand que 184 (nbRel max des queries)




# On a 2256 documents, chaque document est représenter par un vecteur de taille 2353, 
# avec comme dernière valeur sont topic
X, Y = get_data(2256, 2354, data_file) 
nb_topics = len(np.unique(Y))
print "Nombre topics (tous documents) : " + str(nb_topics)

dico = get_dico(dico_file)
baseline = get_ranking_baseline(dico, baseline_file)
relevants = get_relevants(dico, rel_file)


n = 20
Nombre topics (tous documents) : 23

In [5]:
# Exécution

cr_mean = 0
p_mean = 0
for q in relevants.keys():
    #print "#### Query " + q
    acc1 = 0
    acc2 = 0
    #print "nombre doc dans la baseline : " + str(baseline[q].shape[0])
    for i in range(mean):
        rel = relevants[q]
        ranking = baseline[q][:nbrDoc_baseline]
        #ranking = ranking_div_alea(baseline[q][:nbrDoc_baseline], n)
        #ranking = ranking_div_kmeans2(baseline[q][:nbrDoc_baseline], n, nbClusters)
        acc1 += cr_n(ranking, rel, n)
        acc2 += p_n(ranking, rel, n)
    #print "CR = " + str(acc1 / float(mean)) + "  P = " + str(acc2 / float(mean))
    cr_mean += acc1 / float(mean)
    p_mean += acc2 / float(mean)
    if debug:
        break
print "\n===> CR Query Mean = " + str(cr_mean / len(relevants.keys())) + ", P Query Mean = " + str(p_mean / len(relevants.keys()))


===> CR Query Mean = 0.770469425904, P Query Mean = 0.597435897436

In [26]:
def getCR_P(relevants,baseline,nbrDoc_baseline,mean,n,choiceRanking,nbClusters):
    cr_mean = 0
    p_mean = 0
    for q in relevants.keys():
        acc1 = 0
        acc2 = 0
        #print "nombre doc dans la baseline : " + str(baseline[q].shape[0])
        for i in range(mean):
            rel = relevants[q]
            if(choiceRanking==1):
                ranking = baseline[q][:nbrDoc_baseline]
            if(choiceRanking==2):
                ranking = ranking_div_alea(baseline[q][:nbrDoc_baseline], n)
            if(choiceRanking==3):
                ranking = ranking_div_kmeans2(baseline[q][:nbrDoc_baseline], n, nbClusters)
            acc1 += cr_n(ranking, rel, n)
            acc2 += p_n(ranking, rel, n)
        cr_mean += acc1 / float(mean)
        p_mean += acc2 / float(mean)
        if debug:
            break
    return cr_mean/len(relevants.keys()), p_mean/len(relevants.keys())

In [28]:
##parameters
n = 20
mean = 3
debug = False
nbrDoc_baseline = 200 # doit être plus grand que 184 (nbRel max des queries)


# On a 2256 documents, chaque document est représenter par un vecteur de taille 2353, 
# avec comme dernière valeur sont topic
X, Y = get_data(2256, 2354, data_file) 
nb_topics = len(np.unique(Y))
print "Nombre topics (tous documents) : " + str(nb_topics)

dico = get_dico(dico_file)
baseline = get_ranking_baseline(dico, baseline_file)
relevants = get_relevants(dico, rel_file)


tabCr=[]
tabP=[]
tabNbClusters=np.arange(1,19)
choiceRanking=3
for nbC in tabNbClusters:
    cr ,p = getCR_P(relevants,baseline,nbrDoc_baseline,mean,n,choiceRanking,nbC)
    tabCr.append(cr)
    tabP.append(p)
    
plt.plot(tabNbClusters,tabCr, label = "CR@20")
plt.plot(tabNbClusters,tabP, label = "P@20")
plt.xlabel("Nb Clusters")
plt.ylabel(u"Performance")
plt.title(u"CR et P en fonction du nombre de clusters")
plt.legend()
plt.show()


Nombre topics (tous documents) : 23

===> CR Query Mean = 0.770469425904, P Query Mean = 0.597435897436

===> CR Query Mean = 0.855239486761, P Query Mean = 0.412820512821

===> CR Query Mean = 0.884185232374, P Query Mean = 0.371794871795

===> CR Query Mean = 0.870751685244, P Query Mean = 0.320085470085

===> CR Query Mean = 0.918772143048, P Query Mean = 0.302136752137

===> CR Query Mean = 0.952585927224, P Query Mean = 0.32735042735

===> CR Query Mean = 0.971080221443, P Query Mean = 0.299145299145

===> CR Query Mean = 0.967381451077, P Query Mean = 0.32264957265

===> CR Query Mean = 1.00884547551, P Query Mean = 0.303846153846

===> CR Query Mean = 1.02486237595, P Query Mean = 0.266666666667

===> CR Query Mean = 1.03241075886, P Query Mean = 0.281196581197

===> CR Query Mean = 1.06565392471, P Query Mean = 0.282051282051

===> CR Query Mean = 1.09071980086, P Query Mean = 0.29188034188

===> CR Query Mean = 1.09004910817, P Query Mean = 0.287606837607

===> CR Query Mean = 1.11250313026, P Query Mean = 0.292735042735

===> CR Query Mean = 1.11657320136, P Query Mean = 0.290170940171

===> CR Query Mean = 1.122578857, P Query Mean = 0.282478632479

===> CR Query Mean = 1.13254303399, P Query Mean = 0.26452991453

In [13]:
print(np.arange(1,19))


[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]

In [ ]:
def sim_psi(u,q,ranking):
    
def sim_sigma(du,q,relevants):
    
    
def value(du,q,ranking,alpha,relevants):
    sim_s = sim_sigma(du,q,relevants)
    sim_p = sim_psi(u,q,ranking)
    return np.array([du,alpha*sim_sig-(1-alpha)*sim_p])
            
def gluttony(u,k,q,relevants,alpha=0.5):
    ranking=[]
    for i in k:
        #val sous forme np.array([np.array([idDoc,score]])
        val = np.array([value(du,q,ranking,alpha,relevants) for du in u])
        argmax = val[:,1].argmax()
        di=val[argmax][0]
        ranking.append(di)
        u=u.remove(di)

In [34]:
a=[np.array(['a',1]),np.array(['v',2])]

argmax = np.array(a)[:,1].argmax()


Out[34]:
1

In [37]:



Out[37]:
{'23994': 1121,
 '16079': 837,
 '23993': 1120,
 '22466': 1062,
 '4027': 139,
 '4022': 138,
 '35783': 1591,
 '35780': 1589,
 '22665': 1067,
 '11549': 671,
 '35788': 1592,
 '35789': 1593,
 '37531': 1795,
 '11384': 654,
 '10896': 619,
 '39102': 2034,
 '16671': 863,
 '19374': 970,
 '32656': 1517,
 '30449': 1317,
 '12707': 710,
 '37730': 1849,
 '12708': 711,
 '22004': 1049,
 '10965': 622,
 '6795': 317,
 '13606': 742,
 '37735': 1851,
 '13609': 743,
 '25664': 1177,
 '25661': 1176,
 '22669': 1068,
 '26485': 1201,
 '30667': 1344,
 '40357': 2228,
 '13546': 736,
 '13547': 737,
 '16709': 864,
 '17382': 893,
 '17387': 895,
 '17385': 894,
 '19575': 978,
 '16384': 849,
 '9529': 529,
 '22908': 1078,
 '7866': 397,
 '21615': 1044,
 '40213': 2201,
 '16819': 868,
 '4812': 226,
 '4815': 227,
 '17876': 908,
 '6379': 298,
 '11614': 675,
 '9413': 521,
 '10639': 599,
 '10636': 598,
 '26028': 1184,
 '6376': 297,
 '39694': 2141,
 '10140': 571,
 '39696': 2143,
 '37411': 1755,
 '39693': 2140,
 '20528': 1008,
 '11947': 688,
 '39698': 2144,
 '39699': 2145,
 '22874': 1076,
 '22875': 1077,
 '37798': 1873,
 '9630': 538,
 '37790': 1872,
 '8941': 491,
 '8940': 490,
 '8943': 492,
 '35983': 1633,
 '3513': 105,
 '16928': 878,
 '7263': 351,
 '6204': 267,
 '6206': 268,
 '12236': 697,
 '12231': 696,
 '36023': 1634,
 '6208': 269,
 '6209': 270,
 '14626': 779,
 '14627': 780,
 '14620': 778,
 '40186': 2192,
 '40187': 2193,
 '40188': 2194,
 '40189': 2195,
 '24154': 1126,
 '30413': 1313,
 '10097': 567,
 '10095': 566,
 '10094': 565,
 '10098': 568,
 '35880': 1624,
 '37319': 1712,
 '37313': 1710,
 '8834': 481,
 '37310': 1708,
 '37315': 1711,
 '37477': 1764,
 '31644': 1450,
 '21252': 1030,
 '15669': 814,
 '14242': 763,
 '30869': 1374,
 '37177': 1654,
 '37172': 1653,
 '37171': 1652,
 '37774': 1868,
 '37773': 1867,
 '37772': 1866,
 '8167': 430,
 '6973': 333,
 '15919': 826,
 '6979': 334,
 '40568': 2236,
 '40569': 2237,
 '27619': 1259,
 '27618': 1258,
 '39358': 2082,
 '27615': 1257,
 '26718': 1215,
 '14408': 766,
 '14409': 767,
 '26712': 1214,
 '8765': 478,
 '1579': 15,
 '8761': 475,
 '8763': 477,
 '8762': 476,
 '8549': 461,
 '8548': 460,
 '8016': 414,
 '26625': 1209,
 '26624': 1208,
 '24412': 1140,
 '26622': 1207,
 '26629': 1210,
 '34153': 1564,
 '34154': 1565,
 '40299': 2217,
 '34156': 1566,
 '34157': 1567,
 '23297': 1092,
 '3839': 130,
 '13721': 749,
 '10882': 618,
 '4036': 140,
 '31543': 1439,
 '19384': 971,
 '19385': 972,
 '19386': 973,
 '39179': 2045,
 '35844': 1613,
 '35847': 1615,
 '35846': 1614,
 '35840': 1610,
 '35843': 1612,
 '35842': 1611,
 '40517': 2233,
 '32824': 1536,
 '24707': 1144,
 '39174': 2044,
 '32646': 1515,
 '40515': 2232,
 '32648': 1516,
 '12711': 712,
 '2308': 53,
 '10973': 623,
 '10975': 624,
 '10977': 625,
 '4724': 220,
 '32798': 1532,
 '4720': 218,
 '4723': 219,
 '19705': 981,
 '19706': 982,
 '15009': 787,
 '25616': 1174,
 '17620': 903,
 '3741': 123,
 '2275': 45,
 '2277': 46,
 '16392': 850,
 '11599': 674,
 '39190': 2050,
 '17280': 888,
 '7856': 396,
 '25543': 1172,
 '16800': 867,
 '4531': 207,
 '4533': 208,
 '9402': 520,
 '7986': 410,
 '7987': 411,
 '12132': 692,
 '11629': 676,
 '12131': 691,
 '9334': 516,
 '9331': 515,
 '37788': 1870,
 '37789': 1871,
 '3525': 106,
 '9623': 536,
 '18945': 945,
 '9625': 537,
 '37376': 1734,
 '7214': 349,
 '17388': 896,
 '10715': 607,
 '12225': 695,
 '2699': 67,
 '6210': 271,
 '2695': 66,
 '2693': 65,
 '10719': 608,
 '40175': 2191,
 '40174': 2190,
 '40171': 2187,
 '40170': 2186,
 '40173': 2189,
 '6816': 318,
 '38630': 1987,
 '9755': 545,
 '38636': 1988,
 '20467': 1001,
 '37305': 1705,
 '37306': 1706,
 '37300': 1703,
 '1969': 32,
 '21420': 1040,
 '37303': 1704,
 '1965': 29,
 '1966': 30,
 '1967': 31,
 '1960': 28,
 '10081': 564,
 '12957': 715,
 '15672': 815,
 '20262': 990,
 '20260': 989,
 '38182': 1954,
 '17836': 906,
 '37167': 1650,
 '37169': 1651,
 '38740': 1990,
 '7060': 339,
 '35833': 1605,
 '37765': 1865,
 '37761': 1864,
 '30243': 1304,
 '30245': 1305,
 '30246': 1306,
 '15907': 825,
 '15905': 824,
 '40553': 2235,
 '15458': 807,
 '31112': 1390,
 '31111': 1389,
 '12489': 701,
 '8152': 429,
 '31116': 1392,
 '690': 3,
 '26709': 1213,
 '1543': 14,
 '39587': 2110,
 '39589': 2111,
 '14473': 773,
 '24398': 1139,
 '30334': 1307,
 '7199': 348,
 '8539': 459,
 '8028': 416,
 '19101': 949,
 '40427': 2230,
 '40426': 2229,
 '8023': 415,
 '26631': 1211,
 '32870': 1540,
 '15051': 789,
 '38167': 1951,
 '38166': 1950,
 '7683': 379,
 '7682': 378,
 '7681': 377,
 '35926': 1627,
 '39500': 2089,
 '39501': 2090,
 '4282': 168,
 '4283': 169,
 '39503': 2091,
 '4287': 172,
 '4284': 170,
 '4285': 171,
 '38194': 1957,
 '11364': 653,
 '4289': 174,
 '39238': 2056,
 '26555': 1204,
 '39169': 2042,
 '39848': 2162,
 '32813': 1534,
 '7536': 371,
 '32816': 1535,
 '7532': 370,
 '32672': 1520,
 '32670': 1519,
 '32677': 1522,
 '32676': 1521,
 '16652': 862,
 '16650': 861,
 '4759': 223,
 '19284': 962,
 '19287': 963,
 '37529': 1793,
 '6775': 316,
 '23866': 1115,
 '16952': 881,
 '32250': 1503,
 '24427': 1141,
 '9968': 562,
 '11297': 642,
 '3759': 124,
 '4631': 215,
 '22736': 1072,
 '38981': 2002,
 '2197': 42,
 '37192': 1660,
 '21207': 1021,
 '6689': 314,
 '11584': 673,
 '11582': 672,
 '37527': 1791,
 '9509': 527,
 '21382': 1038,
 '9501': 526,
 '16341': 848,
 '37204': 1666,
 '13290': 728,
 '7847': 394,
 '13293': 729,
 '37201': 1664,
 '7995': 412,
 '35647': 1578,
 '26043': 1185,
 '7998': 413,
 '9432': 523,
 '2731': 77,
 '10167': 572,
 '20095': 984,
 '21256': 1031,
 '13183': 725,
 '10363': 582,
 '6944': 331,
 '16904': 875,
 '37576': 1803,
 '6222': 272,
 '6225': 273,
 '8143': 428,
 '20479': 1002,
 '9769': 546,
 '10505': 591,
 '37371': 1730,
 '10634': 597,
 '24177': 1128,
 '37374': 1732,
 '9295': 514,
 '37379': 1736,
 '37378': 1735,
 '30433': 1316,
 '1972': 33,
 '30431': 1315,
 '33457': 1549,
 '8852': 482,
 '33459': 1550,
 '39695': 2142,
 '40302': 2220,
 '40303': 2221,
 '40300': 2218,
 '40301': 2219,
 '40304': 2222,
 '40305': 2223,
 '38198': 1959,
 '38199': 1960,
 '27109': 1233,
 '20277': 991,
 '38192': 1955,
 '38193': 1956,
 '26827': 1218,
 '38197': 1958,
 '37194': 1662,
 '37196': 1663,
 '37191': 1659,
 '37193': 1661,
 '3138': 87,
 '37753': 1862,
 '37752': 1861,
 '37751': 1860,
 '30508': 1334,
 '37208': 1667,
 '37755': 1863,
 '30502': 1329,
 '30501': 1328,
 '30500': 1327,
 '30507': 1333,
 '30506': 1332,
 '30505': 1331,
 '30504': 1330,
 '40542': 2234,
 '15930': 827,
 '30769': 1347,
 '30768': 1346,
 '11108': 631,
 '10387': 583,
 '14468': 772,
 '24385': 1138,
 '3035': 84,
 '39577': 2109,
 '39570': 2108,
 '31219': 1406,
 '15595': 809,
 '19031': 946,
 '30619': 1340,
 '30618': 1339,
 '8035': 417,
 '13856': 752,
 '13855': 751,
 '37202': 1665,
 '12853': 714,
 '9996': 563,
 '25966': 1183,
 '25960': 1182,
 '6514': 306,
 '15027': 788,
 '38174': 1953,
 '3812': 129,
 '16925': 877,
 '3811': 128,
 '22382': 1059,
 '23704': 1109,
 '23700': 1108,
 '8458': 455,
 '11358': 652,
 '4299': 181,
 '15330': 800,
 '39151': 2038,
 '4295': 177,
 '4294': 176,
 '4297': 179,
 '4296': 178,
 '4291': 175,
 '11356': 651,
 '11355': 650,
 '39158': 2039,
 '35867': 1617,
 '35866': 1616,
 '35869': 1619,
 '31217': 1405,
 '24766': 1148,
 '35682': 1581,
 '38247': 1980,
 '22039': 1050,
 '32666': 1518,
 '19615': 979,
 '40659': 2246,
 '38015': 1901,
 '19290': 964,
 '10999': 626,
 '13672': 746,
 '13673': 747,
 '23813': 1113,
 '4740': 221,
 '39023': 2006,
 '39027': 2009,
 '16007': 834,
 '39025': 2008,
 '16005': 833,
 '37337': 1717,
 '32248': 1501,
 '32249': 1502,
 '38014': 1900,
 '17354': 891,
 '2182': 40,
 '38997': 2004,
 '2219': 43,
 '23928': 1117,
 '38016': 1902,
 '31837': 1460,
 '16823': 869,
 '38011': 1897,
 '6343': 296,
 '6342': 295,
 '4511': 206,
 '11648': 677,
 '17198': 887,
 '9421': 522,
 '26050': 1186,
 '26057': 1187,
 '9351': 517,
 '2724': 76,
 '2720': 75,
 '10171': 573,
 '10175': 574,
 '7947': 403,
 '9199': 510,
 '20661': 1012,
 '37568': 1801,
 '37569': 1802,
 '16919': 876,
 '3788': 125,
 '9510': 528,
 '4991': 245,
 '6234': 275,
 '6231': 274,
 '20914': 1014,
 '11828': 684,
 '14855': 786,
 '20912': 1013,
 '26942': 1225,
 '18887': 941,
 '26944': 1226,
 '20918': 1015,
 '9778': 547,
 '8866': 484,
 '8867': 485,
 '8865': 483,
 '7306': 353,
 '37365': 1726,
 '24164': 1127,
 '37368': 1728,
 '37369': 1729,
 '9281': 513,
 '39714': 2151,
 '30426': 1314,
 '21403': 1039,
 '38117': 1936,
 '2858': 80,
 '37250': 1684,
 '10202': 576,
 '7043': 338,
 '3334': 100,
 '20282': 992,
 '39625': 2117,
 '4570': 213,
 '10195': 575,
 '39623': 2115,
 '32034': 1490,
 '37186': 1658,
 '37184': 1657,
 '37182': 1656,
 '39628': 2118,
 '37180': 1655,
 '37748': 1858,
 '37749': 1859,
 '37218': 1671,
 '39660': 2133,
 '37744': 1855,
 '32031': 1489,
 '18392': 926,
 '37747': 1857,
 '37212': 1668,
 '37213': 1669,
 '32030': 1488,
 '32480': 1513,
 '8608': 466,
 '30778': 1352,
 '32481': 1514,
 '30774': 1349,
 '30775': 1350,
 '30777': 1351,
 '8604': 465,
 '30773': 1348,
 '26720': 1216,
 '26721': 1217,
 '14451': 771,
 '39367': 2083,
 '34158': 1568,
 '32039': 1494,
 '3023': 83,
 '32038': 1493,
 '30353': 1308,
 '8738': 472,
 '30608': 1338,
 '30358': 1309,
 '3166': 90,
 '8284': 440,
 '37312': 1709,
 '31481': 1422,
 '14520': 774,
 '31487': 1424,
 '31486': 1423,
 '26614': 1206,
 '6501': 305,
 '6500': 304,
 '39498': 2087,
 '31172': 1393,
 '38146': 1945,
 '38144': 1944,
 '39490': 2085,
 '39497': 2086,
 '32479': 1512,
 '3803': 127,
 '3802': 126,
 '26531': 1203,
 '11347': 649,
 '11341': 648,
 '23717': 1110,
 '23241': 1090,
 '23243': 1091,
 '30976': 1385,
 '8193': 434,
 '19113': 951,
 '30975': 1384,
 '19115': 953,
 '19114': 952,
 '8194': 435,
 '30971': 1383,
 '15329': 799,
 '7559': 372,
 '35878': 1623,
 '23397': 1095,
 '31395': 1416,
 '31394': 1415,
 '23398': 1096,
 '31396': 1417,
 '35870': 1620,
 '35871': 1621,
 '22539': 1063,
 '35678': 1580,
 '13648': 744,
 '26443': 1199,
 '26442': 1198,
 '7176': 346,
 '39038': 2014,
 '39039': 2015,
 '9945': 558,
 '39036': 2012,
 '16034': 836,
 '16031': 835,
 '16740': 865,
 '24972': 1152,
 '24970': 1151,
 '39093': 2030,
 '32297': 1505,
 '39159': 2040,
 '25396': 1165,
 '4610': 214,
 '13506': 734,
 '13507': 735,
 '26190': 1193,
 '2223': 44,
 '7823': 393,
 '25570': 1173,
 '35868': 1618,
 '25214': 1158,
 '35683': 1582,
 '11659': 678,
 '26262': 1194,
 '37506': 1778,
 '2712': 70,
 '2711': 69,
 '2716': 72,
 '2717': 73,
 '2714': 71,
 '2719': 74,
 '37511': 1783,
 '37510': 1782,
 '37513': 1784,
 '37515': 1785,
 '7952': 405,
 '7951': 404,
 '37516': 1786,
 '37519': 1788,
 '7228': 350,
 '10344': 581,
 '1837': 24,
 '9566': 532,
 '6241': 276,
 '12273': 698,
 '6246': 278,
 '20456': 1000,
 '14844': 785,
 '18562': 933,
 '1955': 27,
 '39708': 2149,
 '3248': 94,
 '37359': 1725,
 '8876': 486,
 '37357': 1723,
 '37356': 1722,
 '37355': 1721,
 '37354': 1720,
 '37353': 1719,
 '37420': 1758,
 '26845': 1219,
 '7030': 335,
 '7034': 336,
 '7035': 337,
 '31884': 1484,
 '12495': 702,
 '20292': 994,
 '9019': 498,
 '30561': 1336,
 '30560': 1335,
 '1716': 21,
 '9382': 518,
 '14197': 762,
 '14194': 761,
 '37739': 1853,
 '37229': 1678,
 '37228': 1677,
 '15827': 823,
 '37731': 1850,
 '37222': 1673,
 '37220': 1672,
 '37227': 1676,
 '37226': 1675,
 '37737': 1852,
 '37224': 1674,
 '40215': 2202,
 '40216': 2203,
 '40217': 2204,
 '4749': 222,
 '1134': 6,
 '23052': 1085,
 '7783': 388,
 '7146': 342,
 '3015': 82,
 '40322': 2224,
 '37646': 1826,
 '40325': 2225,
 '40326': 2226,
 '37642': 1825,
 '40328': 2227,
 '30361': 1311,
 '30360': 1310,
 '30015': 1301,
 '30017': 1303,
 '30016': 1302,
 '30011': 1300,
 '8056': 419,
 '8054': 418,
 '31540': 1437,
 '31541': 1438,
 '23650': 1107,
 '8290': 441,
 '4298': 180,
 '4349': 197,
 '30382': 1312,
 '4347': 196,
 '8298': 442,
 '4344': 195,
 '4342': 194,
 '4341': 193,
 '31497': 1425,
 '11015': 627,
 '31499': 1426,
 '7495': 367,
 '39296': 2073,
 '24292': 1134,
 '38150': 1946,
 '38152': 1947,
 '32465': 1511,
 '11330': 646,
 '26524': 1202,
 '11338': 647,
 '39029': 2010,
 '35782': 1590,
 '24741': 1146,
 '7564': 373,
 '6401': 299,
 '32866': 1539,
 '24745': 1147,
 '32864': 1538,
 '8184': 433,
 '19108': 950,
 '8182': 432,
 '7764': 387,
 '35808': 1599,
 '31389': 1414,
 '35807': 1598,
 '3905': 136,
 '3901': 134,
 '3903': 135,
 '19322': 968,
 '19324': 969,
 '13651': 745,
 '22298': 1056,
 '35646': 1577,
 '35644': 1576,
 '11481': 663,
 '27354': 1239,
 '26450': 1200,
 '27108': 1232,
 '31536': 1433,
 '9958': 561,
 '39040': 2016,
 '9956': 560,
 '24630': 1142,
 '24631': 1143,
 '9952': 559,
 '32040': 1495,
 '32042': 1496,
 '16205': 844,
 '16209': 845,
 '32738': 1525,
 '19259': 959,
 '26186': 1192,
 '38978': 2001,
 '31838': 1461,
 '31839': 1462,
 '31835': 1458,
 '31836': 1459,
 '23908': 1116,
 '31830': 1456,
 '31832': 1457,
 '11794': 683,
 '16119': 842,
 '9275': 512,
 '7816': 392,
 '7815': 391,
 '33393': 1548,
 '13401': 732,
 '5096': 252,
 '13409': 733,
 '35706': 1584,
 '26273': 1195,
 '2702': 68,
 '37502': 1776,
 '7963': 406,
 '37500': 1774,
 '37501': 1775,
 '25211': 1156,
 '37507': 1779,
 '37504': 1777,
 '25212': 1157,
 '37508': 1780,
 '37509': 1781,
 '39184': 2048,
 '31192': 1403,
 '1825': 23,
 '4406': 198,
 '39186': 2049,
 '4979': 244,
 '6253': 279,
 '4975': 243,
 '11300': 643,
 '3270': 98,
 '4225': 164,
 '22990': 1083,
 '8880': 487,
 '33374': 1545,
 '33370': 1544,
 '33379': 1547,
 '27682': 1291,
 '33378': 1546,
 '9006': 497,
 '9004': 496,
 '9003': 495,
 '35834': 1606,
 '10221': 577,
 '10222': 578,
 '35835': 1607,
 '9396': 519,
 '21515': 1041,
 '20022': 983,
 '37239': 1680,
 '13135': 721,
 '35838': 1608,
 '35839': 1609,
 '37722': 1847,
 '37723': 1848,
 '37720': 1845,
 '37721': 1846,
 '39719': 2152,
 '40261': 2207,
 '40260': 2206,
 '39545': 2105,
 '39546': 2106,
 '39547': 2107,
 '39540': 2103,
 '20349': 997,
 '7795': 390,
 '7794': 389,
 '38019': 1903,
 '7157': 345,
 '7156': 344,
 '1580': 16,
 '1581': 17,
 '7153': 343,
 '38013': 1899,
 '38012': 1898,
 '37657': 1827,
 '40688': 2255,
 '37717': 1842,
 '23648': 1105,
 '23649': 1106,
 '8069': 421,
 '23644': 1102,
 '23646': 1103,
 '23647': 1104,
 '40665': 2251,
 '7484': 365,
 '40667': 2253,
 '7486': 366,
 '35795': 1597,
 '40663': 2249,
 '40662': 2248,
 '8260': 439,
 '4451': 201,
 '38129': 1940,
 '8662': 467,
 '8665': 468,
 '8666': 469,
 '31625': 1446,
 '31624': 1445,
 '17890': 909,
 '17891': 910,
 '31620': 1442,
 '31623': 1444,
 '31622': 1443,
 '4080': 152,
 '4082': 153,
 '4083': 154,
 '4084': 155,
 '4085': 156,
 '13797': 750,
 '4087': 157,
 '32453': 1508,
 '22606': 1065,
 '32457': 1510,
 '3864': 131,
 '37714': 1840,
 '7577': 374,
 '19138': 955,
 '19135': 954,
 '35819': 1604,
 '38231': 1974,
 '38230': 1973,
 '35812': 1601,
 '35811': 1600,
 '35816': 1602,
 '35817': 1603,
 '8316': 443,
 '11490': 664,
 '11491': 665,
 '35656': 1579,
 '13036': 718,
 '37488': 1767,
 '11258': 639,
 '40664': 2250,
 '9929': 555,
 '39058': 2021,
 '39059': 2022,
 '22199': 1054,
 '39052': 2018,
 '39053': 2019,
 '22194': 1053,
 '39057': 2020,
 '9926': 554,
 '22777': 1074,
 '19260': 960,
 '22772': 1073,
 '16767': 866,
 '39790': 2155,
 '4288': 173,
 '31849': 1468,
 '31848': 1467,
 '31841': 1463,
 '40666': 2252,
 '31844': 1464,
 '31847': 1466,
 '31846': 1465,
 '19317': 967,
 '10811': 617,
 '10810': 616,
 '16165': 843,
 '19311': 966,
 '8744': 474,
 '40661': 2247,
 '4561': 210,
 '4565': 211,
 '4569': 212,
 '16305': 846,
 '3640': 118,
 '11474': 662,
 '17362': 892,
 '37893': 1882,
 '5011': 247,
 '5012': 248,
 '4898': 233,
 '20615': 1010,
 '39543': 2104,
 ...}

In [ ]: