notebook.community

Edit and run



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.mlab as mlab



In [4]:

    
dfraw = pd.read_csv('features.csv', sep=',')
dfraw









    Out[4]:






  
    
      
      article
      offset
      alias
      entity
      link_score
      entity_score
      entity_score_rank
      entity_score_dtop
      entity_score_dsuc
      cosine_sim
      cosine_sim_rank
      cosine_sim_dtop
      cosine_sim_dsuc
      correct
    
  
  
    
      0
      Albert Wenk
      174
      Artesis Hogeschool Antwerpen
      Artesis Hogeschool Antwerpen
      1.000000
      1.000000
      1
      Infinity
      Infinity
      0.027930
      1
      Infinity
      Infinity
      True
    
    
      1
      Logan Ramsey
      1408
      Anne Ramsey
      Anne Ramsey
      1.000000
      1.000000
      1
      Infinity
      0.44082840236686394
      0.266522
      1
      Infinity
      1.2665216650036508
      True
    
    
      2
      Spektralmethode
      171
      Ansatzfunktionen
      Ansatz (Mathematik)
      1.000000
      0.333333
      2
      0.3333333333333333
      Infinity
      0.181334
      2
      0.06183150029798601
      Infinity
      False
    
    
      3
      Spektralmethode
      171
      Ansatzfunktionen
      Ansatzfunktion
      1.000000
      0.666667
      1
      Infinity
      0.3333333333333333
      0.243165
      1
      Infinity
      0.06183150029798601
      True
    
    
      4
      Élencourt
      138
      Arrondissement Beauvais
      Arrondissement Beauvais
      1.000000
      1.000000
      1
      Infinity
      Infinity
      0.301277
      1
      Infinity
      Infinity
      True
    
    
      5
      Victor Eftimiu
      74
      Albanien
      Abgeordneter
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.021549
      25
      0.062188452380636604
      7.688920440697689E-4
      False
    
    
      6
      Victor Eftimiu
      74
      Albanien
      Adel
      0.690233
      0.000620
      14
      0.8173076923076923
      3.1017369727047146E-4
      0.042905
      8
      0.040833356179805165
      8.115629768289251E-4
      False
    
    
      7
      Victor Eftimiu
      74
      Albanien
      Albania
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.021708
      24
      0.062029720876732426
      1.5873150390417756E-4
      False
    
    
      8
      Victor Eftimiu
      74
      Albanien
      Albania (Begriffsklärung)
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.010401
      52
      0.07333645051479792
      2.3759949270757072E-4
      False
    
    
      9
      Victor Eftimiu
      74
      Albanien
      Albanien
      0.690233
      0.817928
      1
      Infinity
      0.7127791563275434
      0.042093
      9
      0.04164491915663409
      0.006297353533244558
      True
    
    
      10
      Victor Eftimiu
      74
      Albanien
      Albanien beim Eurovision Song Contest
      0.690233
      0.001551
      10
      0.8163771712158809
      6.20347394540943E-4
      0.017750
      30
      0.06598805585270895
      1.859480380340152E-4
      False
    
    
      11
      Victor Eftimiu
      74
      Albanien
      Albanien im Mittelalter
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.048738
      5
      0.035000069318407157
      0.00257018576116215
      False
    
    
      12
      Victor Eftimiu
      74
      Albanien
      Albanien und die Europäische Union
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.023830
      20
      0.059908392912543226
      0.0012089604671199211
      False
    
    
      13
      Victor Eftimiu
      74
      Albanien
      Albanische Basketballnationalmannschaft
      0.690233
      0.000931
      12
      0.8169975186104218
      3.1017369727047146E-4
      0.008832
      57
      0.07490604260713715
      3.241390543284057E-4
      False
    
    
      14
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft
      0.690233
      0.105149
      2
      0.7127791563275434
      0.08839950372208435
      0.018334
      28
      0.0654035387286466
      4.627369311335887E-4
      False
    
    
      15
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft (U-21-Män...
      0.690233
      0.005273
      7
      0.8126550868486353
      3.101736972704718E-4
      0.013050
      45
      0.07068793601226803
      5.816615723109406E-4
      False
    
    
      16
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft der Frauen
      0.690233
      0.005583
      6
      0.8123449131513648
      3.101736972704709E-4
      0.006187
      64
      0.07755136764625414
      7.005092154502152E-4
      False
    
    
      17
      Victor Eftimiu
      74
      Albanien
      Albanische Küche
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.020781
      26
      0.06295734442470638
      0.0022057811059381614
      False
    
    
      18
      Victor Eftimiu
      74
      Albanien
      Albanische Luftstreitkräfte
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.012468
      46
      0.07126959758457897
      6.367698125467718E-4
      False
    
    
      19
      Victor Eftimiu
      74
      Albanien
      Albanische Sprache
      0.690233
      0.001551
      10
      0.8163771712158809
      6.20347394540943E-4
      0.028659
      14
      0.05507846712369462
      2.095543805849115E-4
      False
    
    
      20
      Victor Eftimiu
      74
      Albanien
      Albanische Streitkräfte
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.014950
      39
      0.06878755322423608
      1.196445324029051E-4
      False
    
    
      21
      Victor Eftimiu
      74
      Albanien
      Albanische Volleyballnationalmannschaft der Fr...
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.004611
      68
      0.07912667919437812
      0.004611242542187034
      False
    
    
      22
      Victor Eftimiu
      74
      Albanien
      Albanisches Heer
      0.690233
      0.000620
      14
      0.8173076923076923
      3.1017369727047146E-4
      0.010517
      51
      0.07322095762279718
      1.1549289200075107E-4
      False
    
    
      23
      Victor Eftimiu
      74
      Albanien
      Arbeitsministerium
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.007345
      61
      0.07639332280492074
      6.709663625261804E-4
      False
    
    
      24
      Victor Eftimiu
      74
      Albanien
      Autobahn
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.025260
      18
      0.05847752842615518
      2.446038215260897E-4
      False
    
    
      25
      Victor Eftimiu
      74
      Albanien
      Außenminister
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.013065
      44
      0.07067334429995525
      1.4591712312772565E-5
      False
    
    
      26
      Victor Eftimiu
      74
      Albanien
      Bildungsministerium
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.006603
      63
      0.07713489228035474
      4.164753658994066E-4
      False
    
    
      27
      Victor Eftimiu
      74
      Albanien
      Botschafter der Vereinigten Staaten
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.009494
      56
      0.07424419468341709
      6.61847923720071E-4
      False
    
    
      28
      Victor Eftimiu
      74
      Albanien
      Bunker in Albanien
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.017564
      31
      0.06617400389074296
      4.863493127612653E-4
      False
    
    
      29
      Victor Eftimiu
      74
      Albanien
      Ehrenbürger
      0.690233
      0.000310
      21
      0.8176178660049628
      Infinity
      0.015137
      38
      0.06860082720994617
      1.8672601428990597E-4
      False
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      99970
      Poi
      5595
      Australien
      Skilanglauf
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.030089
      6
      0.09699993073495518
      3.364883882109697E-4
      False
    
    
      99971
      Poi
      5595
      Australien
      Skulpturengarten
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.009880
      126
      0.11720947816076607
      1.3165887229213066E-4
      False
    
    
      99972
      Poi
      5595
      Australien
      Snowboard
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.022825
      24
      0.10426386982836983
      5.013551672455874E-5
      False
    
    
      99973
      Poi
      5595
      Australien
      Socialist Equality Party (Australien)
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.002339
      227
      0.12475045053270237
      1.0999596407621198E-4
      False
    
    
      99974
      Poi
      5595
      Australien
      Speerwurf
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.016442
      59
      0.11064735815033228
      2.262556929298748E-4
      False
    
    
      99975
      Poi
      5595
      Australien
      Sportschießen
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.024457
      18
      0.10263262052901435
      4.115742396461723E-4
      False
    
    
      99976
      Poi
      5595
      Australien
      Staaten und Territorien Australiens
      0.361357
      0.000104
      55
      0.8896483869297291
      5.178395733001916E-5
      0.017975
      48
      0.10911444525912233
      5.530440016748156E-5
      False
    
    
      99977
      Poi
      5595
      Australien
      Staatsanwalt
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.012908
      96
      0.11418138078906255
      2.7090196738142544E-4
      False
    
    
      99978
      Poi
      5595
      Australien
      Staatswappen
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.012637
      97
      0.11445228275644398
      1.300597421058109E-4
      False
    
    
      99979
      Poi
      5595
      Australien
      Stabhochsprung
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.015831
      62
      0.11125806966224214
      8.18188513813066E-5
      False
    
    
      99980
      Poi
      5595
      Australien
      Stadtautobahn
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.007762
      163
      0.11932685393693625
      1.6910090196933444E-5
      False
    
    
      99981
      Poi
      5595
      Australien
      Straßenbrücke
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.013153
      92
      0.11393587943974878
      1.0225289407302801E-5
      False
    
    
      99982
      Poi
      5595
      Australien
      Streik
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.014374
      79
      0.1127148483059093
      1.6470953838542725E-4
      False
    
    
      99983
      Poi
      5595
      Australien
      Sträflingskolonie Australien
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.019449
      42
      0.10763987892692117
      3.212116600587525E-5
      False
    
    
      99984
      Poi
      5595
      Australien
      Surfen
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.008331
      147
      0.11875833117780274
      1.136414581218853E-6
      False
    
    
      99985
      Poi
      5595
      Australien
      Tennis
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.021951
      31
      0.1051386842330542
      1.5784039993157334E-4
      False
    
    
      99986
      Poi
      5595
      Australien
      Tennis Australia
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.011284
      112
      0.115804938623654
      6.74437800435336E-5
      False
    
    
      99987
      Poi
      5595
      Australien
      Triathlon
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.020084
      39
      0.10700572185681456
      4.199613273206948E-4
      False
    
    
      99988
      Poi
      5595
      Australien
      Uniting Church in Australia
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.002991
      225
      0.12409877074149459
      2.8853217209194506E-4
      False
    
    
      99989
      Poi
      5595
      Australien
      Uranabbau in Australien
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.022842
      23
      0.10424731714061378
      1.655268775603569E-5
      False
    
    
      99990
      Poi
      5595
      Australien
      Verkehrsministerium
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.005468
      205
      0.12162148295008021
      2.340843574462721E-4
      False
    
    
      99991
      Poi
      5595
      Australien
      Verteidigungsministerium
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.019262
      44
      0.10782737542036844
      1.7756460585487133E-4
      False
    
    
      99992
      Poi
      5595
      Australien
      Victoria (Australien)
      0.361357
      0.000155
      50
      0.8895966029723992
      5.178395733001915E-5
      0.021498
      33
      0.10559129812603801
      5.362677300902244E-5
      False
    
    
      99993
      Poi
      5595
      Australien
      Victorianischer Goldrausch
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.017750
      51
      0.10933941529740171
      6.461619302169583E-5
      False
    
    
      99994
      Poi
      5595
      Australien
      Vizeadmiral
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.007805
      161
      0.11928433118220252
      2.2331633532672937E-5
      False
    
    
      99995
      Poi
      5595
      Australien
      Volleyball
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.021793
      32
      0.10529652463298576
      2.9477349305225E-4
      False
    
    
      99996
      Poi
      5595
      Australien
      Vulkan
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.016763
      56
      0.11032599839505572
      1.6833581562428201E-4
      False
    
    
      99997
      Poi
      5595
      Australien
      Vulkanismus
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.009748
      127
      0.11734113703305821
      2.5103621446075433E-5
      False
    
    
      99998
      Poi
      5595
      Australien
      Wahlkreis
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.016487
      58
      0.11060195935586797
      4.539879446431236E-5
      False
    
    
      99999
      Poi
      5595
      Australien
      Wasserball
      0.361357
      0.000052
      76
      0.8897001708870592
      Infinity
      0.029753
      7
      0.09733641912316615
      1.2739047771093484E-4
      False
    
  

100000 rows × 14 columns



In [4]:

    
columns = ["entity_score_dtop", "entity_score_dsuc", "cosine_sim_dtop", "cosine_sim_dsuc"]
dfraw[columns] = dfraw[columns].astype(float)
inf_replacement = -0.04
df = dfraw.applymap(lambda val: inf_replacement if val == float("inf") else val)
df









    Out[4]:






  
    
      
      article
      offset
      alias
      entity
      link_score
      entity_score
      entity_score_rank
      entity_score_dtop
      entity_score_dsuc
      cosine_sim
      cosine_sim_rank
      cosine_sim_dtop
      cosine_sim_dsuc
      correct
    
  
  
    
      0
      Albert Wenk
      174
      Artesis Hogeschool Antwerpen
      Artesis Hogeschool Antwerpen
      1.000000
      1.000000
      1
      -0.040000
      -0.040000
      0.027930
      1
      -0.040000
      -0.040000
      True
    
    
      1
      Logan Ramsey
      1408
      Anne Ramsey
      Anne Ramsey
      1.000000
      1.000000
      1
      -0.040000
      0.440828
      0.266522
      1
      -0.040000
      1.266522
      True
    
    
      2
      Spektralmethode
      171
      Ansatzfunktionen
      Ansatz (Mathematik)
      1.000000
      0.333333
      2
      0.333333
      -0.040000
      0.181334
      2
      0.061832
      -0.040000
      False
    
    
      3
      Spektralmethode
      171
      Ansatzfunktionen
      Ansatzfunktion
      1.000000
      0.666667
      1
      -0.040000
      0.333333
      0.243165
      1
      -0.040000
      0.061832
      True
    
    
      4
      Élencourt
      138
      Arrondissement Beauvais
      Arrondissement Beauvais
      1.000000
      1.000000
      1
      -0.040000
      -0.040000
      0.301277
      1
      -0.040000
      -0.040000
      True
    
    
      5
      Victor Eftimiu
      74
      Albanien
      Abgeordneter
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.021549
      25
      0.062188
      0.000769
      False
    
    
      6
      Victor Eftimiu
      74
      Albanien
      Adel
      0.690233
      0.000620
      14
      0.817308
      0.000310
      0.042905
      8
      0.040833
      0.000812
      False
    
    
      7
      Victor Eftimiu
      74
      Albanien
      Albania
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.021708
      24
      0.062030
      0.000159
      False
    
    
      8
      Victor Eftimiu
      74
      Albanien
      Albania (Begriffsklärung)
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.010401
      52
      0.073336
      0.000238
      False
    
    
      9
      Victor Eftimiu
      74
      Albanien
      Albanien
      0.690233
      0.817928
      1
      -0.040000
      0.712779
      0.042093
      9
      0.041645
      0.006297
      True
    
    
      10
      Victor Eftimiu
      74
      Albanien
      Albanien beim Eurovision Song Contest
      0.690233
      0.001551
      10
      0.816377
      0.000620
      0.017750
      30
      0.065988
      0.000186
      False
    
    
      11
      Victor Eftimiu
      74
      Albanien
      Albanien im Mittelalter
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.048738
      5
      0.035000
      0.002570
      False
    
    
      12
      Victor Eftimiu
      74
      Albanien
      Albanien und die Europäische Union
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.023830
      20
      0.059908
      0.001209
      False
    
    
      13
      Victor Eftimiu
      74
      Albanien
      Albanische Basketballnationalmannschaft
      0.690233
      0.000931
      12
      0.816998
      0.000310
      0.008832
      57
      0.074906
      0.000324
      False
    
    
      14
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft
      0.690233
      0.105149
      2
      0.712779
      0.088400
      0.018334
      28
      0.065404
      0.000463
      False
    
    
      15
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft (U-21-Män...
      0.690233
      0.005273
      7
      0.812655
      0.000310
      0.013050
      45
      0.070688
      0.000582
      False
    
    
      16
      Victor Eftimiu
      74
      Albanien
      Albanische Fußballnationalmannschaft der Frauen
      0.690233
      0.005583
      6
      0.812345
      0.000310
      0.006187
      64
      0.077551
      0.000701
      False
    
    
      17
      Victor Eftimiu
      74
      Albanien
      Albanische Küche
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.020781
      26
      0.062957
      0.002206
      False
    
    
      18
      Victor Eftimiu
      74
      Albanien
      Albanische Luftstreitkräfte
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.012468
      46
      0.071270
      0.000637
      False
    
    
      19
      Victor Eftimiu
      74
      Albanien
      Albanische Sprache
      0.690233
      0.001551
      10
      0.816377
      0.000620
      0.028659
      14
      0.055078
      0.000210
      False
    
    
      20
      Victor Eftimiu
      74
      Albanien
      Albanische Streitkräfte
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.014950
      39
      0.068788
      0.000120
      False
    
    
      21
      Victor Eftimiu
      74
      Albanien
      Albanische Volleyballnationalmannschaft der Fr...
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.004611
      68
      0.079127
      0.004611
      False
    
    
      22
      Victor Eftimiu
      74
      Albanien
      Albanisches Heer
      0.690233
      0.000620
      14
      0.817308
      0.000310
      0.010517
      51
      0.073221
      0.000115
      False
    
    
      23
      Victor Eftimiu
      74
      Albanien
      Arbeitsministerium
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.007345
      61
      0.076393
      0.000671
      False
    
    
      24
      Victor Eftimiu
      74
      Albanien
      Autobahn
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.025260
      18
      0.058478
      0.000245
      False
    
    
      25
      Victor Eftimiu
      74
      Albanien
      Außenminister
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.013065
      44
      0.070673
      0.000015
      False
    
    
      26
      Victor Eftimiu
      74
      Albanien
      Bildungsministerium
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.006603
      63
      0.077135
      0.000416
      False
    
    
      27
      Victor Eftimiu
      74
      Albanien
      Botschafter der Vereinigten Staaten
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.009494
      56
      0.074244
      0.000662
      False
    
    
      28
      Victor Eftimiu
      74
      Albanien
      Bunker in Albanien
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.017564
      31
      0.066174
      0.000486
      False
    
    
      29
      Victor Eftimiu
      74
      Albanien
      Ehrenbürger
      0.690233
      0.000310
      21
      0.817618
      -0.040000
      0.015137
      38
      0.068601
      0.000187
      False
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      99970
      Poi
      5595
      Australien
      Skilanglauf
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.030089
      6
      0.097000
      0.000336
      False
    
    
      99971
      Poi
      5595
      Australien
      Skulpturengarten
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.009880
      126
      0.117209
      0.000132
      False
    
    
      99972
      Poi
      5595
      Australien
      Snowboard
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.022825
      24
      0.104264
      0.000050
      False
    
    
      99973
      Poi
      5595
      Australien
      Socialist Equality Party (Australien)
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.002339
      227
      0.124750
      0.000110
      False
    
    
      99974
      Poi
      5595
      Australien
      Speerwurf
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.016442
      59
      0.110647
      0.000226
      False
    
    
      99975
      Poi
      5595
      Australien
      Sportschießen
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.024457
      18
      0.102633
      0.000412
      False
    
    
      99976
      Poi
      5595
      Australien
      Staaten und Territorien Australiens
      0.361357
      0.000104
      55
      0.889648
      0.000052
      0.017975
      48
      0.109114
      0.000055
      False
    
    
      99977
      Poi
      5595
      Australien
      Staatsanwalt
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.012908
      96
      0.114181
      0.000271
      False
    
    
      99978
      Poi
      5595
      Australien
      Staatswappen
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.012637
      97
      0.114452
      0.000130
      False
    
    
      99979
      Poi
      5595
      Australien
      Stabhochsprung
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.015831
      62
      0.111258
      0.000082
      False
    
    
      99980
      Poi
      5595
      Australien
      Stadtautobahn
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.007762
      163
      0.119327
      0.000017
      False
    
    
      99981
      Poi
      5595
      Australien
      Straßenbrücke
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.013153
      92
      0.113936
      0.000010
      False
    
    
      99982
      Poi
      5595
      Australien
      Streik
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.014374
      79
      0.112715
      0.000165
      False
    
    
      99983
      Poi
      5595
      Australien
      Sträflingskolonie Australien
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.019449
      42
      0.107640
      0.000032
      False
    
    
      99984
      Poi
      5595
      Australien
      Surfen
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.008331
      147
      0.118758
      0.000001
      False
    
    
      99985
      Poi
      5595
      Australien
      Tennis
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.021951
      31
      0.105139
      0.000158
      False
    
    
      99986
      Poi
      5595
      Australien
      Tennis Australia
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.011284
      112
      0.115805
      0.000067
      False
    
    
      99987
      Poi
      5595
      Australien
      Triathlon
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.020084
      39
      0.107006
      0.000420
      False
    
    
      99988
      Poi
      5595
      Australien
      Uniting Church in Australia
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.002991
      225
      0.124099
      0.000289
      False
    
    
      99989
      Poi
      5595
      Australien
      Uranabbau in Australien
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.022842
      23
      0.104247
      0.000017
      False
    
    
      99990
      Poi
      5595
      Australien
      Verkehrsministerium
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.005468
      205
      0.121621
      0.000234
      False
    
    
      99991
      Poi
      5595
      Australien
      Verteidigungsministerium
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.019262
      44
      0.107827
      0.000178
      False
    
    
      99992
      Poi
      5595
      Australien
      Victoria (Australien)
      0.361357
      0.000155
      50
      0.889597
      0.000052
      0.021498
      33
      0.105591
      0.000054
      False
    
    
      99993
      Poi
      5595
      Australien
      Victorianischer Goldrausch
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.017750
      51
      0.109339
      0.000065
      False
    
    
      99994
      Poi
      5595
      Australien
      Vizeadmiral
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.007805
      161
      0.119284
      0.000022
      False
    
    
      99995
      Poi
      5595
      Australien
      Volleyball
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.021793
      32
      0.105297
      0.000295
      False
    
    
      99996
      Poi
      5595
      Australien
      Vulkan
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.016763
      56
      0.110326
      0.000168
      False
    
    
      99997
      Poi
      5595
      Australien
      Vulkanismus
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.009748
      127
      0.117341
      0.000025
      False
    
    
      99998
      Poi
      5595
      Australien
      Wahlkreis
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.016487
      58
      0.110602
      0.000045
      False
    
    
      99999
      Poi
      5595
      Australien
      Wasserball
      0.361357
      0.000052
      76
      0.889700
      -0.040000
      0.029753
      7
      0.097336
      0.000127
      False
    
  

100000 rows × 14 columns



In [23]:

    
p_label = "Valid links"
n_label = "Invalid links"
p_color = "green"
n_color = "blue"
p_alpha = 0.8
n_alpha = 0.5

relative_frequency = r"$f$"
ls = "$ls$"
es = "$es$"
cs = "$cs$"
rank = r"$r$"
dtop = r"$\Delta top$"
dsuc = r"$\Delta succ$"
es_index = r"$_{es}$"
cs_index = r"$_{cs}$"
es_rank = rank + es_index
es_dtop = dtop + es_index
es_dsuc = dsuc + es_index
cs_rank = rank + cs_index
cs_dtop = dtop + cs_index
cs_dsuc = dsuc + cs_index

# https://stackoverflow.com/questions/12444716/how-do-i-set-the-figure-title-and-axes-labels-font-size-in-matplotlib
params = {'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large'}
plt.rcParams.update(params)



In [6]:

    
x = df[["correct"]].as_matrix()
links = len(list(filter(lambda val: val, x)))
no_links = len(list(filter(lambda val: not val, x)))
(links, no_links)









    Out[6]:





(8357, 91643)



In [28]:

    
composite_features()
link_scores()
entity_scores()
entity_score_ranks()
entity_score_dtop()
entity_score_dsuc()
cosine_sims()
cosine_sim_ranks()
cosine_sim_dtop()
cosine_sim_dsuc()
"done"



In [8]:

    
def composite_features():
    data = df[['entity_score', 'cosine_sim', 'correct']]

    labels = df.correct

    X = data.as_matrix()
    y = labels.as_matrix()

    class_names = ['Invalid link', 'Valid link']

    fig, axes = plt.subplots()
    colors = [n_color, p_color]
    for color, i, class_name in zip(colors, [0, 1, 2], class_names):
        plt.scatter(X[y == i, 0], X[y == i, 1], color=color, alpha=0.2, label=class_name, marker=".")

    axes.set_xlim([0.0, 1.0])
    axes.set_ylim([0.0001, 1.0])
    axes.set_yscale('log')

    #plt.title('Composite features')
    plt.xlabel(es)
    plt.ylabel(cs)
    plt.legend(loc='lower right', shadow=False, scatterpoints=1)
    plt.show()
    fig.savefig("plots/composite_features.png", bbox_inches="tight", dpi=600)
    
composite_features()
"done"









    












    Out[8]:





'done'



In [13]:

    
def link_scores(logx = False):
    data = df[['link_score', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bins = np.arange(0.0, 1.0, 1.0 / number_bins)
    bins = np.append(bins, 1.0)
    if logx:
        bins = np.logspace(-1, 0, number_bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label="Valid links", color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label="Invalid links", color=n_color)
    
    if logx:
        ax.set_xlim([0.1, 1.0])
        ax.set_xscale('log')
    ax.set_ylim([0.0, 1.0])

    #plt.title('Distribution of link scores')
    plt.xlabel(ls)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/link_scores.pdf", bbox_inches="tight")
    
link_scores(False)



In [14]:

    
def entity_scores(normalize=True):
    # https://stackoverflow.com/questions/6871201/plot-two-histograms-at-the-same-time-with-matplotlib

    data = df[['entity_score', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bins = np.arange(0.0, 1.0, 1.0 / number_bins)
    bins = np.append(bins, 1.0)

    # normalize histogram: show relative frequencies of classes
    if normalize:
        p_weights = np.ones_like(tp)/float(len(tp))
        n_weights = np.ones_like(tn)/float(len(tn))
    else:
        p_weights = [1] * len(tp)
        n_weights = [1] * len(tn)
        
    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)
    if normalize:
        ax.set_ylim([0.0, 1.0])
    else:
        ax.set_yscale("log")

    #plt.title('Distribution of entity scores')
    plt.xlabel(es)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/entity_scores.pdf", bbox_inches="tight")
    
entity_scores(True)



In [27]:

    
def cosine_sims(logx=True, logy=True):
    data = df[['cosine_sim', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 35
    bins = np.arange(0.0, 1.0, 1.0 / number_bins)
    bins = np.append(bins, 1.0)
    if logx:
        bins = np.logspace(-4, 0, number_bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)
    if logx:
        ax.set_xlim([0.0001, 1.0])
        ax.set_xscale('log')
    if logy:
        ax.set_ylim([0.00001, 1.0])
        ax.set_yscale('log')
    else:
        ax.set_ylim([0.0, 1.0])

    #plt.title('Distribution of context scores')
    plt.xlabel(cs)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/context_scores.pdf", bbox_inches="tight")
    
cosine_sims(True, True)



In [16]:

    
def entity_score_ranks():
    data = df[['entity_score_rank', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 29
    bins = list(range(1, number_bins + 2))

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, align="left", color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, align="left", color=n_color)

    ticks = [1] + list(range(0, max(bins), 5))[1:] + [bins[-2]]
    ax.set_xticks(ticks)
    ax.set_ylim([0.0, 1.0])

    #plt.title("Distribution of entity scores' ranks")
    plt.xlabel(es_rank)
    plt.ylabel(relative_frequency)
    plt.legend(loc='upper right')
    plt.show()
    fig.savefig("plots/entity_scores_ranks.pdf", bbox_inches="tight")
    
entity_score_ranks()



In [17]:

    
def cosine_sim_ranks():
    data = df[['cosine_sim_rank', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 29
    bins = list(range(1, number_bins + 2))

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, align="left", color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, align="left", color=n_color)

    ticks = [1] + list(range(0, max(bins), 5))[1:] + [bins[-2]]
    ax.set_xticks(ticks)
    ax.set_ylim([0.0, 1.0])

    #plt.title("Distribution of context scores' ranks")
    plt.xlabel(cs_rank)
    plt.ylabel(relative_frequency)
    plt.legend(loc='upper right')
    plt.show()
    fig.savefig("plots/context_scores_ranks.pdf", bbox_inches="tight")
    
cosine_sim_ranks()



In [18]:

    
def entity_score_dtop():
    data = df[['entity_score_dtop', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bin_start, bin_end = 0.0, 1.0
    bin_width = (bin_end - bin_start) / number_bins
    bins = np.arange(bin_start, bin_end, bin_width)
    bins = np.append(bins, 1.0)
    space = 2
    bins = np.append([bin_start - space * bin_width, bin_start - bin_width], bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)

    #ticks = [-space * bin_width] + list(np.arange(0.0, 1.0, 0.2))
    #tick_labels = list(ticks)
    #tick_labels[0] = "n/a"
    #ax.set_xticks(ticks)
    #ax.set_xticklabels(tick_labels)
    ax.set_xlim(-space * bin_width, 1.0)
    ax.set_ylim([0.0, 1.0])

    #plt.title(r"Distribution of entity scores' " + dtop)
    plt.xlabel(es_dtop)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/entity_scores_dtop.pdf", bbox_inches="tight")
    
entity_score_dtop()



In [19]:

    
def cosine_sim_dtop():
    data = df[['cosine_sim_dtop', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bin_start, bin_end = 0.0, 1.0
    bin_width = (bin_end - bin_start) / number_bins
    bins = np.arange(bin_start, bin_end, bin_width)
    bins = np.append(bins, 1.0)
    bins = np.append([bin_start - 2 * bin_width, bin_start - bin_width], bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)

    #ticks = [1] + list(range(0, max(bins), 5))[1:] + [bins[-2]]
    #ax.set_xticks(ticks)
    ax.set_xlim(-2 * bin_width, 1.0)
    ax.set_ylim([0.0, 1.0])

    #plt.title(r"Distribution of context scores' " + dtop)
    plt.xlabel(cs_dtop)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/context_scores_dtop.pdf", bbox_inches="tight")
    
cosine_sim_dtop()



In [20]:

    
def entity_score_dsuc():
    data = df[['entity_score_dsuc', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bin_start, bin_end = 0.0, 1.0
    bin_width = (bin_end - bin_start) / number_bins
    bins = np.arange(bin_start, bin_end, bin_width)
    bins = np.append(bins, 1.0)
    bins = np.append([bin_start - 2 * bin_width, bin_start - bin_width], bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)

    #ticks = [1] + list(range(0, max(bins), 5))[1:] + [bins[-2]]
    #ax.set_xticks(ticks)
    ax.set_xlim(-2 * bin_width, 1.0)
    ax.set_ylim([0.0, 1.0])

    #plt.title(r"Distribution of entity scores' " + dsuc)
    plt.xlabel(es_dsuc)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/entity_scores_dsuc.pdf", bbox_inches="tight")
    
entity_score_dsuc()



In [21]:

    
def cosine_sim_dsuc():
    data = df[['cosine_sim_dsuc', 'correct']]
    x = data.as_matrix()
    tp = list(filter(lambda val: val[1] == True, x))
    tn = list(filter(lambda val: val[1] == False, x))
    tp = pd.Series(list(map(lambda pair: pair[0], tp)))
    tn = pd.Series(list(map(lambda pair: pair[0], tn)))

    # create bin boundaries
    number_bins = 30
    bin_start, bin_end = 0.0, 1.0
    bin_width = (bin_end - bin_start) / number_bins
    bins = np.arange(bin_start, bin_end, bin_width)
    bins = np.append(bins, 1.0)
    bins = np.append([bin_start - 2 * bin_width, bin_start - bin_width], bins)

    # normalize histogram: show relative frequencies of classes
    p_weights = np.ones_like(tp)/float(len(tp))
    n_weights = np.ones_like(tn)/float(len(tn))

    fig, ax = plt.subplots()
    ax.hist(tp, weights=p_weights, bins=bins, alpha=p_alpha, label=p_label, color=p_color)
    ax.hist(tn, weights=n_weights, bins=bins, alpha=n_alpha, label=n_label, color=n_color)

    #ticks = [1] + list(range(0, max(bins), 5))[1:] + [bins[-2]]
    #ax.set_xticks(ticks)
    ax.set_xlim(-2 * bin_width, 1.0)
    ax.set_ylim([0.0, 1.0])
    
    #plt.title(r"Distribution of context scores' " + dsuc)
    plt.xlabel(cs_dsuc)
    plt.ylabel(relative_frequency)
    plt.legend(loc='best')
    plt.show()
    fig.savefig("plots/context_scores_dsuc.pdf", bbox_inches="tight")
    
cosine_sim_dsuc()

	article	offset	alias	entity	link_score	entity_score	entity_score_rank	entity_score_dtop	entity_score_dsuc	cosine_sim	cosine_sim_rank	cosine_sim_dtop	cosine_sim_dsuc	correct
0	Albert Wenk	174	Artesis Hogeschool Antwerpen	Artesis Hogeschool Antwerpen	1.000000	1.000000	1	Infinity	Infinity	0.027930	1	Infinity	Infinity	True
1	Logan Ramsey	1408	Anne Ramsey	Anne Ramsey	1.000000	1.000000	1	Infinity	0.44082840236686394	0.266522	1	Infinity	1.2665216650036508	True
2	Spektralmethode	171	Ansatzfunktionen	Ansatz (Mathematik)	1.000000	0.333333	2	0.3333333333333333	Infinity	0.181334	2	0.06183150029798601	Infinity	False
3	Spektralmethode	171	Ansatzfunktionen	Ansatzfunktion	1.000000	0.666667	1	Infinity	0.3333333333333333	0.243165	1	Infinity	0.06183150029798601	True
4	Élencourt	138	Arrondissement Beauvais	Arrondissement Beauvais	1.000000	1.000000	1	Infinity	Infinity	0.301277	1	Infinity	Infinity	True
5	Victor Eftimiu	74	Albanien	Abgeordneter	0.690233	0.000310	21	0.8176178660049628	Infinity	0.021549	25	0.062188452380636604	7.688920440697689E-4	False
6	Victor Eftimiu	74	Albanien	Adel	0.690233	0.000620	14	0.8173076923076923	3.1017369727047146E-4	0.042905	8	0.040833356179805165	8.115629768289251E-4	False
7	Victor Eftimiu	74	Albanien	Albania	0.690233	0.000310	21	0.8176178660049628	Infinity	0.021708	24	0.062029720876732426	1.5873150390417756E-4	False
8	Victor Eftimiu	74	Albanien	Albania (Begriffsklärung)	0.690233	0.000310	21	0.8176178660049628	Infinity	0.010401	52	0.07333645051479792	2.3759949270757072E-4	False
9	Victor Eftimiu	74	Albanien	Albanien	0.690233	0.817928	1	Infinity	0.7127791563275434	0.042093	9	0.04164491915663409	0.006297353533244558	True
10	Victor Eftimiu	74	Albanien	Albanien beim Eurovision Song Contest	0.690233	0.001551	10	0.8163771712158809	6.20347394540943E-4	0.017750	30	0.06598805585270895	1.859480380340152E-4	False
11	Victor Eftimiu	74	Albanien	Albanien im Mittelalter	0.690233	0.000310	21	0.8176178660049628	Infinity	0.048738	5	0.035000069318407157	0.00257018576116215	False
12	Victor Eftimiu	74	Albanien	Albanien und die Europäische Union	0.690233	0.000310	21	0.8176178660049628	Infinity	0.023830	20	0.059908392912543226	0.0012089604671199211	False
13	Victor Eftimiu	74	Albanien	Albanische Basketballnationalmannschaft	0.690233	0.000931	12	0.8169975186104218	3.1017369727047146E-4	0.008832	57	0.07490604260713715	3.241390543284057E-4	False
14	Victor Eftimiu	74	Albanien	Albanische Fußballnationalmannschaft	0.690233	0.105149	2	0.7127791563275434	0.08839950372208435	0.018334	28	0.0654035387286466	4.627369311335887E-4	False
15	Victor Eftimiu	74	Albanien	Albanische Fußballnationalmannschaft (U-21-Män...	0.690233	0.005273	7	0.8126550868486353	3.101736972704718E-4	0.013050	45	0.07068793601226803	5.816615723109406E-4	False
16	Victor Eftimiu	74	Albanien	Albanische Fußballnationalmannschaft der Frauen	0.690233	0.005583	6	0.8123449131513648	3.101736972704709E-4	0.006187	64	0.07755136764625414	7.005092154502152E-4	False
17	Victor Eftimiu	74	Albanien	Albanische Küche	0.690233	0.000310	21	0.8176178660049628	Infinity	0.020781	26	0.06295734442470638	0.0022057811059381614	False
18	Victor Eftimiu	74	Albanien	Albanische Luftstreitkräfte	0.690233	0.000310	21	0.8176178660049628	Infinity	0.012468	46	0.07126959758457897	6.367698125467718E-4	False
19	Victor Eftimiu	74	Albanien	Albanische Sprache	0.690233	0.001551	10	0.8163771712158809	6.20347394540943E-4	0.028659	14	0.05507846712369462	2.095543805849115E-4	False
20	Victor Eftimiu	74	Albanien	Albanische Streitkräfte	0.690233	0.000310	21	0.8176178660049628	Infinity	0.014950	39	0.06878755322423608	1.196445324029051E-4	False
21	Victor Eftimiu	74	Albanien	Albanische Volleyballnationalmannschaft der Fr...	0.690233	0.000310	21	0.8176178660049628	Infinity	0.004611	68	0.07912667919437812	0.004611242542187034	False
22	Victor Eftimiu	74	Albanien	Albanisches Heer	0.690233	0.000620	14	0.8173076923076923	3.1017369727047146E-4	0.010517	51	0.07322095762279718	1.1549289200075107E-4	False
23	Victor Eftimiu	74	Albanien	Arbeitsministerium	0.690233	0.000310	21	0.8176178660049628	Infinity	0.007345	61	0.07639332280492074	6.709663625261804E-4	False
24	Victor Eftimiu	74	Albanien	Autobahn	0.690233	0.000310	21	0.8176178660049628	Infinity	0.025260	18	0.05847752842615518	2.446038215260897E-4	False
25	Victor Eftimiu	74	Albanien	Außenminister	0.690233	0.000310	21	0.8176178660049628	Infinity	0.013065	44	0.07067334429995525	1.4591712312772565E-5	False
26	Victor Eftimiu	74	Albanien	Bildungsministerium	0.690233	0.000310	21	0.8176178660049628	Infinity	0.006603	63	0.07713489228035474	4.164753658994066E-4	False
27	Victor Eftimiu	74	Albanien	Botschafter der Vereinigten Staaten	0.690233	0.000310	21	0.8176178660049628	Infinity	0.009494	56	0.07424419468341709	6.61847923720071E-4	False
28	Victor Eftimiu	74	Albanien	Bunker in Albanien	0.690233	0.000310	21	0.8176178660049628	Infinity	0.017564	31	0.06617400389074296	4.863493127612653E-4	False
29	Victor Eftimiu	74	Albanien	Ehrenbürger	0.690233	0.000310	21	0.8176178660049628	Infinity	0.015137	38	0.06860082720994617	1.8672601428990597E-4	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
99970	Poi	5595	Australien	Skilanglauf	0.361357	0.000052	76	0.8897001708870592	Infinity	0.030089	6	0.09699993073495518	3.364883882109697E-4	False
99971	Poi	5595	Australien	Skulpturengarten	0.361357	0.000052	76	0.8897001708870592	Infinity	0.009880	126	0.11720947816076607	1.3165887229213066E-4	False
99972	Poi	5595	Australien	Snowboard	0.361357	0.000052	76	0.8897001708870592	Infinity	0.022825	24	0.10426386982836983	5.013551672455874E-5	False
99973	Poi	5595	Australien	Socialist Equality Party (Australien)	0.361357	0.000052	76	0.8897001708870592	Infinity	0.002339	227	0.12475045053270237	1.0999596407621198E-4	False
99974	Poi	5595	Australien	Speerwurf	0.361357	0.000052	76	0.8897001708870592	Infinity	0.016442	59	0.11064735815033228	2.262556929298748E-4	False
99975	Poi	5595	Australien	Sportschießen	0.361357	0.000052	76	0.8897001708870592	Infinity	0.024457	18	0.10263262052901435	4.115742396461723E-4	False
99976	Poi	5595	Australien	Staaten und Territorien Australiens	0.361357	0.000104	55	0.8896483869297291	5.178395733001916E-5	0.017975	48	0.10911444525912233	5.530440016748156E-5	False
99977	Poi	5595	Australien	Staatsanwalt	0.361357	0.000052	76	0.8897001708870592	Infinity	0.012908	96	0.11418138078906255	2.7090196738142544E-4	False
99978	Poi	5595	Australien	Staatswappen	0.361357	0.000052	76	0.8897001708870592	Infinity	0.012637	97	0.11445228275644398	1.300597421058109E-4	False
99979	Poi	5595	Australien	Stabhochsprung	0.361357	0.000052	76	0.8897001708870592	Infinity	0.015831	62	0.11125806966224214	8.18188513813066E-5	False
99980	Poi	5595	Australien	Stadtautobahn	0.361357	0.000052	76	0.8897001708870592	Infinity	0.007762	163	0.11932685393693625	1.6910090196933444E-5	False
99981	Poi	5595	Australien	Straßenbrücke	0.361357	0.000052	76	0.8897001708870592	Infinity	0.013153	92	0.11393587943974878	1.0225289407302801E-5	False
99982	Poi	5595	Australien	Streik	0.361357	0.000052	76	0.8897001708870592	Infinity	0.014374	79	0.1127148483059093	1.6470953838542725E-4	False
99983	Poi	5595	Australien	Sträflingskolonie Australien	0.361357	0.000052	76	0.8897001708870592	Infinity	0.019449	42	0.10763987892692117	3.212116600587525E-5	False
99984	Poi	5595	Australien	Surfen	0.361357	0.000052	76	0.8897001708870592	Infinity	0.008331	147	0.11875833117780274	1.136414581218853E-6	False
99985	Poi	5595	Australien	Tennis	0.361357	0.000052	76	0.8897001708870592	Infinity	0.021951	31	0.1051386842330542	1.5784039993157334E-4	False
99986	Poi	5595	Australien	Tennis Australia	0.361357	0.000052	76	0.8897001708870592	Infinity	0.011284	112	0.115804938623654	6.74437800435336E-5	False
99987	Poi	5595	Australien	Triathlon	0.361357	0.000052	76	0.8897001708870592	Infinity	0.020084	39	0.10700572185681456	4.199613273206948E-4	False
99988	Poi	5595	Australien	Uniting Church in Australia	0.361357	0.000052	76	0.8897001708870592	Infinity	0.002991	225	0.12409877074149459	2.8853217209194506E-4	False
99989	Poi	5595	Australien	Uranabbau in Australien	0.361357	0.000052	76	0.8897001708870592	Infinity	0.022842	23	0.10424731714061378	1.655268775603569E-5	False
99990	Poi	5595	Australien	Verkehrsministerium	0.361357	0.000052	76	0.8897001708870592	Infinity	0.005468	205	0.12162148295008021	2.340843574462721E-4	False
99991	Poi	5595	Australien	Verteidigungsministerium	0.361357	0.000052	76	0.8897001708870592	Infinity	0.019262	44	0.10782737542036844	1.7756460585487133E-4	False
99992	Poi	5595	Australien	Victoria (Australien)	0.361357	0.000155	50	0.8895966029723992	5.178395733001915E-5	0.021498	33	0.10559129812603801	5.362677300902244E-5	False
99993	Poi	5595	Australien	Victorianischer Goldrausch	0.361357	0.000052	76	0.8897001708870592	Infinity	0.017750	51	0.10933941529740171	6.461619302169583E-5	False
99994	Poi	5595	Australien	Vizeadmiral	0.361357	0.000052	76	0.8897001708870592	Infinity	0.007805	161	0.11928433118220252	2.2331633532672937E-5	False
99995	Poi	5595	Australien	Volleyball	0.361357	0.000052	76	0.8897001708870592	Infinity	0.021793	32	0.10529652463298576	2.9477349305225E-4	False
99996	Poi	5595	Australien	Vulkan	0.361357	0.000052	76	0.8897001708870592	Infinity	0.016763	56	0.11032599839505572	1.6833581562428201E-4	False
99997	Poi	5595	Australien	Vulkanismus	0.361357	0.000052	76	0.8897001708870592	Infinity	0.009748	127	0.11734113703305821	2.5103621446075433E-5	False
99998	Poi	5595	Australien	Wahlkreis	0.361357	0.000052	76	0.8897001708870592	Infinity	0.016487	58	0.11060195935586797	4.539879446431236E-5	False
99999	Poi	5595	Australien	Wasserball	0.361357	0.000052	76	0.8897001708870592	Infinity	0.029753	7	0.09733641912316615	1.2739047771093484E-4	False