Naive Linking

Jiarui Xu - jxu57@illinois.edu


In [17]:
import pickle
import pyprind

In [11]:
data_folder = "/Volumes/backup/ccg_tweet_wikifier_data/"

In [12]:
with open(data_folder+"Tweet/NEEL_tweets(with_grams).pickle", "rb") as f:
    tweet_corpus = pickle.load(f)

In [14]:
len(tweet_corpus)


Out[14]:
1286

In [15]:
entity_info_file = "/Volumes/backup/ccg_tweet_wikifier_data/wikidata/entity_info.txt"

In [18]:
bar = pyprind.ProgBar(20951710, width = 70)

entity_info = {}

with open(entity_info_file, "rb") as f:
    for line in f:
        pline = line.strip()
        bar.update()
        
        info = eval(pline)
        entity_info.update(info)


0%                                                                  100%
[######################################################################] | ETA: 00:00:00

In [ ]: