In [9]:
import pickle
In [ ]:
with open(data_folder+"Tweet/NEEL_tweets(with_grams).pickle", "rb") as f:
tweet_corpus = pickle.load(f)
In [5]:
ids = []
with open("/Users/erichsu/Documents/research/ccg_tweet_wikification/data/v1-NEELOnly/label-trainDev.tsv", "rb") as f:
for line in f:
ids.append(line.split("\t")[0])
In [6]:
len(ids)
Out[6]:
In [7]:
ids = set()
with open("/Volumes/backup/ccg_tweet_wikifier_data/microposts2015-neel_challenge_gs/NEEL2015-training-tweets-ids.tsv", "rb") as f:
for line in f:
ids.add(line.split("\t")[0])
In [8]:
len(ids)
Out[8]:
In [ ]: