In [9]:
import pickle

In [ ]:
with open(data_folder+"Tweet/NEEL_tweets(with_grams).pickle", "rb") as f:
    tweet_corpus = pickle.load(f)

In [5]:
ids = []

with open("/Users/erichsu/Documents/research/ccg_tweet_wikification/data/v1-NEELOnly/label-trainDev.tsv", "rb") as f:
    for line in f:
        ids.append(line.split("\t")[0])

In [6]:
len(ids)


Out[6]:
2202

In [7]:
ids = set()
with open("/Volumes/backup/ccg_tweet_wikifier_data/microposts2015-neel_challenge_gs/NEEL2015-training-tweets-ids.tsv", "rb") as f:
    for line in f:
        ids.add(line.split("\t")[0])

In [8]:
len(ids)


Out[8]:
3498

In [ ]: