In [7]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sqlite3 import connect
In [3]:
con = connect('../data/nips-papers/database.sqlite')
In [4]:
texts = [x[0] for x in con.execute('select paper_text from papers;').fetchall()]
titles = [x[0] for x in con.execute('select title from papers;').fetchall()]
years = con.execute('select year from papers;').fetchall()
In [5]:
from preprocessing import Preprocessor
In [8]:
prepr = Preprocessor()
texts_t = [prepr.process(text) for text in tqdm(texts)]
In [9]:
titles_t = [prepr.process(title) for title in tqdm(titles)]
In [15]:
from importlib import reload
import ranking
reload(ranking)
from ranking import BasicVSRanker, EnsembleRanker
In [16]:
text_r = BasicVSRanker.from_tokenized(texts_t)
title_r = BasicVSRanker.from_tokenized(titles_t)
In [29]:
ranker = EnsembleRanker()
ranker.add_ranker(text_r, 0.9)
ranker.add_ranker(title_r, 0.1)
In [30]:
for i in ranker.get_best_matches('deep learning', 5):
print("{} {}".format(years[i], titles[i]))
In [28]:
for i in ranker.get_best_matches('deep learning', 5):
print("{} {}".format(years[i], titles[i]))
In [ ]: