In [7]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sqlite3 import connect

In [3]:
con = connect('../data/nips-papers/database.sqlite')

In [4]:
texts = [x[0] for x in con.execute('select paper_text from papers;').fetchall()]
titles = [x[0] for x in con.execute('select title from papers;').fetchall()]
years = con.execute('select year from papers;').fetchall()

In [5]:
from preprocessing import Preprocessor

In [8]:
prepr = Preprocessor()
texts_t = [prepr.process(text) for text in tqdm(texts)]


100%|██████████████████████████████████████████████████████████████████████████████| 6560/6560 [08:54<00:00, 12.26it/s]

In [9]:
titles_t = [prepr.process(title) for title in tqdm(titles)]


100%|████████████████████████████████████████████████████████████████████████████| 6560/6560 [00:01<00:00, 4419.19it/s]

In [15]:
from importlib import reload
import ranking
reload(ranking)
from ranking import BasicVSRanker, EnsembleRanker

In [16]:
text_r = BasicVSRanker.from_tokenized(texts_t)
title_r = BasicVSRanker.from_tokenized(titles_t)

In [29]:
ranker = EnsembleRanker()
ranker.add_ranker(text_r, 0.9)
ranker.add_ranker(title_r, 0.1)

In [30]:
for i in ranker.get_best_matches('deep learning', 5):
    print("{} {}".format(years[i], titles[i]))


(2014,) Factoring Variations in Natural Images with Deep Gaussian Mixture Models
(2010,) Layer-wise analysis of deep networks with Gaussian kernels
(2016,) Deep Learning without Poor Local Minima
(2014,) Do Deep Nets Really Need to be Deep?
(2016,) A Probabilistic Framework for Deep Learning

In [28]:
for i in ranker.get_best_matches('deep learning', 5):
    print("{} {}".format(years[i], titles[i]))


(2014,) Do Deep Nets Really Need to be Deep?
(2011,) Shallow vs. Deep Sum-Product Networks
(2014,) Deep Symmetry Networks
(2016,) Learning Deep Parsimonious Representations
(2016,) Deep Learning Games

In [ ]: