In [1]:
    
import pandas as pd
pd.set_option('display.max_rows', 100)
    
In [2]:
    
from horacy import HoracyModel
model = HoracyModel('model_4000/').load()
    
    
In [25]:
    
results = model.tfidf_query('ventil')
df = pd.DataFrame(zip(*results),columns=['id','token','df'])
df
    
    Out[25]:
In [30]:
    
results = model.tfidf_query('mechanical')
df = pd.DataFrame(zip(*results),columns=['id','token','df'])
df
    
    Out[30]:
In [8]:
    
def multi_query(*args):
    ids = set()
    for i,arg in enumerate(args):
        query,_,exclude = arg.partition(' ~~ ')
        q_ids = model.inverted_query(query,exclude)
        if i==0:
            ids.update(q_ids)
        else:
            ids.intersection_update(q_ids)
    return ids
def show_results(ids):
    print(f"RESULTS: {len(ids)}\n\n")
    for id in ids:
        doc = model.get_doc(id)
        text = model.doc_to_text(doc).replace('\n',' ').replace('\r',' ')
        print(f"id:{id}\n{text}\n\n")
import re
def filter_results(ids,query):
    q = re.compile(query)
    for id in ids:
        doc = model.get_doc(id)
        text = model.doc_to_text(doc).replace('\n',' ').replace('\r',' ')
        for sen in model.text_to_sentences(text):
            if q.findall(sen):
                print(id,sen)
        
ids = multi_query('mechanical','ventil')
filter_results(ids,'\b(age|old|year)')
    
In [ ]: