In [1]:
from whoosh.index import create_in
from whoosh.fields import *
import io
from __future__ import print_function
from os.path import join, isdir
from os import listdir

In [2]:
data_path = './data'
index_path = './index'

In [3]:
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = create_in(index_path,schema)
writer = ix.writer()

In [6]:
data_folders = listdir(data_path)
for folder in data_folders:
    if isdir(join(data_path, folder)):        
        files = listdir(join(data_path, folder))
        for fl in files:
            data_file_path = join(data_path, folder, fl)
            try:
                writer.add_document(title = fl[:fl.find('.txt')].decode(encoding='utf-8'),
                    path = data_file_path.decode(encoding='utf-8'), content = io.open(data_file_path, 'r',encoding='utf-8').read())          
            except Exception:
                print(data_file_path)
    print(folder, end=' ')
writer.commit()


I H M Р 5 F A 4 P metainfo.txt ./data/В/Встроенный язык программирования 1С
В А Т Щ Ч N И О Е E S G U 7 6 Н 9 Ш Z Й 8 М X З 1 Ё O У .NET Framework.txt Х С C W V Б J B П 3 ./data/Д/Дюна
Д Г Э Л 2 К K L T Q D Ц Ф Я Ж R ( Ю 

In [7]:
from whoosh.qparser import QueryParser

In [17]:
with ix.searcher() as searcher:
    query = QueryParser("content", ix.schema).parse(u'Литва')
    results = searcher.search(query)
    print(results[0]['path'])


./data/Л/Литва.txt

In [ ]: