Dependencies:
In [12]:
import nbformat
nbformat doc: http://nbformat.readthedocs.org/en/latest/api.html
In [84]:
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
In [176]:
import os, fnmatch
import stat
import datetime
In [582]:
schema = Schema(
title=TEXT(stored=True),
markdown=TEXT(stored=True),
code=TEXT(stored=True),
path=ID(stored=True),
user=KEYWORD(stored=True),
tags=KEYWORD(stored=True),
modified=DATETIME(stored=True),
accessed=DATETIME(stored=True),
size=NUMERIC(stored=True)
)
In [583]:
ix = create_in("indexdir", schema)
writer = ix.writer()
In [584]:
def get_file_info(path):
info = dict()
try:
st = os.stat(path)
info['uid'] = st.st_uid
info['gid'] = st.st_gid
info['size'] = st.st_size
info['atime'] = st.st_atime
info['mtime'] = st.st_mtime
info['ctime'] = st.st_ctime
except IOError:
print("Failed to get information about", file)
else:
try:
import pwd # not available on all platforms
userinfo = pwd.getpwuid(st[stat.ST_UID])
except (ImportError, KeyError):
print("Failed to get the owner name for", file)
else:
info['owner'] = userinfo.pw_name
info['complete_owner'] = userinfo.pw_gecos
return info
In [585]:
def add_notebook(writer, path):
info = get_file_info(path)
note = nbformat.read(path, nbformat.NO_CONVERT)
markdown = ''
code = ''
tags = ''
for cell in note['cells']:
if cell['cell_type'] == 'markdown':
markdown += cell['source']
markdown += '\n\n'
if cell['cell_type'] == 'code':
code += cell['source']
code += '\n\n'
writer.add_document(
title=''.join(path.split('/')[-1].split('.')[:-1]),
path=path,
markdown=markdown,
code=code,
user=info['owner'],
tags=tags,
modified=datetime.datetime.fromtimestamp(info['mtime']),
accessed=datetime.datetime.fromtimestamp(info['atime']),
size=info['size']
)
In [586]:
def find(path, pattern, antipattern):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern) and not fnmatch.fnmatch(name, antipattern):
result.append(os.path.join(root, name))
return result
file_path = find('/Users/lukas/Projects', '*.ipynb', '*.ipynb_checkpoints')
In [587]:
for path in file_path:
add_notebook(writer, path)
In [588]:
writer.commit()
In [589]:
from IPython.html import widgets
from IPython.display import display, clear_output
from IPython.core.display import HTML
from yattag import Doc
from hurry.filesize import size
In [590]:
display(HTML('''
<style>
.rendered_html tr, .rendered_html th, .rendered_html td {
border-collapse: collapse;
margin: 1em 2em;
}
.rendered_html table {
margin-left: auto;
margin-right: auto;
border: none;
border-collapse: collapse;
}
</style>
'''
))
In [591]:
def format_output(results):
doc, tag, text = Doc().tagtext()
doc.text('Number of hits: {}\n'.format(len(results)))
with tag('table', klass='table table-striped'):
with tag('thead'):
with tag('tr'):
with tag('th'):
doc.text('#')
with tag('th'):
doc.text('Title')
with tag('tbody'):
for idx, result in enumerate(results):
with tag('tr'):
with tag('td'):
doc.text(idx)
with tag('td'):
with tag('table'):
with tag('tbody'):
with tag('tr'):
with tag('td', klass='col-md-6'):
with tag('a', href=result['path']):
doc.text(result['title'])
with tag('td', klass='col-md-6'):
doc.text(' asdf({})'.format(result['user']))
display(HTML(doc.getvalue()))
In [592]:
import whoosh.highlight as highlight
class BracketFormatter(highlight.Formatter):
def format_token(self, text, token, replace=False):
# Use the get_text function to get the text corresponding to the
# token
tokentext = highlight.get_text(text, token, replace=True)
# Return the text as you want it to appear in the highlighted
# string
return "<mark>%s</mark>" % tokentext
In [593]:
def format_output(results):
doc, tag, text = Doc().tagtext()
doc.text('Number of hits: {}\n'.format(len(results)))
for idx, result in enumerate(results):
with tag('div', klass='row'):
with tag('div', klass='col-md-1'):
doc.text(idx)
with tag('div', klass='col-md-11'):
with tag('strong'):
doc.text(result['title'])
with tag('div', klass='row'):
with tag('div', klass='col-md-1'):
pass
with tag('div', klass='col-md-11'):
with tag('a'):
doc.text(result['path'])
with tag('div', klass='row'):
with tag('div', klass='col-md-1'):
pass
with tag('div', klass='col-md-2'):
doc.text('User: {}'.format(result['user']))
with tag('div', klass='col-md-2'):
doc.text('Size: {}B'.format(size(result['size'])))
with tag('div', klass='col-md-7'):
doc.text('Modified: {}'.format(result['modified']))
with tag('div', klass='row'):
with tag('div', klass='col-md-1'):
pass
with tag('div', klass='col-md-11'):
doc.asis(result.highlights('markdown'))
with tag('br'):
pass
display(HTML(doc.getvalue()))
In [594]:
def search(query_string, field='markdown'):
limit = 200
with ix.searcher() as searcher:
query = QueryParser(field, ix.schema).parse(query_string)
results = searcher.search(query, limit=limit)
brf = BracketFormatter()
results.formatter = brf
results.fragmenter.maxchars = 300
results.fragmenter.surround = 50
format_output(results)
In [598]:
def button_callback(btn):
clear_output()
search(query_string=container.children[0].value)
button = widgets.ButtonWidget(description="Click me!")
button.on_click(button_callback)
text_box = widgets.Text(value='exa*')
container = widgets.HBox(children=(text_box, button))
display(container)
In [ ]: