Dependencies:

  • whoosh
  • yattag
  • hurry.filesize

In [12]:
import nbformat

In [84]:
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser

In [176]:
import os, fnmatch
import stat
import datetime

In [582]:
schema = Schema(
    title=TEXT(stored=True),
    markdown=TEXT(stored=True),
    code=TEXT(stored=True),
    path=ID(stored=True),
    user=KEYWORD(stored=True),
    tags=KEYWORD(stored=True),
    modified=DATETIME(stored=True),
    accessed=DATETIME(stored=True),
    size=NUMERIC(stored=True)
)

In [583]:
ix = create_in("indexdir", schema)
writer = ix.writer()

In [584]:
def get_file_info(path):
    info = dict()
    try:
        st = os.stat(path)
        info['uid'] = st.st_uid
        info['gid'] = st.st_gid
        info['size'] = st.st_size
        info['atime'] = st.st_atime
        info['mtime'] = st.st_mtime
        info['ctime'] = st.st_ctime
    except IOError:
        print("Failed to get information about", file)
    else:
        try:
            import pwd # not available on all platforms
            userinfo = pwd.getpwuid(st[stat.ST_UID])
        except (ImportError, KeyError):
            print("Failed to get the owner name for", file)
        else:
            info['owner'] = userinfo.pw_name
            info['complete_owner'] = userinfo.pw_gecos
    return info

In [585]:
def add_notebook(writer, path):
    info = get_file_info(path)
    note = nbformat.read(path, nbformat.NO_CONVERT)
    
    markdown = ''
    code = ''
    tags = ''

    for cell in note['cells']:
        if cell['cell_type'] == 'markdown':
            markdown += cell['source']
            markdown += '\n\n'
        if cell['cell_type'] == 'code':
            code += cell['source']
            code += '\n\n'
    
    writer.add_document(
        title=''.join(path.split('/')[-1].split('.')[:-1]),
        path=path,
        markdown=markdown,
        code=code,
        user=info['owner'],
        tags=tags,
        modified=datetime.datetime.fromtimestamp(info['mtime']),
        accessed=datetime.datetime.fromtimestamp(info['atime']),
        size=info['size']
    )

In [586]:
def find(path, pattern, antipattern):
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if fnmatch.fnmatch(name, pattern) and not fnmatch.fnmatch(name, antipattern):
                result.append(os.path.join(root, name))
    return result
file_path = find('/Users/lukas/Projects', '*.ipynb', '*.ipynb_checkpoints')

In [587]:
for path in file_path:
    add_notebook(writer, path)

In [588]:
writer.commit()

In [589]:
from IPython.html import widgets
from IPython.display import display, clear_output
from IPython.core.display import HTML
from yattag import Doc
from hurry.filesize import size

In [590]:
display(HTML('''
<style>
.rendered_html tr, .rendered_html th, .rendered_html td {
border-collapse: collapse;
margin: 1em 2em;
}
.rendered_html table {
margin-left: auto;
margin-right: auto;
border: none;
border-collapse: collapse;
}
</style>
'''
))



In [591]:
def format_output(results):
    doc, tag, text = Doc().tagtext()
    doc.text('Number of hits: {}\n'.format(len(results)))
    with tag('table', klass='table table-striped'):
        with tag('thead'):
            with tag('tr'):
                with tag('th'):
                    doc.text('#')
                with tag('th'):
                    doc.text('Title')
        with tag('tbody'):
            for idx, result in enumerate(results):
                with tag('tr'):
                    with tag('td'):
                        doc.text(idx)
                    with tag('td'):
                        with tag('table'):
                            with tag('tbody'):
                                with tag('tr'):
                                    with tag('td', klass='col-md-6'):
                                        with tag('a', href=result['path']):
                                            doc.text(result['title'])
                                    with tag('td', klass='col-md-6'):
                                        doc.text(' asdf({})'.format(result['user']))
    display(HTML(doc.getvalue()))

In [592]:
import whoosh.highlight as highlight
class BracketFormatter(highlight.Formatter):
    def format_token(self, text, token, replace=False):
        # Use the get_text function to get the text corresponding to the
        # token
        tokentext = highlight.get_text(text, token, replace=True)

        # Return the text as you want it to appear in the highlighted
        # string
        return "<mark>%s</mark>" % tokentext

In [593]:
def format_output(results):
    doc, tag, text = Doc().tagtext()
    doc.text('Number of hits: {}\n'.format(len(results)))
    for idx, result in enumerate(results):
        with tag('div', klass='row'):
            with tag('div', klass='col-md-1'):
                doc.text(idx)
            with tag('div', klass='col-md-11'):
                with tag('strong'):
                    doc.text(result['title'])
        with tag('div', klass='row'):
            with tag('div', klass='col-md-1'):
                pass
            with tag('div', klass='col-md-11'):
                with tag('a'):
                    doc.text(result['path'])
        with tag('div', klass='row'):
            with tag('div', klass='col-md-1'):
                pass
            with tag('div', klass='col-md-2'):
                doc.text('User: {}'.format(result['user']))
            with tag('div', klass='col-md-2'):
                doc.text('Size: {}B'.format(size(result['size'])))
            with tag('div', klass='col-md-7'):
                doc.text('Modified: {}'.format(result['modified']))
        with tag('div', klass='row'):
            with tag('div', klass='col-md-1'):
                pass
            with tag('div', klass='col-md-11'):
                doc.asis(result.highlights('markdown'))
        with tag('br'):
            pass
    display(HTML(doc.getvalue()))

In [594]:
def search(query_string, field='markdown'):
    limit = 200
    with ix.searcher() as searcher:
        query = QueryParser(field, ix.schema).parse(query_string)
        results = searcher.search(query, limit=limit)
        brf = BracketFormatter()
        results.formatter = brf
        results.fragmenter.maxchars = 300
        results.fragmenter.surround = 50
        format_output(results)

In [598]:
def button_callback(btn):
    clear_output()
    search(query_string=container.children[0].value)

button = widgets.ButtonWidget(description="Click me!")
button.on_click(button_callback)
text_box = widgets.Text(value='exa*')
container = widgets.HBox(children=(text_box, button))
display(container)


Number of hits: 7
0
2015-09-01 Complex derivatives
User: lukas
Size: 4KB
Modified: 2015-09-02 08:32:30
are satisfied, the above equations yield an exact derviative. If not, the above equations contradict each...when far away from that critical point? ## Simple example \begin{align} f(z) &= |z|^2 & u(x, y) &= x^2 + y^2 & v(x, y) &= 0 & \frac{\partial u}{\partial...magnitudes and to compare phases. These are examples: - Exponential distribution to compare magnitudes

1
2015-10-01 First complex Chainer tests
User: lukas
Size: 105KB
Modified: 2015-10-02 10:44:27
example functions to generate training data # Define network architecture # Training Until now, just works with vanilla stochastic

2
2015-09-01 Complex derivatives-checkpoint
User: lukas
Size: 4KB
Modified: 2015-09-02 08:32:30
are satisfied, the above equations yield an exact derviative. If not, the above equations contradict each...when far away from that critical point? ## Simple example \begin{align} f(z) &= |z|^2 & u(x, y) &= x^2 + y^2 & v(x, y) &= 0 & \frac{\partial u}{\partial...magnitudes and to compare phases. These are examples: - Exponential distribution to compare magnitudes

3
2015-10-01 First complex Chainer tests-checkpoint
User: lukas
Size: 105KB
Modified: 2015-10-02 10:44:27
example functions to generate training data # Define network architecture # Training Until now, just works with vanilla stochastic

4
Einsum
User: lukas
Size: 11KB
Modified: 2015-10-02 10:43:56
Einsum examples - There are some examples here: http://docs.scipy.org/doc/numpy/reference/generated

5
MNIST_Latent_Autoencoder
User: lukas
Size: 604KB
Modified: 2015-10-02 10:43:56
factors of variation in deep networks This is an example of how to implement the autoencoder architecture from...and output data - plot cost evolution and one test example ## Define function to plot input images, output images

6
MNIST_Latent_Autoencoder
User: lukas
Size: 18KB
Modified: 2015-08-20 08:46:05
factors of variation in deep networks This is an example of how to implement the autoencoder architecture from...and output data - plot cost evolution and one test example ## Define function to plot input images, output images


In [ ]: