parallel query mech: * create a worker for every word local * parallel run each worker page=1 round 1.1: [do], [re], [mi], [fa], [sol] round 1.2: [do, re], [re, do], [mi, fa], [fa, sol] ... page=2 round 2.1: ... each worker keeps track of its page and round * after each round check the graph for a path and decide whether to order another round or not heuristic: if no path exists, order another round or page increase if average edge (substring) length is smaller than 3-4, order another round but not a page increase else return the shortest path playlist * make sure to cache queries by term AND page

In [1]:
from gevent import monkey; monkey.patch_socket()
import spotimeta
* cache only possibly useful stuff | if the track name is a substring of the poem * cache by term also - repeating substrings | "take me far, take me away" * there could be multiple matches - perhaps should make all available - maybe even tweak by mood or genre of something

In [21]:
cache = {}

def term_conditioner(term):
    return term.lower()

def query(term, s):
    term = term_conditioner(term)
    has_results = False
    if not cache.has_key(term):
        response = spotimeta.search_track(term)
        result = response['result']
        if result:
            has_results = True
            for track in result:
                name = track['name'].lower()
                cache[name] = track
        print 'cache hit'
    exact_match = cache.get(term)
    return has_results, exact_match, s

In [22]:
def slicer(iterable, start):
    for stop in range(start, len(iterable)):
        yield slice(start, stop + 1)

In [26]:
import gevent
import networkx as nx
from itertools import izip_longest

def investigate(words):
    graph = nx.DiGraph()
    slicers = [slicer(words, i) for i in range(len(words))]
    rounds = izip_longest(*slicers)
    for r in rounds:
        queries = []
        for s in r:
            if not s is None:
                term = (' '.join(words[s]))
                queries.append(gevent.spawn(query, term, s))
        values = (query.value for query in queries)
        for has_results, exact_match, s in values:
            if not (has_results or exact_match):
            elif exact_match:
                graph.add_edge(s.start, s.stop + 1, track=exact_match)
    return graph

In [27]:
words = "If I can't let it go out of my mind".split()

In [28]:
graph = investigate(words)

cache hit
cache hit
cache hit
cache hit
cache hit
cache hit
cache hit
cache hit

In [6]:
from itertools import tee, izip

def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b)

if len(graph) > 0:
    path = nx.shortest_path(graph, 0, len(words))
    edges = pairwise(path)
    print [graph.edge[start][stop]['track']['href'] for start, stop in edges]

[u'spotify:track:7uKFB4W13N5WGZSmmr4bDl', u'spotify:track:5k7nFvA1srxjUfjo0zTxQC', u'spotify:track:2rilBmoyI8fqer3KTJnk9e']

In [7]:
from collections import deque

def bfs(graph, start, end):
    parent = {}
    queue = deque([start])
    while queue:
        node = queue.popleft()
        # when we find the end node, backtrack
        # to find the shortest path
        if node == end:
            path = [end]
            while path[-1] != start:
            return path
        for neighbor in graph[node]:
            parent[neighbor] = node

In [8]:
path = bfs(graph, 0, len(words))

[0, 3, 6, 10]

In [9]:
%timeit bfs(graph, 0, len(words))

100000 loops, best of 3: 15.4 us per loop

In [10]:
%timeit nx.shortest_path(graph, 0, len(words))

100000 loops, best of 3: 17.3 us per loop