In [1]:
# coding: utf-8
# python2.7

from __future__ import division, print_function
from parsers import CitationWindowParser 
from context_parsing_functions import create_context_parsers
import subprocess
import re

# functions to make indexing param files
def index_collections(window_specs, index_dir, collection_dir):
    """ Takes window specs {'Sentence': [(0,0)], 'Word': [(0,0)}
    Args:
        window_specs = dict with lists of tuples
    
    """
    parsers = create_context_parsers(window_specs)
    index_names = []
    for parser in parsers:
        index_name = parser.get_stringy_name()
        citation_field = parser.get_field_name()
        write_param_file(citation_field, index_name, index_dir, collection_dir)
        index_names.append(index_name) # add index_name to list of index_names
    build_indexes(index_names)
    return
        
def write_param_file(citation_field, index_name, index_dir, collection_dir):
    """Writes param files for building Indri indexes from each document collection.
    
    Args:
        
        
    """
    stop_stem_list = ['ss','sn','ns','nn']
    for stop_stem in stop_stem_list:
        filename = "param-I.{}.{}.xml".format(index_name, stop_stem)
        print("...writing index param file: {}".format(filename))
        with open("{}Param_Files/{}".format(index_dir, filename), 'w') as param:
            param.write(param_text(citation_field, index_name, stop_stem, index_dir, collection_dir))
    return

def param_text(citation_field, index_name, stop_stem, index_dir, collection_dir):
    if re.search('[ns]s', stop_stem):
            param_text = """<parameters>
          <index>{}I.{}.{}</index>
          <memory>2G</memory>
          <storeDocs>false</storeDocs>
          <stemmer>
            <name>krovetz</name>
          </stemmer>
          <field>
            <name>isearchdoc</name>
          </field>
          <field>
            <name>author</name>
          </field>
          <field>
           <name>category</name>
          </field>
          <field>
            <name>description</name>
          </field>
          <field>
            <name>docno</name>
          </field>
          <field>
            <name>documentlink</name>
          </field>
          <field>
            <name>fulltext</name>
          </field>
          <field>
            <name>subject</name>
          </field>
          <field>
            <name>title</name>
          </field>
          <field>
            <name>type</name>
          </field>
          <field>
            <name>venue</name>
          </field>
          <field>
            <name>citations</name>
          </field>
          <field>
            <name>{}</name>
          </field>
          <corpus>
            <path>{}PF+PN+C.{}/</path>
            <class>xml</class>
          </corpus>
        </parameters>
            """.format(index_dir, index_name, stop_stem, citation_field, collection_dir, index_name)
    else:
            param_text = """<parameters>
          <index>{}I.{}.{}</index>
          <memory>2G</memory>
          <storeDocs>false</storeDocs>
          <field>
            <name>isearchdoc</name>
          </field>
          <field>
            <name>author</name>
          </field>
          <field>
           <name>category</name>
          </field>
          <field>
            <name>description</name>
          </field>
          <field>
            <name>docno</name>
          </field>
          <field>
            <name>documentlink</name>
          </field>
          <field>
            <name>fulltext</name>
          </field>
          <field>
            <name>subject</name>
          </field>
          <field>
            <name>title</name>
          </field>
          <field>
            <name>type</name>
          </field>
          <field>
            <name>venue</name>
          </field>
          <field>
            <name>citations</name>
          </field>
          <field>
            <name>{}</name>
          </field>
          <corpus>
            <path>{}PF+PN+C.{}/</path>
            <class>xml</class>
          </corpus>
        </parameters>
            """.format(index_dir, index_name, stop_stem, citation_field, collection_dir, index_name)
    return param_text

# functions to build Indri indexes
def build_indexes(index_names):
    for index_name in index_names:
        subprocess.call(["./build_indri_index.bash", str(index_name)])
    return


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-5eb27a2d3d8e> in <module>()
      1 from __future__ import division, print_function
----> 2 from parsers import CitationWindowParser
      3 from context_parsing_functions import create_context_parsers
      4 from os import listdir, makedirs
      5 from os.path import isfile, exists

ImportError: No module named parsers

In [4]:
index_names = []
index_name = 'a'
index_names.append(index_name)
index_names


Out[4]:
['a']

In [ ]: