Kurs IR-Grundlagen, Praktischer Teil 5

1.1 Indexing of documents and queries

Modules needed


In [5]:
import re
import os

Path and file definitions


In [15]:
os.name


Out[15]:
'nt'

In [19]:
if os.name == 'posix':
    baseDir =  r'/home/hase/Documents/ZHAW/InfoEng/Lectures/Information_Retrieval/Exercises/PT_5_MiniRetrieve/'
    doc_path = r'/home/hase/Documents/ZHAW/InfoEng/Lectures/Information_Retrieval/Exercises/PT_5_MiniRetrieve/documents/'
    query_path = r'/home/hase/Documents/ZHAW/InfoEng/Lectures/Information_Retrieval/Exercises/PT_5_MiniRetrieve/queries/'
elif os.name == 'nt':
    baseDir = r'C:\ZHAW\IR\PT_5_MiniRetrieve\\'
    doc_path = r'C:\ZHAW\IR\PT_5_MiniRetrieve\documents\\'
    
STOPWORDS_PATH = 'stopwords.txt'

Read files


In [94]:
# First, read the entire document as a string
def readDoc(dir_path, file):
    path = dir_path + file
    with open(path, 'r') as f:
        string = f.read()
    return string

In [93]:
# Reading document and create a string
string = readDoc(doc_path, '1')
#string

Simple Tokenize


In [22]:
# Define regex to parse the string, and perform a simple tokenize
# Later will need a proper tokenize function to remove stopwords
split_regex = r'\W+'

def simpleTokenize(string):
    """ A simple implementation of input string tokenization
    Args:
        string (str): input string
    Returns:
        list: a list of tokens
    """
    # Convert string to lowercase
    string = string.lower()
    # Tokenize using the split_regex definition
    raw_tokens = re.split(split_regex, string)
    # Remove empty tokens
    tokens = []
    for raw_token in raw_tokens:
        if len(raw_token) != 0:
            tokens.append(raw_token)
    return tokens

In [106]:
#print(simpleTokenize(string))

Remove Stopwords


In [85]:
# File with stopwords
stopfile = os.path.join(baseDir, STOPWORDS_PATH)
print(stopfile)
# Create list of stopwords
stopwords = []
with open(stopfile, 'r') as s:
    stopwords_string = s.read()
    stopwords = re.split(split_regex, stopwords_string)
    
type(stopwords), len(stopwords)


/home/hase/Documents/ZHAW/InfoEng/Lectures/Information_Retrieval/Exercises/PT_5_MiniRetrieve/stopwords.txt
Out[85]:
(list, 128)

In [54]:
def tokenize(string):
    """ An implementation of input string tokenization that excludes stopwords
    Args:
        string (str): input string
    Returns:
        list: a list of tokens without stopwords
    """
    tokens = simpleTokenize(string)
    # Loop the entire list and add words that are on not on the stopwords list to a new list
    filtered = []
    for token in tokens:
      if token in stopwords:
        continue
      else:
        filtered.append(token) 
    return filtered

In [105]:
#tokenize(string)

Inverted and Non-Inverted index

for each document 'doc' in the list of documents D:

get tokens by tokenizing 'd'
for each token in tokens:
    inverted index dict {token_one:[doc with token_one, frequency of token_one in doc with token_one],
                         token_two:[doc with token two, frequency of token_two in doc with token_two],
                         ...
                         }
    non-inverted index dict {doc_one:[token in doc_one, frequency of token in doc_one],
                             doc_one:[another_token in doc one, frequency of another_token in doc_one],
                             doc_two:[token in doc_two, frequency of token in doc_two],
                             ...
                         }

In [197]:
def noninvIndex(dir_path, num_files):
    """ A simple implementation of non-inverted index
    i.e. token frequency found in document
    Args:
        dir_path (string): path were all the documents are stored
        num_files (string): number of files stored in dir_path; assuming the name is a number
    Returns:
        docNoniIdx (dict): tokens frequency for each document 
    """
    # Create a list of files in the directory
    files = []
    total_files = int(num_files)
    for i in range(1,total_files):
        files.append(str(i))
    # Dictionary to store the non-inverted index for all documents
    docNoniIdx = {}
    # Loop the list files to parse all the existing documents
    for file in files:
        # Create a string for the file read
        path = dir_path + file
        with open(path, 'r') as f:
            string = f.read()
        # tokenize of the string removing stopwords
        #tokens = tokenize(string)
        tokens = simpleTokenize(string)
        # With the list of tokens create a non-inverted Index
        noniIdx = {}
        for token in tokens:
            if token not in noniIdx.keys():
                noniIdx[token] = 1
            else:
                noniIdx[token] += 1
        docNoniIdx[file] = noniIdx
    return docNoniIdx

In [212]:
non_invIndex = noninvIndex(doc_path,10)

In [213]:
len(non_invIndex)


Out[213]:
9

In [214]:
non_invIndex


Out[214]:
{'1': {'a': 7,
  'aerodynamics': 1,
  'after': 1,
  'agree': 1,
  'an': 3,
  'and': 1,
  'angles': 1,
  'as': 1,
  'at': 2,
  'attack': 1,
  'basis': 1,
  'boundary': 1,
  'by': 1,
  'comparative': 1,
  'configuration': 1,
  'control': 1,
  'curves': 1,
  'destalling': 3,
  'determine': 1,
  'different': 3,
  'distribution': 1,
  'due': 2,
  'effect': 1,
  'effects': 1,
  'empirical': 1,
  'evaluation': 2,
  'evidence': 1,
  'experiment': 1,
  'experimental': 2,
  'flow': 1,
  'for': 2,
  'found': 1,
  'free': 1,
  'in': 4,
  'increase': 1,
  'increment': 2,
  'integrated': 1,
  'intended': 1,
  'investigation': 1,
  'layer': 1,
  'lift': 4,
  'loading': 1,
  'made': 2,
  'of': 10,
  'or': 1,
  'order': 1,
  'part': 2,
  'potential': 1,
  'problem': 1,
  'produced': 1,
  'propeller': 1,
  'ratios': 1,
  'remaining': 1,
  'results': 1,
  'showed': 1,
  'slipstream': 5,
  'span': 1,
  'spanwise': 1,
  'specific': 1,
  'stream': 1,
  'study': 1,
  'substantial': 1,
  'subtracting': 1,
  'supporting': 1,
  'that': 1,
  'the': 12,
  'theoretical': 1,
  'theory': 1,
  'this': 2,
  'to': 5,
  'together': 1,
  'treatments': 1,
  'velocity': 1,
  'was': 4,
  'well': 1,
  'were': 1,
  'wing': 3,
  'with': 2},
 '2': {'a': 9,
  'again': 1,
  'an': 2,
  'and': 2,
  'approximation': 1,
  'arises': 1,
  'as': 1,
  'be': 3,
  'been': 1,
  'being': 1,
  'between': 1,
  'body': 2,
  'boundary': 5,
  'by': 2,
  'can': 2,
  'classical': 1,
  'consequently': 1,
  'consider': 1,
  'considered': 1,
  'constant': 1,
  'curved': 1,
  'different': 1,
  'dimensional': 2,
  'discussed': 1,
  'discussion': 1,
  'edge': 1,
  'effects': 1,
  'emitting': 1,
  'exists': 1,
  'feature': 1,
  'ferri': 1,
  'flat': 3,
  'flow': 6,
  'fluid': 2,
  'for': 1,
  'free': 3,
  'from': 2,
  'has': 1,
  'have': 1,
  'here': 1,
  'high': 1,
  'hypersonic': 2,
  'in': 7,
  'incompressible': 2,
  'instance': 1,
  'investigated': 1,
  'inviscid': 3,
  'irrotational': 1,
  'is': 5,
  'it': 2,
  'layer': 5,
  'leading': 1,
  'libby': 1,
  'must': 1,
  'necessary': 1,
  'nose': 1,
  'novel': 1,
  'of': 6,
  'only': 1,
  'or': 1,
  'original': 1,
  'outside': 1,
  'paper': 1,
  'past': 4,
  'plate': 3,
  'possible': 1,
  'prandtl': 2,
  'present': 1,
  'problem': 4,
  'recently': 1,
  'region': 1,
  'restricted': 1,
  'rotational': 2,
  's': 2,
  'shear': 2,
  'shock': 2,
  'shown': 1,
  'simple': 2,
  'situation': 2,
  'small': 2,
  'somewhat': 1,
  'speed': 1,
  'steady': 1,
  'stream': 3,
  'study': 2,
  'such': 1,
  'that': 2,
  'the': 18,
  'there': 1,
  'this': 1,
  'to': 2,
  'treated': 1,
  'two': 2,
  'usually': 1,
  'viscosity': 2,
  'viscous': 2,
  'vorticity': 2,
  'wave': 2,
  'while': 1},
 '3': {'a': 1,
  'are': 1,
  'boundary': 2,
  'equations': 1,
  'flat': 1,
  'flow': 2,
  'for': 1,
  'gradient': 1,
  'in': 1,
  'incompressible': 1,
  'layer': 2,
  'no': 1,
  'past': 1,
  'plate': 1,
  'presented': 1,
  'pressure': 1,
  'shear': 1,
  'simple': 1,
  'steady': 1,
  'the': 2,
  'with': 1},
 '4': {'a': 4,
  'also': 1,
  'and': 1,
  'approximate': 1,
  'are': 1,
  'been': 1,
  'boundary': 5,
  'by': 1,
  'comparison': 1,
  'considered': 1,
  'dimensional': 1,
  'distribution': 1,
  'effect': 1,
  'equations': 1,
  'flat': 1,
  'flow': 3,
  'fluid': 1,
  'for': 3,
  'friction': 1,
  'has': 1,
  'in': 3,
  'incompressible': 2,
  'is': 1,
  'karman': 1,
  'laminar': 1,
  'layer': 5,
  'made': 1,
  'obtained': 1,
  'of': 4,
  'plate': 2,
  'pohlhausen': 1,
  'problem': 1,
  'shear': 2,
  'show': 1,
  'skin': 1,
  'solutions': 2,
  'steady': 1,
  'technique': 1,
  'the': 8,
  'thickness': 1,
  'to': 1,
  'two': 1,
  'uniform': 1,
  'velocity': 1,
  'vorticity': 1,
  'with': 1},
 '5': {'a': 4,
  'aerodynamic': 1,
  'analytic': 1,
  'are': 1,
  'at': 1,
  'composite': 1,
  'conduction': 2,
  'dimensional': 1,
  'double': 1,
  'during': 1,
  'example': 1,
  'exposed': 1,
  'for': 3,
  'heat': 4,
  'heating': 2,
  'in': 1,
  'input': 1,
  'internal': 1,
  'into': 1,
  'layer': 1,
  'linear': 1,
  'may': 1,
  'occur': 1,
  'of': 1,
  'one': 2,
  'presented': 1,
  'rate': 2,
  'slab': 1,
  'slabs': 1,
  'small': 1,
  'solutions': 1,
  'subjected': 1,
  'surface': 1,
  'the': 1,
  'this': 1,
  'time': 1,
  'to': 2,
  'transient': 2,
  'triangular': 1,
  'type': 1},
 '6': {'2': 1,
  'a': 4,
  'analytic': 1,
  'and': 2,
  'are': 1,
  'at': 3,
  'briefly': 1,
  'by': 1,
  'cases': 1,
  'contribution': 1,
  'dimensional': 1,
  'double': 1,
  'duration': 1,
  'face': 1,
  'flow': 1,
  'for': 3,
  'forum': 1,
  'gave': 1,
  'general': 1,
  'give': 1,
  'given': 1,
  'heat': 3,
  'here': 1,
  'his': 1,
  'how': 1,
  'i': 1,
  'in': 3,
  'incomplete': 1,
  'indicate': 1,
  'input': 2,
  'insulated': 1,
  'interface': 1,
  'is': 1,
  'it': 1,
  'layer': 1,
  'longer': 1,
  'method': 1,
  'multilayer': 1,
  'no': 1,
  'obtained': 1,
  'of': 2,
  'one': 2,
  'other': 1,
  'out': 1,
  'particular': 1,
  'point': 1,
  'problem': 1,
  'propose': 1,
  'rate': 1,
  'readers': 1,
  'recent': 1,
  'reference': 1,
  'resistance': 1,
  'slab': 2,
  'solution': 1,
  'solutions': 3,
  'temperature': 1,
  'than': 1,
  'that': 1,
  'the': 10,
  'thermal': 1,
  'this': 1,
  'three': 1,
  'times': 1,
  'to': 5,
  'transient': 1,
  'triangular': 1,
  'using': 1,
  'wassermann': 2,
  'were': 1,
  'with': 2},
 '7': {'1': 2,
  '12': 1,
  '2': 2,
  '3': 2,
  '4': 1,
  '5': 1,
  '67': 1,
  '71': 1,
  '90': 1,
  'a': 1,
  'after': 1,
  'and': 7,
  'appears': 1,
  'are': 1,
  'as': 1,
  'at': 4,
  'becoming': 1,
  'begins': 1,
  'boundary': 4,
  'breakdown': 2,
  'breaks': 1,
  'by': 2,
  'california': 1,
  'contaminates': 1,
  'contamination': 1,
  'controlled': 1,
  'dimensional': 3,
  'double': 1,
  'down': 1,
  'each': 1,
  'edge': 1,
  'effect': 3,
  'element': 1,
  'elements': 3,
  'ensuing': 1,
  'experiments': 1,
  'field': 2,
  'flow': 3,
  'fourth': 1,
  'from': 1,
  'has': 1,
  'height': 2,
  'in': 3,
  'inch': 1,
  'increasing': 1,
  'indicate': 1,
  'induced': 1,
  'initial': 1,
  'institute': 1,
  'investigate': 1,
  'is': 2,
  'jet': 1,
  'k': 1,
  'kinematic': 1,
  'laboratory': 1,
  'laminar': 2,
  'lateral': 1,
  'layer': 4,
  'little': 1,
  'local': 1,
  'mach': 1,
  'may': 1,
  'more': 1,
  'number': 3,
  'numbers': 1,
  'occurs': 1,
  'of': 14,
  'on': 3,
  'one': 1,
  'outer': 1,
  'per': 1,
  'performed': 1,
  'persist': 1,
  'position': 4,
  'power': 1,
  'propulsion': 1,
  'rather': 1,
  'relative': 1,
  'results': 1,
  'reynolds': 3,
  'roughness': 7,
  'row': 1,
  'size': 1,
  'spacing': 2,
  'speeds': 1,
  'spheres': 1,
  'spiral': 2,
  'still': 1,
  'strength': 1,
  'sublayer': 1,
  'such': 1,
  'suddenly': 1,
  'supersonic': 2,
  'surrounding': 1,
  'tained': 1,
  'technology': 1,
  'that': 2,
  'the': 24,
  'thickness': 1,
  'three': 3,
  'to': 4,
  'trailing': 1,
  'transition': 5,
  'trip': 3,
  'tunnel': 1,
  'turbulent': 2,
  'u': 1,
  'upon': 1,
  'v': 1,
  'varies': 1,
  'varying': 1,
  'velocity': 1,
  'violent': 1,
  'viscosity': 1,
  'viz': 1,
  'vortices': 2,
  'vorticity': 2,
  'were': 1,
  'when': 1,
  'where': 2,
  'wind': 1,
  'with': 1,
  'x': 1},
 '8': {'a': 5,
  'additional': 1,
  'after': 1,
  'an': 1,
  'and': 3,
  'appeared': 1,
  'as': 1,
  'at': 3,
  'available': 1,
  'based': 1,
  'basis': 1,
  'be': 2,
  'behave': 1,
  'between': 1,
  'boundary': 2,
  'bureau': 1,
  'conducted': 1,
  'could': 1,
  'course': 1,
  'data': 2,
  'desirable': 1,
  'dimensional': 5,
  'displacement': 1,
  'distance': 1,
  'dryden': 1,
  'during': 1,
  'edge': 1,
  'effect': 3,
  'element': 3,
  'elements': 3,
  'extend': 1,
  'flat': 1,
  'found': 1,
  'from': 1,
  'functional': 1,
  'h': 1,
  'height': 1,
  'higher': 1,
  'his': 2,
  'in': 3,
  'investigation': 1,
  'is': 3,
  'it': 1,
  'l': 1,
  'layer': 2,
  'leading': 1,
  'measurements': 1,
  'national': 1,
  'number': 1,
  'obtained': 2,
  'of': 15,
  'on': 6,
  'plate': 1,
  'position': 1,
  'primarily': 1,
  'range': 1,
  'reasonably': 1,
  'relation': 1,
  'represented': 1,
  'results': 1,
  'reynolds': 1,
  'roughness': 6,
  'row': 1,
  'same': 1,
  'see': 1,
  'some': 1,
  'standards': 1,
  'study': 1,
  'such': 1,
  'suggestion': 1,
  'terms': 1,
  'that': 1,
  'the': 17,
  'thickness': 1,
  'three': 2,
  'to': 4,
  'transition': 4,
  'trip': 1,
  'two': 3,
  'values': 1,
  'way': 1,
  'well': 1,
  'were': 2,
  'where': 1,
  'whether': 1,
  'wire': 1,
  'would': 1},
 '9': {'0': 2,
  '1': 1,
  '10': 6,
  '2': 3,
  '26': 1,
  '40': 1,
  '46': 1,
  '5': 9,
  '6': 1,
  '8': 2,
  '9': 1,
  'a': 9,
  'agreement': 2,
  'air': 3,
  'along': 1,
  'although': 1,
  'amount': 1,
  'an': 4,
  'and': 8,
  'angle': 1,
  'approximately': 1,
  'as': 4,
  'at': 9,
  'be': 2,
  'being': 2,
  'below': 1,
  'between': 1,
  'boundary': 6,
  'by': 4,
  'caused': 1,
  'coefficient': 1,
  'compared': 1,
  'complete': 1,
  'constant': 1,
  'contamination': 1,
  'degree': 1,
  'detection': 1,
  'developed': 1,
  'dimensional': 1,
  'direct': 1,
  'discussion': 1,
  'disturbances': 1,
  'downstream': 1,
  'earlier': 1,
  'edge': 1,
  'effect': 1,
  'effective': 1,
  'element': 1,
  'emphasis': 1,
  'extensively': 1,
  'far': 1,
  'flat': 3,
  'floating': 1,
  'flow': 3,
  'for': 5,
  'found': 3,
  'friction': 4,
  'fully': 1,
  'galcit': 1,
  'given': 1,
  'good': 2,
  'greater': 2,
  'hastening': 1,
  'head': 1,
  'hypersonic': 2,
  'in': 7,
  'incompressible': 1,
  'injected': 1,
  'injection': 2,
  'insulated': 2,
  'into': 1,
  'investigated': 1,
  'investigation': 2,
  'is': 1,
  'it': 1,
  'lacquer': 1,
  'laminar': 3,
  'layer': 6,
  'leading': 1,
  'least': 1,
  'local': 1,
  'low': 2,
  'lower': 1,
  'mach': 2,
  'made': 2,
  'means': 2,
  'measurements': 3,
  'much': 1,
  'nominal': 1,
  'not': 1,
  'number': 3,
  'numbers': 6,
  'obtained': 2,
  'occurred': 1,
  'of': 21,
  'on': 4,
  'only': 1,
  'onset': 1,
  'originated': 1,
  'over': 2,
  'phosphorescent': 1,
  'pitot': 1,
  'plate': 4,
  'possibility': 1,
  'qualitative': 1,
  'r': 1,
  'rake': 1,
  'range': 2,
  'regardless': 1,
  'results': 2,
  'reynolds': 7,
  'shear': 1,
  'sidewall': 1,
  'skin': 4,
  'speed': 1,
  'speeds': 1,
  'spread': 1,
  'stability': 1,
  'studies': 1,
  'surface': 1,
  'surveys': 1,
  'technique': 3,
  'than': 2,
  'that': 2,
  'the': 17,
  'these': 1,
  'this': 2,
  'to': 4,
  'total': 1,
  'transition': 5,
  'transverse': 1,
  'tunnel': 2,
  'turbulent': 4,
  'two': 1,
  'uniform': 1,
  'used': 2,
  'value': 2,
  'verified': 1,
  'was': 10,
  'were': 1,
  'wind': 1,
  'with': 4,
  'x': 5}}

In [206]:
def invIndex(dir_path, num_files):
    """ A simple implementation of inverted index
    i.e. token frequency found in document
    Args:
        dir_path (string): path were all the documents are stored
        num_files (string): number of files stored in dir_path; assuming the name is a number
    Returns:
        dociIdx (dict): frequency of token in documents  dociIdx = {token:{'doc 1':freq, 'doc 2':freq}, 
                                                                    token_two:{'doc 1:freq}, ...} 
    """
        # Create a list of files in the directory
    files = []
    total_files = int(num_files)
    for i in range(1,total_files):
        files.append(str(i))
    # Dictionary to store the non-inverted index for all documents
    dociIdx = {}
    # Loop the list files to parse all the existing documents
    for file in files:
        # Create a string for the file read
        path = dir_path + file
        with open(path, 'r') as f:
            string = f.read()
        # tokenize of the string; remove stopwords later
        tokens = simpleTokenize(string)
        # With the list of tokens create a non-inverted Index
        for token in tokens:
            if token not in dociIdx.keys():
                #print(token, 'not in')
                dociIdx[token] = {file:1}
                #print(dociIdx) 
            elif token in dociIdx.keys():
                #print(token, 'in')
                if file in dociIdx[token].keys():
                    #print(file, 'in value')
                    dociIdx[token][file] += 1 
                    #print(dociIdx) 
                else:
                    #print(file, 'not in value')
                    dociIdx[token].update({file:1})
                    #print(dociIdx) 
    return dociIdx

In [215]:
docinvIndex = invIndex(doc_path,10)

In [216]:
docinvIndex


Out[216]:
{'0': {'9': 2},
 '1': {'7': 2, '9': 1},
 '10': {'9': 6},
 '12': {'7': 1},
 '2': {'6': 1, '7': 2, '9': 3},
 '26': {'9': 1},
 '3': {'7': 2},
 '4': {'7': 1},
 '40': {'9': 1},
 '46': {'9': 1},
 '5': {'7': 1, '9': 9},
 '6': {'9': 1},
 '67': {'7': 1},
 '71': {'7': 1},
 '8': {'9': 2},
 '9': {'9': 1},
 '90': {'7': 1},
 'a': {'1': 7, '2': 9, '3': 1, '4': 4, '5': 4, '6': 4, '7': 1, '8': 5, '9': 9},
 'additional': {'8': 1},
 'aerodynamic': {'5': 1},
 'aerodynamics': {'1': 1},
 'after': {'1': 1, '7': 1, '8': 1},
 'again': {'2': 1},
 'agree': {'1': 1},
 'agreement': {'9': 2},
 'air': {'9': 3},
 'along': {'9': 1},
 'also': {'4': 1},
 'although': {'9': 1},
 'amount': {'9': 1},
 'an': {'1': 3, '2': 2, '8': 1, '9': 4},
 'analytic': {'5': 1, '6': 1},
 'and': {'1': 1, '2': 2, '4': 1, '6': 2, '7': 7, '8': 3, '9': 8},
 'angle': {'9': 1},
 'angles': {'1': 1},
 'appeared': {'8': 1},
 'appears': {'7': 1},
 'approximate': {'4': 1},
 'approximately': {'9': 1},
 'approximation': {'2': 1},
 'are': {'3': 1, '4': 1, '5': 1, '6': 1, '7': 1},
 'arises': {'2': 1},
 'as': {'1': 1, '2': 1, '7': 1, '8': 1, '9': 4},
 'at': {'1': 2, '5': 1, '6': 3, '7': 4, '8': 3, '9': 9},
 'attack': {'1': 1},
 'available': {'8': 1},
 'based': {'8': 1},
 'basis': {'1': 1, '8': 1},
 'be': {'2': 3, '8': 2, '9': 2},
 'becoming': {'7': 1},
 'been': {'2': 1, '4': 1},
 'begins': {'7': 1},
 'behave': {'8': 1},
 'being': {'2': 1, '9': 2},
 'below': {'9': 1},
 'between': {'2': 1, '8': 1, '9': 1},
 'body': {'2': 2},
 'boundary': {'1': 1, '2': 5, '3': 2, '4': 5, '7': 4, '8': 2, '9': 6},
 'breakdown': {'7': 2},
 'breaks': {'7': 1},
 'briefly': {'6': 1},
 'bureau': {'8': 1},
 'by': {'1': 1, '2': 2, '4': 1, '6': 1, '7': 2, '9': 4},
 'california': {'7': 1},
 'can': {'2': 2},
 'cases': {'6': 1},
 'caused': {'9': 1},
 'classical': {'2': 1},
 'coefficient': {'9': 1},
 'comparative': {'1': 1},
 'compared': {'9': 1},
 'comparison': {'4': 1},
 'complete': {'9': 1},
 'composite': {'5': 1},
 'conducted': {'8': 1},
 'conduction': {'5': 2},
 'configuration': {'1': 1},
 'consequently': {'2': 1},
 'consider': {'2': 1},
 'considered': {'2': 1, '4': 1},
 'constant': {'2': 1, '9': 1},
 'contaminates': {'7': 1},
 'contamination': {'7': 1, '9': 1},
 'contribution': {'6': 1},
 'control': {'1': 1},
 'controlled': {'7': 1},
 'could': {'8': 1},
 'course': {'8': 1},
 'curved': {'2': 1},
 'curves': {'1': 1},
 'data': {'8': 2},
 'degree': {'9': 1},
 'desirable': {'8': 1},
 'destalling': {'1': 3},
 'detection': {'9': 1},
 'determine': {'1': 1},
 'developed': {'9': 1},
 'different': {'1': 3, '2': 1},
 'dimensional': {'2': 2, '4': 1, '5': 1, '6': 1, '7': 3, '8': 5, '9': 1},
 'direct': {'9': 1},
 'discussed': {'2': 1},
 'discussion': {'2': 1, '9': 1},
 'displacement': {'8': 1},
 'distance': {'8': 1},
 'distribution': {'1': 1, '4': 1},
 'disturbances': {'9': 1},
 'double': {'5': 1, '6': 1, '7': 1},
 'down': {'7': 1},
 'downstream': {'9': 1},
 'dryden': {'8': 1},
 'due': {'1': 2},
 'duration': {'6': 1},
 'during': {'5': 1, '8': 1},
 'each': {'7': 1},
 'earlier': {'9': 1},
 'edge': {'2': 1, '7': 1, '8': 1, '9': 1},
 'effect': {'1': 1, '4': 1, '7': 3, '8': 3, '9': 1},
 'effective': {'9': 1},
 'effects': {'1': 1, '2': 1},
 'element': {'7': 1, '8': 3, '9': 1},
 'elements': {'7': 3, '8': 3},
 'emitting': {'2': 1},
 'emphasis': {'9': 1},
 'empirical': {'1': 1},
 'ensuing': {'7': 1},
 'equations': {'3': 1, '4': 1},
 'evaluation': {'1': 2},
 'evidence': {'1': 1},
 'example': {'5': 1},
 'exists': {'2': 1},
 'experiment': {'1': 1},
 'experimental': {'1': 2},
 'experiments': {'7': 1},
 'exposed': {'5': 1},
 'extend': {'8': 1},
 'extensively': {'9': 1},
 'face': {'6': 1},
 'far': {'9': 1},
 'feature': {'2': 1},
 'ferri': {'2': 1},
 'field': {'7': 2},
 'flat': {'2': 3, '3': 1, '4': 1, '8': 1, '9': 3},
 'floating': {'9': 1},
 'flow': {'1': 1, '2': 6, '3': 2, '4': 3, '6': 1, '7': 3, '9': 3},
 'fluid': {'2': 2, '4': 1},
 'for': {'1': 2, '2': 1, '3': 1, '4': 3, '5': 3, '6': 3, '9': 5},
 'forum': {'6': 1},
 'found': {'1': 1, '8': 1, '9': 3},
 'fourth': {'7': 1},
 'free': {'1': 1, '2': 3},
 'friction': {'4': 1, '9': 4},
 'from': {'2': 2, '7': 1, '8': 1},
 'fully': {'9': 1},
 'functional': {'8': 1},
 'galcit': {'9': 1},
 'gave': {'6': 1},
 'general': {'6': 1},
 'give': {'6': 1},
 'given': {'6': 1, '9': 1},
 'good': {'9': 2},
 'gradient': {'3': 1},
 'greater': {'9': 2},
 'h': {'8': 1},
 'has': {'2': 1, '4': 1, '7': 1},
 'hastening': {'9': 1},
 'have': {'2': 1},
 'head': {'9': 1},
 'heat': {'5': 4, '6': 3},
 'heating': {'5': 2},
 'height': {'7': 2, '8': 1},
 'here': {'2': 1, '6': 1},
 'high': {'2': 1},
 'higher': {'8': 1},
 'his': {'6': 1, '8': 2},
 'how': {'6': 1},
 'hypersonic': {'2': 2, '9': 2},
 'i': {'6': 1},
 'in': {'1': 4,
  '2': 7,
  '3': 1,
  '4': 3,
  '5': 1,
  '6': 3,
  '7': 3,
  '8': 3,
  '9': 7},
 'inch': {'7': 1},
 'incomplete': {'6': 1},
 'incompressible': {'2': 2, '3': 1, '4': 2, '9': 1},
 'increase': {'1': 1},
 'increasing': {'7': 1},
 'increment': {'1': 2},
 'indicate': {'6': 1, '7': 1},
 'induced': {'7': 1},
 'initial': {'7': 1},
 'injected': {'9': 1},
 'injection': {'9': 2},
 'input': {'5': 1, '6': 2},
 'instance': {'2': 1},
 'institute': {'7': 1},
 'insulated': {'6': 1, '9': 2},
 'integrated': {'1': 1},
 'intended': {'1': 1},
 'interface': {'6': 1},
 'internal': {'5': 1},
 'into': {'5': 1, '9': 1},
 'investigate': {'7': 1},
 'investigated': {'2': 1, '9': 1},
 'investigation': {'1': 1, '8': 1, '9': 2},
 'inviscid': {'2': 3},
 'irrotational': {'2': 1},
 'is': {'2': 5, '4': 1, '6': 1, '7': 2, '8': 3, '9': 1},
 'it': {'2': 2, '6': 1, '8': 1, '9': 1},
 'jet': {'7': 1},
 'k': {'7': 1},
 'karman': {'4': 1},
 'kinematic': {'7': 1},
 'l': {'8': 1},
 'laboratory': {'7': 1},
 'lacquer': {'9': 1},
 'laminar': {'4': 1, '7': 2, '9': 3},
 'lateral': {'7': 1},
 'layer': {'1': 1,
  '2': 5,
  '3': 2,
  '4': 5,
  '5': 1,
  '6': 1,
  '7': 4,
  '8': 2,
  '9': 6},
 'leading': {'2': 1, '8': 1, '9': 1},
 'least': {'9': 1},
 'libby': {'2': 1},
 'lift': {'1': 4},
 'linear': {'5': 1},
 'little': {'7': 1},
 'loading': {'1': 1},
 'local': {'7': 1, '9': 1},
 'longer': {'6': 1},
 'low': {'9': 2},
 'lower': {'9': 1},
 'mach': {'7': 1, '9': 2},
 'made': {'1': 2, '4': 1, '9': 2},
 'may': {'5': 1, '7': 1},
 'means': {'9': 2},
 'measurements': {'8': 1, '9': 3},
 'method': {'6': 1},
 'more': {'7': 1},
 'much': {'9': 1},
 'multilayer': {'6': 1},
 'must': {'2': 1},
 'national': {'8': 1},
 'necessary': {'2': 1},
 'no': {'3': 1, '6': 1},
 'nominal': {'9': 1},
 'nose': {'2': 1},
 'not': {'9': 1},
 'novel': {'2': 1},
 'number': {'7': 3, '8': 1, '9': 3},
 'numbers': {'7': 1, '9': 6},
 'obtained': {'4': 1, '6': 1, '8': 2, '9': 2},
 'occur': {'5': 1},
 'occurred': {'9': 1},
 'occurs': {'7': 1},
 'of': {'1': 10, '2': 6, '4': 4, '5': 1, '6': 2, '7': 14, '8': 15, '9': 21},
 'on': {'7': 3, '8': 6, '9': 4},
 'one': {'5': 2, '6': 2, '7': 1},
 'only': {'2': 1, '9': 1},
 'onset': {'9': 1},
 'or': {'1': 1, '2': 1},
 'order': {'1': 1},
 'original': {'2': 1},
 'originated': {'9': 1},
 'other': {'6': 1},
 'out': {'6': 1},
 'outer': {'7': 1},
 'outside': {'2': 1},
 'over': {'9': 2},
 'paper': {'2': 1},
 'part': {'1': 2},
 'particular': {'6': 1},
 'past': {'2': 4, '3': 1},
 'per': {'7': 1},
 'performed': {'7': 1},
 'persist': {'7': 1},
 'phosphorescent': {'9': 1},
 'pitot': {'9': 1},
 'plate': {'2': 3, '3': 1, '4': 2, '8': 1, '9': 4},
 'pohlhausen': {'4': 1},
 'point': {'6': 1},
 'position': {'7': 4, '8': 1},
 'possibility': {'9': 1},
 'possible': {'2': 1},
 'potential': {'1': 1},
 'power': {'7': 1},
 'prandtl': {'2': 2},
 'present': {'2': 1},
 'presented': {'3': 1, '5': 1},
 'pressure': {'3': 1},
 'primarily': {'8': 1},
 'problem': {'1': 1, '2': 4, '4': 1, '6': 1},
 'produced': {'1': 1},
 'propeller': {'1': 1},
 'propose': {'6': 1},
 'propulsion': {'7': 1},
 'qualitative': {'9': 1},
 'r': {'9': 1},
 'rake': {'9': 1},
 'range': {'8': 1, '9': 2},
 'rate': {'5': 2, '6': 1},
 'rather': {'7': 1},
 'ratios': {'1': 1},
 'readers': {'6': 1},
 'reasonably': {'8': 1},
 'recent': {'6': 1},
 'recently': {'2': 1},
 'reference': {'6': 1},
 'regardless': {'9': 1},
 'region': {'2': 1},
 'relation': {'8': 1},
 'relative': {'7': 1},
 'remaining': {'1': 1},
 'represented': {'8': 1},
 'resistance': {'6': 1},
 'restricted': {'2': 1},
 'results': {'1': 1, '7': 1, '8': 1, '9': 2},
 'reynolds': {'7': 3, '8': 1, '9': 7},
 'rotational': {'2': 2},
 'roughness': {'7': 7, '8': 6},
 'row': {'7': 1, '8': 1},
 's': {'2': 2},
 'same': {'8': 1},
 'see': {'8': 1},
 'shear': {'2': 2, '3': 1, '4': 2, '9': 1},
 'shock': {'2': 2},
 'show': {'4': 1},
 'showed': {'1': 1},
 'shown': {'2': 1},
 'sidewall': {'9': 1},
 'simple': {'2': 2, '3': 1},
 'situation': {'2': 2},
 'size': {'7': 1},
 'skin': {'4': 1, '9': 4},
 'slab': {'5': 1, '6': 2},
 'slabs': {'5': 1},
 'slipstream': {'1': 5},
 'small': {'2': 2, '5': 1},
 'solution': {'6': 1},
 'solutions': {'4': 2, '5': 1, '6': 3},
 'some': {'8': 1},
 'somewhat': {'2': 1},
 'spacing': {'7': 2},
 'span': {'1': 1},
 'spanwise': {'1': 1},
 'specific': {'1': 1},
 'speed': {'2': 1, '9': 1},
 'speeds': {'7': 1, '9': 1},
 'spheres': {'7': 1},
 'spiral': {'7': 2},
 'spread': {'9': 1},
 'stability': {'9': 1},
 'standards': {'8': 1},
 'steady': {'2': 1, '3': 1, '4': 1},
 'still': {'7': 1},
 'stream': {'1': 1, '2': 3},
 'strength': {'7': 1},
 'studies': {'9': 1},
 'study': {'1': 1, '2': 2, '8': 1},
 'subjected': {'5': 1},
 'sublayer': {'7': 1},
 'substantial': {'1': 1},
 'subtracting': {'1': 1},
 'such': {'2': 1, '7': 1, '8': 1},
 'suddenly': {'7': 1},
 'suggestion': {'8': 1},
 'supersonic': {'7': 2},
 'supporting': {'1': 1},
 'surface': {'5': 1, '9': 1},
 'surrounding': {'7': 1},
 'surveys': {'9': 1},
 'tained': {'7': 1},
 'technique': {'4': 1, '9': 3},
 'technology': {'7': 1},
 'temperature': {'6': 1},
 'terms': {'8': 1},
 'than': {'6': 1, '9': 2},
 'that': {'1': 1, '2': 2, '6': 1, '7': 2, '8': 1, '9': 2},
 'the': {'1': 12,
  '2': 18,
  '3': 2,
  '4': 8,
  '5': 1,
  '6': 10,
  '7': 24,
  '8': 17,
  '9': 17},
 'theoretical': {'1': 1},
 'theory': {'1': 1},
 'there': {'2': 1},
 'thermal': {'6': 1},
 'these': {'9': 1},
 'thickness': {'4': 1, '7': 1, '8': 1},
 'this': {'1': 2, '2': 1, '5': 1, '6': 1, '9': 2},
 'three': {'6': 1, '7': 3, '8': 2},
 'time': {'5': 1},
 'times': {'6': 1},
 'to': {'1': 5, '2': 2, '4': 1, '5': 2, '6': 5, '7': 4, '8': 4, '9': 4},
 'together': {'1': 1},
 'total': {'9': 1},
 'trailing': {'7': 1},
 'transient': {'5': 2, '6': 1},
 'transition': {'7': 5, '8': 4, '9': 5},
 'transverse': {'9': 1},
 'treated': {'2': 1},
 'treatments': {'1': 1},
 'triangular': {'5': 1, '6': 1},
 'trip': {'7': 3, '8': 1},
 'tunnel': {'7': 1, '9': 2},
 'turbulent': {'7': 2, '9': 4},
 'two': {'2': 2, '4': 1, '8': 3, '9': 1},
 'type': {'5': 1},
 'u': {'7': 1},
 'uniform': {'4': 1, '9': 1},
 'upon': {'7': 1},
 'used': {'9': 2},
 'using': {'6': 1},
 'usually': {'2': 1},
 'v': {'7': 1},
 'value': {'9': 2},
 'values': {'8': 1},
 'varies': {'7': 1},
 'varying': {'7': 1},
 'velocity': {'1': 1, '4': 1, '7': 1},
 'verified': {'9': 1},
 'violent': {'7': 1},
 'viscosity': {'2': 2, '7': 1},
 'viscous': {'2': 2},
 'viz': {'7': 1},
 'vortices': {'7': 2},
 'vorticity': {'2': 2, '4': 1, '7': 2},
 'was': {'1': 4, '9': 10},
 'wassermann': {'6': 2},
 'wave': {'2': 2},
 'way': {'8': 1},
 'well': {'1': 1, '8': 1},
 'were': {'1': 1, '6': 1, '7': 1, '8': 2, '9': 1},
 'when': {'7': 1},
 'where': {'7': 2, '8': 1},
 'whether': {'8': 1},
 'while': {'2': 1},
 'wind': {'7': 1, '9': 1},
 'wing': {'1': 3},
 'wire': {'8': 1},
 'with': {'1': 2, '3': 1, '4': 1, '6': 2, '7': 1, '9': 4},
 'would': {'8': 1},
 'x': {'7': 1, '9': 5}}