In [27]:

    
%load_ext autoreload
%autoreload 2









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

http://manybooks.net/



In [36]:

    
from data.text import samples as sam
from latinpigsay import generalfunctions as gfunc
from latinpigsay.tmp.experiments import expfunctions as expfunc

from latinpigsay import latinpig as lp
from latinpigsay import piggyprint as pp
from latinpigsay.tmp.experiments import exp

from latinpigsay.contractions import contractions_parallel as contspara
from latinpigsay.contractions import find_contractions as findconts

import piglatin as pl

import requests
from itertools import islice, permutations, count, izip, imap, product, chain
import itertools
import re

import json
from xml.dom import minidom

import os
from os import path
import operator

from multiprocessing import Pool



In [29]:

    
import numpy
from data.text import samples as sam

from textblob import TextBlob as textb
import nltk



In [30]:

    
import time
import arrow

class Timer:
    def __init__(self):
        self.interval = 0
    def __enter__(self):
        self.start = arrow.now()
        return self

    def __exit__(self, *args):
        self.end = arrow.now()
        self.interval = self.end - self.start



In [4]:

NLTK and TextBlob Stuff



In [5]:

    
print sam.acidtest









    



Can you talk piglatin to piglatin.



In [5]:



In [6]:

    
text = sam.paragraphs_og



In [7]:

    
tokens = nltk.word_tokenize(text)



In [8]:

    
#print tokens



In [9]:

    
#print ' '.join(tokens)



In [10]:

    
sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)



In [11]:

    
len(sentences)









    Out[11]:





98



In [12]:

    
print sentences[12]









    



([u'show', u'me', u'flights', u'from', u'detroit', u'to', u'san', u'diego', u'on', u'tuesday', u'may', u'third', u'.'], 0)

Regex Stuff



In [10]:

    
files = {1 : {'file1' : 'data/text/phrases_english.txt',
              'file2' : 'data/text/phrases_piglatin.txt',
             },
         2 : {'file1' : 'data/text/contractions.txt',
              'file2' : 'data/text/contractions-un.txt'
             },
         3 : {'file1' : 'data/contractions.txt',
             },
        }
print files[1]['file1']









    



data/text/phrases_english.txt



In [11]:

    
class regexpreplacer(object):
    def __init__(self, patterns):
        #self.patternlist = patterns
        self.patterns = [(re.compile(regex), repl) for (regex, repl) in
                         patterns]
    def replace(self, text):
        s = text
        for (pattern, repl) in self.patterns:
            (s, count) = re.subn(pattern, repl, s)
        return s
    #def replace2(self, text):
    #    s = text
    #    for (pattern, repl) in self.patternlist:
    #        (s, count) = re.subn(pattern, repl, s, flags=re.IGNORECASE)
    #    return s



In [12]:

    
contractions = [(r"won't", "will not"),
                (r"can't", "cannot"),
                (r"i'm", "i am"),
                (r"I'm", "I am"),
                (r"ma'am", "madam"),
                (r"ain't", "is not"),
                (r"let's", "let us"),
                (r"Let's", "Let us"),
                (r"shan't", "shall not"),
                (r"where'd", "where did"),
                (r"y'all", "you all"),
                (r"o'clock", "of the clock"), # A "Hackish way to to get oway ' ockclay
                (r"(\w+)'ll", "\g<1> will"),
                (r"(\w+)n't", "\g<1> not"),
                (r"(\w+)'ve", "\g<1> have"),
                (r"(\w+)'s", "\g<1> is"),
                (r"(\w+)'re", "\g<1> are"),
                (r"(\w+)'d", "\g<1> would"),
                ]
expander = regexpreplacer(contractions)



In [13]:

    
with open(files[2]['file1']) as f:
    conts = f.read()
with open(files[2]['file2']) as f:
    notconts = f.read()



In [14]:

    
contslist = conts.splitlines()
fixedcontslist = expander.replace(conts).splitlines()
notcontslist = notconts.splitlines()



In [15]:

    
padding = 20
n = 0
for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):
    print n, cont.ljust(padding), fcont.ljust(padding), ncont
    n += 1









    



0 can't                cannot               cannot
1 could've             could have           could have
2 couldn't             could not            could not
3 couldn't've          could not have       could not have
4 didn't               did not              did not
5 doesn't              does not             does not
6 don't                do not               do not
7 hadn't               had not              had not
8 hadn't've            had not have         had not have
9 hasn't               has not              has not
10 haven't              have not             have not
11 he'd                 he would             he had / he would
12 he'd've              he would have        he would have
13 he'll                he will              he shall / he will
14 he's                 he is                he has / he is
15 how'd                how would            how did / how would
16 how'll               how will             how will
17 how's                how is               how has / how is / how does
18 I'd                  I would              I had / I would
19 I'd've               I would have         I would have
20 I'll                 I will               I shall / I will
21 I'm                  I am                 I am
22 I've                 I have               I have
23 isn't                is not               is not
24 it'd                 it would             it had / it would
25 it'd've              it would have        it would have
26 it'll                it will              it shall / it will
27 it's                 it is                it has / it is
28 let's                let us               let us
29 ma'am                madam                madam
30 mightn't             might not            might not
31 mightn't've          might not have       might not have
32 might've             might have           might have
33 mustn't              must not             must not
34 must've              must have            must have
35 needn't              need not             need not
36 not've               not have             not have
37 o'clock              of the clock         of the clock
38 shan't               shall not            shall not
39 she'd                she would            she had / she would
40 she'd've             she would have       she would have
41 she'll               she will             she shall / she will
42 she's                she is               she has / she is
43 should've            should have          should have
44 shouldn't            should not           should not
45 shouldn't've         should not have      should not have
46 that'll              that will            that will
47 that's               that is              that has / that is
48 there'd              there would          there had / there would
49 there'd've           there would have     there would have
50 there're             there are            there are
51 there's              there is             there has / there is
52 they'd               they would           they had / they would
53 they'd've            they would have      they would have
54 they'll              they will            they shall / they will
55 they're              they are             they are
56 they've              they have            they have
57 wasn't               was not              was not
58 we'd                 we would             we had / we would
59 we'd've              we would have        we would have
60 we'll                we will              we will
61 we're                we are               we are
62 we've                we have              we have
63 weren't              were not             were not
64 what'll              what will            what shall / what will
65 what're              what are             what are
66 what's               what is              what has / what is / what does
67 what've              what have            what have
68 when's               when is              when has / when is
69 where'd              where did            where did
70 where's              where is             where has / where is
71 where've             where have           where have
72 who'd                who would            who would / who had
73 who'll               who will             who shall / who will
74 who're               who are              who are
75 who's                who is               who has / who is
76 who've               who have             who have
77 why'll               why will             why will
78 why're               why are              why are
79 why's                why is               why has / why is
80 won't                will not             will not
81 would've             would have           would have
82 wouldn't             would not            would not
83 wouldn't've          would not have       would not have
84 y'all                you all              you all
85 y'all'd've           you all would have   you all should have / you all could have / you all would have
86 you'd                you would            you had / you would
87 you'd've             you would have       you would have
88 you'll               you will             you shall / you will
89 you're               you are              you are
90 you've               you have             you have



In [16]:

    
padding = 20
n = 0
for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):
    print cont + '\t' + fcont
    n += 1









    



can't	cannot
could've	could have
couldn't	could not
couldn't've	could not have
didn't	did not
doesn't	does not
don't	do not
hadn't	had not
hadn't've	had not have
hasn't	has not
haven't	have not
he'd	he would
he'd've	he would have
he'll	he will
he's	he is
how'd	how would
how'll	how will
how's	how is
I'd	I would
I'd've	I would have
I'll	I will
I'm	I am
I've	I have
isn't	is not
it'd	it would
it'd've	it would have
it'll	it will
it's	it is
let's	let us
ma'am	madam
mightn't	might not
mightn't've	might not have
might've	might have
mustn't	must not
must've	must have
needn't	need not
not've	not have
o'clock	of the clock
shan't	shall not
she'd	she would
she'd've	she would have
she'll	she will
she's	she is
should've	should have
shouldn't	should not
shouldn't've	should not have
that'll	that will
that's	that is
there'd	there would
there'd've	there would have
there're	there are
there's	there is
they'd	they would
they'd've	they would have
they'll	they will
they're	they are
they've	they have
wasn't	was not
we'd	we would
we'd've	we would have
we'll	we will
we're	we are
we've	we have
weren't	were not
what'll	what will
what're	what are
what's	what is
what've	what have
when's	when is
where'd	where did
where's	where is
where've	where have
who'd	who would
who'll	who will
who're	who are
who's	who is
who've	who have
why'll	why will
why're	why are
why's	why is
won't	will not
would've	would have
wouldn't	would not
wouldn't've	would not have
y'all	you all
y'all'd've	you all would have
you'd	you would
you'd've	you would have
you'll	you will
you're	you are
you've	you have



In [17]:

    
def testexpander():
    padding = 20
    n = 0
    for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):

        if not(fcont in ncont):
            print n, cont.ljust(padding), fcont.ljust(padding), ncont
        n += 1

testexpander()



In [18]:

    
padding = 20
n = 0
print 
for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):
    print n, exp.translator(fcont).returnstr.ljust(padding), exp.translator(cont).returnstr.ljust(padding), lp.translator(cont).returnstr.ljust(padding), pl.translate(cont).ljust(padding), ncont
    n += 1









    



0 annotcay             annotcay            





    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-40def0bcdaa6> in <module>()
      3 print
      4 for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):
----> 5     print n, exp.translator(fcont).returnstr.ljust(padding), exp.translator(cont).returnstr.ljust(padding), lp.translator(cont).returnstr.ljust(padding), pl.translate(cont).ljust(padding), ncont
      6     n += 1

AttributeError: 'module' object has no attribute 'translator'



In [19]:

    
def testexpander():
    padding = 20
    n = 0
    for cont, fcont, ncont in izip(contslist, fixedcontslist, notcontslist):
        prefixed = exp.translator(fcont).returnstr
        testing = exp.translator(cont).returnstr
        if testing != prefixed:
            print n, prefixed, testing
        n += 1

testexpander()









    



 18 Iway ouldway I'dway
19 Iway ouldway avehay I'd'veway
20 Iway illway I'llway
21 Iway amway I'mway
22 Iway avehay I'veway



In [20]:

    
n = 0
for word in contslist:
    m = re.match(r"[n]'[\w]+|[\w]+(?!')(?:[A-Za-mo-z](?='))?|(?<=\s)[\w](?=')|[^\s\w']", word)
    if m is None:
        print n, word
    n += 1









    



18 I'd
19 I'd've
20 I'll
21 I'm
22 I've
37 o'clock
84 y'all
85 y'all'd've



In [21]:

    
n = 0
for word in contslist:
    m = re.match(r"[n]'[\w]+|[\w]+(?!')(?:[A-Za-mo-z](?='))?|(?<=\s)[\w](?=')|[^\s\w']", word)
    if m is not None:
        print n, word
    n += 1









    



0 can't
1 could've
2 couldn't
3 couldn't've
4 didn't
5 doesn't
6 don't
7 hadn't
8 hadn't've
9 hasn't
10 haven't
11 he'd
12 he'd've
13 he'll
14 he's
15 how'd
16 how'll
17 how's
23 isn't
24 it'd
25 it'd've
26 it'll
27 it's
28 let's
29 ma'am
30 mightn't
31 mightn't've
32 might've
33 mustn't
34 must've
35 needn't
36 not've
38 shan't
39 she'd
40 she'd've
41 she'll
42 she's
43 should've
44 shouldn't
45 shouldn't've
46 that'll
47 that's
48 there'd
49 there'd've
50 there're
51 there's
52 they'd
53 they'd've
54 they'll
55 they're
56 they've
57 wasn't
58 we'd
59 we'd've
60 we'll
61 we're
62 we've
63 weren't
64 what'll
65 what're
66 what's
67 what've
68 when's
69 where'd
70 where's
71 where've
72 who'd
73 who'll
74 who're
75 who's
76 who've
77 why'll
78 why're
79 why's
80 won't
81 would've
82 wouldn't
83 wouldn't've
86 you'd
87 you'd've
88 you'll
89 you're
90 you've



In [21]:

Optimizing Contraction Database



In [22]:

    
def fileline_gen(file_):
    with open(file_) as f:
        for line in f.read().splitlines():
            yield line

def fileline(file_):
    with open(file_) as f:
        return f.read().splitlines()

def fileword_gen(file_):
    with open(file_) as f:
        for word in re.findall(r'(?:\S+)|(?:\s+)', f.read()):
            yield word

def urlword_gen(url):
    f = requests.get(url, stream=True)
    for line in f.iter_lines(delimiter='\n'):
        #yield json.loads(line)
        yield line



In [23]:

    
contdatabase = [tuple([line[0], ' '.join(line[1:])])
                for line in (line.split()
                             for line in fileline(files[3]['file1'])
                             )
                ]
justconts = [cont[0] for cont in contdatabase]



In [26]:

    
#justconts



In [24]:

    
padding = 20
n = 0
for cont, clean, db in izip(contslist, cleanlist, contdatabase):
    db1 = db[0]
    db2 = db[1]
    columns = ' '.join([str(n).rjust(2),
                        cont.ljust(13, "."),
                        clean.ljust(13, "."),
                        db1.ljust(13, "."),
                        db2.ljust(13, " "),
                       ])
    print columns
    n += 1









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-b7caefce5249> in <module>()
      1 padding = 20
      2 n = 0
----> 3 for cont, clean, db in izip(contslist, cleanlist, contdatabase):
      4     db1 = db[0]
      5     db2 = db[1]

NameError: name 'cleanlist' is not defined



In [28]:

    
def countfreq(word, listtocount):
    assert type(listtocount) in (list, tuple)
    if type(listtocount[0]) is not list:
        countlist = [[w, 0] for w in listtocount]
    elif type(listtocount[0]) is list:
        countlist = listtocount
    for thing in countlist:
        if thing[0] in word:
            thing[1] += 1
    return countlist



In [29]:

    
def countfromlist(wordgen, listtocount):
    assert type(listtocount) in (list, tuple)
    if type(listtocount[0]) is not list:
        countlist = [[w, 0] for w in listtocount]
    elif type(listtocount[0]) is list:
        countlist = listtocount

    for word in wordgen:
        words = word.split()
        for w in words:
            for thing in countlist:
                if thing[0] in w:
                    thing[1] += 1
    return countlist



In [30]:

    
def countfromgenlist(genlist, listtocount):
    countlist = listtocount
    try:
        gen = genlist.next()
        gengen = genlist
        countlist = countfromlist(gen, listtocount)
        countfromgenlist(gengen, countlist)
    
    finally:
        return countlist



In [31]:

    
genlist = [fileword_gen("data/text/largetext/alltext.txt"),
           fileword_gen("data/text/largetext/warandpeace.txt"),
           fileword_gen("data/text/largetext/misctext.txt"),
           fileword_gen("data/text/largetext/trustingthewatercure.txt"),
           ]


ggg = lambda :itertools.imap(fileword_gen, (''.join(['data/text/largetext/', file]) for file in os.listdir('data/text/largetext/') if file.endswith('.txt')))

derp = ggg()


fff = lambda :itertools.imap(fileline, (''.join(['data/text/largetext/', file]) for file in os.listdir('data/text/largetext/') if file.endswith('.txt')))

gengen = (gen for gen in genlist)



In [32]:

    
with Timer() as t:
    totals = countfromgenlist(fff(), justconts)
print t.interval
totals









    



0:00:09.362709






    Out[32]:





[["can't", 412],
 ["could've", 2],
 ["couldn't", 303],
 ["couldn't've", 0],
 ["didn't", 933],
 ["doesn't", 195],
 ["don't", 1163],
 ["hadn't", 163],
 ["hadn't've", 0],
 ["hasn't", 26],
 ["haven't", 76],
 ["he'd", 630],
 ["he'd've", 0],
 ["he'll", 54],
 ["he's", 398],
 ["how'd", 0],
 ["how'll", 0],
 ["how's", 7],
 ["I'd", 224],
 ["I'd've", 0],
 ["I'll", 513],
 ["I'm", 1320],
 ["I've", 469],
 ["isn't", 105],
 ["it'd", 8],
 ["it'd've", 0],
 ["it'll", 22],
 ["it's", 586],
 ["let's", 40],
 ["ma'am", 38],
 ["mightn't", 0],
 ["mightn't've", 0],
 ["might've", 0],
 ["mustn't", 0],
 ["must've", 1],
 ["needn't", 2],
 ["not've", 0],
 ["o'clock", 41],
 ["shan't", 1],
 ["she'd", 179],
 ["she'd've", 0],
 ["she'll", 12],
 ["she's", 99],
 ["should've", 9],
 ["shouldn't", 43],
 ["shouldn't've", 0],
 ["that'll", 2],
 ["that's", 298],
 ["there'd", 14],
 ["there'd've", 0],
 ["there're", 7],
 ["there's", 192],
 ["they'd", 119],
 ["they'd've", 0],
 ["they'll", 47],
 ["they're", 111],
 ["they've", 42],
 ["wasn't", 428],
 ["we'd", 51],
 ["we'd've", 0],
 ["we'll", 89],
 ["we're", 187],
 ["we've", 110],
 ["weren't", 76],
 ["what'll", 0],
 ["what're", 0],
 ["what's", 115],
 ["what've", 0],
 ["when's", 0],
 ["where'd", 0],
 ["where's", 7],
 ["where've", 0],
 ["who'd", 26],
 ["who'll", 0],
 ["who're", 4],
 ["who's", 27],
 ["who've", 0],
 ["why'll", 0],
 ["why're", 0],
 ["why's", 2],
 ["won't", 207],
 ["would've", 19],
 ["wouldn't", 206],
 ["wouldn't've", 0],
 ["y'all", 1],
 ["y'all'd've", 0],
 ["you'd", 62],
 ["you'd've", 0],
 ["you'll", 88],
 ["you're", 309],
 ["you've", 106]]



In [33]:

    
d = {}
for i in totals:
    d[i[0]] = i[1]



In [40]:

    
#d









    Out[40]:





{"I'd": 2647,
 "I'd've": 1,
 "I'll": 2323,
 "I'm": 5954,
 "I've": 2001,
 "can't": 2544,
 "could've": 43,
 "couldn't": 2296,
 "couldn't've": 2,
 "didn't": 5650,
 "doesn't": 1264,
 "don't": 6568,
 "hadn't": 1085,
 "hadn't've": 1,
 "hasn't": 174,
 "haven't": 469,
 "he'd": 2494,
 "he'd've": 2,
 "he'll": 350,
 "he's": 2073,
 "how'd": 8,
 "how'll": 2,
 "how's": 53,
 "isn't": 894,
 "it'd": 43,
 "it'd've": 1,
 "it'll": 143,
 "it's": 3250,
 "let's": 338,
 "ma'am": 20,
 "might've": 6,
 "mightn't": 3,
 "mightn't've": 1,
 "must've": 21,
 "mustn't": 32,
 "needn't": 29,
 "not've": 1,
 "o'clock": 234,
 "shan't": 18,
 "she'd": 755,
 "she'd've": 1,
 "she'll": 88,
 "she's": 460,
 "should've": 42,
 "shouldn't": 324,
 "shouldn't've": 1,
 "that'll": 45,
 "that's": 1644,
 "there'd": 44,
 "there'd've": 1,
 "there're": 7,
 "there's": 871,
 "they'd": 473,
 "they'd've": 1,
 "they'll": 286,
 "they're": 795,
 "they've": 174,
 "wasn't": 2571,
 "we'd": 305,
 "we'd've": 2,
 "we'll": 511,
 "we're": 930,
 "we've": 316,
 "weren't": 534,
 "what'll": 10,
 "what're": 5,
 "what's": 611,
 "what've": 3,
 "when's": 11,
 "where'd": 7,
 "where's": 29,
 "where've": 1,
 "who'd": 179,
 "who'll": 12,
 "who're": 2,
 "who's": 151,
 "who've": 14,
 "why'll": 1,
 "why're": 1,
 "why's": 3,
 "won't": 1328,
 "would've": 82,
 "wouldn't": 1353,
 "wouldn't've": 1,
 "y'all": 9,
 "y'all'd've": 1,
 "you'd": 403,
 "you'd've": 1,
 "you'll": 751,
 "you're": 2158,
 "you've": 623}



In [21]:

    
sorted_x = reversed(sorted(d.items(), key=operator.itemgetter(1)))
sortedgen = lambda : reversed(sorted(d.items(), key=operator.itemgetter(1)))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-f92baf50c88a> in <module>()
----> 1 sorted_x = reversed(sorted(d.items(), key=operator.itemgetter(1)))
      2 sortedgen = lambda : reversed(sorted(d.items(), key=operator.itemgetter(1)))

NameError: name 'd' is not defined



In [8]:

    
#for i in sortedgen():
    #print i



In [34]:

    
def checkifin_iter(item, iterlist):
    for i in iterlist:
        if i[0] == item:
            return i[1] + ' --- !'
    return item



In [23]:

    
#for word in sortedgen():
    #print str(checkifin_iter(word[0], contdatabase)) + ','



In [24]:

    
#for word in fileword_gen("data/text/largetext/alltext.txt"):
    #print str(checkifin_iter(word, contdatabase)), '---', word



In [25]:

    
#for word in fileword_gen("data/text/largetext/alltext.txt"):
    #print word



In [25]:

Multicore Contraction Count



In [32]:



In [2]:

    
contgen = lambda:(line.split('\t')[0] for line in fileline_gen('data/contractions.txt'))
contlist = [line.split('\t') for line in fileline_gen('data/contractions.txt')]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-341f32ad6294> in <module>()
      1 contgen = lambda:(line.split('\t')[0] for line in fileline_gen('data/contractions.txt'))
----> 2 contlist = [line.split('\t') for line in fileline_gen('data/contractions.txt')]

NameError: name 'fileline_gen' is not defined



In [36]:

    
gen1 = contgen()
gen2 = contgen()



In [36]:



In [37]:

    
wordlist = [line for line in fileword_gen('data/contractions.txt')]



In [37]:



In [38]:

    
gencont_factory = lambda:islice(wordlist, 0, len(wordlist), 1)



In [39]:

    
gencont = gencont_factory()



In [40]:

    
gencont.next()









    Out[40]:





"can't"



In [41]:

    
testl = []
for i in xrange(21):
    testl.append(i)



In [42]:

    
testl









    Out[42]:





[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]



In [43]:

    
testl[0:10]









    Out[43]:





[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]



In [43]:



In [43]:



In [44]:

    
filelist = [''.join(['data/text/testbatch/', file]) for file in os.listdir('data/text/testbatch/') if file.endswith('.txt')]



In [45]:

    
len(filelist)









    Out[45]:





5



In [46]:

    
numberoffiles = len(filelist)
cores = 4
filespercore = numberoffiles / cores
[filespercore, filespercore*cores]









    Out[46]:





[1, 4]



In [47]:

    
def formbatches(listof, batches):
    numberof = len(listof)
    batches = 4
    perbatch = numberof / batches
    offset = numberof - perbatch*batches
    start = 0
    end = perbatch + 1
    batches = []
    for core in xrange(cores):
        batches.append(listof[start:end])
        start = end
        end += perbatch
    return batches



In [48]:

    
filebatches = formbatches(filelist, cores)
len(filebatches)









    Out[48]:





4



In [49]:

    
l1 = len(filebatches[0])
l2 = len(filebatches[1])
l3 = len(filebatches[2])
l4 = len(filebatches[3])
print l1, l2, l3, l4
print l1 + l2 + l3 + l4



In [50]:

    
filegen_factory = lambda filelist: itertools.imap(fileline, filelist)
filebatch_gen = lambda filebatches: map(filegen_factory, filebatches)



In [51]:

    
#with Timer() as t:
#    totals = countfromgenlist(fff(), justconts)
#print t.interval
#totals



In [52]:

    
hyh = filebatch_gen(filebatches)



In [53]:

    
def countfromgenlist_parallel(filegens):
    return countfromgenlist(filegens, justconts)



In [1]:



In [63]:

    
def contsinparallel(filebatch_list):
    pool = Pool(processes=cores)
    results = pool.map(countfromgenlist_parallel, tuple(filebatch_list))
    product = results
    return product



In [ ]:

    
with Timer() as t:
    listofresults = contsinparallel(filebatch_gen(filebatches))
print t.interval



In [56]:

    
print listofresults









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-56-e36a0ddef61e> in <module>()
----> 1 print listofresults

NameError: name 'listofresults' is not defined



In [57]:

    
countfromgenlist_parallel(filebatch_gen(filebatches)[0])









    Out[57]:





[["can't", 5],
 ["could've", 1],
 ["couldn't", 19],
 ["couldn't've", 0],
 ["didn't", 43],
 ["doesn't", 1],
 ["don't", 35],
 ["hadn't", 6],
 ["hadn't've", 0],
 ["hasn't", 1],
 ["haven't", 4],
 ["he'd", 23],
 ["he'd've", 0],
 ["he'll", 2],
 ["he's", 5],
 ["how'd", 0],
 ["how'll", 0],
 ["how's", 0],
 ["I'd", 83],
 ["I'd've", 0],
 ["I'll", 23],
 ["I'm", 35],
 ["I've", 9],
 ["isn't", 4],
 ["it'd", 0],
 ["it'd've", 0],
 ["it'll", 0],
 ["it's", 15],
 ["let's", 3],
 ["ma'am", 1],
 ["mightn't", 0],
 ["mightn't've", 0],
 ["might've", 0],
 ["mustn't", 0],
 ["must've", 2],
 ["needn't", 0],
 ["not've", 0],
 ["o'clock", 0],
 ["shan't", 0],
 ["she'd", 3],
 ["she'd've", 0],
 ["she'll", 0],
 ["she's", 1],
 ["should've", 1],
 ["shouldn't", 0],
 ["shouldn't've", 0],
 ["that'll", 0],
 ["that's", 5],
 ["there'd", 0],
 ["there'd've", 0],
 ["there're", 0],
 ["there's", 3],
 ["they'd", 6],
 ["they'd've", 0],
 ["they'll", 0],
 ["they're", 2],
 ["they've", 0],
 ["wasn't", 20],
 ["we'd", 12],
 ["we'd've", 0],
 ["we'll", 3],
 ["we're", 2],
 ["we've", 0],
 ["weren't", 0],
 ["what'll", 0],
 ["what're", 0],
 ["what's", 3],
 ["what've", 0],
 ["when's", 0],
 ["where'd", 0],
 ["where's", 0],
 ["where've", 0],
 ["who'd", 3],
 ["who'll", 1],
 ["who're", 0],
 ["who's", 2],
 ["who've", 0],
 ["why'll", 0],
 ["why're", 0],
 ["why's", 0],
 ["won't", 13],
 ["would've", 4],
 ["wouldn't", 7],
 ["wouldn't've", 0],
 ["y'all", 0],
 ["y'all'd've", 0],
 ["you'd", 11],
 ["you'd've", 0],
 ["you'll", 7],
 ["you're", 12],
 ["you've", 4]]



In [64]:

    
filebatch_gen(filebatches)[0]









    Out[64]:





<itertools.imap at 0x7f0632adf350>



In [65]:

    
print tuple(filebatch_gen(filebatches))









    



(<itertools.imap object at 0x7f0632adfa50>, <itertools.imap object at 0x7f0632adf510>, <itertools.imap object at 0x7f0632adf850>, <itertools.imap object at 0x7f0632adf7d0>)



In [ ]:

    
map(countfromgenlist_parallel, filebatch_gen(filebatches))



In [ ]:



In [35]:

    
contspara.contspara('data/text/testbatch/', 4)









    



['data/text/testbatch/An_Elementary_Course_in_Synt.txt', 'data/text/testbatch/A_Place_so_Foreign.txt', 'data/text/testbatch/alltext.txt', 'data/text/testbatch/Any_Coincidence_Is.txt', 'data/text/testbatch/Beyond_No_Self.txt']



In [ ]: