In :from bs4 import BeautifulSoup # For processing XMLfrom BeautifulSoup import nltk import re
In :doc = open('garden-party.xml').read() soup = BeautifulSoup(doc, 'lxml')
In :segs = soup.findAll('seg')
In :text = "" for seg in segs: text += seg.text + " "
In :def cleanText(text): text = text.replace('\n', ' ') # change newlines to spaces text = text.replace('\t', ' ') # change tabs to spaces text = re.sub('\s+', ' ', text).strip() # remove redundant whitespace return text text = cleanText(text)
In :sents = nltk.sent_tokenize(text) # break the text up into sentences
In :len(sents) # how many sentences?
In :ands = [sent for sent in sents if re.search(r'^And', sent) is not None] ands # sentences that start with "And"
Out:['And after all the weather was ideal.', 'And now she looked at the others, they were smiling too.', 'And what a beautiful morning!', "And she pointed to the lily-lawn with the hand that didn't hold the bread-and-butter.", 'And they were so lovely, with their broad, gleaming leaves, and their clusters of yellow fruit.', 'And Laura sat back.', 'And now there came a long, chuckling absurd sound.', 'And there were two tiny spots of sun, one on the inkpot, one on a silver photograph frame, playing too.', 'And I suddenly thought for once in my life I shall have enough canna lilies.', "And the children knew by her face that she hadn't got them.", 'And she said to Sadie firmly, "Tell cook I\'ll let her have them in ten minutes."', 'And—and, Jose, pacify cook if you do go into the kitchen, will you?', 'And he said to the cook, "He\'s left a wife and five little ones."', 'And Mrs. Sheridan turned round from her dressing-table.', 'And she held up her hand-mirror.', 'And it\'s not very sympathetic to spoil everybody\'s enjoyment as you\'re doing now."', 'And now she hoped her mother was right.', 'And somehow that seemed quite the best plan...', 'And she followed him into the hall.', 'And Laura, glowing, answered softly, "Have you had tea?', 'And the perfect afternoon slowly ripened, slowly faded, slowly its petals closed.', 'And they all of them sat down in the deserted marquee.', "And she's sure to have neighbours calling in and so on.", 'And, Laura!', 'And it seemed to her that kisses, voices, tinkling spoons, laughter, the smell of crushed grass were somehow inside her.', 'And the big hat with the velvet streamer—if only it was another hat!', 'And the poor face puckered up again.', 'And again she began, "You\'ll excuse her, miss, I\'m sure," and her face, swollen too, tried an oily smile.', "And this time she didn't wait for Em's sister."]
In :len(ands) # number of sentences that start with "And"
In :proportionOfAnds = (len(ands) / len(sents)) * 100 proportionOfAnds # percentage of sentences that start with "And"
Now let's find sentences that begin with "but."
In :buts = [sent for sent in sents if re.search(r'^But', sent) is not None] buts # sentences that start with "But"
Out:['But Meg could not possibly go and supervise the men.', 'But that sounded so fearfully affected that she was ashamed, and stammered like a little girl, "Oh—er—have you come—is it about the marquee?"', 'But she did quite follow him.', 'But the tall fellow interrupted.', 'But the air!', 'But at that moment Mrs. Sheridan joined them.', 'But at the word "Good-bye," and although the piano sounded more desperate than ever, her face broke into a brilliant, dreadfully unsympathetic smile.', 'But now Sadie interrupted them.', "But the back door was blocked by cook, Sadie, Godber's man and Hans.", "But Godber's man wasn't going to have his story snatched from under his very nose.", 'But Jose was still more amazed.', 'But since they were grown up, Laura and Laurie on their prowls sometimes walked through.', 'But still one must go everywhere; one must see everything.', 'But it all seemed blurred, unreal, like a picture in the newspaper.', 'But oh, these parties, these parties!', 'But to her horror the woman answered, "Walk in please, miss," and she was shut in the passage.', "But all the same you had to cry, and she couldn't go out of the room without saying something to him.", 'But Laurie—" She stopped, she looked at her brother.']
In :(len(buts) / len(sents)) * 100
In [ ]: