In [12]:
from matplotlib import pyplot as plt
import re
from nltk.tokenize import regexp_tokenize
In [13]:
scene_one='''SCENE 1: [wind] [clop clop clop] \nKING ARTHUR: Whoa there! [clop clop clop] \nSOLDIER #1: Halt! Who goes there?\nARTHUR: It is I, Arthur, son of Uther Pendragon, from the castle of Camelot. King of the Britons, defeator of the Saxons, sovereign of all England!\nSOLDIER #1: Pull the other one!\nARTHUR: I am, ... and this is my trusty servant Patsy. We have ridden the length and breadth of the land in search of knights who will join me in my court at Camelot. I must speak with your lord and master.\nSOLDIER #1: What? Ridden on a horse?\nARTHUR: Yes!\nSOLDIER #1: You're using coconuts!\nARTHUR: What?\nSOLDIER #1: You've got two empty halves of coconut and you're bangin' 'em together.\nARTHUR: So? We have ridden since the snows of winter covered this land, through the kingdom of Mercea, through--\nSOLDIER #1: Where'd you get the coconuts?\nARTHUR: We found them.\nSOLDIER #1: Found them? In Mercea? The coconut's tropical!\nARTHUR: What do you mean?\nSOLDIER #1: Well, this is a temperate zone.\nARTHUR: The swallow may fly south with the sun or the house martin or the plover may seek warmer climes in winter, yet these are not strangers to our land?\nSOLDIER #1: Are you suggesting coconuts migrate?\nARTHUR: Not at all. They could be carried.\nSOLDIER #1: What? A swallow carrying a coconut?\nARTHUR: It could grip it by the husk!\nSOLDIER #1: It's not a question of where he grips it! It's a simple question of weight ratios! A five ounce bird could not carry a one pound coconut.\nARTHUR: Well, it doesn't matter. Will you go and tell your master that Arthur from the Court of Camelot is here.\nSOLDIER #1: Listen. In order to maintain air-speed velocity, a swallow needs to beat its wings forty-three times every second, right?\nARTHUR: Please!\nSOLDIER #1: Am I right?\nARTHUR: I'm not interested!\nSOLDIER #2: It could be carried by an African swallow!\nSOLDIER #1: Oh, yeah, an African swallow maybe, but not a European swallow. That's my point.\nSOLDIER #2: Oh, yeah, I agree with that.\nARTHUR: Will you ask your master if he wants to join my court at Camelot?!\nSOLDIER #1: But then of course a-- African swallows are non-migratory.\nSOLDIER #2: Oh, yeah...\nSOLDIER #1: So they couldn't bring a coconut back anyway... [clop clop clop] \nSOLDIER #2: Wait a minute! Supposing two swallows carried it together?\nSOLDIER #1: No, they'd have to have it on a line.\nSOLDIER #2: Well, simple! They'd just use a strand of creeper!\nSOLDIER #1: What, held under the dorsal guiding feathers?
\nSOLDIER #2: Well, why not?\n'''
In [14]:
#Split text in line
lines= scene_one.split("\n")
#Remove name of character
pattern = "[A-Z]{2,}(\s)?(#\d)?([A-Z]{2,})?:"
lines = [re.sub(pattern,'',l) for l in lines]
In [15]:
# Tokenize each line
tokenized_lines = [regexp_tokenize(s,r'\w+') for s in lines]
In [17]:
line_num_words = [len(t_line) for t_line in tokenized_lines]
In [19]:
plt.hist(line_num_words)
# Show the plot
plt.show()