In [ ]:
#subject lines that have dates, e.g. 12/01/99
[line for line in subjects if re.search("\d\d/\d\d/\d\d", line)]
In [ ]:
[line for line in subjects if re.search("[aeiou][aeiou][aeiou][aeiou]", line)]
In [ ]:
[line for line in subjects if re.search("F[wW]:", line)]
In [ ]:
[line for line in subjects if res.search("^[Nn]ew [Yy]ork", line)]
In [ ]:
[line for line in subjects if re.search(r"\boil\b", line)]
In [1]:
x = "this is \na test"
print(x)
In [2]:
x = "this is\t\t\tanother test"
print(x)
In [3]:
normal = "hello\nthere"
raw = r"hello\nthere"
print("normal:", normal)
print("raw:", raw)
* match zero or more times
{n} matches exactly n times
{n,m} matches at least n times, but no more than m times
{n,} matches at least n times, but maybe infinite times
+ match at least once ({1,})
? match one time or zero times
[line for line in subjects if re.search(r"^R string matches regular expression if at the first line, you encounter .......
In [4]:
[line for line in subjects if re.search(r"\b(?:[Cc]at|[kK]itty|[kK]itten)\b", line)]
In [6]:
all_subjects = open("enronsubjects.txt").read()
In [ ]:
all_subjects[:1000]
#looking for domain names
[line for line in subjectts if re.search](r"\b\w+\.(?:com|net|org)\b", line)
#re.findall(r"\b\w+\.(?:com|net|org)\b", all_subjects)
#"will you pass teh pepper?" re.search "yes"
#"will you pass the pepper?" re.findall "yes, here it is" *passes pepper*