In [43]:
from urllib.request import urlopen
url_response = urlopen('http://www.py4inf.com/code/romeo.txt')
contents = str(url_response.read())
print(contents)
In [44]:
lines = contents.split('\\n')
print(lines)
In [47]:
Jword_set = set()
for line in lines:
# Passing no args to split() will do what you want in this case:
# split on all weird characters (aka whitespace characters)
words = line.split()
for word in words:
# Lowercase the word or else alphabetical sort puts capitals ahead
word = word.lower()
# Adding to a set (vs list) will automatically de-duplicate
word_set.add(word)
print(word_set)
In [49]:
sorted_word_set = sorted(word_set)
print(sorted_word_set)
In [ ]:
In [ ]: