In [1]:
import sys
import re
In [2]:
WORD_RE = re.compile('\w+')
In [4]:
index = {}
with open('zen.txt', encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start() + 1
location = (line_no, column_no)
#ugly implement
occurrences = index.get(word, []) # <1>
occurrences.append(location) # <2>
index[word] = occurrences # <3>
In [7]:
for word in sorted(index, key=str.upper): # <4>
print(word, index[word])
In [8]:
index = {}
with open('zen.txt', encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start() + 1
location = (line_no, column_no)
index.setdefault(word, []).append(location) # <1>
my_dict.setdefault(key, []).append(new_value)
same as
if key not in my_dict:
my_dict[key] = []
my_dict[key].append(new_value)
In [12]:
from collections import defaultdict
In [14]:
# create defaultdict with list constructor as default_factory
index = defaultdict(list)
with open('zen.txt', encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start() + 1
location = (line_no, column_no)
index[word].append(location) # <1>
In [16]:
dd = defaultdict(list)
In [17]:
dd['abc']
Out[17]:
In [18]:
dd.get('bbb') ## None if useing .get() function
In [20]:
dd
Out[20]:
In [ ]: