example-code-2e/03-dict-set/index_default.py

27 lines
796 B
Python
Raw Normal View History

2014-10-14 19:26:55 +02:00
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
2020-02-19 03:58:03 +01:00
# tag::INDEX_DEFAULT[]
2014-10-14 19:26:55 +02:00
"""Build an index mapping word -> list of occurrences"""
import collections
2021-05-21 23:56:12 +02:00
import re
import sys
2014-10-14 19:26:55 +02:00
2019-03-20 15:05:34 +01:00
WORD_RE = re.compile(r'\w+')
2014-10-14 19:26:55 +02:00
index = collections.defaultdict(list) # <1>
with open(sys.argv[1], encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
2021-05-21 23:56:12 +02:00
column_no = match.start() + 1
2014-10-14 19:26:55 +02:00
location = (line_no, column_no)
index[word].append(location) # <2>
2021-05-21 23:56:12 +02:00
# display in alphabetical order
2014-10-14 19:26:55 +02:00
for word in sorted(index, key=str.upper):
print(word, index[word])
2020-02-19 03:58:03 +01:00
# end::INDEX_DEFAULT[]