example-code-2e/attic/dicts/index_alex.py
2019-03-20 22:05:34 +08:00

23 lines
586 B
Python

# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
"""Build a map word -> list-of-line-numbers"""
import sys
import re
NONWORD_RE = re.compile(r'\W+')
idx = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for n, line in enumerate(fp, 1):
for word in NONWORD_RE.split(line):
if word.strip():
idx.setdefault(word, []).append(n)
# print in alphabetical order
for word in sorted(idx, key=str.upper):
print(word, idx[word])