27 lines
787 B
Python
27 lines
787 B
Python
|
# adapted from Alex Martelli's example in "Re-learning Python"
|
||
|
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
|
||
|
# (slide 41) Ex: lines-by-word file index
|
||
|
|
||
|
# BEGIN INDEX_DEFAULT
|
||
|
"""Build an index mapping word -> list of occurrences"""
|
||
|
|
||
|
import sys
|
||
|
import re
|
||
|
import collections
|
||
|
|
||
|
WORD_RE = re.compile('\w+')
|
||
|
|
||
|
index = collections.defaultdict(list) # <1>
|
||
|
with open(sys.argv[1], encoding='utf-8') as fp:
|
||
|
for line_no, line in enumerate(fp, 1):
|
||
|
for match in WORD_RE.finditer(line):
|
||
|
word = match.group()
|
||
|
column_no = match.start()+1
|
||
|
location = (line_no, column_no)
|
||
|
index[word].append(location) # <2>
|
||
|
|
||
|
# print in alphabetical order
|
||
|
for word in sorted(index, key=str.upper):
|
||
|
print(word, index[word])
|
||
|
# END INDEX_DEFAULT
|