update from O'Reilly repo

2021-03-22 12:24:21 -03:00
parent e1cd63aa04
commit 2f8bf06270
28 changed files with 470 additions and 125 deletions
--- a/22-async/mojifinder/charindex.py
+++ b/22-async/mojifinder/charindex.py
@@ -4,28 +4,28 @@
 Class ``InvertedIndex`` builds an inverted index mapping each word to
 the set of Unicode characters which contain that word in their names.

-Optional arguments to the constructor are ``first`` and ``last+1`` character
-codes to index, to make testing easier.
+Optional arguments to the constructor are ``first`` and ``last+1``
+character codes to index, to make testing easier. In the examples
+below, only the ASCII range was indexed.

-In the example below, only the ASCII range was indexed::
+The `entries` attribute is a `defaultdict` with uppercased single
+words as keys::

    >>> idx = InvertedIndex(32, 128)
+    >>> idx.entries['DOLLAR']
+    {'$'}
    >>> sorted(idx.entries['SIGN'])
    ['#', '$', '%', '+', '<', '=', '>']
-    >>> sorted(idx.entries['DIGIT'])
-    ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
-    >>> idx.entries['DIGIT'] & idx.entries['EIGHT']
-    {'8'}
-    >>> idx.search('digit')
-    ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
-    >>> idx.search('eight digit')
-    ['8']
-    >>> idx.search('a letter')
-    ['A', 'a']
-    >>> idx.search('a letter capital')
-    ['A']
-    >>> idx.search('borogove')
-    []
+    >>> idx.entries['A'] & idx.entries['SMALL']
+    {'a'}
+    >>> idx.entries['BRILLIG']
+    set()
+
+The `.search()` method takes a string, uppercases it, splits it into
+words, and returns the intersection of the entries for each word::
+
+    >>> idx.search('capital a')
+    {'A'}

 """

@@ -58,17 +58,16 @@ class InvertedIndex:
                    entries[word].add(char)
        self.entries = entries

-    def search(self, query: str) -> list[Char]:
+    def search(self, query: str) -> set[Char]:
        if words := list(tokenize(query)):
-            first = self.entries[words[0]]
-            result = first.intersection(*(self.entries[w] for w in words[1:]))
-            return sorted(result)
+            found = self.entries[words[0]]
+            return found.intersection(*(self.entries[w] for w in words[1:]))
        else:
-            return []
+            return set()


-def format_results(chars: list[Char]) -> Iterator[str]:
-    for char in chars:
+def format_results(chars: set[Char]) -> Iterator[str]:
+    for char in sorted(chars):
        name = unicodedata.name(char)
        code = ord(char)
        yield f'U+{code:04X}\t{char}\t{name}'
@@ -77,7 +76,7 @@ def format_results(chars: list[Char]) -> Iterator[str]:
 def main(words: list[str]) -> None:
    if not words:
        print('Please give one or more words to search.')
-        sys.exit()
+        sys.exit(2)  # command line usage error
    index = InvertedIndex()
    chars = index.search(' '.join(words))
    for line in format_results(chars):