Add files via upload

2020-06-22 16:52:41 -07:00 · 2020-06-22 16:52:41 -07:00 · 485f9345d6
commit 485f9345d6
parent 7cfaf87b41
3 changed files with 1738 additions and 0 deletions
--- a/ipynb/Portmantwo.ipynb
+++ b/ipynb/Portmantwo.ipynb
--- a/ipynb/natalie.txt
+++ b/ipynb/natalie.txt
--- a/ipynb/portman.py
+++ b/ipynb/portman.py
@ -0,0 +1,139 @@
+# Generate a portmantout word 
+# Peter Norvig
+# See https://github.com/norvig/pytudes/blob/master/ipynb/Portmantout.ipynb
+
+from collections import defaultdict, Counter
+from typing import List, Tuple, Set, Dict, Any
+
+Word = str
+class Wordset(set): """A set of words."""
+Step = Tuple[int, str] # An (overlap, word) pair.
+OVERLAP, WORD = 0, 1 # Indexes of the two parts of a Step.
+Path = List[Step] # A list of steps.
+Bridge = (int, Step,...) # An excess letter count and step(s), e.g. (1, (2, 'arrow')).
+EXCESS, STEPS = 0, slice(1, None) # Indexes of the two parts of a bridge.
+
+W = Wordset(open('wordlist.asc').read().split()) 
+
+def portman(P: Path) -> Word:
+    """Compute the portmantout string S from the path P."""
+    return ''.join(word[overlap:] for (overlap, word) in P)
+
+def natalie(W: Wordset, start=None) -> Path:
+    """Return a portmantout path containing all words in W."""
+    precompute(W)
+    word = start or first(W.unused)
+    used(W, word)
+    P = [(0, word)]
+    while W.unused:
+        steps = unused_step(W, word) or bridging_steps(W, word)
+        for (overlap, word) in steps:
+            P.append((overlap, word))
+            used(W, word)
+    return P
+
+def unused_step(W: Wordset, prev_word: Word) -> List[Step]:
+    """Return [(overlap, unused_word)] or []."""
+    for suf in suffixes(prev_word):
+        for unused_word in W.startswith.get(suf, ()):
+            overlap = len(suf)
+            return [(overlap, unused_word)]
+    return []
+
+def bridging_steps(W: Wordset, prev_word: Word) -> List[Step]:
+    """The steps from the shortest bridge that bridges 
+    from a suffix of prev_word to a prefix of an unused word."""
+    bridge = min(W.bridges[suf][pre] 
+                 for suf in suffixes(prev_word) if suf in W.bridges
+                 for pre in W.bridges[suf] if W.startswith[pre])
+    return bridge[STEPS]
+
+def precompute(W):
+    """Precompute and cache data structures for W. The .subwords and .bridges
+    data structures are static and only need to be computed once; .unused and
+    .startswith are dynamic and must be recomputed on each call to `natalie`."""
+    if not hasattr(W, 'subwords') or not hasattr(W, 'bridges'): 
+        W.subwords = subwords(W)
+        W.bridges  = build_bridges(W)
+    W.unused       = W - W.subwords
+    W.startswith   = compute_startswith(W.unused)
+    
+def used(W, word):
+    """Remove word from `W.unused` and, for each prefix, from `W.startswith[pre]`."""
+    assert word in W, f'used "{word}", which is not in the word set'
+    if word in W.unused:
+        W.unused.remove(word)
+        for pre in prefixes(word):
+            W.startswith[pre].remove(word)
+            if not W.startswith[pre]:
+                del W.startswith[pre]
+                
+def first(iterable, default=None): return next(iter(iterable), default)
+
+def multimap(pairs) -> Dict[Any, set]:
+    """Given (key, val) pairs, make a dict of {key: {val,...}}."""
+    result = defaultdict(set)
+    for key, val in pairs:
+        result[key].add(val)
+    return result
+
+def compute_startswith(words) -> Dict[str, Set[Word]]: 
+    """A dict mapping a prefix to all the words it starts:
+    {'somet': {'something', 'sometimes'},...}."""
+    return multimap((pre, w) for w in words for pre in prefixes(w))
+
+def subwords(W: Wordset) -> Set[str]:
+    """All the words in W that are subparts of some other word."""
+    return {subword for w in W for subword in subparts(w) & W}              
+    
+def suffixes(word) -> List[str]:
+    """All non-empty proper suffixes of word, longest first."""
+    return [word[i:] for i in range(1, len(word))]
+
+def prefixes(word) -> List[str]:
+    """All non-empty proper prefixes of word."""
+    return [word[:i] for i in range(1, len(word))]
+
+def subparts(word) -> Set[str]:
+    """All non-empty proper substrings of word"""
+    return {word[i:j] 
+            for i in range(len(word)) 
+            for j in range(i + 1, len(word) + (i > 0))}
+
+def splits(word) -> List[Tuple[int, str, str]]: 
+    """A sequence of (excess, pre, suf) tuples."""
+    return [(excess, word[:i], word[i+excess:])
+            for excess in range(len(word) - 1)
+            for i in range(1, len(word) - excess)]
+
+def try_bridge(bridges, pre, suf, excess, word, step2=None):
+    """Store a new bridge if it has less excess than the previous bridges[pre][suf]."""
+    if suf not in bridges[pre] or excess < bridges[pre][suf][EXCESS]:
+        bridge = (excess, (len(pre), word))
+        if step2: bridge +=  (step2,)
+        bridges[pre][suf] = bridge
+
+def build_bridges(W: Wordset, maxlen=5, end='qujvz'):
+    """A table of bridges[pre][suf] == (excess, (overlap, word)), e.g.
+    bridges['ar']['c'] == (0, (2, 'arc'))."""
+    bridges         = defaultdict(dict)
+    shortwords      = [w for w in W if len(w) <= maxlen + (w[-1] in end)]
+    shortstartswith = compute_startswith(shortwords)
+    # One-word bridges
+    for word in shortwords: 
+        for excess, pre, suf, in splits(word):
+            try_bridge(bridges, pre, suf, excess, word)
+    # Two-word bridges
+    for word1 in shortwords:
+        for suf in suffixes(word1): 
+            for word2 in shortstartswith[suf]: 
+                excess = len(word1) + len(word2) - len(suf) - 2
+                A, B = word1[0], word2[-1]
+                if A != B:
+                    step2 = (len(suf), word2)
+                    try_bridge(bridges, A, B, excess, word1, step2)
+    return bridges
+
+if __name__ == "__main__":
+    W = Wordset(open('wordlist.asc').read().split())
+    print(portman(natalie(W)))