Add files via upload
This commit is contained in:
parent
7cfaf87b41
commit
485f9345d6
1598
ipynb/Portmantwo.ipynb
Normal file
1598
ipynb/Portmantwo.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
1
ipynb/natalie.txt
Normal file
1
ipynb/natalie.txt
Normal file
File diff suppressed because one or more lines are too long
139
ipynb/portman.py
Normal file
139
ipynb/portman.py
Normal file
@ -0,0 +1,139 @@
|
||||
# Generate a portmantout word
|
||||
# Peter Norvig
|
||||
# See https://github.com/norvig/pytudes/blob/master/ipynb/Portmantout.ipynb
|
||||
|
||||
from collections import defaultdict, Counter
|
||||
from typing import List, Tuple, Set, Dict, Any
|
||||
|
||||
Word = str
|
||||
class Wordset(set): """A set of words."""
|
||||
Step = Tuple[int, str] # An (overlap, word) pair.
|
||||
OVERLAP, WORD = 0, 1 # Indexes of the two parts of a Step.
|
||||
Path = List[Step] # A list of steps.
|
||||
Bridge = (int, Step,...) # An excess letter count and step(s), e.g. (1, (2, 'arrow')).
|
||||
EXCESS, STEPS = 0, slice(1, None) # Indexes of the two parts of a bridge.
|
||||
|
||||
W = Wordset(open('wordlist.asc').read().split())
|
||||
|
||||
def portman(P: Path) -> Word:
|
||||
"""Compute the portmantout string S from the path P."""
|
||||
return ''.join(word[overlap:] for (overlap, word) in P)
|
||||
|
||||
def natalie(W: Wordset, start=None) -> Path:
|
||||
"""Return a portmantout path containing all words in W."""
|
||||
precompute(W)
|
||||
word = start or first(W.unused)
|
||||
used(W, word)
|
||||
P = [(0, word)]
|
||||
while W.unused:
|
||||
steps = unused_step(W, word) or bridging_steps(W, word)
|
||||
for (overlap, word) in steps:
|
||||
P.append((overlap, word))
|
||||
used(W, word)
|
||||
return P
|
||||
|
||||
def unused_step(W: Wordset, prev_word: Word) -> List[Step]:
|
||||
"""Return [(overlap, unused_word)] or []."""
|
||||
for suf in suffixes(prev_word):
|
||||
for unused_word in W.startswith.get(suf, ()):
|
||||
overlap = len(suf)
|
||||
return [(overlap, unused_word)]
|
||||
return []
|
||||
|
||||
def bridging_steps(W: Wordset, prev_word: Word) -> List[Step]:
|
||||
"""The steps from the shortest bridge that bridges
|
||||
from a suffix of prev_word to a prefix of an unused word."""
|
||||
bridge = min(W.bridges[suf][pre]
|
||||
for suf in suffixes(prev_word) if suf in W.bridges
|
||||
for pre in W.bridges[suf] if W.startswith[pre])
|
||||
return bridge[STEPS]
|
||||
|
||||
def precompute(W):
|
||||
"""Precompute and cache data structures for W. The .subwords and .bridges
|
||||
data structures are static and only need to be computed once; .unused and
|
||||
.startswith are dynamic and must be recomputed on each call to `natalie`."""
|
||||
if not hasattr(W, 'subwords') or not hasattr(W, 'bridges'):
|
||||
W.subwords = subwords(W)
|
||||
W.bridges = build_bridges(W)
|
||||
W.unused = W - W.subwords
|
||||
W.startswith = compute_startswith(W.unused)
|
||||
|
||||
def used(W, word):
|
||||
"""Remove word from `W.unused` and, for each prefix, from `W.startswith[pre]`."""
|
||||
assert word in W, f'used "{word}", which is not in the word set'
|
||||
if word in W.unused:
|
||||
W.unused.remove(word)
|
||||
for pre in prefixes(word):
|
||||
W.startswith[pre].remove(word)
|
||||
if not W.startswith[pre]:
|
||||
del W.startswith[pre]
|
||||
|
||||
def first(iterable, default=None): return next(iter(iterable), default)
|
||||
|
||||
def multimap(pairs) -> Dict[Any, set]:
|
||||
"""Given (key, val) pairs, make a dict of {key: {val,...}}."""
|
||||
result = defaultdict(set)
|
||||
for key, val in pairs:
|
||||
result[key].add(val)
|
||||
return result
|
||||
|
||||
def compute_startswith(words) -> Dict[str, Set[Word]]:
|
||||
"""A dict mapping a prefix to all the words it starts:
|
||||
{'somet': {'something', 'sometimes'},...}."""
|
||||
return multimap((pre, w) for w in words for pre in prefixes(w))
|
||||
|
||||
def subwords(W: Wordset) -> Set[str]:
|
||||
"""All the words in W that are subparts of some other word."""
|
||||
return {subword for w in W for subword in subparts(w) & W}
|
||||
|
||||
def suffixes(word) -> List[str]:
|
||||
"""All non-empty proper suffixes of word, longest first."""
|
||||
return [word[i:] for i in range(1, len(word))]
|
||||
|
||||
def prefixes(word) -> List[str]:
|
||||
"""All non-empty proper prefixes of word."""
|
||||
return [word[:i] for i in range(1, len(word))]
|
||||
|
||||
def subparts(word) -> Set[str]:
|
||||
"""All non-empty proper substrings of word"""
|
||||
return {word[i:j]
|
||||
for i in range(len(word))
|
||||
for j in range(i + 1, len(word) + (i > 0))}
|
||||
|
||||
def splits(word) -> List[Tuple[int, str, str]]:
|
||||
"""A sequence of (excess, pre, suf) tuples."""
|
||||
return [(excess, word[:i], word[i+excess:])
|
||||
for excess in range(len(word) - 1)
|
||||
for i in range(1, len(word) - excess)]
|
||||
|
||||
def try_bridge(bridges, pre, suf, excess, word, step2=None):
|
||||
"""Store a new bridge if it has less excess than the previous bridges[pre][suf]."""
|
||||
if suf not in bridges[pre] or excess < bridges[pre][suf][EXCESS]:
|
||||
bridge = (excess, (len(pre), word))
|
||||
if step2: bridge += (step2,)
|
||||
bridges[pre][suf] = bridge
|
||||
|
||||
def build_bridges(W: Wordset, maxlen=5, end='qujvz'):
|
||||
"""A table of bridges[pre][suf] == (excess, (overlap, word)), e.g.
|
||||
bridges['ar']['c'] == (0, (2, 'arc'))."""
|
||||
bridges = defaultdict(dict)
|
||||
shortwords = [w for w in W if len(w) <= maxlen + (w[-1] in end)]
|
||||
shortstartswith = compute_startswith(shortwords)
|
||||
# One-word bridges
|
||||
for word in shortwords:
|
||||
for excess, pre, suf, in splits(word):
|
||||
try_bridge(bridges, pre, suf, excess, word)
|
||||
# Two-word bridges
|
||||
for word1 in shortwords:
|
||||
for suf in suffixes(word1):
|
||||
for word2 in shortstartswith[suf]:
|
||||
excess = len(word1) + len(word2) - len(suf) - 2
|
||||
A, B = word1[0], word2[-1]
|
||||
if A != B:
|
||||
step2 = (len(suf), word2)
|
||||
try_bridge(bridges, A, B, excess, word1, step2)
|
||||
return bridges
|
||||
|
||||
if __name__ == "__main__":
|
||||
W = Wordset(open('wordlist.asc').read().split())
|
||||
print(portman(natalie(W)))
|
Loading…
Reference in New Issue
Block a user