from collections import Counter, deque import re class PhraseDict(dict): """A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with: .prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre' .suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'""" def __init__(self, phrases): for phrase in phrases: phrase = phrase.strip() self[letters(phrase)] = phrase self.prefixes = Counter(x for p in self for x in prefixes(p)) self.suffixes = Counter(x for p in self for x in suffixes(p)) def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)] def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)] def letters(phrase, sub=re.compile(r'[\W]+').sub): "Remove all the non-letters from phrase; return lowercase version." return sub('', phrase).lower() DICT = PhraseDict(open('npdict.txt')) class Panama: """Panama represents a palindrome, or a state in searching for one. It has .left and .right to hold the phrases that are chosen, and .L and .R to hold the current partial phrases in the middle (still working on these). Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from.""" def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT): assert cat(left + [L]) == cat([R] + right)[::-1] self.left = list(left) # list of complete phrases on left self.L = L # an incomplete phrase on left self.R = R # an incomplete phrase on right self.right = deque(right) # deque of complete phrases on right self.dict = dict # a {letters: actual_phrase} mapping self.set = set(left + right) # a set of all complete phrases in palindrome self.best = [] # list of phrases in longest palindrome found self.Nshown = 0 # the number of phrases shown in the previous printout self.i = 0 # the number of steps taken in the search self.check() def __str__(self): return self.original_phrases(self.best) def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases) def search(self, steps=10**5): """Depth-first search for palindromes. From the current state, find all applicable actions. Do the first one, and put on the stack reminders to undo it and try the others, but first search deeper from the result of the first action.""" stack = [self.applicable_actions()] for self.i in range(steps): if not stack: return command = stack.pop() if isinstance(command, UndoCommand): self.undo(command) elif command: act = command.pop() self.do(act) self.check() stack.extend([command, UndoCommand(act), self.applicable_actions()]) def do(self, act): "Modify the current state by adding a letter, or finishing a phrase." if act == ',': # finish phrase on left self.set.add(self.L) self.left.append(self.L) self.L = '' elif act == ';': # finish phrase on right self.set.add(self.R) self.right.appendleft(self.R) self.R = '' else: # add a letter self.L = self.L + act self.R = act + self.R def undo(self, act): "Modify the current state by undoing an action that was previously done." if act == ',': # unfinish phrase on left assert self.L == '' self.L = self.left.pop() self.set.remove(self.L) elif act == ';': # unfinish phrase on right assert self.R == '' self.R = self.right.popleft() self.set.remove(self.R) else: # remove a letter self.L = self.L[:-1] self.R = self.R[1:] def check(self): "Check to see if current state is a palindrome, and if so, record it and maybe print." if not self.is_palindrome(): return N = len(self.left) + len(self.right) if N > len(self.best): self.best = self.left + list(self.right) if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500: self.Nshown = N print(self.report()) def report(self): N = len(self.best) nwords = N + sum(self.dict[p].count(' ') for p in self.best) nletters = sum(len(p) for p in self.best) return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})' .format(N, nwords, nletters, self.i+1)) def applicable_actions(self): L, R, D = self.L, self.R, self.dict actions = [] def score(A): return D.prefixes[L+A] * D.suffixes[A+R] if self.is_allowed(L): actions.append(',') if self.is_allowed(R): actions.append(';') for A in sorted(alphabet, key=score): if score(A) > 0: actions.append(A) return actions def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set def is_palindrome(self): "Is this a palindrome? (Does any extra .L or .R match the other side?)" return ((self.L == '' and self.left[-1].endswith(self.R)) or (self.R == '' and self.right[0].startswith(self.L))) alphabet = 'abcdefghijklmnopqrstuvwxyz' cat = ''.join UndoCommand = str DoCommand = list ################ Unit Tests def test1(): assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello'] assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello'] assert letters('a man') == 'aman' assert letters('an elk') == 'anelk' assert letters('Mr. T') == 'mrt' assert letters('Donald E. Knuth') == 'donaldeknuth' assert len(DICT) == 125512 assert 'panama' in DICT assert 'aman' in DICT assert 'threemen' not in DICT assert DICT['acanal'] == 'a canal' return 'ok' def test2(): p1 = Panama() assert p1.is_palindrome() assert str(p1) == 'a man, a plan, a canal, Panama' p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama']) assert not p2.is_palindrome() p3 = Panama(['maya'], '', '', ['ayam']) assert p3.is_palindrome() assert str(p3) == 'Maya, a yam' return 'ok' if __name__ == '__main__': p = Panama(); test1() test2() p.search(10**6) print(p.report()) print(str(p))