Add subdirectories

Add /ipynb/ and /py/ subdirectories to keep the home page neater.
2017-10-23 10:32:23 -07:00
parent ff96ec21ba
commit 88819c4cd0
61 changed files with 48 additions and 46 deletions
--- a/py/SET.py
+++ b/py/SET.py
@@ -0,0 +1,134 @@
+import random
+import collections 
+import itertools 
+
+"""
+Game of Set                (Peter Norvig 2010-2015)
+
+How often do sets appear when we deal an array of cards?
+How often in the course of playing out the game?
+
+Here are the data types we will use:
+
+    card:    A string, such as '3R=0', meaning "three red striped ovals".
+    deck:    A list of cards, initially of length 81.
+    layout:  A list of cards, initially of length 12.
+    set:     A tuple of 3 cards.
+    Tallies: A dict: {12: {True: 33, False: 1}}} means a layout of size 12
+             tallied 33 sets and 1 non-set.
+"""
+
+#### Cards, dealing cards, and defining the notion of sets.
+
+CARDS = [number + color + shade + symbol 
+         for number in '123' 
+         for color  in 'RGP' 
+         for shade  in '@O=' 
+         for symbol in '0SD']
+
+def deal(n, deck): 
+    "Deal n cards from the deck."
+    return [deck.pop() for _ in range(n)]
+
+def is_set(cards):
+    "Are these 3 cards a set? No if any feature has 2 values."
+    for f in range(4):
+        values = {card[f] for card in cards}
+        if len(values) == 2: 
+            return False
+    return True
+
+def find_set(layout):
+    "Return a set found from this layout, if there is one."
+    for cards in itertools.combinations(layout, 3):
+        if is_set(cards):
+            return cards
+    return ()
+
+#### Tallying set:no-set ratio
+
+def Tallies(): 
+    "A data structure to keep track, for each size, the number of sets and no-sets."
+    return collections.defaultdict(lambda: {True: 0, False: 0})
+
+def tally(tallies, layout):
+    "Record that a set was found or not found in a layout of given size; return the set."
+    s = find_set(layout)
+    tallies[len(layout)][bool(s)] += 1
+    return s
+            
+#### Three experiments
+
+def tally_initial_layout(N, sizes=(12, 15)):
+    "Record tallies for N initial deals."
+    tallies = Tallies()
+    deck = list(CARDS)
+    for deal in range(N):
+        random.shuffle(deck)
+        for size in sizes:
+            tally(tallies, deck[:size])
+    return tallies
+
+def tally_initial_layout_no_prior_sets(N, sizes=(12, 15)):
+    """Simulate N initial deals for each size, keeping tallies for Sets and NoSets,
+    but only when there was no set with 3 fewer cards."""
+    tallies = Tallies()
+    deck = list(CARDS)
+    for deal in range(N):
+        random.shuffle(deck)
+        for size in sizes:
+            if not find_set(deck[:size-3]):
+                tally(tallies, deck[:size])
+    return tallies
+
+def tally_game_play(N):
+    "Record tallies for the play of N complete games."
+    tallies = Tallies()
+    for game in range(N):
+        deck = list(CARDS)
+        random.shuffle(deck)
+        layout = deal(12, deck)
+        while deck:
+            s = tally(tallies, layout)
+            # Pick up the cards in the set, if any
+            for card in s: layout.remove(card)
+            # Deal new cards
+            if len(layout) < 12 or not s:
+                layout += deal(3, deck)    
+    return tallies
+
+def experiments(N):
+    show({12: [1, 33], 15: [1, 2500]}, 
+         'the instruction booklet')
+    show(tally_initial_layout(N), 
+         'initial layout')
+    show(tally_game_play(N // 25), 
+         'game play')
+    show(tally_initial_layout_no_prior_sets(N), 
+         'initial layout, but no sets before dealing last 3 cards')
+
+
+def show(tallies, label):
+    "Print out the counts."
+    print()
+    print('Size |  Sets  | NoSets | Set:NoSet ratio for', label)
+    print('-----+--------+--------+----------------')
+    for size in sorted(tallies):
+        y, n = tallies[size][True], tallies[size][False]
+        ratio = ('inft' if n==0 else int(round(float(y)/n)))
+        print('{:4d} |{:7,d} |{:7,d} | {:4}:1'
+              .format(size, y, n, ratio))
+
+def test():
+    assert len(CARDS) == 81 == len(set(CARDS))
+    assert is_set(('3R=O', '2R=S', '1R=D'))
+    assert not is_set(('3R=0', '2R=S', '1R@D'))
+    assert find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R=D']) == ('3R=0', '2R=S', '1R=D')
+    assert not find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R@D'])
+    photo = '2P=0 3P=D 2R=0 3GO0 2POD 3R@D 2RO0 2ROS 1P@S 2P@0 3ROS 2GOD 2P@D 1GOD 3GOS'.split()
+    assert not find_set(photo)
+    assert set(itertools.combinations([1, 2, 3, 4], 3)) == {(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)}
+    print('All tests pass.')
+
+test()
+experiments(100000)
--- a/py/beal.py
+++ b/py/beal.py
@@ -0,0 +1,159 @@
+"""Search for  counterexamples to Beal's conjecture
+See http://norvig.com/beal.html and http://www.bealconjecture.com"""
+
+from __future__  import division, print_function
+from math        import log
+from itertools   import combinations, product
+from collections import defaultdict
+try:
+    from math import gcd      # For Python 3.6 and up
+except ImportError:
+    from fractions import gcd # For older versions (works in 2.7 as well)
+
+def beal(max_A, max_x):
+    """See if any A ** x + B ** y equals some C ** z, with gcd(A, B) == 1.
+    Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
+    Apowers = make_Apowers(max_A, max_x)
+    Czroots = make_Czroots(Apowers)
+    for (A, B) in combinations(Apowers, 2):
+        if gcd(A, B) == 1:
+            for (Ax, By) in product(Apowers[A], Apowers[B]):       
+                Cz = Ax + By
+                if Cz in Czroots:
+                    C = Czroots[Cz]
+                    x, y, z = exponent(Ax, A), exponent(By, B), exponent(Cz, C)
+                    print('{} ** {} + {} ** {} == {} ** {} == {}'
+                          .format(A, x, B, y, C, z, C ** z))
+
+def make_Apowers(max_A, max_x): 
+    "A dict of {A: [A**3, A**4, ...], ...}."
+    exponents = exponents_upto(max_x)
+    return {A: [A ** x for x in (exponents if (A != 1) else [3])]
+            for A in range(1, max_A+1)}
+
+def make_Czroots(Apowers): return {Cz: C for C in Apowers for Cz in Apowers[C]}            
+    
+def exponents_upto(max_x):
+    "Return all odd primes up to max_x, as well as 4."
+    exponents = [3, 4] if max_x >= 4 else [3] if max_x == 3 else []
+    for x in range(5, max_x, 2):
+        if not any(x % p == 0 for p in exponents):
+            exponents.append(x)
+    return exponents
+
+def exponent(Cz, C): 
+    """Recover z such that C ** z == Cz (or equivalently z = log Cz base C).
+    For exponent(1, 1), arbitrarily choose to return 3."""
+    return 3 if (Cz == C == 1) else int(round(log(Cz, C)))
+
+##############################################################################
+
+def tests():
+    assert make_Apowers(6, 10) == {
+         1: [1],
+         2: [8, 16, 32, 128],
+         3: [27, 81, 243, 2187],
+         4: [64, 256, 1024, 16384],
+         5: [125, 625, 3125, 78125],
+         6: [216, 1296, 7776, 279936]}
+    
+    assert make_Czroots(make_Apowers(5, 8)) == {
+        1: 1, 8: 2, 16: 2, 27: 3, 32: 2, 64: 4, 81: 3,
+        125: 5, 128: 2, 243: 3, 256: 4, 625: 5, 1024: 4,
+        2187: 3, 3125: 5, 16384: 4, 78125: 5}
+    Czroots = make_Czroots(make_Apowers(100, 100))
+    assert 3 ** 3 + 6 ** 3 in Czroots
+    assert 99 ** 97 in Czroots
+    assert 101 ** 100 not in Czroots
+    assert Czroots[99 ** 97] == 99
+    
+    assert exponent(10 ** 5, 10) == 5
+    assert exponent(7 ** 3, 7) == 3
+    assert exponent(1234 ** 999, 1234) == 999
+    assert exponent(12345 ** 6789, 12345) == 6789
+    assert exponent(3 ** 10000, 3) == 10000
+    assert exponent(1, 1) == 3
+    
+    assert exponents_upto(2) == []
+    assert exponents_upto(3) == [3]
+    assert exponents_upto(4) == [3, 4]
+    assert exponents_upto(40) == [3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]
+    assert exponents_upto(100) == [
+        3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 
+        67, 71, 73, 79, 83, 89, 97]
+    
+    assert gcd(3, 6) == 3
+    assert gcd(3, 7) == 1
+    assert gcd(861591083269373931, 94815872265407) == 97
+    assert gcd(2*3*5*(7**10)*(11**12), 3*(7**5)*(11**13)*17) == 3*(7**5)*(11**12)
+    return 'tests pass'
+
+##############################################################################
+
+def beal_modp(max_A, max_x, p=2**31-1):
+    """See if any A ** x + B ** y equals some C ** z (mod p), with gcd(A, B) == 1.
+    If so, verify that the equation works without the (mod p).
+    Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
+    assert p >= max_A
+    Apowers = make_Apowers_modp(max_A, max_x, p)
+    Czroots = make_Czroots_modp(Apowers)
+    for (A, B) in combinations(Apowers, 2):
+        if gcd(A, B) == 1:
+            for (Axp, x), (Byp, y) in product(Apowers[A], Apowers[B]):  
+                Czp = (Axp + Byp) % p
+                if Czp in Czroots:
+                    lhs = A ** x + B ** y
+                    for (C, z) in Czroots[Czp]:
+                        if lhs == C ** z:
+                            print('{} ** {} + {} ** {} == {} ** {} == {}'
+                                  .format(A, x, B, y, C, z, C ** z))                        
+                    
+
+def make_Apowers_modp(max_A, max_x, p): 
+    "A dict of {A: [(A**3 (mod p), 3), (A**4 (mod p), 4), ...]}."
+    exponents = exponents_upto(max_x)
+    return {A: [(pow(A, x, p), x) for x in (exponents if (A != 1) else [3])]
+            for A in range(1, max_A+1)}
+
+def make_Czroots_modp(Apowers): 
+    "A dict of {C**z (mod p): [(C, z),...]}"
+    Czroots = defaultdict(list)
+    for A in Apowers:
+        for (Axp, x) in Apowers[A]:
+            Czroots[Axp].append((A, x))
+    return Czroots
+
+##############################################################################
+
+def simpsons(bases, powers):
+    """Find the integers (A, B, C, n) that come closest to solving 
+    Fermat's equation, A ** n + B ** n == C ** n. 
+    Let A, B range over all pairs of bases and n over all powers."""
+    equations = ((A, B, iroot(A ** n + B ** n, n), n)
+                 for A, B in combinations(bases, 2)
+                 for n in powers)
+    return min(equations, key=relative_error)
+
+def iroot(i, n): 
+    "The integer closest to the nth root of i."
+    return int(round(i ** (1./n)))
+
+def relative_error(equation):
+    "Error between LHS and RHS of equation, relative to RHS." 
+    (A, B, C, n) = equation
+    LHS = A ** n + B ** n
+    RHS = C ** n
+    return abs(LHS - RHS) / RHS
+
+if __name__ == '__main__':
+    print(tests())
+    print("Searching beal(500, 100)")
+    print(beal(500, 100))
+    print("Finding Simpson-esque near-solutions to Fermat's Equation")
+    def s(b, p):  print('{0}^{3} + {1}^{3} = {2}^{3}'.format(*simpsons(b, p)))
+    s(range(1000, 2000), [11, 12, 13])
+    s(range(3000, 5000), [12])
+    print("Searching beal_modp(500, 100)")
+    print(beal_modp(500, 100))
+
+
--- a/py/docex.py
+++ b/py/docex.py
@@ -0,0 +1,238 @@
+"""A framework for running unit tests and examples, written in docstrings.
+
+This lets you write "Ex: sqrt(4) ==> 2; sqrt(-1) raises ValueError" in a
+docstring, and then execute the examples as unit tests.
+
+This functionality is similar to the doctest module.  The major
+differences between docex and doctest are:
+
+(1) Brevity.  With docex you write the one-line comment
+    "Ex: len('abc') ==> 3; len([]) ==> 0; len(5) raises TypeError"
+    With doctest you would need 9 lines for the same thing:
+    '''>>> len('abc')
+    3
+    >>> len([])
+    0
+    >>> len(5))
+    Traceback (most recent call last):
+      ...
+    TypeError: len() of unsized object
+    '''
+
+(2) Docex handles both examples and unit tests.
+    It took me a while to recognize this distinction: when I write
+    "sqrt(4) ==> 2" it has two purposes -- to serve as a unit test
+    and to serve as an example of how to use the sqrt function.
+    When I write "random.choice('abc')" it serves as an example of
+    how to use the choice function, but it is not a unit test.
+    docex lets you do both; doctest only supports tests.  Of course
+    you can coerce this into a test in doctest, with something like
+    >>> random.choice('abc') in 'abc'
+    True
+
+(3) Eval-based rather than string-comparison based.  The docex string
+    "dict(zip([1,4,9], [1,2,3])) ==> {1: 1, 4: 2, 9: 3}" works even
+    when a different version of Python decides to print the dict as
+    "{9: 3, 4: 2, 1: 1}" because docex evals the right-hand-side and
+    checks to see if it is equal.  That's good for dicts, its good for
+    writing "1+1==2 ==> True" and having it work in versions of Python
+    where True prints as "1" rather than as "True", and so on,
+    but doctest has the edge if you want to compare against something
+    that doesn't have an eval-able output, or if you want to test 
+    printed output.
+
+(4) Doctest has many more features, and is better supported.
+    I wrote docex before doctest was an official part of Python, but
+    with the refactoring of doctest in Python 2.4, I decided to switch
+    my code over to doctest, even though I prefer the brevity of docex. 
+    I still offer docex for those who want it.
+
+From Python, when you want to test modules m1, m2, ... do:
+    docex.Docex([m1, m2, ...])
+From the shell, when you want to test files *.py, do:
+    python docex.py [log-file] *.py
+If log file ends in .htm or .html, it will be written in HTML.
+If log file is -, or if it is missing, then standard output is used.
+
+For each module, Docex looks at the __doc__ and _docex strings of the
+module itself, and of each member, and recursively for each member
+class.  If a line in a docstring starts with r'^\s*Ex: ' (a line with
+blanks, then 'Ex: '), then the remainder of the string after the colon
+is treated as examples. Each line of the examples should conform to
+one of the following formats:
+
+    (1) Blank line or a comment; these just get echoed verbatim to the log.
+    (2) Of the form example1 ; example2 ; ...
+    (3) Of the form 'x ==> y' for any expressions x and y.
+            x is evaled and assigned to _, then y is evaled.
+            If x != y, an error message is printed.
+    (4) Of the form 'x raises y', for any statement x and expression y.
+            First y is evaled to yield an exception type, then x is execed.
+            If x doesn't raise the right exception, an error msg is printed.
+    (5) Of the form 'statement'. Statement is execed for side effect.
+    (6) Of the form 'expression'. Expression is evaled for side effect. 
+"""
+
+import re, sys, types
+
+class Docex:
+    """A class to run test examples written in docstrings or in _docex."""
+
+    def __init__(self, modules=None, html=0, out=None,
+                 title='Docex Example Output'):
+        if modules is None:
+            modules = sys.modules.values()
+        self.passed = self.failed = 0;
+        self.dictionary = {}
+        self.already_seen = {}
+        self.html = html
+        try:
+            if out: sys.stdout = out
+            self.writeln(title, '<h1>', '</h1><pre>')
+            for module in modules:
+                self.run_module(module)
+            self.writeln(str(self), '</pre>\n<hr><h1>', '</h1>\n')
+        finally:
+            if out:
+                sys.stdout = sys.__stdout__
+                out.close()
+                
+    def __repr__(self):
+	if self.failed:
+            return ('<Test: #### failed %d, passed %d>'
+                    % (self.failed, self.passed))
+        else:
+            return '<Test: passed all %d>' % self.passed
+
+    def run_module(self, object):
+        """Run the docstrings, and then all members of the module."""
+        if not self.seen(object):
+            self.dictionary.update(vars(object)) # import module into self
+            name = object.__name__
+            self.writeln('## Module %s ' % name,
+             '\n</pre><a name=%s><h1>' % name,
+             '</h1><pre>')
+            self.run_docstring(object)
+            names = object.__dict__.keys()
+            names.sort()
+            for name in names:
+                val = object.__dict__[name]
+                if isinstance(val, types.ClassType):
+                    self.run_class(val)
+                elif isinstance(val, types.ModuleType):
+                    pass
+                elif not self.seen(val):
+                    self.run_docstring(val)
+
+    def run_class(self, object):
+        """Run the docstrings, and then all members of the class."""
+        if not self.seen(object):
+            self.run_docstring(object)
+            names = object.__dict__.keys()
+            names.sort()
+            for name in names:
+                self.run_docstring(object.__dict__[name])
+
+    def run_docstring(self, object, search=re.compile(r'(?m)^\s*Ex: ').search):
+        "Run the __doc__ and _docex attributes, if the object has them."
+        if hasattr(object, '__doc__'):
+            s = object.__doc__
+            if isinstance(s, str):
+                match = search(s)
+                if match: self.run_string(s[match.end():])
+        if hasattr(object, '_docex'):
+                self.run_string(object._docex)
+        
+    def run_string(self, teststr):
+        """Run a test string, printing inputs and results."""
+        if not teststr: return
+        teststr = teststr.strip()
+        if teststr.find('\n') > -1:
+            map(self.run_string, teststr.split('\n'))
+        elif teststr == '' or teststr.startswith('#'):
+            self.writeln(teststr)
+        elif teststr.find('; ') > -1:
+            for substr in teststr.split('; '): self.run_string(substr)
+        elif teststr.find('==>') > -1:
+            teststr, result = teststr.split('==>')
+            self.evaluate(teststr, result)
+        elif teststr.find(' raises ') > -1:
+            teststr, exception = teststr.split(' raises ')
+            self.raises(teststr, exception)
+        else: ## Try to eval, but if it is a statement, exec
+            try:
+                self.evaluate(teststr)
+            except SyntaxError:
+                exec teststr in self.dictionary
+
+    def evaluate(self, teststr, resultstr=None):
+        "Eval teststr and check if resultstr (if given) evals to the same."
+        self.writeln('>>> ' +  teststr.strip())
+        result = eval(teststr, self.dictionary)
+        self.dictionary['_'] = result
+        self.writeln(repr(result))
+        if resultstr == None:
+          return
+        elif result == eval(resultstr, self.dictionary):
+          self.passed += 1
+        else:
+          self.fail(teststr, resultstr)
+    
+    def raises(self, teststr, exceptionstr):
+        teststr = teststr.strip()
+        self.writeln('>>> ' + teststr)
+        except_class = eval(exceptionstr, self.dictionary)
+        try:
+            exec teststr in self.dictionary
+        except except_class:
+            self.writeln('# raises %s as expected' % exceptionstr)
+            self.passed += 1
+            return
+        self.fail(teststr, exceptionstr)
+
+    def fail(self, teststr, resultstr):
+        self.writeln('###### ERROR, TEST FAILED: expected %s for %s' 
+                     % (resultstr, teststr),
+                     '<font color=red><b>', '</b></font>')
+        self.failed += 1
+
+    def writeln(self, s, before='', after=''):
+        "Write s, html escaped, and wrapped with html code before and after."
+        s = str(s)
+        if self.html:
+            s = s.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
+            print '%s%s%s' % (before, s, after)
+        else:
+            print s
+
+    def seen(self, object):
+        """Return true if this object has been seen before.
+        In any case, record that we have seen it."""
+        result = self.already_seen.has_key(id(object))
+        self.already_seen[id(object)] = 1
+        return result
+
+def main(args):
+    """Run Docex.  args should be a list of python filenames.  
+    If the first arg is a non-python filename, it is taken as the
+    name of a log file to which output is written.  If it ends in
+    ".htm" or ".html", then the output is written as html.  If the
+    first arg is "-", then standard output is used as the log file."""
+    import glob
+    out = None
+    html = 0
+    if args[0] != "-" and not args[0].endswith(".py"):
+        out = open(args[0], 'w')
+        if args[0].endswith(".html") or args[0].endswith(".htm"):
+            html = 1
+    modules = []
+    for arg in args:
+        for file in glob.glob(arg):
+            if file.endswith('.py'):
+                modules.append(__import__(file[:-3]))
+    print Docex(modules, html=html, out=out)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
+
+
--- a/py/ibol.py
+++ b/py/ibol.py
@@ -0,0 +1,193 @@
+from collections import defaultdict
+
+def get_genomes(fname="byronbayseqs.fas.txt"):
+    "Return a list of genomes, and a list of their corresponding names."
+    import re
+    names, species, genomes = [], [], []
+    for name, g in re.findall('>(.*?)\r([^\r]*)\r*', file(fname).read()):
+        names.append(name)
+        species.append(name.split('|')[-1])
+        genomes.append(g)
+    return names, species, genomes
+
+def get_neighbors(fname="editdistances.txt"):
+    "Return dict: neighbors[i][j] = neighbors[j][i] = d means i,j are d apart."
+    ## Read the data pre-computed from the Java program
+    neighbors = dict((i, {}) for i in range(n))
+    for line in file(fname):
+        i,j,d = map(int, line.split())
+        neighbors[i][j] = neighbors[j][i] = d
+    return neighbors
+        
+def cluster(neighbors, d, dc):
+    """Return a list of clusters, each cluster element is within d of another
+    and within dc of every other cluster element."""
+    unclustered = set(neighbors) ## set of g's not yet clustered
+    return [closure(g, set(), unclustered, d, dc)
+            for g in neighbors if g in unclustered]
+
+def closure(g, s, unclustered, d, dc):
+    "Accumulate in set s the transitive closure of 'near', starting at g"
+    if g not in s and g in unclustered and near(g, s, d, dc):
+        s.add(g); unclustered.remove(g)
+        for g2 in neighbors[g]:
+            closure(g2, s, unclustered, d, dc)
+    return s
+
+def dist(i, j):
+    "Distance between two genomes."
+    if i == j: return 0
+    return neighbors[min(i, j)].get(max(i, j), max_distance)
+
+def near(g, cluster, d, dc):
+    "Is g within d of some member of c, and within dc of every member of c?"
+    distances = [dist(g, g2) for g2 in cluster] or [0]
+    return min(distances) <= d and max(distances) <= dc
+
+def diameter(cluster):
+    "The largest distance between two elements of the cluster"
+    return max([dist(i, j) for i in cluster for j in cluster] or [0])
+
+def margin(cluster):
+    "The distance from a cluster to the nearest g2 outside this cluster."
+    return min([d for g in cluster for g2,d in neighbors[g].items()
+                if g2 not in cluster] or [max_distance])
+
+################################################################ Analysis
+
+def pct(num, den):
+    "Return a string representing the percentage. "
+    if '__len__' in dir(den): den = len(den)
+    if num==den: return ' 100%'
+    return '%.1f%%' % (num*100.0/den)
+
+def histo(items):
+    "Make a histogram from a sequence of items or (item, count) tuples."
+    D = defaultdict(int)
+    for item in items:
+        if isinstance(item, tuple): D[item[0]] += item[1]
+        else: D[item] += 1
+    return D
+
+def showh(d):
+    "Show a histogram"
+    if not isinstance(d, dict): d = histo(d)
+    return ' '.join('%s:%s' % i for i in sorted(d.items()))
+
+def greport(genomes):
+    print "Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes)))
+    G = dict((g, set()) for g in genomes)
+    for i in range(n):
+        G[genomes[i]].add(species[i])
+    print "Multi-named genomes:", (
+        len([s for s in G.values() if len(s) > 1]))
+    lens = map(len, genomes)
+    print "Genome lengths: min=%d, max=%d" % (min(lens), max(lens))
+    print "Character counts: ", showh(c for g in genomes for c in g)
+    
+def nreport(neighbors):
+    NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
+    for n in neighbors:
+        nn = min(neighbors[n].values() or ['>25'])
+        NN[nn] += 1
+        for d2 in neighbors[n].values():
+            NumN[d2] += 1 
+    print
+    print "Nearest neighbor counts:", showh(NN)
+    print "Number of neighbors at each distance:", showh(NumN)
+
+def nspecies(c): return len(set(species[g] for g in c))
+
+def showc(c):
+    return "N=%d, D=%d, M=%d: %s %s" % (
+        len(c), diameter(c), margin(c), list(c), showh(species[g] for g in c))
+
+def creport(drange, dcrange):
+    def table(what, fn):
+        print "\n" + what
+        print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
+        for d in drange:
+            print '%s (%2d)' % (pct(d, glen), d),
+            for dc in dcrange:
+                print '%5s' % fn(cluster(neighbors, d, dc)),
+            print
+    print '\nNearest neighbor must be closer than this percentage (places). '
+    print 'Each column: all genomes in cluster within this percentage of each other.'
+    table("Number of clusters", len)
+    cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
+    print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
+    M, T = defaultdict(int), defaultdict(int)
+    for c in cluster1:
+        M[margin(c)] += 1; T[margin(c)] += len(c)
+    for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
+    print '\nMargins', showh(M)
+    for c in cluster1:
+        if margin(c) <= 16:
+            print showc(c)
+    print '\nScatter plot of cluster diameter vs. margin.'
+    for c in cluster1:
+        if diameter(c) > 0:
+            pass
+            #print '%d\t%d' % (diameter(c), margin(c))
+    print '\nDifference from cluster(neighbors, 11, 14):'
+    #table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
+    print '\nNumber of clusters witth more than one species name:'
+    #table(lambda cl: sum(nspecies(c) > 1 for c in cl))
+    def pct_near_another(clusters, P=1.25):
+        total = 0
+        for c in clusters:
+            d = diameter(c)
+            for g in c:
+                for g2 in neighbors[g]:
+                    if g2 not in c and dist(g, g2) < P*d:
+                        total += 1
+        return pct(total, n)
+    def f(P):
+        print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
+        table(lambda cl: pct_near_another(cl, P))
+    #map(f, [1.2, 1.33, 1.5])
+
+def sreport(species):
+    SS = defaultdict(int)
+    print
+    for s in set(species):
+        c = [g for g in range(n) if species[g] == s]
+        d = diameter(c)
+        if d > 14:
+            if d==glen: d = '>25'
+            print 'diameter %s for %s (%d elements)' % (d, s, len(c))
+        SS[d] += 1
+    print 'Diameters of %d labelled clusters: %s' % (len(set(species)), showh(SS))
+    
+def compare(cl1, cl2):
+    "Compare two lists of clusters"
+    return sum(c1==c2 or 0.5*(abs(len(c1)-len(c2))==1 and
+                              (c1.issubset(c2) or c2.issubset(c1)))
+               for c1 in cl1 for c2 in cl2)
+
+def unit_tests():
+    assert set(len(g) for g in genomes) == set([glen])
+    clusters = cluster(neighbors, 11, 11)
+    assert sum(len(c) for c in clusters) == len(genomes)
+    assert len(set(g for c in clusters for g in c)) == len(genomes)
+    assert dist(17, 42) == dist(42, 17)
+    assert diameter(set()) == 0
+    assert diameter([17, 42]) == dist(17, 42)
+    assert pct(1, 2) == '50.0%'
+    print '\nAll tests pass.\n'
+
+    
+
+################################################################ Main body
+ 
+max_distance = 26
+names, species, genomes = get_genomes() ## genomes = ['ACT...', ...]
+n = len(genomes)
+glen = len(genomes[0])
+neighbors = get_neighbors() ## neighbor[g] = {g2:d2, g3:g3, ...}
+greport(genomes)
+nreport(neighbors)
+creport(range(6, 15), [glen,16,15,14,13, 12, 11])
+#sreport(species)
+
+unit_tests()
--- a/py/lettercount.py
+++ b/py/lettercount.py
@@ -0,0 +1,443 @@
+"""
+Code to support http://norvig.com/mayzner.html
+Read files in the Google Books ngram format, and convert them to a simpler format.
+The original format looks like this:
+
+    word     \t year \t word_count \t book_count
+    word_POS \t year \t word_count \t book_count
+
+for example,
+
+    accreted_VERB	1846	7	4
+    accreted_VERB	1847	1	1
+    accreted_VERB	1848	1	1
+
+The function 'read_year_file' will convert a file of this form into a dict of
+{WORD: count} pairs, where the WORD is uppercased, and the count is the total
+over all years (you have the option to specify a starting year) and all
+capitalizations.  Then 'read_dict' and 'write_dict' convert between a dict and
+an external file format that looks like this:
+
+    ACCRETED	9
+
+"""
+
+from __future__ import division
+from collections import Counter, defaultdict
+
+#### Read files in Books-Ngram format; convert to a dict
+
+def read_year_file(filename, dic=None):
+    """Read a file of 'word year word_count book_count' lines and convert to a dict
+    {WORD: totalcount}. Uppercase all words, and only include all-alphabetic words."""
+    if dic is None: dic = {}
+    for line in file(filename):
+        word, year, c1, c2 = line.split('\t')
+        if '_' in word:
+            word = word[:word.index('_')]
+        if word.isalpha():
+            word = word.upper()
+            dic[word] = dic.get(word, 0) + int(c1)
+    return dic
+
+#### Read and write files of the form 'WORD \t count \n'
+
+def write_dict(dic, filename):
+    "Write a {word:count} dict as 'word \t count' lines in filename."
+    out = file(filename, 'w')
+    for key in sorted(dic):
+        out.write('%s\t%s\n' % (key, dic[key]))
+    return out.close()
+        
+def read_dict(filename, sep='\t'):
+    "Read 'word \t count' lines from file and make them into a dict of {word:count}."
+    pairs = (line.split(sep) for line in file(filename))
+    return {word: int(count) for (word, count) in pairs}
+
+#### Convert a bunch of year files into dict file format.
+
+def convert_files(filenames, mincount=1e5):
+    def report(filename, D, adj):
+        import time
+        N = len(D)
+        W = sum(v for v in D.itervalues())
+        print '%s: %s %s words (%s tokens) at %s' % (
+            filename, adj, format(W, ',d'), format(N, ',d'),
+            time.strftime("%H:%M:%S", time.gmtime()))
+    for f in filenames:
+        report(f, {}, 'starting')
+        D = read_year_file(f)
+        report(f, D, 'total')
+        for key in list(D):
+            if D[key] < mincount:
+                del D[key]
+        write_dict(D, 'WORD-' + f[-1].upper())
+        report(f, D, 'popular')
+
+def load(filename='top-words.txt'):
+    "Load file of 'word \t count' lines into D (a dict), W (length of D) and M (total number of words)."
+    global D, W, M
+    D = read_dict(filename)
+    W = len(D)
+    M = sum(D.values())
+    
+#### Compute letter counts and save as HTML files.
+
+def histogram(items):
+    "Return a Counter of the number of times each key occurs in (key, val) pairs."
+    C = Counter()
+    for (key, val) in items:
+        C[key] += val
+    return C
+
+def end(name): return '/' + name
+
+def tag(name, **kwds): return '<' + name + keywords(kwds) + '>'
+
+def row(cells, **kwds):
+    return '<tr>' + ''
+    
+def ngram_tables(dic, N, pos=[0, 1, 2, 3, 4, -5, -4, -3, -2, -1]):
+    """Return three dicts of letter N-grams of length N: counts, counts1, counts2.
+    counts is a dict of {'AB': 123} that counts how often 'AB' occurs.
+    counts1[i] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i.
+    counts2[i][j] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i."""
+    L = len(max(D, key=len))
+    counts = Counter()
+    counts1 = [Counter() for _ in range(L)]
+    counts2 = [[Counter() for i in range(L)]]
+
+def counter(pairs):
+    "Make a Counter from an iterable of (value, count) pairs."
+    c = Counter()
+    for (value, count) in pairs:
+        c[value] += count
+    return c
+
+def ngrams(word, N):
+    return [word[i:i+N] for i in range(len(word)+1-N)]
+
+
+import glob
+#convert_files(glob.glob('book?'))
+              
+#DB = [[letter_counts() for length in range(length)] for length in range(maxlen)]
+      
+
+## Unused ???
+
+def letter_counts(wc):
+    """From word_counts dictionary wc, Create a dictionary of {(s, i, L): count}
+    where s is a letter n-gram, i is the starting position, and L is the length
+    of the word in which it appears."""
+    result = defaultdict(int)
+    for (word, count) in wc.iteritems():
+        for p in pieces(word):
+            result[p] += count
+    return result
+
+def pieces(word):
+    "Yield the 1- and 2-letter grams in (s, i, L) format."
+    L = len(word)
+    for i in range(L):
+        yield (word[i], i, L)
+        if i+1 < L:
+            yield (word[i:i+2], i, L)
+
+def getcount(counts, s, pos, length):
+    """The count for letter sequence s (one or two letters) starting at
+    position i of words of length length.  If any argument is all, sum them up."""
+    if length == all:
+        return sum(getcount(counts, s, pos, L) for L in all_lengths)
+    elif pos == all:
+        return sum(getcount(counts, s, i, length) for i in range(length))
+    else:
+        return counts[s, pos, length]
+
+
+print 'start'
+#wc = word_counts('count_100K.txt')
+#counts = letter_counts(wc)
+print 'end'
+
+
+
+def test():
+    D = {'the': 100, 'of': 70, 'and': 60, 'to': 50, 'a': 40}
+
+def num(ch):
+    "Translate 'a' or 'A' to 0, ... 'z' or 'Z' to 25."
+    return 'abcdefghijklmnopqrstuvwxyz'.index(ch.lower())
+    
+
+def stats(D, NS = (1, 2, 3, 4, 5, 6)):
+    counts = {n: Counter() for n in NS}
+    print 'words ' + ' '.join('   %d-grams  ' % n for n in NS)
+    for (i, word) in enumerate(sortedby(D), 1):
+        for n in NS:
+            for ng in ngrams(word, n):
+                counts[n][ng] += 1
+        if i % 5000 == 0 or i == len(D):
+            print "%4dK" % (i/1000),
+            for n in NS:
+                c = len(counts[n])
+                field = "%5d (%d%%)" % (c, int(round(c*100/(26**n))))
+                print '%12s' % field,
+            print
+
+letters = 'ETAOINSRHLDCUMFPGWYBVKXJQZ'
+alphabet = ''.join(sorted(letters))
+
+from itertools import cycle, izip
+
+colors = 'ygobp'
+
+def bar(text, color, count, N, pixels, height=16):
+    width = int(round(pixels * count / N))
+    if width < 2: width = 3
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<span title="%s"><img src="%s.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
+        title, color, height, width, -width+2, text) # -int(width/2+5)
+
+def letter_bar(LC, N=None, factor='', pixels=700):
+    if N is None: N = sum(LC.values())
+    #divisor = {'':1., 'K':1e3, 'M':1e6, 'B':1e9}[factor]
+    return ''.join(
+        bar(L.lower(), color, LC[L], N, pixels)
+        for (L, color) in izip(letters, cycle(colors)))
+        
+
+def singleton(x): return [x]
+
+positions = [0, 1, 2, 3, 4, 5, 6, -7, -6, -5, -4, -3, -2, -1]
+
+def substr(word, pos, length):
+    """Return the substr of word of given length starting/ending at pos; or None."""
+    W = len(word)
+    if pos >= 0 and pos+length <= W:
+        return word[pos:pos+length]
+    elif pos < 0 and abs(pos)+length-1 <= W:
+        return word[W+pos+1-length:W+pos+1]
+    else:
+        return None
+        
+def lettercount(D, pos):
+    LC = histogram((substr(w, pos, 1), D[w]) for w in D)
+    del LC[None]
+    print LC
+    pos_name = (str(pos)+'+' if isinstance(pos, tuple) else
+                pos if pos < 0 else
+                pos+1)
+    return '\n<br>\n%-3s %s' % (pos_name, letter_bar(LC))
+
+def ngramcount(D, n=2):
+    return histogram((ng, D[w]) for w in D for ng in ngrams(w, n))
+
+def twograms(D2):
+    N = sum(D2.values())
+    header = '<table cellpadding=1 cellborder=1>'
+    rows = [tr([cell(A+B, D2, N) for A in alphabet]) for B in alphabet]
+    return '\n'.join([header] + rows + ['</table>'])
+
+def cell(text, D2, N, height=16, maxwidth=25, scale=27):
+    count = D2.get(text, 0)
+    width = int(round(maxwidth * count * scale * 1. / N))
+    if width < 1: width = 1
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<td title="%s"><img src="o.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
+        title, height, width, -width+2, text)
+
+def cell(text, D2, N, height=16, maxwidth=25, scale=27):
+    count = D2.get(text, 0)
+    width = int(round(maxwidth * count * scale * 1. / N))
+    if width < 1: width = 1
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<td title="%s" background="o.jpg" height=%d width=%d>%s' % (
+        title, height, width, text) 
+
+def tr(cells):
+    return '<tr>' + ''.join(cells)
+
+def comma(n): return '{:,}'.format(n)
+
+def ngram_stats(D, n, k=5):
+    DN = ngramcount(D, n)
+    topk = ', '.join(sortedby(DN)[:k])
+    return '<tr><td>%d-grams<td align=right>%s<td align=right>%s<td><a href="counts-%d.csv">counts-%d.csv</a><td><a href="counts-%d.html">counts-%d.html</a><td>%s' % (
+        n, comma(len(DN)), comma(sum(DN.values())), n, n, n, n, topk)
+
+#### Tables
+
+def sortedby(D):
+    return sorted(D, key=lambda x: -D[x])
+
+ANY = '*'
+
+wordlengths = range(1, 10)
+
+def col(*args): return args
+
+def columns(n, wordlengths=wordlengths):
+    lengths = [k for k in wordlengths if k >= n]
+    return ([col(ANY, ANY)]
+            + [col(k, ANY) for k in lengths]
+            + [col(k, start, start+n-1) for k in lengths for start in range(1, 2+k-n)]
+            + [col(ANY, start, start+n-1) for start in wordlengths]
+            + [col(ANY, -k, -k+n-1) for k in reversed(lengths) if -k+n-1 < 0])
+
+def colname(col):
+    fmt = '%s/%s' if (len(col) == 2) else '%s/%d:%d'
+    return  fmt % col
+
+def csvline(first, rest):
+    return '\t'.join([first] + map(str, rest))
+
+def makecsv(n, D=D):
+    out = file('ngrams%d.csv' % n, 'w')
+    cols = columns(n)
+    Dng = defaultdict(lambda: defaultdict(int))
+    for w in D:
+        for (start, ng) in enumerate(ngrams(w, n), 1):
+            entry = Dng[ng]
+            N = D[w]
+            wlen = len(w)
+            entry[ANY, ANY] += N
+            entry[wlen, ANY] += N
+            if start <= 9:
+                entry[wlen, start, start+n-1] += N
+                entry[ANY, start, start+n-1] += N
+            from_end = wlen-start+1
+            if from_end <= 9:
+                entry[ANY, -from_end, -from_end+n-1] += N
+        # enumerate ngrams from word and increment counts for each one
+    print >> out, csvline('%d-gram' % n,  map(colname, cols))
+    for ng in sorted(Dng, key=lambda ng: -Dng[ng][(ANY, ANY)]):
+        print >> out, csvline(ng, [Dng[ng].get(col, 0) for col in cols])
+    out.close()
+    return Dng
+
+### Tests
+
+"""
+>>> for w in words:
+    print '%-6s %6.2f B (%4.2f%%) <img src="s.jpg" height=12 width=%d>' % (w.lower(), D[w]/1e9, D[w]*100./N, int(round(D[w]*4000./N)))
+... 
+the     53.10 B (7.14%) <img src="s.jpg" height=12 width=286>
+of      30.97 B (4.16%) <img src="s.jpg" height=12 width=167>
+and     22.63 B (3.04%) <img src="s.jpg" height=12 width=122>
+to      19.35 B (2.60%) <img src="s.jpg" height=12 width=104>
+in      16.89 B (2.27%) <img src="s.jpg" height=12 width=91>
+a       15.31 B (2.06%) <img src="s.jpg" height=12 width=82>
+is       8.38 B (1.13%) <img src="s.jpg" height=12 width=45>
+that     8.00 B (1.08%) <img src="s.jpg" height=12 width=43>
+for      6.55 B (0.88%) <img src="s.jpg" height=12 width=35>
+it       5.74 B (0.77%) <img src="s.jpg" height=12 width=31>
+as       5.70 B (0.77%) <img src="s.jpg" height=12 width=31>
+was      5.50 B (0.74%) <img src="s.jpg" height=12 width=30>
+with     5.18 B (0.70%) <img src="s.jpg" height=12 width=28>
+be       4.82 B (0.65%) <img src="s.jpg" height=12 width=26>
+by       4.70 B (0.63%) <img src="s.jpg" height=12 width=25>
+on       4.59 B (0.62%) <img src="s.jpg" height=12 width=25>
+not      4.52 B (0.61%) <img src="s.jpg" height=12 width=24>
+he       4.11 B (0.55%) <img src="s.jpg" height=12 width=22>
+i        3.88 B (0.52%) <img src="s.jpg" height=12 width=21>
+this     3.83 B (0.51%) <img src="s.jpg" height=12 width=21>
+are      3.70 B (0.50%) <img src="s.jpg" height=12 width=20>
+or       3.67 B (0.49%) <img src="s.jpg" height=12 width=20>
+his      3.61 B (0.49%) <img src="s.jpg" height=12 width=19>
+from     3.47 B (0.47%) <img src="s.jpg" height=12 width=19>
+at       3.41 B (0.46%) <img src="s.jpg" height=12 width=18>
+which    3.14 B (0.42%) <img src="s.jpg" height=12 width=17>
+but      2.79 B (0.38%) <img src="s.jpg" height=12 width=15>
+have     2.78 B (0.37%) <img src="s.jpg" height=12 width=15>
+an       2.73 B (0.37%) <img src="s.jpg" height=12 width=15>
+had      2.62 B (0.35%) <img src="s.jpg" height=12 width=14>
+they     2.46 B (0.33%) <img src="s.jpg" height=12 width=13>
+you      2.34 B (0.31%) <img src="s.jpg" height=12 width=13>
+were     2.27 B (0.31%) <img src="s.jpg" height=12 width=12>
+their    2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
+one      2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
+all      2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
+we       2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
+can      1.67 B (0.22%) <img src="s.jpg" height=12 width=9>
+her      1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
+has      1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
+there    1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
+been     1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
+if       1.56 B (0.21%) <img src="s.jpg" height=12 width=8>
+more     1.55 B (0.21%) <img src="s.jpg" height=12 width=8>
+when     1.52 B (0.20%) <img src="s.jpg" height=12 width=8>
+will     1.49 B (0.20%) <img src="s.jpg" height=12 width=8>
+would    1.47 B (0.20%) <img src="s.jpg" height=12 width=8>
+who      1.46 B (0.20%) <img src="s.jpg" height=12 width=8>
+so       1.45 B (0.19%) <img src="s.jpg" height=12 width=8>
+no       1.40 B (0.19%) <img src="s.jpg" height=12 width=8>
+
+>>> for n in sorted(H):
+    print '%2d %9.2f M (%6.3f%%) <img src="s.jpg" height=12 width=%d> %d' % (n, H[n]/1e6, H[n]*100./NN, H[n]*3000./NN, n)
+... 
+ 1  22301.22 M ( 2.998%) <img src="s.jpg" height=12 width=89> 1
+ 2 131293.85 M (17.651%) <img src="s.jpg" height=12 width=529> 2
+ 3 152568.38 M (20.511%) <img src="s.jpg" height=12 width=615> 3
+ 4 109988.33 M (14.787%) <img src="s.jpg" height=12 width=443> 4
+ 5  79589.32 M (10.700%) <img src="s.jpg" height=12 width=320> 5
+ 6  62391.21 M ( 8.388%) <img src="s.jpg" height=12 width=251> 6
+ 7  59052.66 M ( 7.939%) <img src="s.jpg" height=12 width=238> 7
+ 8  44207.29 M ( 5.943%) <img src="s.jpg" height=12 width=178> 8
+ 9  33006.93 M ( 4.437%) <img src="s.jpg" height=12 width=133> 9
+10  22883.84 M ( 3.076%) <img src="s.jpg" height=12 width=92> 10
+11  13098.06 M ( 1.761%) <img src="s.jpg" height=12 width=52> 11
+12   7124.15 M ( 0.958%) <img src="s.jpg" height=12 width=28> 12
+13   3850.58 M ( 0.518%) <img src="s.jpg" height=12 width=15> 13
+14   1653.08 M ( 0.222%) <img src="s.jpg" height=12 width=6> 14
+15    565.24 M ( 0.076%) <img src="s.jpg" height=12 width=2> 15
+16    151.22 M ( 0.020%) <img src="s.jpg" height=12 width=0> 16
+17     72.81 M ( 0.010%) <img src="s.jpg" height=12 width=0> 17
+18     28.62 M ( 0.004%) <img src="s.jpg" height=12 width=0> 18
+19      8.51 M ( 0.001%) <img src="s.jpg" height=12 width=0> 19
+20      6.35 M ( 0.001%) <img src="s.jpg" height=12 width=0> 20
+21      0.13 M ( 0.000%) <img src="s.jpg" height=12 width=0> 21
+22      0.81 M ( 0.000%) <img src="s.jpg" height=12 width=0> 22
+23      0.32 M ( 0.000%) <img src="s.jpg" height=12 width=0> 23
+
+>>> NL = sum(LC.values())
+
+>>> for L in sorted(LC, key=lambda L: -LC[L]):
+    print '%s %8.1f B (%5.2f%%) <img src="s.jpg" height=12 width=%d>' % (L, LC[L]/1e9, LC[L]*100./NL, LC[L]*3000./NL)
+...
+E    445.2 B (12.49%) <img src="s.jpg" height=12 width=374>
+T    330.5 B ( 9.28%) <img src="s.jpg" height=12 width=278>
+A    286.5 B ( 8.04%) <img src="s.jpg" height=12 width=241>
+O    272.3 B ( 7.64%) <img src="s.jpg" height=12 width=229>
+I    269.7 B ( 7.57%) <img src="s.jpg" height=12 width=227>
+N    257.8 B ( 7.23%) <img src="s.jpg" height=12 width=217>
+S    232.1 B ( 6.51%) <img src="s.jpg" height=12 width=195>
+R    223.8 B ( 6.28%) <img src="s.jpg" height=12 width=188>
+H    180.1 B ( 5.05%) <img src="s.jpg" height=12 width=151>
+L    145.0 B ( 4.07%) <img src="s.jpg" height=12 width=122>
+D    136.0 B ( 3.82%) <img src="s.jpg" height=12 width=114>
+C    119.2 B ( 3.34%) <img src="s.jpg" height=12 width=100>
+U     97.3 B ( 2.73%) <img src="s.jpg" height=12 width=81>
+M     89.5 B ( 2.51%) <img src="s.jpg" height=12 width=75>
+F     85.6 B ( 2.40%) <img src="s.jpg" height=12 width=72>
+P     76.1 B ( 2.14%) <img src="s.jpg" height=12 width=64>
+G     66.6 B ( 1.87%) <img src="s.jpg" height=12 width=56>
+W     59.7 B ( 1.68%) <img src="s.jpg" height=12 width=50>
+Y     59.3 B ( 1.66%) <img src="s.jpg" height=12 width=49>
+B     52.9 B ( 1.48%) <img src="s.jpg" height=12 width=44>
+V     37.5 B ( 1.05%) <img src="s.jpg" height=12 width=31>
+K     19.3 B ( 0.54%) <img src="s.jpg" height=12 width=16>
+X      8.4 B ( 0.23%) <img src="s.jpg" height=12 width=7>
+J      5.7 B ( 0.16%) <img src="s.jpg" height=12 width=4>
+Q      4.3 B ( 0.12%) <img src="s.jpg" height=12 width=3>
+Z      3.2 B ( 0.09%) <img src="s.jpg" height=12 width=2>
+
+>>> D2 = ngramcount(D, 2)
+
+>>> for ng in sorted(D2, key=lambda L: -D2[L])[:50]: print '%s %8.1f B (%5.2f%%) <img src="o.jpg" height=12 width=%d>' % (ng, D2[ng]/1e9, D2[ng]*100./N2, D2[ng]*15000./N2)
+
+def doit(k=25):
+    counts = [sortedby(ngramcount(D, n))[:k] for n in range(2, 10)]
+    for i in range(k):
+        print ('     '.join(count[i] for count in counts)).lower()
+"""
--- a/py/lis.py
+++ b/py/lis.py
@@ -0,0 +1,145 @@
+################ Lispy: Scheme Interpreter in Python
+
+## (c) Peter Norvig, 2010-16; See http://norvig.com/lispy.html
+
+from __future__ import division
+import math
+import operator as op
+
+################ Types
+
+Symbol = str          # A Lisp Symbol is implemented as a Python str
+List   = list         # A Lisp List is implemented as a Python list
+Number = (int, float) # A Lisp Number is implemented as a Python int or float
+
+################ Parsing: parse, tokenize, and read_from_tokens
+
+def parse(program):
+    "Read a Scheme expression from a string."
+    return read_from_tokens(tokenize(program))
+
+def tokenize(s):
+    "Convert a string into a list of tokens."
+    return s.replace('(',' ( ').replace(')',' ) ').split()
+
+def read_from_tokens(tokens):
+    "Read an expression from a sequence of tokens."
+    if len(tokens) == 0:
+        raise SyntaxError('unexpected EOF while reading')
+    token = tokens.pop(0)
+    if '(' == token:
+        L = []
+        while tokens[0] != ')':
+            L.append(read_from_tokens(tokens))
+        tokens.pop(0) # pop off ')'
+        return L
+    elif ')' == token:
+        raise SyntaxError('unexpected )')
+    else:
+        return atom(token)
+
+def atom(token):
+    "Numbers become numbers; every other token is a symbol."
+    try: return int(token)
+    except ValueError:
+        try: return float(token)
+        except ValueError:
+            return Symbol(token)
+
+################ Environments
+
+def standard_env():
+    "An environment with some Scheme standard procedures."
+    env = Env()
+    env.update(vars(math)) # sin, cos, sqrt, pi, ...
+    env.update({
+        '+':op.add, '-':op.sub, '*':op.mul, '/':op.truediv, 
+        '>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq, 
+        'abs':     abs,
+        'append':  op.add,  
+        'apply':   apply,
+        'begin':   lambda *x: x[-1],
+        'car':     lambda x: x[0],
+        'cdr':     lambda x: x[1:], 
+        'cons':    lambda x,y: [x] + y,
+        'eq?':     op.is_, 
+        'equal?':  op.eq, 
+        'length':  len, 
+        'list':    lambda *x: list(x), 
+        'list?':   lambda x: isinstance(x,list), 
+        'map':     map,
+        'max':     max,
+        'min':     min,
+        'not':     op.not_,
+        'null?':   lambda x: x == [], 
+        'number?': lambda x: isinstance(x, Number),   
+        'procedure?': callable,
+        'round':   round,
+        'symbol?': lambda x: isinstance(x, Symbol),
+    })
+    return env
+
+class Env(dict):
+    "An environment: a dict of {'var':val} pairs, with an outer Env."
+    def __init__(self, parms=(), args=(), outer=None):
+        self.update(zip(parms, args))
+        self.outer = outer
+    def find(self, var):
+        "Find the innermost Env where var appears."
+        return self if (var in self) else self.outer.find(var)
+
+global_env = standard_env()
+
+################ Interaction: A REPL
+
+def repl(prompt='lis.py> '):
+    "A prompt-read-eval-print loop."
+    while True:
+        val = eval(parse(raw_input(prompt)))
+        if val is not None: 
+            print(lispstr(val))
+
+def lispstr(exp):
+    "Convert a Python object back into a Lisp-readable string."
+    if isinstance(exp, List):
+        return '(' + ' '.join(map(lispstr, exp)) + ')' 
+    else:
+        return str(exp)
+
+################ Procedures
+
+class Procedure(object):
+    "A user-defined Scheme procedure."
+    def __init__(self, parms, body, env):
+        self.parms, self.body, self.env = parms, body, env
+    def __call__(self, *args): 
+        return eval(self.body, Env(self.parms, args, self.env))
+
+################ eval
+
+def eval(x, env=global_env):
+    "Evaluate an expression in an environment."
+    if isinstance(x, Symbol):      # variable reference
+        return env.find(x)[x]
+    elif not isinstance(x, List):  # constant literal
+        return x                
+    elif x[0] == 'quote':          # (quote exp)
+        (_, exp) = x
+        return exp
+    elif x[0] == 'if':             # (if test conseq alt)
+        (_, test, conseq, alt) = x
+        exp = (conseq if eval(test, env) else alt)
+        return eval(exp, env)
+    elif x[0] == 'define':         # (define var exp)
+        (_, var, exp) = x
+        env[var] = eval(exp, env)
+    elif x[0] == 'set!':           # (set! var exp)
+        (_, var, exp) = x
+        env.find(var)[var] = eval(exp, env)
+    elif x[0] == 'lambda':         # (lambda (var...) body)
+        (_, parms, body) = x
+        return Procedure(parms, body, env)
+    else:                          # (proc arg...)
+        proc = eval(x[0], env)
+        args = [eval(exp, env) for exp in x[1:]]
+        return proc(*args)
--- a/py/lispy.py
+++ b/py/lispy.py
@@ -0,0 +1,318 @@
+################ Scheme Interpreter in Python
+
+## (c) Peter Norvig, 2010; See http://norvig.com/lispy2.html
+
+################ Symbol, Procedure, classes
+
+from __future__ import division
+import re, sys, StringIO
+
+class Symbol(str): pass
+
+def Sym(s, symbol_table={}):
+    "Find or create unique Symbol entry for str s in symbol table."
+    if s not in symbol_table: symbol_table[s] = Symbol(s)
+    return symbol_table[s]
+
+_quote, _if, _set, _define, _lambda, _begin, _definemacro, = map(Sym, 
+"quote   if   set!  define   lambda   begin   define-macro".split())
+
+_quasiquote, _unquote, _unquotesplicing = map(Sym,
+"quasiquote   unquote   unquote-splicing".split())
+
+class Procedure(object):
+    "A user-defined Scheme procedure."
+    def __init__(self, parms, exp, env):
+        self.parms, self.exp, self.env = parms, exp, env
+    def __call__(self, *args): 
+        return eval(self.exp, Env(self.parms, args, self.env))
+
+################ parse, read, and user interaction
+
+def parse(inport):
+    "Parse a program: read and expand/error-check it."
+    # Backwards compatibility: given a str, convert it to an InPort
+    if isinstance(inport, str): inport = InPort(StringIO.StringIO(inport))
+    return expand(read(inport), toplevel=True)
+
+eof_object = Symbol('#<eof-object>') # Note: uninterned; can't be read
+
+class InPort(object):
+    "An input port. Retains a line of chars."
+    tokenizer = r"""\s*(,@|[('`,)]|"(?:[\\].|[^\\"])*"|;.*|[^\s('"`,;)]*)(.*)"""
+    def __init__(self, file):
+        self.file = file; self.line = ''
+    def next_token(self):
+        "Return the next token, reading new text into line buffer if needed."
+        while True:
+            if self.line == '': self.line = self.file.readline()
+            if self.line == '': return eof_object
+            token, self.line = re.match(InPort.tokenizer, self.line).groups()
+            if token != '' and not token.startswith(';'):
+                return token
+
+def readchar(inport):
+    "Read the next character from an input port."
+    if inport.line != '':
+        ch, inport.line = inport.line[0], inport.line[1:]
+        return ch
+    else:
+        return inport.file.read(1) or eof_object
+
+def read(inport):
+    "Read a Scheme expression from an input port."
+    def read_ahead(token):
+        if '(' == token: 
+            L = []
+            while True:
+                token = inport.next_token()
+                if token == ')': return L
+                else: L.append(read_ahead(token))
+        elif ')' == token: raise SyntaxError('unexpected )')
+        elif token in quotes: return [quotes[token], read(inport)]
+        elif token is eof_object: raise SyntaxError('unexpected EOF in list')
+        else: return atom(token)
+    # body of read:
+    token1 = inport.next_token()
+    return eof_object if token1 is eof_object else read_ahead(token1)
+
+quotes = {"'":_quote, "`":_quasiquote, ",":_unquote, ",@":_unquotesplicing}
+
+def atom(token):
+    'Numbers become numbers; #t and #f are booleans; "..." string; otherwise Symbol.'
+    if token == '#t': return True
+    elif token == '#f': return False
+    elif token[0] == '"': return token[1:-1].decode('string_escape')
+    try: return int(token)
+    except ValueError:
+        try: return float(token)
+        except ValueError:
+            try: return complex(token.replace('i', 'j', 1))
+            except ValueError:
+                return Sym(token)
+
+def to_string(x):
+    "Convert a Python object back into a Lisp-readable string."
+    if x is True: return "#t"
+    elif x is False: return "#f"
+    elif isa(x, Symbol): return x
+    elif isa(x, str): return '"%s"' % x.encode('string_escape').replace('"',r'\"')
+    elif isa(x, list): return '('+' '.join(map(to_string, x))+')'
+    elif isa(x, complex): return str(x).replace('j', 'i')
+    else: return str(x)
+
+def load(filename):
+    "Eval every expression from a file."
+    repl(None, InPort(open(filename)), None)
+
+def repl(prompt='lispy> ', inport=InPort(sys.stdin), out=sys.stdout):
+    "A prompt-read-eval-print loop."
+    sys.stderr.write("Lispy version 2.0\n")
+    while True:
+        try:
+            if prompt: sys.stderr.write(prompt)
+            x = parse(inport)
+            if x is eof_object: return
+            val = eval(x)
+            if val is not None and out: print >> out, to_string(val)
+        except Exception as e:
+            print '%s: %s' % (type(e).__name__, e)
+
+################ Environment class
+
+class Env(dict):
+    "An environment: a dict of {'var':val} pairs, with an outer Env."
+    def __init__(self, parms=(), args=(), outer=None):
+        # Bind parm list to corresponding args, or single parm to list of args
+        self.outer = outer
+        if isa(parms, Symbol): 
+            self.update({parms:list(args)})
+        else: 
+            if len(args) != len(parms):
+                raise TypeError('expected %s, given %s, ' 
+                                % (to_string(parms), to_string(args)))
+            self.update(zip(parms,args))
+    def find(self, var):
+        "Find the innermost Env where var appears."
+        if var in self: return self
+        elif self.outer is None: raise LookupError(var)
+        else: return self.outer.find(var)
+
+def is_pair(x): return x != [] and isa(x, list)
+def cons(x, y): return [x]+y
+
+def callcc(proc):
+    "Call proc with current continuation; escape only"
+    ball = RuntimeWarning("Sorry, can't continue this continuation any longer.")
+    def throw(retval): ball.retval = retval; raise ball
+    try:
+        return proc(throw)
+    except RuntimeWarning as w:
+        if w is ball: return ball.retval
+        else: raise w
+
+def add_globals(self):
+    "Add some Scheme standard procedures."
+    import math, cmath, operator as op
+    self.update(vars(math))
+    self.update(vars(cmath))
+    self.update({
+     '+':op.add, '-':op.sub, '*':op.mul, '/':op.div, 'not':op.not_, 
+     '>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq, 
+     'equal?':op.eq, 'eq?':op.is_, 'length':len, 'cons':cons,
+     'car':lambda x:x[0], 'cdr':lambda x:x[1:], 'append':op.add,  
+     'list':lambda *x:list(x), 'list?': lambda x:isa(x,list),
+     'null?':lambda x:x==[], 'symbol?':lambda x: isa(x, Symbol),
+     'boolean?':lambda x: isa(x, bool), 'pair?':is_pair, 
+     'port?': lambda x:isa(x,file), 'apply':lambda proc,l: proc(*l), 
+     'eval':lambda x: eval(expand(x)), 'load':lambda fn: load(fn), 'call/cc':callcc,
+     'open-input-file':open,'close-input-port':lambda p: p.file.close(), 
+     'open-output-file':lambda f:open(f,'w'), 'close-output-port':lambda p: p.close(),
+     'eof-object?':lambda x:x is eof_object, 'read-char':readchar,
+     'read':read, 'write':lambda x,port=sys.stdout:port.write(to_string(x)),
+     'display':lambda x,port=sys.stdout:port.write(x if isa(x,str) else to_string(x))})
+    return self
+
+isa = isinstance
+
+global_env = add_globals(Env())
+
+################ eval (tail recursive)
+
+def eval(x, env=global_env):
+    "Evaluate an expression in an environment."
+    while True:
+        if isa(x, Symbol):       # variable reference
+            return env.find(x)[x]
+        elif not isa(x, list):   # constant literal
+            return x                
+        elif x[0] is _quote:     # (quote exp)
+            (_, exp) = x
+            return exp
+        elif x[0] is _if:        # (if test conseq alt)
+            (_, test, conseq, alt) = x
+            x = (conseq if eval(test, env) else alt)
+        elif x[0] is _set:       # (set! var exp)
+            (_, var, exp) = x
+            env.find(var)[var] = eval(exp, env)
+            return None
+        elif x[0] is _define:    # (define var exp)
+            (_, var, exp) = x
+            env[var] = eval(exp, env)
+            return None
+        elif x[0] is _lambda:    # (lambda (var*) exp)
+            (_, vars, exp) = x
+            return Procedure(vars, exp, env)
+        elif x[0] is _begin:     # (begin exp+)
+            for exp in x[1:-1]:
+                eval(exp, env)
+            x = x[-1]
+        else:                    # (proc exp*)
+            exps = [eval(exp, env) for exp in x]
+            proc = exps.pop(0)
+            if isa(proc, Procedure):
+                x = proc.exp
+                env = Env(proc.parms, exps, proc.env)
+            else:
+                return proc(*exps)
+
+################ expand
+
+def expand(x, toplevel=False):
+    "Walk tree of x, making optimizations/fixes, and signaling SyntaxError."
+    require(x, x!=[])                    # () => Error
+    if not isa(x, list):                 # constant => unchanged
+        return x
+    elif x[0] is _quote:                 # (quote exp)
+        require(x, len(x)==2)
+        return x
+    elif x[0] is _if:                    
+        if len(x)==3: x = x + [None]     # (if t c) => (if t c None)
+        require(x, len(x)==4)
+        return map(expand, x)
+    elif x[0] is _set:                   
+        require(x, len(x)==3); 
+        var = x[1]                       # (set! non-var exp) => Error
+        require(x, isa(var, Symbol), "can set! only a symbol")
+        return [_set, var, expand(x[2])]
+    elif x[0] is _define or x[0] is _definemacro: 
+        require(x, len(x)>=3)            
+        _def, v, body = x[0], x[1], x[2:]
+        if isa(v, list) and v:           # (define (f args) body)
+            f, args = v[0], v[1:]        #  => (define f (lambda (args) body))
+            return expand([_def, f, [_lambda, args]+body])
+        else:
+            require(x, len(x)==3)        # (define non-var/list exp) => Error
+            require(x, isa(v, Symbol), "can define only a symbol")
+            exp = expand(x[2])
+            if _def is _definemacro:     
+                require(x, toplevel, "define-macro only allowed at top level")
+                proc = eval(exp)       
+                require(x, callable(proc), "macro must be a procedure")
+                macro_table[v] = proc    # (define-macro v proc)
+                return None              #  => None; add v:proc to macro_table
+            return [_define, v, exp]
+    elif x[0] is _begin:
+        if len(x)==1: return None        # (begin) => None
+        else: return [expand(xi, toplevel) for xi in x]
+    elif x[0] is _lambda:                # (lambda (x) e1 e2) 
+        require(x, len(x)>=3)            #  => (lambda (x) (begin e1 e2))
+        vars, body = x[1], x[2:]
+        require(x, (isa(vars, list) and all(isa(v, Symbol) for v in vars))
+                or isa(vars, Symbol), "illegal lambda argument list")
+        exp = body[0] if len(body) == 1 else [_begin] + body
+        return [_lambda, vars, expand(exp)]   
+    elif x[0] is _quasiquote:            # `x => expand_quasiquote(x)
+        require(x, len(x)==2)
+        return expand_quasiquote(x[1])
+    elif isa(x[0], Symbol) and x[0] in macro_table:
+        return expand(macro_table[x[0]](*x[1:]), toplevel) # (m arg...) 
+    else:                                #        => macroexpand if m isa macro
+        return map(expand, x)            # (f arg...) => expand each
+
+def require(x, predicate, msg="wrong length"):
+    "Signal a syntax error if predicate is false."
+    if not predicate: raise SyntaxError(to_string(x)+': '+msg)
+
+_append, _cons, _let = map(Sym, "append cons let".split())
+
+def expand_quasiquote(x):
+    """Expand `x => 'x; `,x => x; `(,@x y) => (append x y) """
+    if not is_pair(x):
+        return [_quote, x]
+    require(x, x[0] is not _unquotesplicing, "can't splice here")
+    if x[0] is _unquote:
+        require(x, len(x)==2)
+        return x[1]
+    elif is_pair(x[0]) and x[0][0] is _unquotesplicing:
+        require(x[0], len(x[0])==2)
+        return [_append, x[0][1], expand_quasiquote(x[1:])]
+    else:
+        return [_cons, expand_quasiquote(x[0]), expand_quasiquote(x[1:])]
+
+def let(*args):
+    args = list(args)
+    x = cons(_let, args)
+    require(x, len(args)>1)
+    bindings, body = args[0], args[1:]
+    require(x, all(isa(b, list) and len(b)==2 and isa(b[0], Symbol)
+                   for b in bindings), "illegal binding list")
+    vars, vals = zip(*bindings)
+    return [[_lambda, list(vars)]+map(expand, body)] + map(expand, vals)
+
+macro_table = {_let:let} ## More macros can go here
+
+eval(parse("""(begin
+
+(define-macro and (lambda args 
+   (if (null? args) #t
+       (if (= (length args) 1) (car args)
+           `(if ,(car args) (and ,@(cdr args)) #f)))))
+
+;; More macros can also go here
+
+)"""))
+
+if __name__ == '__main__':
+    repl()
+
--- a/py/lispytest.py
+++ b/py/lispytest.py
@@ -0,0 +1,121 @@
+
+################ Tests for lis.py and lispy.py
+
+lis_tests = [
+    ("(quote (testing 1 (2.0) -3.14e159))", ['testing', 1, [2.0], -3.14e159]),
+    ("(+ 2 2)", 4),
+    ("(+ (* 2 100) (* 1 10))", 210),
+    ("(if (> 6 5) (+ 1 1) (+ 2 2))", 2),
+    ("(if (< 6 5) (+ 1 1) (+ 2 2))", 4),
+    ("(define x 3)", None), ("x", 3), ("(+ x x)", 6),
+    ("(begin (define x 1) (set! x (+ x 1)) (+ x 1))", 3),
+    ("((lambda (x) (+ x x)) 5)", 10),
+    ("(define twice (lambda (x) (* 2 x)))", None), ("(twice 5)", 10),
+    ("(define compose (lambda (f g) (lambda (x) (f (g x)))))", None),
+    ("((compose list twice) 5)", [10]),
+    ("(define repeat (lambda (f) (compose f f)))", None),
+    ("((repeat twice) 5)", 20), ("((repeat (repeat twice)) 5)", 80),
+    ("(define fact (lambda (n) (if (<= n 1) 1 (* n (fact (- n 1))))))", None),
+    ("(fact 3)", 6),
+    ("(fact 50)", 30414093201713378043612608166064768844377641568960512000000000000),
+    ("(define abs (lambda (n) ((if (> n 0) + -) 0 n)))", None),
+    ("(list (abs -3) (abs 0) (abs 3))", [3, 0, 3]),
+    ("""(define combine (lambda (f)
+    (lambda (x y)
+      (if (null? x) (quote ())
+          (f (list (car x) (car y))
+             ((combine f) (cdr x) (cdr y)))))))""", None),
+    ("(define zip (combine cons))", None),
+    ("(zip (list 1 2 3 4) (list 5 6 7 8))", [[1, 5], [2, 6], [3, 7], [4, 8]]),
+    ("""(define riff-shuffle (lambda (deck) (begin
+    (define take (lambda (n seq) (if (<= n 0) (quote ()) (cons (car seq) (take (- n 1) (cdr seq))))))
+    (define drop (lambda (n seq) (if (<= n 0) seq (drop (- n 1) (cdr seq)))))
+    (define mid (lambda (seq) (/ (length seq) 2)))
+    ((combine append) (take (mid deck) deck) (drop (mid deck) deck)))))""", None),
+    ("(riff-shuffle (list 1 2 3 4 5 6 7 8))", [1, 5, 2, 6, 3, 7, 4, 8]),
+    ("((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8))",  [1, 3, 5, 7, 2, 4, 6, 8]),
+    ("(riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8))))", [1,2,3,4,5,6,7,8]),
+    ]
+
+lispy_tests = [
+    ("()", SyntaxError), ("(set! x)", SyntaxError), 
+    ("(define 3 4)", SyntaxError),
+    ("(quote 1 2)", SyntaxError), ("(if 1 2 3 4)", SyntaxError), 
+    ("(lambda 3 3)", SyntaxError), ("(lambda (x))", SyntaxError),
+    ("""(if (= 1 2) (define-macro a 'a) 
+     (define-macro a 'b))""", SyntaxError),
+    ("(define (twice x) (* 2 x))", None), ("(twice 2)", 4),
+    ("(twice 2 2)", TypeError),
+    ("(define lyst (lambda items items))", None),
+    ("(lyst 1 2 3 (+ 2 2))", [1,2,3,4]),
+    ("(if 1 2)", 2),
+    ("(if (= 3 4) 2)", None),
+    ("(define ((account bal) amt) (set! bal (+ bal amt)) bal)", None),
+    ("(define a1 (account 100))", None),
+    ("(a1 0)", 100), ("(a1 10)", 110), ("(a1 10)", 120),
+    ("""(define (newton guess function derivative epsilon)
+    (define guess2 (- guess (/ (function guess) (derivative guess))))
+    (if (< (abs (- guess guess2)) epsilon) guess2
+        (newton guess2 function derivative epsilon)))""", None),
+    ("""(define (square-root a)
+    (newton 1 (lambda (x) (- (* x x) a)) (lambda (x) (* 2 x)) 1e-8))""", None),
+    ("(> (square-root 200.) 14.14213)", True),
+    ("(< (square-root 200.) 14.14215)", True),
+    ("(= (square-root 200.) (sqrt 200.))", True),
+    ("""(define (sum-squares-range start end)
+         (define (sumsq-acc start end acc)
+            (if (> start end) acc (sumsq-acc (+ start 1) end (+ (* start start) acc))))
+         (sumsq-acc start end 0))""", None),
+    ("(sum-squares-range 1 3000)", 9004500500), ## Tests tail recursion
+    ("(call/cc (lambda (throw) (+ 5 (* 10 (throw 1))))) ;; throw", 1),
+    ("(call/cc (lambda (throw) (+ 5 (* 10 1)))) ;; do not throw", 15),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 (escape 3)))))))) ; 1 level""", 35),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 (throw 3)))))))) ; 2 levels""", 3),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 1))))))) ; 0 levels""", 1005),
+    ("(* 1i 1i)", -1), ("(sqrt -1)", 1j),
+    ("(let ((a 1) (b 2)) (+ a b))", 3),
+    ("(let ((a 1) (b 2 3)) (+ a b))", SyntaxError),
+    ("(and 1 2 3)", 3), ("(and (> 2 1) 2 3)", 3), ("(and)", True),
+    ("(and (> 2 1) (> 2 3))", False),
+    ("(define-macro unless (lambda args `(if (not ,(car args)) (begin ,@(cdr args))))) ; test `", None),
+    ("(unless (= 2 (+ 1 1)) (display 2) 3 4)", None),
+    (r'(unless (= 4 (+ 1 1)) (display 2) (display "\n") 3 4)', 4),
+    ("(quote x)", 'x'), 
+    ("(quote (1 2 three))", [1, 2, 'three']), 
+    ("'x", 'x'),
+    ("'(one 2 3)", ['one', 2, 3]),
+    ("(define L (list 1 2 3))", None),
+    ("`(testing ,@L testing)", ['testing',1,2,3,'testing']),
+    ("`(testing ,L testing)", ['testing',[1,2,3],'testing']),
+    ("`,@L", SyntaxError),
+    ("""'(1 ;test comments '
+     ;skip this line
+     2 ; more ; comments ; ) )
+     3) ; final comment""", [1,2,3]),
+    ]
+
+def test(tests, name=''):
+    "For each (exp, expected) test case, see if eval(parse(exp)) == expected."
+    fails = 0
+    for (x, expected) in tests:
+        try:
+            result = eval(parse(x))
+            print x, '=>', to_string(result)
+            ok = (result == expected)
+        except Exception as e:
+            print x, '=raises=>', type(e).__name__, e
+            ok = issubclass(expected, Exception) and isinstance(e, expected)
+        if not ok:
+            fails += 1
+            print 'FAIL!!!  Expected', expected
+    print '%s %s: %d out of %d tests fail.' % ('*'*45, name, fails, len(tests))
+
+if __name__ == '__main__':
+    from lis import *
+    test(lis_tests, 'lis.py')
+    from lispy import *
+    test(lis_tests+lispy_tests, 'lispy.py')
+
--- a/py/pal.py
+++ b/py/pal.py
@@ -0,0 +1,154 @@
+import string, random, os, re, bisect
+
+"""Produce Panama-ish Palindromes. Copyright (C) 2002, Peter Norvig.
+See http://www.norvig.com/license.html and http://www.norvig.com/pal-alg.html"""
+
+def is_panama(p):
+    "Test if p is a Panama-ish palindrome."
+    def is_unique(seq): return len(seq) == len(dict(zip(seq, seq)))
+    return (p.endswith('Panama') and is_palindrome(p)
+	    and is_unique([s.strip() for s in p.split(',')]))
+
+def is_palindrome(phrase):
+    "Test if a phrase is a palindrome."
+    cphrase = canonical(phrase)
+    return cphrase == reverse(cphrase)
+
+def canonical(word, sub=re.compile('[^A-Za-z0-9]').sub):
+    "The canonical form for comparing: lowercase alphanumerics."
+    return sub('', word).lower()
+
+def read_dict(filename='npdict.txt'):
+    "Read the file into global variables _fw and _bw and _truename."
+    global _fw, _bw, _truename
+    _fw, _bw, _truename = [], [], {'': ''}
+    for word in open(filename).read().splitlines():
+        w = canonical(word)
+        _fw.append(w)
+        _bw.append(reverse(w))
+        _truename[w] = word
+    _fw.sort(); _bw.sort()
+    return len(_fw), len(_bw), len(_truename)
+
+def update(obj, **entries): obj.__dict__.update(entries); return obj
+
+class PalDict:
+    """A dictionary from which you can find canonical words that start or end
+    with a given canonical substring, and find the true name of a
+    canonical word."""
+    def __init__(self, fw=None, bw=None, truename=None):
+        update(self, fw=fw or _fw, bw=bw or _bw, truename=truename or _truename)
+
+    def startswith(self, prefix, k=100):
+        """Return up to k canonical words that start with prefix.
+        If there are more than k, choose from them at random."""
+        return k_startingwith(k, self.fw, prefix)
+
+    def endswith(self, suffix, k=100):
+        """Return up to k canonical words that end with suffix.
+        If there are more than k, choose from them at random.
+        Both the suffix and the word returned are reversed."""
+        return k_startingwith(k, self.bw, suffix)
+
+def k_startingwith(k, words, prefix):
+    """Choose up to k words that match the prefix (choose randomly if > k)."""
+    start = bisect.bisect(words, prefix)
+    end = bisect.bisect(words, prefix + 'zzzz')
+    n = end - start
+    if k >= n:
+        results = words[start:end]
+        random.shuffle(results)
+    else: # Should really try to avoid duplicates
+        results = [words[random.randrange(start, end)] for i in range(k)]
+    return results
+
+class Panama:
+    def __init__(self, L='A man, a plan', R='a canal, Panama', dict=None):
+        left = [canonical(w) for w in L.split(', ')]
+        right = [canonical(reverse(w)) for w in reverse(R.split(', '))]
+        update(self, left=left, right=right, dict=dict or PalDict(), best=0, 
+               seen={}, diff=len(''.join(left)) - len(''.join(right)))
+        for word in left + map(reverse, right):
+            self.seen[word] = 1
+
+    def missing(self, k=20):
+        """Return the substring that is missing, and candidate words."""
+        if self.diff >= 0: # Left is longer, missing on right
+            substr =  self.left[-1][-self.diff:]
+            return substr, self.dict.endswith(substr, k)
+        else: # Right is longer, missing on left
+            substr =  self.right[-1][self.diff:]
+            return substr, self.dict.startswith(substr, k)
+
+    def search(self, k=200):
+        "Search for palindromes; consider at most k words at each level."
+        self.stack = [self.missing(k)]
+        while self.stack:
+            substr, words = self.stack[-1]
+            if is_palindrome(substr):
+                self.report()
+            if words:
+                self.extend(words.pop(), k)
+            elif not self.backtrack():
+                return
+
+    def extend(self, word, k):
+        "Add a new word (unless we've already seen it)."
+        if self.diff >= 0: # Left is longer, add to right
+            fword = reverse(word)
+            if fword in self.seen: return
+            self.diff -= len(fword)
+            self.seen[fword] = 1
+            self.right.append(word)
+            self.stack.append(self.missing(k))
+        else: # Right is longer, add to left
+            if word in self.seen: return
+            self.diff += len(word)
+            self.seen[word] = 1
+            self.left.append(word)
+            self.stack.append(self.missing(k))
+
+    def backtrack(self):
+        "Remove the last word added; return 0 if can't backtrack"
+        if self.diff >= 0: # Left is longer, pop from left
+            if not self.left: return 0
+            word = self.left.pop()
+            self.diff -= len(word)
+            del self.seen[word]
+        else: # Right is longer, pop from right
+            if not self.right: return 0
+            word = self.right.pop()
+            self.diff += len(word)
+            del self.seen[reverse(word)]
+        self.stack.pop()
+        return 1
+
+    def report(self):
+        "Write current state to log file."
+        if len(self) > self.best + 200:
+            self.best = len(self)
+            print self.best
+            self.bestphrase = str(self)
+            assert is_panama(self.bestphrase)
+            f = open('pallog%d.txt' % os.getpid(), 'w')
+            f.write(self.bestphrase + '\n')
+            f.close()
+
+    def __len__(self):
+        return len(self.left) + len(self.right)
+
+    def __str__(self):
+        truename = self.dict.truename
+        lefts = [truename[w] for w in self.left]
+        rights = [truename[reverse(w)] for w in reverse(self.right[:])]
+        return ', '.join(lefts + ['*****'] + rights)
+
+def reverse(x):
+    "Reverse a list or string."
+    if type(x) == type(''):
+        return ''.join(reverse(list(x)))
+    else:
+        x.reverse()
+        return x
+
+if __name__ == '__main__': read_dict(); p = Panama(); p.search()
--- a/py/pal2.py
+++ b/py/pal2.py
@@ -0,0 +1,262 @@
+import random, re, bisect, time
+
+"""Produce Panama-ish Palindromes. Copyright (C) 2002-2008, Peter Norvig."""
+
+################ Checking for Palindromes
+
+def is_panama(s):
+    "Test if string s is a Panama-ish palindrome."
+    return is_palindrome(s) and is_unique(phrases(s))
+
+def is_palindrome(s):
+    "Test if a string is a palindrome."
+    s1 = canonical(s)
+    return s1 == reversestr(s1)
+
+def phrases(s):
+    "Break a string s into comma-separated phrases."
+    return [phrase.strip() for phrase in s.split(',')]
+
+def canonical(word, sub=re.compile('''[-* \t\n\r.,;!?:()`"']''').sub):
+    "The canonical form for comparing: lowercase, no blanks or punctuation."
+    return sub('', word).lower()
+
+################ Utilities
+
+def reversestr(x):
+    "Reverse a string."
+    return x[::-1]
+
+def is_unique(seq):
+    "Return true if seq has no duplicate elements."
+    return len(seq) == len(set(seq))
+
+def update(obj, **entries):
+    "Change attributes of obj, according to the keyword args."
+    obj.__dict__.update(entries)
+    return obj
+
+################ Reading in a dictionary
+
+class PalDict:
+    """A dictionary from which you can find canonical words that start or end
+    with a given canonical substring, and find the true name of a
+    canonical word with d.truename[canonicalword]."""
+    
+    def __init__(self, k=1000, filename='npdict.txt'):
+        words, rwords, truename = [], [], {'': '', 'panama': 'Panama!'}
+        for tword in open(filename).read().splitlines():
+            word = canonical(tword)
+            words.append(word)
+            rwords.append(reversestr(word))
+            truename[word] = tword
+        words.sort()
+        rwords.sort()
+        update(self, k=k, words=words, rwords=rwords, truename=truename,
+               reversibles={}, rangek=range(k), tryharder=False)
+
+    def startswith(self, prefix):
+        """Return up to k canonical words that start with prefix.
+        If there are more than k, choose from them at random."""
+        return self._k_startingwith(self.words, prefix)
+
+    def endswith(self, rsuffix):
+        """Return up to k canonical words that end with the reversed suffix.
+        If you want words ending in 'ing', ask for d.endswith('gni').
+        If there are more than k, choose from them at random."""
+        return map(reversestr, self._k_startingwith(self.rwords, rsuffix))
+
+    def __contains__(self, word):
+        return word in self.truename
+
+    def reversible_words(self):
+        "Find words that have a reverse in the dict, like {'Camus': 'Sumac'}"
+        if not self.reversibles:
+            reversibles = self.reversibles
+            for rw in self.rwords:
+                if rw in self:
+                    w = reversestr(rw)
+                    if w != rw and w not in reversibles:
+                        reversibles[w] = rw
+            self.reversibles = reversibles
+        return self.reversibles
+
+    def _k_startingwith(self, words, prefix):
+        start = bisect.bisect_left(words, prefix)
+        end = bisect.bisect(words, prefix + 'zzzz')
+        n = end - start
+        if self.k >= n: # get all the words that start with prefix
+            results = words[start:end]
+        else: # sample from words starting with prefix 
+            indexes = random.sample(xrange(start, end), self.k)
+            results = [words[i] for i in indexes]
+        random.shuffle(results)
+        ## Consider words that are prefixes of the prefix.
+        ## This is very slow, so don't use it until late in the game.
+        if self.tryharder:
+            for i in range(3, len(prefix)):
+                w = prefix[0:i]
+                if ((words == self.words and w in self.truename) or
+                    (words == self.rwords and reversestr(w) in self.truename)):
+                    results.append(w)
+        return results
+
+paldict = PalDict() 
+
+def anpdictshort():
+    "Find the words that are valid when every phrase must start with 'a'"
+    def segment(word):  return [s for s in word.split('a') if s]
+    def valid(word): return all(reversestr(s) in segments for s in segment(word))
+    words = map(canonical, file('anpdict.txt'))
+    segments = set(s for w in words for s in segment(canonical(w)))
+    valid_words = [paldict.truename[w] for w in words if valid(w)]
+    file('anpdict-short.txt', 'w').write('\n'.join(valid_words))
+
+################ Search for a palindrome
+
+class Panama:
+    def __init__(self, L='A man, a plan', R='a canal, Panama', dict=paldict):
+        ## .left and .right hold lists of canonical words
+        ## .diff holds the number of characters that are not matched,
+        ##  positive for words on left, negative for right.
+        ## .stack holds (action, side, arg) tuples
+        update(self, left=[], right=[], best=0, seen={}, diff=0, stack=[],
+               used_reversibles=False, starttime=time.clock(), dict=dict)
+        for word in L.split(','):
+            self.add('left', canonical(word))
+        for rword in reversestr(R).split(','):
+            self.add('right', canonical(reversestr(rword)))
+        self.consider_candidates()
+        
+    def search(self, steps=50000000):
+        "Search for palindromes."
+        for _ in xrange(steps):
+            if not self.stack:
+                return 'done'
+            action, dir, substr, arg = self.stack[-1]
+            if action == 'added': # undo the last word added
+                self.remove(dir, arg)
+            elif action == 'trying' and arg: # try the next word if there is one
+                self.add(dir, arg.pop()) and self.consider_candidates()
+            elif action == 'trying' and not arg: # otherwise backtrack
+                self.stack.pop()
+            else:
+                raise ValueError(action)
+
+    def add(self, dir, word):
+        "add a word"
+        if word in self.seen:
+            return False
+        else:
+            getattr(self, dir).append(word)
+            self.diff += factor[dir] * len(word)
+            self.seen[word] = True
+            self.stack.append(('added', dir, '?', word))
+            return True
+
+    def remove(self, dir, word):
+        "remove a word"
+        oldword = getattr(self, dir).pop()
+        assert word == oldword
+        self.diff -= factor[dir] * len(word)
+        del self.seen[word]
+        self.stack.pop()
+        
+    def consider_candidates(self):
+        """Push a new state with a set of candidate words onto stack."""
+        if self.diff > 0: # Left is longer, consider adding on right
+            dir = 'right'
+            substr =  self.left[-1][-self.diff:]
+            candidates = self.dict.endswith(substr)
+        elif self.diff < 0: # Right is longer, consider adding on left
+            dir = 'left'
+            substr =  reversestr(self.right[-1][0:-self.diff])
+            candidates = self.dict.startswith(substr)
+        else: # Both sides are same size
+            dir = 'left'
+            if not self.used_reversibles:
+                self.report()
+                self.add_reversibles()
+            substr = ''
+            candidates = self.dict.startswith('')
+        if substr == reversestr(substr):
+            self.report()
+        self.stack.append(('trying', dir, substr, candidates))
+
+    def add_reversibles(self):
+        "Add in reversible words."
+        print 'using reversibles ...'
+        for (word, rword) in self.dict.reversible_words().items():
+            if word not in self.seen and rword not in self.seen:
+                self.add('left', word)
+                self.add('right', rword)
+        self.used_reversibles = True
+        self.stack = []
+        print '...done'
+                
+    def report(self):
+        "Report a new palindrome to log file (if it is sufficiently big)."
+        N = len(self)
+        if N > 13333:
+            self.dict.tryharder = True
+        if N > self.best and (N > 12500 or N > self.best+500):
+            self.best = len(self)
+            self.bestphrase = str(self)
+            print '%5d phrases (%5d words) in %3d seconds' % (
+                self.best, self.bestphrase.count(' ')+1, time.clock() - self.starttime)
+            assert is_panama(self.bestphrase)
+            f = open('pallog%d.txt' % (id(self) % 10000), 'w')
+            f.write(self.bestphrase + '\n')
+            f.close()
+
+    def __len__(self):
+        return len(self.left) + len(self.right)
+
+    def __str__(self):
+        truename = self.dict.truename
+        lefts = [truename[w] for w in self.left]
+        rights =[truename[w] for w in self.right]
+        return ', '.join(lefts + rights[::-1])
+
+factor = {'left': +1, 'right': -1}
+
+# Note that we only allow one truename per canonical name.  Occasionally
+# this means we miss a good word (as in "a node" vs. "an ode"), but there
+# are only 665 of these truename collisions, and most of them are of the
+# form "a mark-up" vs. "a markup" so it seemed better to disallow them.
+
+################ Unit Tests
+ 
+def tests(p=Panama()):
+    assert is_panama('A man, a plan, a canal, Panama.')
+    assert is_panama('''A (man),     a   plan,,;, a ```canal?'' -- Panama!''')
+    assert not is_panama('A man, a plan, a radar, a canal, Panama.')
+    assert is_palindrome('A man, a plan, a canal, Panama.')
+    assert is_palindrome('radar, radar? radar!')
+    assert not is_palindrome('radars')
+    assert phrases('A man, a plan, Panama') == ['A man', 'a plan', 'Panama']
+    assert canonical('A man, a plan, a canal, Panama') == 'amanaplanacanalpanama'
+    assert reversestr('foo') == 'oof'
+    assert is_unique([1, 2, 3])
+    assert not is_unique([1, 2, 2])
+    d = p.dict
+    def sameset(a, b): return set(a) == set(b)
+    assert 'panama' in d
+    assert d.words[0] in d
+    assert d.words[-1] in d
+    assert sameset(d.startswith('aword'), ['awording', 'awordbreak',
+        'awordiness', 'awordage', 'awordplay', 'awordlore', 'awordbook',
+        'awordlessness', 'aword', 'awordsmith'])
+    assert sameset(d.endswith('ytisob'), ['aglobosity', 'averbosity',
+        'asubglobosity', 'anonverbosity', 'agibbosity'])
+    d.tryharder = True
+    assert sameset(d.startswith('oklahoma'), ['oklahoma', 'okla'])
+    d.tryharder = False
+    assert d.startswith('oklahoma') == ['oklahoma']
+    assert d.startswith('fsfdsfdsfds') == []
+    print 'all tests pass'
+
+if __name__ == '__main__': 
+    p = Panama();
+    tests(p)
+    p.search()
--- a/py/pal3.py
+++ b/py/pal3.py
@@ -0,0 +1,170 @@
+from collections import Counter, deque
+import re
+
+class PhraseDict(dict):
+    """A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with:
+    .prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre'
+    .suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'"""
+    def __init__(self, phrases):
+        for phrase in phrases:
+            phrase = phrase.strip()
+            self[letters(phrase)] = phrase
+        self.prefixes = Counter(x for p in self for x in prefixes(p))
+        self.suffixes = Counter(x for p in self for x in suffixes(p))
+        
+def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)]
+
+def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)]
+
+def letters(phrase, sub=re.compile(r'[\W]+').sub):
+    "Remove all the non-letters from phrase; return lowercase version."
+    return sub('', phrase).lower()
+
+DICT = PhraseDict(open('npdict.txt'))
+
+class Panama:
+    """Panama represents a palindrome, or a state in searching for one.
+    It has .left and .right to hold the phrases that are chosen,
+    and .L and .R to hold the current partial phrases in the middle (still working on these).
+    Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from."""
+    
+    def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT):
+        assert cat(left + [L]) == cat([R] + right)[::-1]
+        self.left   = list(left)        # list of complete phrases on left
+        self.L      = L                 # an incomplete phrase on left
+        self.R      = R                 # an incomplete phrase on right
+        self.right  = deque(right)      # deque of complete phrases on right
+        self.dict   = dict              # a {letters: actual_phrase} mapping
+        self.set    = set(left + right) # a set of all complete phrases in palindrome
+        self.best   = []                # list of phrases in longest palindrome found
+        self.Nshown = 0                 # the number of phrases shown in the previous printout
+        self.i      = 0                 # the number of steps taken in the search
+        self.check()
+
+    def __str__(self): return self.original_phrases(self.best)
+    
+    def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases)
+
+    def search(self, steps=10**5):
+        """Depth-first search for palindromes. From the current state, find all applicable actions.
+        Do the first one, and put on the stack reminders to undo it and try the others,
+        but first search deeper from the result of the first action."""
+        stack = [self.applicable_actions()]
+        for self.i in range(steps):
+            if not stack: 
+                return
+            command = stack.pop()
+            if isinstance(command, UndoCommand):
+                self.undo(command)
+            elif command:
+                act = command.pop()
+                self.do(act)
+                self.check()
+                stack.extend([command, UndoCommand(act), self.applicable_actions()])
+                
+    def do(self, act):
+        "Modify the current state by adding a letter, or finishing a phrase."
+        if act == ',': # finish phrase on left
+            self.set.add(self.L)
+            self.left.append(self.L)
+            self.L = ''
+        elif act == ';': # finish phrase on right
+            self.set.add(self.R)
+            self.right.appendleft(self.R)
+            self.R = ''
+        else: # add a letter
+            self.L = self.L + act 
+            self.R = act + self.R
+    
+    def undo(self, act):
+        "Modify the current state by undoing an action that was previously done."
+        if act == ',': # unfinish phrase on left
+            assert self.L == ''
+            self.L = self.left.pop()
+            self.set.remove(self.L)
+        elif act == ';': # unfinish phrase on right
+            assert self.R == ''
+            self.R = self.right.popleft()
+            self.set.remove(self.R)
+        else: # remove a letter
+            self.L = self.L[:-1]
+            self.R = self.R[1:]
+            
+    def check(self):
+        "Check to see if current state is a palindrome, and if so, record it and maybe print."
+        if not self.is_palindrome(): return
+        N = len(self.left) + len(self.right) 
+        if N > len(self.best):
+            self.best = self.left + list(self.right)
+            if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500:
+                self.Nshown = N
+                print(self.report())
+            
+    def report(self):
+        N = len(self.best)
+        nwords = N + sum(self.dict[p].count(' ') for p in self.best)
+        nletters = sum(len(p) for p in self.best)
+        return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})'
+                .format(N, nwords, nletters, self.i+1))
+        
+    def applicable_actions(self):
+        L, R, D = self.L, self.R, self.dict
+        actions = []
+
+        def score(A): return D.prefixes[L+A] * D.suffixes[A+R]
+        if self.is_allowed(L):
+            actions.append(',')
+        if self.is_allowed(R):
+            actions.append(';')
+        for A in sorted(alphabet, key=score):
+            if score(A) > 0:
+                actions.append(A)    
+
+        return actions
+ 
+    def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set
+        
+    def is_palindrome(self): 
+        "Is this a palindrome? (Does any extra .L or .R match the other side?)"
+        return ((self.L == '' and self.left[-1].endswith(self.R)) or 
+                (self.R == '' and self.right[0].startswith(self.L)))
+
+alphabet    = 'abcdefghijklmnopqrstuvwxyz'
+cat         = ''.join
+UndoCommand = str
+DoCommand   = list
+                      
+################ Unit Tests
+
+def test1():
+    assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello']
+    assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello']
+    assert letters('a man') == 'aman'
+    assert letters('an elk') == 'anelk'
+    assert letters('Mr. T') == 'mrt'
+    assert letters('Donald E. Knuth') == 'donaldeknuth'
+    assert len(DICT) == 125512
+    assert 'panama' in DICT
+    assert 'aman' in DICT
+    assert 'threemen' not in DICT
+    assert DICT['acanal'] == 'a canal'
+    return 'ok'
+
+def test2():
+    p1 = Panama()
+    assert p1.is_palindrome()
+    assert str(p1) == 'a man, a plan, a canal, Panama'
+    p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama'])
+    assert not p2.is_palindrome()
+    p3 = Panama(['maya'], '', '', ['ayam'])
+    assert p3.is_palindrome()
+    assert str(p3) == 'Maya, a yam'
+    return 'ok'
+
+if __name__ == '__main__': 
+    p = Panama();
+    test1()
+    test2()
+    p.search(10**6)
+    print(p.report())
+    print(str(p))
--- a/py/parse.py
+++ b/py/parse.py
@@ -0,0 +1,52 @@
+grammar = {
+  'Noun': ['stench', 'wumpus'],
+  'Verb': ['is', 'smell'],
+  'Adjective': ['dead', 'smelly'],
+  'Adverb': ['left', 'back'],
+  'Pronoun': ['me', 'you'],
+  'Name': ['John', 'Mary'],
+  'Article': ['the', 'a'],
+  'Preposition': ['to', 'in'],
+  'Conjunction': ['and', 'or'],
+  'Digit': ['0', '1'],
+  
+  'S': [['NP', 'VP'], ['S', 'Comjunction', 'S']],
+  'NP': ['Pronoun', 'Noun', ['Article', 'Noun'], ['Digit', 'Digit'], 
+         ['NP', 'PP'], ['NP', 'RelClause']],
+  'VP': ['Verb', ['VP', 'NP'], ['VP', 'Adjective'], ['VP', 'PP'], 
+         ['VP', 'Adverb']],
+  'PP': [['Preposition', 'NP']],
+  'RelClause': [['that', 'VP']]
+  }
+  
+
+def parse(forest, grammar):
+    if len(forest) == 1 and category(forest[0]) == 'S':
+        return forest[0]
+    for i in range(len(forest)):
+        for lhs in grammar.keys():
+            for rhs in grammar[lhs]:
+		rhs = mklist(rhs)
+		n = len(rhs)
+		subsequence = forest[i:i+n]
+		if match(subsequence, rhs):
+                    print subsequence, lhs, '=>', rhs
+		    forest2 = forest[:]
+		    forest2[i:i+n] = [(lhs, subsequence)]
+		    result = parse(forest2, grammar)
+		    if result != None:
+			return result
+    return None
+
+def mklist(x):
+    if type(x) == type([]): return x
+    else: return [x]
+
+def match(forest, rhs):
+    for i in range(len(rhs)):
+        if category(forest[i]) != rhs[i] and forest[i] != rhs[i]: return 0
+    return 1
+
+def category(forest):
+    if type(forest) == type(()): return forest[0]
+    else: return 'word'
--- a/py/py2html.py
+++ b/py/py2html.py
@@ -0,0 +1,110 @@
+"""Pretty-print Python code to colorized, hyperlinked html.
+
+In python, do:
+    py2html.convert_files(['file1.py', 'file2.py', ...]) 
+From the shell, do:
+    python py2html.py *.py"""
+
+import re, string, time, os
+
+
+id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
+g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
+def b(text): return '<b>%s</b>' % text
+def i(text): return '<i>%s</i>' % text
+def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
+def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
+def hilite(text, bg="ffff00"):
+    return '<b style="background-color:%s"><a name="%s">%s</b>' % (
+        bg, text, text)
+
+def modulelink(module, baseurl=''):
+    """Hyperlink to a module, either locally or on python.org"""
+    if module+'.py' not in local_files:
+        baseurl = 'http://www.python.org/doc/current/lib/module-'
+    return link(baseurl+module+'.html', module)
+
+def importer(m):
+    "Turn text such as 'utils, math, re' into a string of HTML links."
+    modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
+    return (m.group(1) + ', '.join(modules) + m.group(3))
+
+def find1(regex, str):
+    return (re.findall(regex, str) or ['&nbsp;'])[0]
+
+def convert_files(filenames, local_filenames=None, tblfile='readme.htm'):
+    "Convert files of python code to colorized HTML."
+    global local_files
+    local_files = local_filenames or filenames
+    summary_table = {}
+    for f in filenames:
+        fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
+        text = fulltext
+        for (pattern, repl) in replacements:
+            text = re.sub(pattern, repl, text)
+        text = '<<header("AIMA Python file: %s")>><pre>%s</pre><<footer>>' % (
+            f, text)
+        open(f[:-3]+'.htm', 'w').write(text)
+        if tblfile:
+            ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
+            module = f.replace('.py','')
+            lines = fulltext.count('\n')
+            desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
+            summary_table.setdefault(ch,[]).append((module, lines, desc))
+    if tblfile:
+        totallines = 0
+        tbl = ["<tr><th>Chapter<th>Module<th>Files<th>Lines<th>Description"]
+        fmt = "<tr><td align=right>%s<th>%s<td>%s<td align=right>%s<td>%s" 
+        items = summary_table.items(); items.sort(num_cmp)
+        for (ch, entries) in items:
+            for (module, lines, desc) in entries:
+                totallines += lines
+                files = link(module+'.py', '.py')
+                if os.path.exists(module+'.txt'):
+                    files += ' ' + link(module+'.txt', '.txt')
+                tbl += [fmt % (ch, link(module+'.html', module), 
+                               files, lines, desc)]
+        tbl += [fmt % ('', '', '', totallines, ''), "</table>"]
+        ## Now read the tblfile, and replace the first table with tbl
+        old = open(tblfile).read()
+        new = re.sub("(?s)(<table border=1>)(.*)(</table>)", 
+                     r'\1' + '\n'.join(tbl) + r'\3', old, 1)
+        open(tblfile, 'w').write(new)
+
+def num_cmp(x, y):
+    def num(x):
+        nums = re.findall('[0-9]+', x or '')
+        if nums: return int(nums[0])
+        return x
+    return cmp(num(x[0]), num(y[0]))
+
+### Above is general (more or less); below is specific to my files.
+
+def comment(text): return i(color("green", text))
+
+replacements = [
+    (r'&', '&amp;'),
+    (r'<', '&lt;'),
+    (r'>', '&gt;'),
+    (r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
+    (r"""('[^']*?'|"[^"]*?")""", comment(g1)),
+    (r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
+    (r'(#.*)', color("cc33cc", g1)),
+    (r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
+    (r'(?m)(^\s*)(def\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
+    (r'(?m)(^\s*)(class\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
+    (r'(from\s+)([a-z]+)(\s+import)', importer),
+    (r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
+    ]
+
+if __name__ == '__main__':
+    import sys, glob
+    files = []
+    for arg in sys.argv[1:]:
+        files.extend(glob.glob(arg))
+    convert_files(files) 
+
+## ENHANCEMENTS:
+## Can get confused with """ and '''; not a problem in practice.
+## Maybe we should create an index 
+## Probably should switch to Doxygen
--- a/py/spell.py
+++ b/py/spell.py
@@ -0,0 +1,106 @@
+"""Spelling Corrector in Python 3; see http://norvig.com/spell-correct.html
+
+Copyright (c) 2007-2016 Peter Norvig
+MIT license: www.opensource.org/licenses/mit-license.php
+"""
+
+################ Spelling Corrector 
+
+import re
+from collections import Counter
+
+def words(text): return re.findall(r'\w+', text.lower())
+
+WORDS = Counter(words(open('big.txt').read()))
+
+def P(word, N=sum(WORDS.values())): 
+    "Probability of `word`."
+    return WORDS[word] / N
+
+def correction(word): 
+    "Most probable spelling correction for word."
+    return max(candidates(word), key=P)
+
+def candidates(word): 
+    "Generate possible spelling corrections for word."
+    return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
+
+def known(words): 
+    "The subset of `words` that appear in the dictionary of WORDS."
+    return set(w for w in words if w in WORDS)
+
+def edits1(word):
+    "All edits that are one edit away from `word`."
+    letters    = 'abcdefghijklmnopqrstuvwxyz'
+    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
+    deletes    = [L + R[1:]               for L, R in splits if R]
+    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
+    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
+    inserts    = [L + c + R               for L, R in splits for c in letters]
+    return set(deletes + transposes + replaces + inserts)
+
+def edits2(word): 
+    "All edits that are two edits away from `word`."
+    return (e2 for e1 in edits1(word) for e2 in edits1(e1))
+
+################ Test Code 
+
+def unit_tests():
+    assert correction('speling') == 'spelling'              # insert
+    assert correction('korrectud') == 'corrected'           # replace 2
+    assert correction('bycycle') == 'bicycle'               # replace
+    assert correction('inconvient') == 'inconvenient'       # insert 2
+    assert correction('arrainged') == 'arranged'            # delete
+    assert correction('peotry') =='poetry'                  # transpose
+    assert correction('peotryy') =='poetry'                 # transpose + delete
+    assert correction('word') == 'word'                     # known
+    assert correction('quintessential') == 'quintessential' # unknown
+    assert words('This is a TEST.') == ['this', 'is', 'a', 'test']
+    assert Counter(words('This is a test. 123; A TEST this is.')) == (
+           Counter({'123': 1, 'a': 2, 'is': 2, 'test': 2, 'this': 2}))
+    assert len(WORDS) == 32192
+    assert sum(WORDS.values()) == 1115504
+    assert WORDS.most_common(10) == [
+     ('the', 79808),
+     ('of', 40024),
+     ('and', 38311),
+     ('to', 28765),
+     ('in', 22020),
+     ('a', 21124),
+     ('that', 12512),
+     ('he', 12401),
+     ('was', 11410),
+     ('it', 10681)]
+    assert WORDS['the'] == 79808
+    assert P('quintessential') == 0
+    assert 0.07 < P('the') < 0.08
+    return 'unit_tests pass'
+
+def spelltest(tests, verbose=False):
+    "Run correction(wrong) on all (right, wrong) pairs; report results."
+    import time
+    start = time.clock()
+    good, unknown = 0, 0
+    n = len(tests)
+    for right, wrong in tests:
+        w = correction(wrong)
+        good += (w == right)
+        if w != right:
+            unknown += (right not in WORDS)
+            if verbose:
+                print('correction({}) => {} ({}); expected {} ({})'
+                      .format(wrong, w, WORDS[w], right, WORDS[right]))
+    dt = time.clock() - start
+    print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second '
+          .format(good / n, n, unknown / n, n / dt))
+    
+def Testset(lines):
+    "Parse 'right: wrong1 wrong2' lines into [('right', 'wrong1'), ('right', 'wrong2')] pairs."
+    return [(right, wrong)
+            for (right, wrongs) in (line.split(':') for line in lines)
+            for wrong in wrongs.split()]
+
+if __name__ == '__main__':
+    print(unit_tests())
+    spelltest(Testset(open('spell-testset1.txt')))
+    spelltest(Testset(open('spell-testset2.txt')))
--- a/py/sudoku-easy50.txt
+++ b/py/sudoku-easy50.txt
@@ -0,0 +1,50 @@
+003020600900305001001806400008102900700000008006708200002609500800203009005010300
+200080300060070084030500209000105408000000000402706000301007040720040060004010003
+000000907000420180000705026100904000050000040000507009920108000034059000507000000
+030050040008010500460000012070502080000603000040109030250000098001020600080060020
+020810740700003100090002805009040087400208003160030200302700060005600008076051090
+100920000524010000000000070050008102000000000402700090060000000000030945000071006
+043080250600000000000001094900004070000608000010200003820500000000000005034090710
+480006902002008001900370060840010200003704100001060049020085007700900600609200018
+000900002050123400030000160908000000070000090000000205091000050007439020400007000
+001900003900700160030005007050000009004302600200000070600100030042007006500006800
+000125400008400000420800000030000095060902010510000060000003049000007200001298000
+062340750100005600570000040000094800400000006005830000030000091006400007059083260
+300000000005009000200504000020000700160000058704310600000890100000067080000005437
+630000000000500008005674000000020000003401020000000345000007004080300902947100080
+000020040008035000000070602031046970200000000000501203049000730000000010800004000
+361025900080960010400000057008000471000603000259000800740000005020018060005470329
+050807020600010090702540006070020301504000908103080070900076205060090003080103040
+080005000000003457000070809060400903007010500408007020901020000842300000000100080
+003502900000040000106000305900251008070408030800763001308000104000020000005104800
+000000000009805100051907420290401065000000000140508093026709580005103600000000000
+020030090000907000900208005004806500607000208003102900800605007000309000030020050
+005000006070009020000500107804150000000803000000092805907006000030400010200000600
+040000050001943600009000300600050002103000506800020007005000200002436700030000040
+004000000000030002390700080400009001209801307600200008010008053900040000000000800
+360020089000361000000000000803000602400603007607000108000000000000418000970030014
+500400060009000800640020000000001008208000501700500000000090084003000600060003002
+007256400400000005010030060000508000008060200000107000030070090200000004006312700
+000000000079050180800000007007306800450708096003502700700000005016030420000000000
+030000080009000500007509200700105008020090030900402001004207100002000800070000090
+200170603050000100000006079000040700000801000009050000310400000005000060906037002
+000000080800701040040020030374000900000030000005000321010060050050802006080000000
+000000085000210009960080100500800016000000000890006007009070052300054000480000000
+608070502050608070002000300500090006040302050800050003005000200010704090409060701
+050010040107000602000905000208030501040070020901080406000401000304000709020060010
+053000790009753400100000002090080010000907000080030070500000003007641200061000940
+006080300049070250000405000600317004007000800100826009000702000075040190003090600
+005080700700204005320000084060105040008000500070803010450000091600508007003010600
+000900800128006400070800060800430007500000009600079008090004010003600284001007000
+000080000270000054095000810009806400020403060006905100017000620460000038000090000
+000602000400050001085010620038206710000000000019407350026040530900020007000809000
+000900002050123400030000160908000000070000090000000205091000050007439020400007000
+380000000000400785009020300060090000800302009000040070001070500495006000000000092
+000158000002060800030000040027030510000000000046080790050000080004070100000325000
+010500200900001000002008030500030007008000500600080004040100700000700006003004050
+080000040000469000400000007005904600070608030008502100900000005000781000060000010
+904200007010000000000706500000800090020904060040002000001607000000000030300005702
+000700800006000031040002000024070000010030080000060290000800070860000500002006000
+001007090590080001030000080000005800050060020004100000080000030100020079020700400
+000003017015009008060000000100007000009000200000500004000000020500600340340200000
+300200000000107000706030500070009080900020004010800050009040301000702000000008006
--- a/py/sudoku-hardest.txt
+++ b/py/sudoku-hardest.txt
@@ -0,0 +1,11 @@
+85...24..72......9..4.........1.7..23.5...9...4...........8..7..17..........36.4.
+..53.....8......2..7..1.5..4....53...1..7...6..32...8..6.5....9..4....3......97..
+12..4......5.69.1...9...5.........7.7...52.9..3......2.9.6...5.4..9..8.1..3...9.4
+...57..3.1......2.7...234......8...4..7..4...49....6.5.42...3.....7..9....18.....
+7..1523........92....3.....1....47.8.......6............9...5.6.4.9.7...8....6.1.
+1....7.9..3..2...8..96..5....53..9...1..8...26....4...3......1..4......7..7...3..
+1...34.8....8..5....4.6..21.18......3..1.2..6......81.52..7.9....6..9....9.64...2
+...92......68.3...19..7...623..4.1....1...7....8.3..297...8..91...5.72......64...
+.6.5.4.3.1...9...8.........9...5...6.4.6.2.7.7...4...5.........4...8...1.5.2.3.4.
+7.....4...2..7..8...3..8.799..5..3...6..2..9...1.97..6...3..9...3..4..6...9..1.35
+....7..2.8.......6.1.2.5...9.54....8.........3....85.1...3.2.8.4.......9.7..6....
--- a/py/sudoku-top95.txt
+++ b/py/sudoku-top95.txt
@@ -0,0 +1,95 @@
+4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......
+52...6.........7.13...........4..8..6......5...........418.........3..2...87.....
+6.....8.3.4.7.................5.4.7.3..2.....1.6.......2.....5.....8.6......1....
+48.3............71.2.......7.5....6....2..8.............1.76...3.....4......5....
+....14....3....2...7..........9...3.6.1.............8.2.....1.4....5.6.....7.8...
+......52..8.4......3...9...5.1...6..2..7........3.....6...1..........7.4.......3.
+6.2.5.........3.4..........43...8....1....2........7..5..27...........81...6.....
+.524.........7.1..............8.2...3.....6...9.5.....1.6.3...........897........
+6.2.5.........4.3..........43...8....1....2........7..5..27...........81...6.....
+.923.........8.1...........1.7.4...........658.........6.5.2...4.....7.....9.....
+6..3.2....5.....1..........7.26............543.........8.15........4.2........7..
+.6.5.1.9.1...9..539....7....4.8...7.......5.8.817.5.3.....5.2............76..8...
+..5...987.4..5...1..7......2...48....9.1.....6..2.....3..6..2.......9.7.......5..
+3.6.7...........518.........1.4.5...7.....6.....2......2.....4.....8.3.....5.....
+1.....3.8.7.4..............2.3.1...........958.........5.6...7.....8.2...4.......
+6..3.2....4.....1..........7.26............543.........8.15........4.2........7..
+....3..9....2....1.5.9..............1.2.8.4.6.8.5...2..75......4.1..6..3.....4.6.
+45.....3....8.1....9...........5..9.2..7.....8.........1..4..........7.2...6..8..
+.237....68...6.59.9.....7......4.97.3.7.96..2.........5..47.........2....8.......
+..84...3....3.....9....157479...8........7..514.....2...9.6...2.5....4......9..56
+.98.1....2......6.............3.2.5..84.........6.........4.8.93..5...........1..
+..247..58..............1.4.....2...9528.9.4....9...1.........3.3....75..685..2...
+4.....8.5.3..........7......2.....6.....5.4......1.......6.3.7.5..2.....1.9......
+.2.3......63.....58.......15....9.3....7........1....8.879..26......6.7...6..7..4
+1.....7.9.4...72..8.........7..1..6.3.......5.6..4..2.........8..53...7.7.2....46
+4.....3.....8.2......7........1...8734.......6........5...6........1.4...82......
+.......71.2.8........4.3...7...6..5....2..3..9........6...7.....8....4......5....
+6..3.2....4.....8..........7.26............543.........8.15........8.2........7..
+.47.8...1............6..7..6....357......5....1..6....28..4.....9.1...4.....2.69.
+......8.17..2........5.6......7...5..1....3...8.......5......2..4..8....6...3....
+38.6.......9.......2..3.51......5....3..1..6....4......17.5..8.......9.......7.32
+...5...........5.697.....2...48.2...25.1...3..8..3.........4.7..13.5..9..2...31..
+.2.......3.5.62..9.68...3...5..........64.8.2..47..9....3.....1.....6...17.43....
+.8..4....3......1........2...5...4.69..1..8..2...........3.9....6....5.....2.....
+..8.9.1...6.5...2......6....3.1.7.5.........9..4...3...5....2...7...3.8.2..7....4
+4.....5.8.3..........7......2.....6.....5.8......1.......6.3.7.5..2.....1.8......
+1.....3.8.6.4..............2.3.1...........958.........5.6...7.....8.2...4.......
+1....6.8..64..........4...7....9.6...7.4..5..5...7.1...5....32.3....8...4........
+249.6...3.3....2..8.......5.....6......2......1..4.82..9.5..7....4.....1.7...3...
+...8....9.873...4.6..7.......85..97...........43..75.......3....3...145.4....2..1
+...5.1....9....8...6.......4.1..........7..9........3.8.....1.5...2..4.....36....
+......8.16..2........7.5......6...2..1....3...8.......2......7..3..8....5...4....
+.476...5.8.3.....2.....9......8.5..6...1.....6.24......78...51...6....4..9...4..7
+.....7.95.....1...86..2.....2..73..85......6...3..49..3.5...41724................
+.4.5.....8...9..3..76.2.....146..........9..7.....36....1..4.5..6......3..71..2..
+.834.........7..5...........4.1.8..........27...3.....2.6.5....5.....8........1..
+..9.....3.....9...7.....5.6..65..4.....3......28......3..75.6..6...........12.3.8
+.26.39......6....19.....7.......4..9.5....2....85.....3..2..9..4....762.........4
+2.3.8....8..7...........1...6.5.7...4......3....1............82.5....6...1.......
+6..3.2....1.....5..........7.26............843.........8.15........8.2........7..
+1.....9...64..1.7..7..4.......3.....3.89..5....7....2.....6.7.9.....4.1....129.3.
+.........9......84.623...5....6...453...1...6...9...7....1.....4.5..2....3.8....9
+.2....5938..5..46.94..6...8..2.3.....6..8.73.7..2.........4.38..7....6..........5
+9.4..5...25.6..1..31......8.7...9...4..26......147....7.......2...3..8.6.4.....9.
+...52.....9...3..4......7...1.....4..8..453..6...1...87.2........8....32.4..8..1.
+53..2.9...24.3..5...9..........1.827...7.........981.............64....91.2.5.43.
+1....786...7..8.1.8..2....9........24...1......9..5...6.8..........5.9.......93.4
+....5...11......7..6.....8......4.....9.1.3.....596.2..8..62..7..7......3.5.7.2..
+.47.2....8....1....3....9.2.....5...6..81..5.....4.....7....3.4...9...1.4..27.8..
+......94.....9...53....5.7..8.4..1..463...........7.8.8..7.....7......28.5.26....
+.2......6....41.....78....1......7....37.....6..412....1..74..5..8.5..7......39..
+1.....3.8.6.4..............2.3.1...........758.........7.5...6.....8.2...4.......
+2....1.9..1..3.7..9..8...2.......85..6.4.........7...3.2.3...6....5.....1.9...2.5
+..7..8.....6.2.3...3......9.1..5..6.....1.....7.9....2........4.83..4...26....51.
+...36....85.......9.4..8........68.........17..9..45...1.5...6.4....9..2.....3...
+34.6.......7.......2..8.57......5....7..1..2....4......36.2..1.......9.......7.82
+......4.18..2........6.7......8...6..4....3...1.......6......2..5..1....7...3....
+.4..5..67...1...4....2.....1..8..3........2...6...........4..5.3.....8..2........
+.......4...2..4..1.7..5..9...3..7....4..6....6..1..8...2....1..85.9...6.....8...3
+8..7....4.5....6............3.97...8....43..5....2.9....6......2...6...7.71..83.2
+.8...4.5....7..3............1..85...6.....2......4....3.26............417........
+....7..8...6...5...2...3.61.1...7..2..8..534.2..9.......2......58...6.3.4...1....
+......8.16..2........7.5......6...2..1....3...8.......2......7..4..8....5...3....
+.2..........6....3.74.8.........3..2.8..4..1.6..5.........1.78.5....9..........4.
+.52..68.......7.2.......6....48..9..2..41......1.....8..61..38.....9...63..6..1.9
+....1.78.5....9..........4..2..........6....3.74.8.........3..2.8..4..1.6..5.....
+1.......3.6.3..7...7...5..121.7...9...7........8.1..2....8.64....9.2..6....4.....
+4...7.1....19.46.5.....1......7....2..2.3....847..6....14...8.6.2....3..6...9....
+......8.17..2........5.6......7...5..1....3...8.......5......2..3..8....6...4....
+963......1....8......2.5....4.8......1....7......3..257......3...9.2.4.7......9..
+15.3......7..4.2....4.72.....8.........9..1.8.1..8.79......38...........6....7423
+..........5724...98....947...9..3...5..9..12...3.1.9...6....25....56.....7......6
+....75....1..2.....4...3...5.....3.2...8...1.......6.....1..48.2........7........
+6.....7.3.4.8.................5.4.8.7..2.....1.3.......2.....5.....7.9......1....
+....6...4..6.3....1..4..5.77.....8.5...8.....6.8....9...2.9....4....32....97..1..
+.32.....58..3.....9.428...1...4...39...6...5.....1.....2...67.8.....4....95....6.
+...5.3.......6.7..5.8....1636..2.......4.1.......3...567....2.8..4.7.......2..5..
+.5.3.7.4.1.........3.......5.8.3.61....8..5.9.6..1........4...6...6927....2...9..
+..5..8..18......9.......78....4.....64....9......53..2.6.........138..5....9.714.
+..........72.6.1....51...82.8...13..4.........37.9..1.....238..5.4..9.........79.
+...658.....4......12............96.7...3..5....2.8...3..19..8..3.6.....4....473..
+.2.3.......6..8.9.83.5........2...8.7.9..5........6..4.......1...1...4.22..7..8.9
+.5..9....1.....6.....3.8.....8.4...9514.......3....2..........4.8...6..77..15..6.
+.....2.......7...17..3...9.8..7......2.89.6...13..6....9..5.824.....891..........
+3...8.......7....51..............36...2..4....7...........6.13..452...........8..
--- a/py/sudoku.py
+++ b/py/sudoku.py
@@ -0,0 +1,161 @@
+## Solve Every Sudoku Puzzle
+
+## See http://norvig.com/sudoku.html
+
+## Throughout this program we have:
+##   r is a row,    e.g. 'A'
+##   c is a column, e.g. '3'
+##   s is a square, e.g. 'A3'
+##   d is a digit,  e.g. '9'
+##   u is a unit,   e.g. ['A1','B1','C1','D1','E1','F1','G1','H1','I1']
+##   grid is a grid,e.g. 81 non-blank chars, e.g. starting with '.18...7...
+##   values is a dict of possible values, e.g. {'A1':'12349', 'A2':'8', ...}
+
+def cross(A, B):
+    "Cross product of elements in A and elements in B."
+    return [a+b for a in A for b in B]
+
+digits   = '123456789'
+rows     = 'ABCDEFGHI'
+cols     = digits
+squares  = cross(rows, cols)
+unitlist = ([cross(rows, c) for c in cols] +
+            [cross(r, cols) for r in rows] +
+            [cross(rs, cs) for rs in ('ABC','DEF','GHI') for cs in ('123','456','789')])
+units = dict((s, [u for u in unitlist if s in u])
+             for s in squares)
+peers = dict((s, set(sum(units[s],[]))-set([s]))
+             for s in squares)
+
+################ Unit Tests ################
+
+def test():
+    "A set of tests that must pass."
+    assert len(squares) == 81
+    assert len(unitlist) == 27
+    assert all(len(units[s]) == 3 for s in squares)
+    assert all(len(peers[s]) == 20 for s in squares)
+    assert units['C2'] == [['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2'],
+                           ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'],
+                           ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']]
+    assert peers['C2'] == set(['A2', 'B2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2',
+                               'C1', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9',
+                               'A1', 'A3', 'B1', 'B3'])
+    print('All tests pass.')
+
+################ Parse a Grid ################
+
+def parse_grid(grid):
+    """Convert grid to a dict of possible values, {square: digits}, or
+    return False if a contradiction is detected."""
+    ## To start, every square can be any digit; then assign values from the grid.
+    values = dict((s, digits) for s in squares)
+    for s,d in grid_values(grid).items():
+        if d in digits and not assign(values, s, d):
+            return False ## (Fail if we can't assign d to square s.)
+    return values
+
+def grid_values(grid):
+    "Convert grid into a dict of {square: char} with '0' or '.' for empties."
+    chars = [c for c in grid if c in digits or c in '0.']
+    if len(chars) != 81: print(grid, chars, len(chars))
+    assert len(chars) == 81
+    return dict(zip(squares, chars))
+
+################ Constraint Propagation ################
+
+def assign(values, s, d):
+    """Eliminate all the other values (except d) from values[s] and propagate.
+    Return values, except return False if a contradiction is detected."""
+    other_values = values[s].replace(d, '')
+    if all(eliminate(values, s, d2) for d2 in other_values):
+        return values
+    else:
+        return False
+
+def eliminate(values, s, d):
+    """Eliminate d from values[s]; propagate when values or places <= 2.
+    Return values, except return False if a contradiction is detected."""
+    if d not in values[s]:
+        return values ## Already eliminated
+    values[s] = values[s].replace(d,'')
+    ## (1) If a square s is reduced to one value d2, then eliminate d2 from the peers.
+    if len(values[s]) == 0:
+        return False ## Contradiction: removed last value
+    elif len(values[s]) == 1:
+        d2 = values[s]
+        if not all(eliminate(values, s2, d2) for s2 in peers[s]):
+            return False
+    ## (2) If a unit u is reduced to only one place for a value d, then put it there.
+    for u in units[s]:
+        dplaces = [s for s in u if d in values[s]]
+        if len(dplaces) == 0:
+            return False ## Contradiction: no place for this value
+        elif len(dplaces) == 1:
+            # d can only be in one place in unit; assign it there
+            if not assign(values, dplaces[0], d):
+                return False
+    return values
+
+################ Display as 2-D grid ################
+
+def display(values):
+    "Display these values as a 2-D grid."
+    width = 1+max(len(values[s]) for s in squares)
+    line = '+'.join(['-'*(width*3)]*3)
+    for r in rows:
+        print(''.join(values[r+c].center(width) + ('|' if c in '36' else '')
+                      for c in cols))
+        if r in 'CF': print(line)
+    print()
+
+################ Search ################
+
+def solve(grid): return search(parse_grid(grid))
+
+def search(values):
+    "Using depth-first search and propagation, try all possible values."
+    if values is False:
+        return False ## Failed earlier
+    if all(len(values[s]) == 1 for s in squares):
+        return values ## Solved!
+    ## Chose the unfilled square s with the fewest possibilities
+    n,s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
+    for d in values[s]:
+        result = search(assign(values.copy(), s, d))
+        if result: return result
+
+################ System test ################
+
+import time
+
+def solve_all(grids, name=''):
+    """Attempt to solve a sequence of grids. Report results."""
+    times, results = zip(*[time_solve(grid) for grid in grids])
+    N = len(results)
+    if N > 1:
+        print("Solved %d of %d %s puzzles (avg %.2f secs (%d Hz), max %.2f secs)." % (
+            sum(results), N, name, sum(times)/N, N/sum(times), max(times)))
+            
+def time_solve(grid):
+    start = time.clock()
+    values = solve(grid)
+    t = time.clock()-start
+    return (t, solved(values))
+
+def solved(values):
+    "A puzzle is solved if each unit is a permutation of the digits 1 to 9."
+    def unitsolved(unit): return set(values[s] for s in unit) == set(digits)
+    return values is not False and all(unitsolved(unit) for unit in unitlist)
+
+
+grid1  = '003020600900305001001806400008102900700000008006708200002609500800203009005010300'
+grid2  = '4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......'
+hard1  = '.....6....59.....82....8....45........3........6..3.54...325..6..................'
+    
+if __name__ == '__main__':
+    test()
+    solve_all(open("sudoku-easy50.txt"), "easy")
+    solve_all(open("sudoku-top95.txt"), "hard")
+    solve_all(open("sudoku-hardest.txt"), "hardest")
+    
--- a/py/testaccum.py
+++ b/py/testaccum.py
@@ -0,0 +1,73 @@
+from __future__ import division
+import re
+from accum import *
+
+
+acc_re = re.compile("[[](.+):(.+) for (.+) in (.+)[]]")
+
+def expand_accumulations(program_text):
+    """Replace any accumulation displays in program_text with calls to
+    accumulation.  Used to simulate a hypothetical Python interpreter that
+    actually handles accumlation displays. This one is rather poor: it
+    won't match across lines, it won't match nested accumulation displays,
+    and it doesn't handle multiple 'for' clauses; nor 'if' clauses."""
+    def _(matchobj):
+        (acc, exp, x, it) = matchobj.groups()
+        return "accumulation(%s, lambda %s: (%s), %s)" % (acc, x, exp, it)
+    return acc_re.sub(_, program_text)
+
+def test1(acc_display, expected):
+    "Eval an accumulation display and see if it gets the expected answer."
+    print acc_display
+    result = eval(expand_accumulations(acc_display))
+    assert result == expected, ('Got %s; expected %s' % (result, expected))
+    print '    ==>  %s' % result
+
+#### Initialize some data
+temp = [70, 70, 71, 74, 76, 76, 72, 76, 77, 77, 77, 78,
+        78, 79, 79, 79, 78, 80, 82, 83, 83, 81, 84, 83]
+data = temp
+def f(x): return 2 * x
+votes = {'Arnie': 48, 'Gray': 45, 'Tom': 13, 'Cruz': 32, 'Peter': 3}
+candidates = votes.keys()
+
+def test():
+
+    print 'temp = ', temp
+    print 'data = temp'
+    print 'votes = ', votes
+    print 'candidates = ', candidates
+    print
+    
+    #### Test some accumulation displays
+    test1("[Max: temp[hour] for hour in range(24)]",
+          max([temp[hour] for hour in range(24)]))
+    test1("[Min: temp[hour] for hour in range(24)]",
+          min([temp[hour] for hour in range(24)]))
+    test1("[Sum: x*x for x in data]",
+          sum([x*x for x in data]))
+    test1("[Mean: f(x) for x in data]",
+          sum([f(x) for x in data])/len(data))
+    test1("[Median: f(x) for x in data]",
+          156.0)
+    test1("[Mode: f(x) for x in data]",
+          166)
+    test1("[Argmax: votes[c] for c in candidates]",
+          'Arnie')
+    test1("[Argmin: votes[c] for c in candidates]",
+          'Peter')
+    test1("[Some: temp[hour] > 75 for hour in range(24)]",
+          len([hour for four in range(24) if temp[hour] > 75])>0)
+    test1("[Every: temp[hour] > 75 for hour in range(24)]",
+          len([h for h in range(24) if temp[h] > 75]) == 24)
+    test1("[Top(10): temp[hour] for hour in range(24)]",
+          [84, 83, 83, 83, 82, 81, 80, 79, 79, 79])
+    test1("[Join(', '): votes[c] for c in candidates]",
+                       ', '.join([str(votes[c]) for c in candidates]))
+    test1("[SortBy: abs(x) for x in (-2, -4, 3, 1)]",
+          [1, -2, 3, -4])
+    test1("[SortBy(reverse=True): abs(x) for x in (-2, -4, 3, 1)]",
+          [-4, 3, -2, 1])
+
+if __name__ == "__main__":
+    test()
--- a/py/yaptu.py
+++ b/py/yaptu.py
@@ -0,0 +1,170 @@
+"""Yet Another Python Templating Utility, Version 1.2, by Alex Martelli.
+   http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52305
+   (Specialized to HTML and modified by Peter Norvig.)
+
+Copies input to output, with some substitutions. There are three types
+of substitutions: lexical, expression, and statement. 
+
+LEXICAL SUBSTITUTIONS:
+
+& < >
+        These characters, if surrounded by whitespace, are replaced by
+        the corresonding HTML entities: &amp;, &lt;, &gt;.
+
+EXPRESSION SUBSTITUTIONS:
+
+<<exp>>     
+        Replace <<exp>> by eval(exp), where exp is a Python expression.
+        The most common use is when exp is just a variable name.
+        Example: <<green>>
+        Special case 1: If exp starts with '/', replace '/' by '_'.
+        Example: <</green>> becomes <<_green>
+        Special case 2: If exp evals to a callable, call it.
+        Example: <<random.random>> is the same as <<random.random()>>
+        Special case 3: If exp evals to None, replace it with ''.
+        Example: <<list.append(item)>> generates no text.
+
+STATEMENT SUBSTITUTIONS:
+
+All statement substitutions start with a #[ in column 1, and end with
+a #] in column 1 of a subsequent line.  Nesting is allowed, and
+works like you would expect. There are two variants:
+
+#[
+stmts
+#]
+        Any number of lines of Python stmts are executed.
+        The first line must be empty, except for the #[
+
+#[ stmt-header:
+lines
+#]
+        The lines are interpreted as HTML with embedded expressions,
+        and are sent to output, once for each execution of stmt-header.
+        stmt-header is usually a for or if; This is hard to explain, 
+        but easy to see with an example:
+
+        <table><tr><th> Number <th> Number squared
+        #[ for i in range(10):
+              <tr><td> <<i>> <td> <<i**2>>
+        #]        
+        </table>
+        
+        This produces one line of the table for each value of i in [0 .. 9].
+        If your compound statement has multiple stmt-headers, you use #| to
+        introduce the subsequent stmt-headers (such as else: or except:). 
+        Another example:
+
+        #[ if time.localtime()[6] in [5, 6]: 
+        Have a good weekend!
+        #| else:
+        Time for work.
+        #]
+"""
+
+import sys, re, os, os.path
+
+class Copier:
+    "Smart-copier (YAPTU) class"
+
+    def copyblock(self, i=0, last=None):
+        "Main copy method: process lines [i,last) of block"
+
+        def repl(match, self=self):
+            "Replace the match with its value as a Python expression."
+            expr = self.preproc(match.group(1), 'eval')
+            if self.verbose: print '=== eval{%s}' % expr,
+            try:
+                val = eval(expr, self.globals)
+            except:
+                self.oops('eval', expr)
+            if callable(val): val = val()
+            if val == None: val = ''
+            if self.verbose: print '========>', val
+            return str(val)
+
+        block = self.globals['_bl']
+        if last is None: last = len(block)
+        while i < last:
+            line = block[i]
+            if line.startswith("#["):   # a statement starts at line block[i]
+                # i is the last line to _not_ process
+                stmt = line[2:].strip()
+                j = i+1   # look for 'finish' from here onwards
+                nest = 1  # count nesting levels of statements
+                while j<last and not stmt.endswith("#]"):
+                    line = block[j]
+                    # first look for nested statements or 'finish' lines
+                    if line.startswith("#]"):    # found a statement-end
+                        nest = nest - 1     
+                        if nest == 0: break  # j is first line to _not_ process
+                    elif line.startswith("#["):   # found a nested statement
+                        nest = nest + 1     
+                    elif nest == 1 and line.startswith("#|"):
+                        # look for continuation only at this nesting
+                        nestat = line[2:].strip()
+                        stmt = '%s _cb(%s,%s)\n%s' % (stmt,i+1,j,nestat)
+                        i=j     # again, i is the last line to _not_ process
+                    j = j+1
+                if stmt == '': ## A multi-line python suite
+                    self.execute(''.join(block[i+1:j]))
+                    i = j+1
+                else:  ## The header of a for loop (etc.) is on this line
+                    self.execute("%s _cb(%s,%s)" % (stmt,i+1,j))
+                    i = j+1
+            else:       # normal line, just copy with substitution
+                self.outf.write(self.regex.sub(repl,self.preproc(line,'copy')))
+                i = i+1
+
+    def __init__(self, globals):
+        "Create a Copier."
+        self.regex   = re.compile("<<(.*?)>>")
+        self.globals = globals
+        self.globals['_cb'] = self.copyblock
+        self.outf = sys.stdout
+        self.verbose = 0
+
+    def execute(self, stmt):
+        stmt = self.preproc(stmt, 'exec') + '\n'
+        if self.verbose: 
+            print "******* executing {%s} in %s" % (stmt, self.globals.keys())
+        try:
+            exec stmt in self.globals
+        except:
+            self.oops('exec', stmt)
+
+    def oops(self, why, what):
+        print 'Something went wrong in %sing {%s}' % (why, what)
+        print 'Globals:', self.globals.keys(), \
+            self.globals.get('SECTIONS', '???')
+        raise
+
+    def preproc(self, string, why, reg=re.compile(r"\s([<>&])\s"), 
+                table={'&':' &amp; ', '<':' &lt; ', '>':' &gt; '}):
+        # If it starts with '/', change to '_'
+        if why in ('exec', 'eval'):
+            string = string.strip()
+            if string[0] == '/':
+                string = '_' + string[1:]
+            return string
+        elif why == 'copy':
+            # Expand & < > into entitites if surrounded by whitespace
+            return reg.sub(lambda match: table[match.group(1)], string)
+
+    def copyfile(self, filename, ext="html"):
+        "Convert filename.* to filename.ext, where ext defaults to html."
+        global yaptu_filename
+        outname = re.sub('[.][a-zA-Z0-9]+?$', '', filename) + '.'+ext
+        print 'Transforming', filename, 'to', outname
+        self.globals['_bl'] = file(filename).readlines()
+        yaptu_filename = filename
+        self.outf = file(outname, 'w')
+        self.copyblock()
+
+if __name__ == '__main__':
+    copier = Copier(globals())
+    for filename in sys.argv[1:]:
+        if filename == '-v':
+            copier.verbose = 1
+        else:
+            copier.copyfile(filename)