Added files from norvig.com

2017-02-28 21:52:46 -08:00 · 2017-02-28 21:52:46 -08:00 · 740e597a0d
commit 740e597a0d
parent 3b06853ded
14 changed files with 2479 additions and 0 deletions
--- a/SET.py
+++ b/SET.py
@ -0,0 +1,134 @@
+import random
+import collections 
+import itertools 
+
+"""
+Game of Set                (Peter Norvig 2010-2015)
+
+How often do sets appear when we deal an array of cards?
+How often in the course of playing out the game?
+
+Here are the data types we will use:
+
+    card:    A string, such as '3R=0', meaning "three red striped ovals".
+    deck:    A list of cards, initially of length 81.
+    layout:  A list of cards, initially of length 12.
+    set:     A tuple of 3 cards.
+    Tallies: A dict: {12: {True: 33, False: 1}}} means a layout of size 12
+             tallied 33 sets and 1 non-set.
+"""
+
+#### Cards, dealing cards, and defining the notion of sets.
+
+CARDS = [number + color + shade + symbol 
+         for number in '123' 
+         for color  in 'RGP' 
+         for shade  in '@O=' 
+         for symbol in '0SD']
+
+def deal(n, deck): 
+    "Deal n cards from the deck."
+    return [deck.pop() for _ in range(n)]
+
+def is_set(cards):
+    "Are these 3 cards a set? No if any feature has 2 values."
+    for f in range(4):
+        values = {card[f] for card in cards}
+        if len(values) == 2: 
+            return False
+    return True
+
+def find_set(layout):
+    "Return a set found from this layout, if there is one."
+    for cards in itertools.combinations(layout, 3):
+        if is_set(cards):
+            return cards
+    return ()
+
+#### Tallying set:no-set ratio
+
+def Tallies(): 
+    "A data structure to keep track, for each size, the number of sets and no-sets."
+    return collections.defaultdict(lambda: {True: 0, False: 0})
+
+def tally(tallies, layout):
+    "Record that a set was found or not found in a layout of given size; return the set."
+    s = find_set(layout)
+    tallies[len(layout)][bool(s)] += 1
+    return s
+            
+#### Three experiments
+
+def tally_initial_layout(N, sizes=(12, 15)):
+    "Record tallies for N initial deals."
+    tallies = Tallies()
+    deck = list(CARDS)
+    for deal in range(N):
+        random.shuffle(deck)
+        for size in sizes:
+            tally(tallies, deck[:size])
+    return tallies
+
+def tally_initial_layout_no_prior_sets(N, sizes=(12, 15)):
+    """Simulate N initial deals for each size, keeping tallies for Sets and NoSets,
+    but only when there was no set with 3 fewer cards."""
+    tallies = Tallies()
+    deck = list(CARDS)
+    for deal in range(N):
+        random.shuffle(deck)
+        for size in sizes:
+            if not find_set(deck[:size-3]):
+                tally(tallies, deck[:size])
+    return tallies
+
+def tally_game_play(N):
+    "Record tallies for the play of N complete games."
+    tallies = Tallies()
+    for game in range(N):
+        deck = list(CARDS)
+        random.shuffle(deck)
+        layout = deal(12, deck)
+        while deck:
+            s = tally(tallies, layout)
+            # Pick up the cards in the set, if any
+            for card in s: layout.remove(card)
+            # Deal new cards
+            if len(layout) < 12 or not s:
+                layout += deal(3, deck)    
+    return tallies
+
+def experiments(N):
+    show({12: [1, 33], 15: [1, 2500]}, 
+         'the instruction booklet')
+    show(tally_initial_layout(N), 
+         'initial layout')
+    show(tally_game_play(N // 25), 
+         'game play')
+    show(tally_initial_layout_no_prior_sets(N), 
+         'initial layout, but no sets before dealing last 3 cards')
+
+
+def show(tallies, label):
+    "Print out the counts."
+    print()
+    print('Size |  Sets  | NoSets | Set:NoSet ratio for', label)
+    print('-----+--------+--------+----------------')
+    for size in sorted(tallies):
+        y, n = tallies[size][True], tallies[size][False]
+        ratio = ('inft' if n==0 else int(round(float(y)/n)))
+        print('{:4d} |{:7,d} |{:7,d} | {:4}:1'
+              .format(size, y, n, ratio))
+
+def test():
+    assert len(CARDS) == 81 == len(set(CARDS))
+    assert is_set(('3R=O', '2R=S', '1R=D'))
+    assert not is_set(('3R=0', '2R=S', '1R@D'))
+    assert find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R=D']) == ('3R=0', '2R=S', '1R=D')
+    assert not find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R@D'])
+    photo = '2P=0 3P=D 2R=0 3GO0 2POD 3R@D 2RO0 2ROS 1P@S 2P@0 3ROS 2GOD 2P@D 1GOD 3GOS'.split()
+    assert not find_set(photo)
+    assert set(itertools.combinations([1, 2, 3, 4], 3)) == {(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)}
+    print('All tests pass.')
+
+test()
+experiments(100000)
--- a/ibol.py
+++ b/ibol.py
@ -0,0 +1,193 @@
+from collections import defaultdict
+
+def get_genomes(fname="byronbayseqs.fas.txt"):
+    "Return a list of genomes, and a list of their corresponding names."
+    import re
+    names, species, genomes = [], [], []
+    for name, g in re.findall('>(.*?)\r([^\r]*)\r*', file(fname).read()):
+        names.append(name)
+        species.append(name.split('|')[-1])
+        genomes.append(g)
+    return names, species, genomes
+
+def get_neighbors(fname="editdistances.txt"):
+    "Return dict: neighbors[i][j] = neighbors[j][i] = d means i,j are d apart."
+    ## Read the data pre-computed from the Java program
+    neighbors = dict((i, {}) for i in range(n))
+    for line in file(fname):
+        i,j,d = map(int, line.split())
+        neighbors[i][j] = neighbors[j][i] = d
+    return neighbors
+        
+def cluster(neighbors, d, dc):
+    """Return a list of clusters, each cluster element is within d of another
+    and within dc of every other cluster element."""
+    unclustered = set(neighbors) ## set of g's not yet clustered
+    return [closure(g, set(), unclustered, d, dc)
+            for g in neighbors if g in unclustered]
+
+def closure(g, s, unclustered, d, dc):
+    "Accumulate in set s the transitive closure of 'near', starting at g"
+    if g not in s and g in unclustered and near(g, s, d, dc):
+        s.add(g); unclustered.remove(g)
+        for g2 in neighbors[g]:
+            closure(g2, s, unclustered, d, dc)
+    return s
+
+def dist(i, j):
+    "Distance between two genomes."
+    if i == j: return 0
+    return neighbors[min(i, j)].get(max(i, j), max_distance)
+
+def near(g, cluster, d, dc):
+    "Is g within d of some member of c, and within dc of every member of c?"
+    distances = [dist(g, g2) for g2 in cluster] or [0]
+    return min(distances) <= d and max(distances) <= dc
+
+def diameter(cluster):
+    "The largest distance between two elements of the cluster"
+    return max([dist(i, j) for i in cluster for j in cluster] or [0])
+
+def margin(cluster):
+    "The distance from a cluster to the nearest g2 outside this cluster."
+    return min([d for g in cluster for g2,d in neighbors[g].items()
+                if g2 not in cluster] or [max_distance])
+
+################################################################ Analysis
+
+def pct(num, den):
+    "Return a string representing the percentage. "
+    if '__len__' in dir(den): den = len(den)
+    if num==den: return ' 100%'
+    return '%.1f%%' % (num*100.0/den)
+
+def histo(items):
+    "Make a histogram from a sequence of items or (item, count) tuples."
+    D = defaultdict(int)
+    for item in items:
+        if isinstance(item, tuple): D[item[0]] += item[1]
+        else: D[item] += 1
+    return D
+
+def showh(d):
+    "Show a histogram"
+    if not isinstance(d, dict): d = histo(d)
+    return ' '.join('%s:%s' % i for i in sorted(d.items()))
+
+def greport(genomes):
+    print "Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes)))
+    G = dict((g, set()) for g in genomes)
+    for i in range(n):
+        G[genomes[i]].add(species[i])
+    print "Multi-named genomes:", (
+        len([s for s in G.values() if len(s) > 1]))
+    lens = map(len, genomes)
+    print "Genome lengths: min=%d, max=%d" % (min(lens), max(lens))
+    print "Character counts: ", showh(c for g in genomes for c in g)
+    
+def nreport(neighbors):
+    NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
+    for n in neighbors:
+        nn = min(neighbors[n].values() or ['>25'])
+        NN[nn] += 1
+        for d2 in neighbors[n].values():
+            NumN[d2] += 1 
+    print
+    print "Nearest neighbor counts:", showh(NN)
+    print "Number of neighbors at each distance:", showh(NumN)
+
+def nspecies(c): return len(set(species[g] for g in c))
+
+def showc(c):
+    return "N=%d, D=%d, M=%d: %s %s" % (
+        len(c), diameter(c), margin(c), list(c), showh(species[g] for g in c))
+
+def creport(drange, dcrange):
+    def table(what, fn):
+        print "\n" + what
+        print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
+        for d in drange:
+            print '%s (%2d)' % (pct(d, glen), d),
+            for dc in dcrange:
+                print '%5s' % fn(cluster(neighbors, d, dc)),
+            print
+    print '\nNearest neighbor must be closer than this percentage (places). '
+    print 'Each column: all genomes in cluster within this percentage of each other.'
+    table("Number of clusters", len)
+    cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
+    print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
+    M, T = defaultdict(int), defaultdict(int)
+    for c in cluster1:
+        M[margin(c)] += 1; T[margin(c)] += len(c)
+    for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
+    print '\nMargins', showh(M)
+    for c in cluster1:
+        if margin(c) <= 16:
+            print showc(c)
+    print '\nScatter plot of cluster diameter vs. margin.'
+    for c in cluster1:
+        if diameter(c) > 0:
+            pass
+            #print '%d\t%d' % (diameter(c), margin(c))
+    print '\nDifference from cluster(neighbors, 11, 14):'
+    #table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
+    print '\nNumber of clusters witth more than one species name:'
+    #table(lambda cl: sum(nspecies(c) > 1 for c in cl))
+    def pct_near_another(clusters, P=1.25):
+        total = 0
+        for c in clusters:
+            d = diameter(c)
+            for g in c:
+                for g2 in neighbors[g]:
+                    if g2 not in c and dist(g, g2) < P*d:
+                        total += 1
+        return pct(total, n)
+    def f(P):
+        print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
+        table(lambda cl: pct_near_another(cl, P))
+    #map(f, [1.2, 1.33, 1.5])
+
+def sreport(species):
+    SS = defaultdict(int)
+    print
+    for s in set(species):
+        c = [g for g in range(n) if species[g] == s]
+        d = diameter(c)
+        if d > 14:
+            if d==glen: d = '>25'
+            print 'diameter %s for %s (%d elements)' % (d, s, len(c))
+        SS[d] += 1
+    print 'Diameters of %d labelled clusters: %s' % (len(set(species)), showh(SS))
+    
+def compare(cl1, cl2):
+    "Compare two lists of clusters"
+    return sum(c1==c2 or 0.5*(abs(len(c1)-len(c2))==1 and
+                              (c1.issubset(c2) or c2.issubset(c1)))
+               for c1 in cl1 for c2 in cl2)
+
+def unit_tests():
+    assert set(len(g) for g in genomes) == set([glen])
+    clusters = cluster(neighbors, 11, 11)
+    assert sum(len(c) for c in clusters) == len(genomes)
+    assert len(set(g for c in clusters for g in c)) == len(genomes)
+    assert dist(17, 42) == dist(42, 17)
+    assert diameter(set()) == 0
+    assert diameter([17, 42]) == dist(17, 42)
+    assert pct(1, 2) == '50.0%'
+    print '\nAll tests pass.\n'
+
+    
+
+################################################################ Main body
+ 
+max_distance = 26
+names, species, genomes = get_genomes() ## genomes = ['ACT...', ...]
+n = len(genomes)
+glen = len(genomes[0])
+neighbors = get_neighbors() ## neighbor[g] = {g2:d2, g3:g3, ...}
+greport(genomes)
+nreport(neighbors)
+creport(range(6, 15), [glen,16,15,14,13, 12, 11])
+#sreport(species)
+
+unit_tests()
--- a/lettercount.py
+++ b/lettercount.py
@ -0,0 +1,440 @@
+"""
+Read files in the Google Books ngram format, and convert them to a simpler format.
+The original format looks like this:
+
+    word \t year \t word_count \t book_count
+    word_POS \t year \t word_count \t book_count
+
+for example,
+
+    accreted_VERB	1846	7	4
+    accreted_VERB	1847	1	1
+    accreted_VERB	1848	1	1
+
+The function 'read_year_file' will convert a file of this form into a dict of
+{WORD: count} pairs, where the WORD is uppercased, and the count is the total
+over all years (you have the option to specify a starting year) and all
+capitalizations.  Then 'read_dict' and 'write_dict' convert between a dict and
+an external file format that looks like this:
+
+    ACCRETED	9
+
+"""
+
+from __future__ import division
+from collections import Counter, defaultdict
+
+#### Read files in Books-Ngram format; convert to a dict
+
+def read_year_file(filename, dic=None):
+    """Read a file of 'word year word_count book_count' lines and convert to a dict
+    {WORD: totalcount}. Uppercase all words, and only include all-alphabetic words."""
+    if dic is None: dic = {}
+    for line in file(filename):
+        word, year, c1, c2 = line.split('\t')
+        if '_' in word:
+            word = word[:word.index('_')]
+        if word.isalpha():
+            word = word.upper()
+            dic[word] = dic.get(word, 0) + int(c1)
+    return dic
+
+#### Read and write files of the form 'WORD \t count \n'
+
+def write_dict(dic, filename):
+    "Write a {word:count} dict as 'word \t count' lines in filename."
+    out = file(filename, 'w')
+    for key in sorted(dic):
+        out.write('%s\t%s\n' % (key, dic[key]))
+    return out.close()
+        
+def read_dict(filename, sep='\t'):
+    "Read 'word \t count' lines from file and make them into a dict of {word:count}."
+    pairs = (line.split(sep) for line in file(filename))
+    return {word: int(count) for (word, count) in pairs}
+
+#### Convert a bunch of year files into dict file format.
+
+def convert_files(filenames, mincount=1e5):
+    def report(filename, D, adj):
+        import time
+        N = len(D)
+        W = sum(v for v in D.itervalues())
+        print '%s: %s %s words (%s tokens) at %s' % (
+            filename, adj, format(W, ',d'), format(N, ',d'),
+            time.strftime("%H:%M:%S", time.gmtime()))
+    for f in filenames:
+        report(f, {}, 'starting')
+        D = read_year_file(f)
+        report(f, D, 'total')
+        for key in list(D):
+            if D[key] < mincount:
+                del D[key]
+        write_dict(D, 'WORD-' + f[-1].upper())
+        report(f, D, 'popular')
+
+def load():
+    global D, W, M
+    D = read_dict('top-words.txt')
+    W = len(D)
+    M = sum(D.values())
+    
+#### Compute letter counts and save as HTML files.
+
+def histogram(items):
+    C = Counter()
+    for (key, val) in items:
+        C[key] += val
+    return C
+
+def end(name): return '/' + name
+
+def tag(name, **kwds): return '<' + name + keywords(kwds) + '>'
+
+def row(cells, **kwds):
+    return '<tr>' + ''
+    
+def ngram_tables(dic, N, pos=[0, 1, 2, 3, 4, -5, -4, -3, -2, -1]):
+    """Return three dicts of letter N-grams of length N: counts, counts1, counts2.
+    counts is a dict of {'AB': 123} that counts how often 'AB' occurs.
+    counts1[i] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i.
+    counts2[i][j] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i."""
+    L = len(max(D, key=len))
+    counts = Counter()
+    counts1 = [Counter() for _ in range(L)]
+    counts2 = [[Counter() for i in range(L)]]
+
+def counter(pairs):
+    "Make a Counter from an iterable of (value, count) pairs."
+    c = Counter()
+    for (value, count) in pairs:
+        c[value] += count
+    return c
+
+def ngrams(word, N):
+    return [word[i:i+N] for i in range(len(word)+1-N)]
+
+
+import glob
+#convert_files(glob.glob('book?'))
+              
+#DB = [[letter_counts() for length in range(length)] for length in range(maxlen)]
+      
+
+## Unused ???
+
+def letter_counts(wc):
+    """From word_counts dictionary wc, Create a dictionary of {(s, i, L): count}
+    where s is a letter n-gram, i is the starting position, and L is the length
+    of the word in which it appears."""
+    result = defaultdict(int)
+    for (word, count) in wc.iteritems():
+        for p in pieces(word):
+            result[p] += count
+    return result
+
+def pieces(word):
+    "Yield the 1- and 2-letter grams in (s, i, L) format."
+    L = len(word)
+    for i in range(L):
+        yield (word[i], i, L)
+        if i+1 < L:
+            yield (word[i:i+2], i, L)
+
+def getcount(counts, s, pos, length):
+    """The count for letter sequence s (one or two letters) starting at
+    position i of words of length length.  If any argument is all, sum them up."""
+    if length == all:
+        return sum(getcount(counts, s, pos, L) for L in all_lengths)
+    elif pos == all:
+        return sum(getcount(counts, s, i, length) for i in range(length))
+    else:
+        return counts[s, pos, length]
+
+
+print 'start'
+#wc = word_counts('count_100K.txt')
+#counts = letter_counts(wc)
+print 'end'
+
+
+
+def test():
+    D = {'the': 100, 'of': 70, 'and': 60, 'to': 50, 'a': 40}
+
+def num(ch):
+    "Translate 'a' or 'A' to 0, ... 'z' or 'Z' to 25."
+    return 'abcdefghijklmnopqrstuvwxyz'.index(ch.lower())
+    
+
+def stats(D, NS = (1, 2, 3, 4, 5, 6)):
+    counts = {n: Counter() for n in NS}
+    print 'words ' + ' '.join('   %d-grams  ' % n for n in NS)
+    for (i, word) in enumerate(sortedby(D), 1):
+        for n in NS:
+            for ng in ngrams(word, n):
+                counts[n][ng] += 1
+        if i % 5000 == 0 or i == len(D):
+            print "%4dK" % (i/1000),
+            for n in NS:
+                c = len(counts[n])
+                field = "%5d (%d%%)" % (c, int(round(c*100/(26**n))))
+                print '%12s' % field,
+            print
+
+letters = 'ETAOINSRHLDCUMFPGWYBVKXJQZ'
+alphabet = ''.join(sorted(letters))
+
+from itertools import cycle, izip
+
+colors = 'ygobp'
+
+def bar(text, color, count, N, pixels, height=16):
+    width = int(round(pixels * count / N))
+    if width < 2: width = 3
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<span title="%s"><img src="%s.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
+        title, color, height, width, -width+2, text) # -int(width/2+5)
+
+def letter_bar(LC, N=None, factor='', pixels=700):
+    if N is None: N = sum(LC.values())
+    #divisor = {'':1., 'K':1e3, 'M':1e6, 'B':1e9}[factor]
+    return ''.join(
+        bar(L.lower(), color, LC[L], N, pixels)
+        for (L, color) in izip(letters, cycle(colors)))
+        
+
+def singleton(x): return [x]
+
+positions = [0, 1, 2, 3, 4, 5, 6, -7, -6, -5, -4, -3, -2, -1]
+
+def substr(word, pos, length):
+    """Return the substr of word of given length starting/ending at pos; or None."""
+    W = len(word)
+    if pos >= 0 and pos+length <= W:
+        return word[pos:pos+length]
+    elif pos < 0 and abs(pos)+length-1 <= W:
+        return word[W+pos+1-length:W+pos+1]
+    else:
+        return None
+        
+def lettercount(D, pos):
+    LC = histogram((substr(w, pos, 1), D[w]) for w in D)
+    del LC[None]
+    print LC
+    pos_name = (str(pos)+'+' if isinstance(pos, tuple) else
+                pos if pos < 0 else
+                pos+1)
+    return '\n<br>\n%-3s %s' % (pos_name, letter_bar(LC))
+
+def ngramcount(D, n=2):
+    return histogram((ng, D[w]) for w in D for ng in ngrams(w, n))
+
+def twograms(D2):
+    N = sum(D2.values())
+    header = '<table cellpadding=1 cellborder=1>'
+    rows = [tr([cell(A+B, D2, N) for A in alphabet]) for B in alphabet]
+    return '\n'.join([header] + rows + ['</table>'])
+
+def cell(text, D2, N, height=16, maxwidth=25, scale=27):
+    count = D2.get(text, 0)
+    width = int(round(maxwidth * count * scale * 1. / N))
+    if width < 1: width = 1
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<td title="%s"><img src="o.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
+        title, height, width, -width+2, text)
+
+def cell(text, D2, N, height=16, maxwidth=25, scale=27):
+    count = D2.get(text, 0)
+    width = int(round(maxwidth * count * scale * 1. / N))
+    if width < 1: width = 1
+    title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
+    return '<td title="%s" background="o.jpg" height=%d width=%d>%s' % (
+        title, height, width, text) 
+
+def tr(cells):
+    return '<tr>' + ''.join(cells)
+
+def comma(n): return '{:,}'.format(n)
+
+def ngram_stats(D, n, k=5):
+    DN = ngramcount(D, n)
+    topk = ', '.join(sortedby(DN)[:k])
+    return '<tr><td>%d-grams<td align=right>%s<td align=right>%s<td><a href="counts-%d.csv">counts-%d.csv</a><td><a href="counts-%d.html">counts-%d.html</a><td>%s' % (
+        n, comma(len(DN)), comma(sum(DN.values())), n, n, n, n, topk)
+
+#### Tables
+
+def sortedby(D):
+    return sorted(D, key=lambda x: -D[x])
+
+ANY = '*'
+
+wordlengths = range(1, 10)
+
+def col(*args): return args
+
+def columns(n, wordlengths=wordlengths):
+    lengths = [k for k in wordlengths if k >= n]
+    return ([col(ANY, ANY)]
+            + [col(k, ANY) for k in lengths]
+            + [col(k, start, start+n-1) for k in lengths for start in range(1, 2+k-n)]
+            + [col(ANY, start, start+n-1) for start in wordlengths]
+            + [col(ANY, -k, -k+n-1) for k in reversed(lengths) if -k+n-1 < 0])
+
+def colname(col):
+    fmt = '%s/%s' if (len(col) == 2) else '%s/%d:%d'
+    return  fmt % col
+
+def csvline(first, rest):
+    return '\t'.join([first] + map(str, rest))
+
+def makecsv(n, D=D):
+    out = file('ngrams%d.csv' % n, 'w')
+    cols = columns(n)
+    Dng = defaultdict(lambda: defaultdict(int))
+    for w in D:
+        for (start, ng) in enumerate(ngrams(w, n), 1):
+            entry = Dng[ng]
+            N = D[w]
+            wlen = len(w)
+            entry[ANY, ANY] += N
+            entry[wlen, ANY] += N
+            if start <= 9:
+                entry[wlen, start, start+n-1] += N
+                entry[ANY, start, start+n-1] += N
+            from_end = wlen-start+1
+            if from_end <= 9:
+                entry[ANY, -from_end, -from_end+n-1] += N
+        # enumerate ngrams from word and increment counts for each one
+    print >> out, csvline('%d-gram' % n,  map(colname, cols))
+    for ng in sorted(Dng, key=lambda ng: -Dng[ng][(ANY, ANY)]):
+        print >> out, csvline(ng, [Dng[ng].get(col, 0) for col in cols])
+    out.close()
+    return Dng
+
+### Tests
+
+"""
+>>> for w in words:
+    print '%-6s %6.2f B (%4.2f%%) <img src="s.jpg" height=12 width=%d>' % (w.lower(), D[w]/1e9, D[w]*100./N, int(round(D[w]*4000./N)))
+... 
+the     53.10 B (7.14%) <img src="s.jpg" height=12 width=286>
+of      30.97 B (4.16%) <img src="s.jpg" height=12 width=167>
+and     22.63 B (3.04%) <img src="s.jpg" height=12 width=122>
+to      19.35 B (2.60%) <img src="s.jpg" height=12 width=104>
+in      16.89 B (2.27%) <img src="s.jpg" height=12 width=91>
+a       15.31 B (2.06%) <img src="s.jpg" height=12 width=82>
+is       8.38 B (1.13%) <img src="s.jpg" height=12 width=45>
+that     8.00 B (1.08%) <img src="s.jpg" height=12 width=43>
+for      6.55 B (0.88%) <img src="s.jpg" height=12 width=35>
+it       5.74 B (0.77%) <img src="s.jpg" height=12 width=31>
+as       5.70 B (0.77%) <img src="s.jpg" height=12 width=31>
+was      5.50 B (0.74%) <img src="s.jpg" height=12 width=30>
+with     5.18 B (0.70%) <img src="s.jpg" height=12 width=28>
+be       4.82 B (0.65%) <img src="s.jpg" height=12 width=26>
+by       4.70 B (0.63%) <img src="s.jpg" height=12 width=25>
+on       4.59 B (0.62%) <img src="s.jpg" height=12 width=25>
+not      4.52 B (0.61%) <img src="s.jpg" height=12 width=24>
+he       4.11 B (0.55%) <img src="s.jpg" height=12 width=22>
+i        3.88 B (0.52%) <img src="s.jpg" height=12 width=21>
+this     3.83 B (0.51%) <img src="s.jpg" height=12 width=21>
+are      3.70 B (0.50%) <img src="s.jpg" height=12 width=20>
+or       3.67 B (0.49%) <img src="s.jpg" height=12 width=20>
+his      3.61 B (0.49%) <img src="s.jpg" height=12 width=19>
+from     3.47 B (0.47%) <img src="s.jpg" height=12 width=19>
+at       3.41 B (0.46%) <img src="s.jpg" height=12 width=18>
+which    3.14 B (0.42%) <img src="s.jpg" height=12 width=17>
+but      2.79 B (0.38%) <img src="s.jpg" height=12 width=15>
+have     2.78 B (0.37%) <img src="s.jpg" height=12 width=15>
+an       2.73 B (0.37%) <img src="s.jpg" height=12 width=15>
+had      2.62 B (0.35%) <img src="s.jpg" height=12 width=14>
+they     2.46 B (0.33%) <img src="s.jpg" height=12 width=13>
+you      2.34 B (0.31%) <img src="s.jpg" height=12 width=13>
+were     2.27 B (0.31%) <img src="s.jpg" height=12 width=12>
+their    2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
+one      2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
+all      2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
+we       2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
+can      1.67 B (0.22%) <img src="s.jpg" height=12 width=9>
+her      1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
+has      1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
+there    1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
+been     1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
+if       1.56 B (0.21%) <img src="s.jpg" height=12 width=8>
+more     1.55 B (0.21%) <img src="s.jpg" height=12 width=8>
+when     1.52 B (0.20%) <img src="s.jpg" height=12 width=8>
+will     1.49 B (0.20%) <img src="s.jpg" height=12 width=8>
+would    1.47 B (0.20%) <img src="s.jpg" height=12 width=8>
+who      1.46 B (0.20%) <img src="s.jpg" height=12 width=8>
+so       1.45 B (0.19%) <img src="s.jpg" height=12 width=8>
+no       1.40 B (0.19%) <img src="s.jpg" height=12 width=8>
+
+>>> for n in sorted(H):
+    print '%2d %9.2f M (%6.3f%%) <img src="s.jpg" height=12 width=%d> %d' % (n, H[n]/1e6, H[n]*100./NN, H[n]*3000./NN, n)
+... 
+ 1  22301.22 M ( 2.998%) <img src="s.jpg" height=12 width=89> 1
+ 2 131293.85 M (17.651%) <img src="s.jpg" height=12 width=529> 2
+ 3 152568.38 M (20.511%) <img src="s.jpg" height=12 width=615> 3
+ 4 109988.33 M (14.787%) <img src="s.jpg" height=12 width=443> 4
+ 5  79589.32 M (10.700%) <img src="s.jpg" height=12 width=320> 5
+ 6  62391.21 M ( 8.388%) <img src="s.jpg" height=12 width=251> 6
+ 7  59052.66 M ( 7.939%) <img src="s.jpg" height=12 width=238> 7
+ 8  44207.29 M ( 5.943%) <img src="s.jpg" height=12 width=178> 8
+ 9  33006.93 M ( 4.437%) <img src="s.jpg" height=12 width=133> 9
+10  22883.84 M ( 3.076%) <img src="s.jpg" height=12 width=92> 10
+11  13098.06 M ( 1.761%) <img src="s.jpg" height=12 width=52> 11
+12   7124.15 M ( 0.958%) <img src="s.jpg" height=12 width=28> 12
+13   3850.58 M ( 0.518%) <img src="s.jpg" height=12 width=15> 13
+14   1653.08 M ( 0.222%) <img src="s.jpg" height=12 width=6> 14
+15    565.24 M ( 0.076%) <img src="s.jpg" height=12 width=2> 15
+16    151.22 M ( 0.020%) <img src="s.jpg" height=12 width=0> 16
+17     72.81 M ( 0.010%) <img src="s.jpg" height=12 width=0> 17
+18     28.62 M ( 0.004%) <img src="s.jpg" height=12 width=0> 18
+19      8.51 M ( 0.001%) <img src="s.jpg" height=12 width=0> 19
+20      6.35 M ( 0.001%) <img src="s.jpg" height=12 width=0> 20
+21      0.13 M ( 0.000%) <img src="s.jpg" height=12 width=0> 21
+22      0.81 M ( 0.000%) <img src="s.jpg" height=12 width=0> 22
+23      0.32 M ( 0.000%) <img src="s.jpg" height=12 width=0> 23
+
+>>> NL = sum(LC.values())
+
+>>> for L in sorted(LC, key=lambda L: -LC[L]):
+    print '%s %8.1f B (%5.2f%%) <img src="s.jpg" height=12 width=%d>' % (L, LC[L]/1e9, LC[L]*100./NL, LC[L]*3000./NL)
+...
+E    445.2 B (12.49%) <img src="s.jpg" height=12 width=374>
+T    330.5 B ( 9.28%) <img src="s.jpg" height=12 width=278>
+A    286.5 B ( 8.04%) <img src="s.jpg" height=12 width=241>
+O    272.3 B ( 7.64%) <img src="s.jpg" height=12 width=229>
+I    269.7 B ( 7.57%) <img src="s.jpg" height=12 width=227>
+N    257.8 B ( 7.23%) <img src="s.jpg" height=12 width=217>
+S    232.1 B ( 6.51%) <img src="s.jpg" height=12 width=195>
+R    223.8 B ( 6.28%) <img src="s.jpg" height=12 width=188>
+H    180.1 B ( 5.05%) <img src="s.jpg" height=12 width=151>
+L    145.0 B ( 4.07%) <img src="s.jpg" height=12 width=122>
+D    136.0 B ( 3.82%) <img src="s.jpg" height=12 width=114>
+C    119.2 B ( 3.34%) <img src="s.jpg" height=12 width=100>
+U     97.3 B ( 2.73%) <img src="s.jpg" height=12 width=81>
+M     89.5 B ( 2.51%) <img src="s.jpg" height=12 width=75>
+F     85.6 B ( 2.40%) <img src="s.jpg" height=12 width=72>
+P     76.1 B ( 2.14%) <img src="s.jpg" height=12 width=64>
+G     66.6 B ( 1.87%) <img src="s.jpg" height=12 width=56>
+W     59.7 B ( 1.68%) <img src="s.jpg" height=12 width=50>
+Y     59.3 B ( 1.66%) <img src="s.jpg" height=12 width=49>
+B     52.9 B ( 1.48%) <img src="s.jpg" height=12 width=44>
+V     37.5 B ( 1.05%) <img src="s.jpg" height=12 width=31>
+K     19.3 B ( 0.54%) <img src="s.jpg" height=12 width=16>
+X      8.4 B ( 0.23%) <img src="s.jpg" height=12 width=7>
+J      5.7 B ( 0.16%) <img src="s.jpg" height=12 width=4>
+Q      4.3 B ( 0.12%) <img src="s.jpg" height=12 width=3>
+Z      3.2 B ( 0.09%) <img src="s.jpg" height=12 width=2>
+
+>>> D2 = ngramcount(D, 2)
+
+>>> for ng in sorted(D2, key=lambda L: -D2[L])[:50]: print '%s %8.1f B (%5.2f%%) <img src="o.jpg" height=12 width=%d>' % (ng, D2[ng]/1e9, D2[ng]*100./N2, D2[ng]*15000./N2)
+
+def doit(k=25):
+    counts = [sortedby(ngramcount(D, n))[:k] for n in range(2, 10)]
+    for i in range(k):
+        print ('     '.join(count[i] for count in counts)).lower()
+"""
--- a/lis.py
+++ b/lis.py
@ -0,0 +1,145 @@
+################ Lispy: Scheme Interpreter in Python
+
+## (c) Peter Norvig, 2010-16; See http://norvig.com/lispy.html
+
+from __future__ import division
+import math
+import operator as op
+
+################ Types
+
+Symbol = str          # A Lisp Symbol is implemented as a Python str
+List   = list         # A Lisp List is implemented as a Python list
+Number = (int, float) # A Lisp Number is implemented as a Python int or float
+
+################ Parsing: parse, tokenize, and read_from_tokens
+
+def parse(program):
+    "Read a Scheme expression from a string."
+    return read_from_tokens(tokenize(program))
+
+def tokenize(s):
+    "Convert a string into a list of tokens."
+    return s.replace('(',' ( ').replace(')',' ) ').split()
+
+def read_from_tokens(tokens):
+    "Read an expression from a sequence of tokens."
+    if len(tokens) == 0:
+        raise SyntaxError('unexpected EOF while reading')
+    token = tokens.pop(0)
+    if '(' == token:
+        L = []
+        while tokens[0] != ')':
+            L.append(read_from_tokens(tokens))
+        tokens.pop(0) # pop off ')'
+        return L
+    elif ')' == token:
+        raise SyntaxError('unexpected )')
+    else:
+        return atom(token)
+
+def atom(token):
+    "Numbers become numbers; every other token is a symbol."
+    try: return int(token)
+    except ValueError:
+        try: return float(token)
+        except ValueError:
+            return Symbol(token)
+
+################ Environments
+
+def standard_env():
+    "An environment with some Scheme standard procedures."
+    env = Env()
+    env.update(vars(math)) # sin, cos, sqrt, pi, ...
+    env.update({
+        '+':op.add, '-':op.sub, '*':op.mul, '/':op.truediv, 
+        '>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq, 
+        'abs':     abs,
+        'append':  op.add,  
+        'apply':   apply,
+        'begin':   lambda *x: x[-1],
+        'car':     lambda x: x[0],
+        'cdr':     lambda x: x[1:], 
+        'cons':    lambda x,y: [x] + y,
+        'eq?':     op.is_, 
+        'equal?':  op.eq, 
+        'length':  len, 
+        'list':    lambda *x: list(x), 
+        'list?':   lambda x: isinstance(x,list), 
+        'map':     map,
+        'max':     max,
+        'min':     min,
+        'not':     op.not_,
+        'null?':   lambda x: x == [], 
+        'number?': lambda x: isinstance(x, Number),   
+        'procedure?': callable,
+        'round':   round,
+        'symbol?': lambda x: isinstance(x, Symbol),
+    })
+    return env
+
+class Env(dict):
+    "An environment: a dict of {'var':val} pairs, with an outer Env."
+    def __init__(self, parms=(), args=(), outer=None):
+        self.update(zip(parms, args))
+        self.outer = outer
+    def find(self, var):
+        "Find the innermost Env where var appears."
+        return self if (var in self) else self.outer.find(var)
+
+global_env = standard_env()
+
+################ Interaction: A REPL
+
+def repl(prompt='lis.py> '):
+    "A prompt-read-eval-print loop."
+    while True:
+        val = eval(parse(raw_input(prompt)))
+        if val is not None: 
+            print(lispstr(val))
+
+def lispstr(exp):
+    "Convert a Python object back into a Lisp-readable string."
+    if isinstance(exp, List):
+        return '(' + ' '.join(map(lispstr, exp)) + ')' 
+    else:
+        return str(exp)
+
+################ Procedures
+
+class Procedure(object):
+    "A user-defined Scheme procedure."
+    def __init__(self, parms, body, env):
+        self.parms, self.body, self.env = parms, body, env
+    def __call__(self, *args): 
+        return eval(self.body, Env(self.parms, args, self.env))
+
+################ eval
+
+def eval(x, env=global_env):
+    "Evaluate an expression in an environment."
+    if isinstance(x, Symbol):      # variable reference
+        return env.find(x)[x]
+    elif not isinstance(x, List):  # constant literal
+        return x                
+    elif x[0] == 'quote':          # (quote exp)
+        (_, exp) = x
+        return exp
+    elif x[0] == 'if':             # (if test conseq alt)
+        (_, test, conseq, alt) = x
+        exp = (conseq if eval(test, env) else alt)
+        return eval(exp, env)
+    elif x[0] == 'define':         # (define var exp)
+        (_, var, exp) = x
+        env[var] = eval(exp, env)
+    elif x[0] == 'set!':           # (set! var exp)
+        (_, var, exp) = x
+        env.find(var)[var] = eval(exp, env)
+    elif x[0] == 'lambda':         # (lambda (var...) body)
+        (_, parms, body) = x
+        return Procedure(parms, body, env)
+    else:                          # (proc arg...)
+        proc = eval(x[0], env)
+        args = [eval(exp, env) for exp in x[1:]]
+        return proc(*args)
--- a/lispy.py
+++ b/lispy.py
@ -0,0 +1,318 @@
+################ Scheme Interpreter in Python
+
+## (c) Peter Norvig, 2010; See http://norvig.com/lispy2.html
+
+################ Symbol, Procedure, classes
+
+from __future__ import division
+import re, sys, StringIO
+
+class Symbol(str): pass
+
+def Sym(s, symbol_table={}):
+    "Find or create unique Symbol entry for str s in symbol table."
+    if s not in symbol_table: symbol_table[s] = Symbol(s)
+    return symbol_table[s]
+
+_quote, _if, _set, _define, _lambda, _begin, _definemacro, = map(Sym, 
+"quote   if   set!  define   lambda   begin   define-macro".split())
+
+_quasiquote, _unquote, _unquotesplicing = map(Sym,
+"quasiquote   unquote   unquote-splicing".split())
+
+class Procedure(object):
+    "A user-defined Scheme procedure."
+    def __init__(self, parms, exp, env):
+        self.parms, self.exp, self.env = parms, exp, env
+    def __call__(self, *args): 
+        return eval(self.exp, Env(self.parms, args, self.env))
+
+################ parse, read, and user interaction
+
+def parse(inport):
+    "Parse a program: read and expand/error-check it."
+    # Backwards compatibility: given a str, convert it to an InPort
+    if isinstance(inport, str): inport = InPort(StringIO.StringIO(inport))
+    return expand(read(inport), toplevel=True)
+
+eof_object = Symbol('#<eof-object>') # Note: uninterned; can't be read
+
+class InPort(object):
+    "An input port. Retains a line of chars."
+    tokenizer = r"""\s*(,@|[('`,)]|"(?:[\\].|[^\\"])*"|;.*|[^\s('"`,;)]*)(.*)"""
+    def __init__(self, file):
+        self.file = file; self.line = ''
+    def next_token(self):
+        "Return the next token, reading new text into line buffer if needed."
+        while True:
+            if self.line == '': self.line = self.file.readline()
+            if self.line == '': return eof_object
+            token, self.line = re.match(InPort.tokenizer, self.line).groups()
+            if token != '' and not token.startswith(';'):
+                return token
+
+def readchar(inport):
+    "Read the next character from an input port."
+    if inport.line != '':
+        ch, inport.line = inport.line[0], inport.line[1:]
+        return ch
+    else:
+        return inport.file.read(1) or eof_object
+
+def read(inport):
+    "Read a Scheme expression from an input port."
+    def read_ahead(token):
+        if '(' == token: 
+            L = []
+            while True:
+                token = inport.next_token()
+                if token == ')': return L
+                else: L.append(read_ahead(token))
+        elif ')' == token: raise SyntaxError('unexpected )')
+        elif token in quotes: return [quotes[token], read(inport)]
+        elif token is eof_object: raise SyntaxError('unexpected EOF in list')
+        else: return atom(token)
+    # body of read:
+    token1 = inport.next_token()
+    return eof_object if token1 is eof_object else read_ahead(token1)
+
+quotes = {"'":_quote, "`":_quasiquote, ",":_unquote, ",@":_unquotesplicing}
+
+def atom(token):
+    'Numbers become numbers; #t and #f are booleans; "..." string; otherwise Symbol.'
+    if token == '#t': return True
+    elif token == '#f': return False
+    elif token[0] == '"': return token[1:-1].decode('string_escape')
+    try: return int(token)
+    except ValueError:
+        try: return float(token)
+        except ValueError:
+            try: return complex(token.replace('i', 'j', 1))
+            except ValueError:
+                return Sym(token)
+
+def to_string(x):
+    "Convert a Python object back into a Lisp-readable string."
+    if x is True: return "#t"
+    elif x is False: return "#f"
+    elif isa(x, Symbol): return x
+    elif isa(x, str): return '"%s"' % x.encode('string_escape').replace('"',r'\"')
+    elif isa(x, list): return '('+' '.join(map(to_string, x))+')'
+    elif isa(x, complex): return str(x).replace('j', 'i')
+    else: return str(x)
+
+def load(filename):
+    "Eval every expression from a file."
+    repl(None, InPort(open(filename)), None)
+
+def repl(prompt='lispy> ', inport=InPort(sys.stdin), out=sys.stdout):
+    "A prompt-read-eval-print loop."
+    sys.stderr.write("Lispy version 2.0\n")
+    while True:
+        try:
+            if prompt: sys.stderr.write(prompt)
+            x = parse(inport)
+            if x is eof_object: return
+            val = eval(x)
+            if val is not None and out: print >> out, to_string(val)
+        except Exception as e:
+            print '%s: %s' % (type(e).__name__, e)
+
+################ Environment class
+
+class Env(dict):
+    "An environment: a dict of {'var':val} pairs, with an outer Env."
+    def __init__(self, parms=(), args=(), outer=None):
+        # Bind parm list to corresponding args, or single parm to list of args
+        self.outer = outer
+        if isa(parms, Symbol): 
+            self.update({parms:list(args)})
+        else: 
+            if len(args) != len(parms):
+                raise TypeError('expected %s, given %s, ' 
+                                % (to_string(parms), to_string(args)))
+            self.update(zip(parms,args))
+    def find(self, var):
+        "Find the innermost Env where var appears."
+        if var in self: return self
+        elif self.outer is None: raise LookupError(var)
+        else: return self.outer.find(var)
+
+def is_pair(x): return x != [] and isa(x, list)
+def cons(x, y): return [x]+y
+
+def callcc(proc):
+    "Call proc with current continuation; escape only"
+    ball = RuntimeWarning("Sorry, can't continue this continuation any longer.")
+    def throw(retval): ball.retval = retval; raise ball
+    try:
+        return proc(throw)
+    except RuntimeWarning as w:
+        if w is ball: return ball.retval
+        else: raise w
+
+def add_globals(self):
+    "Add some Scheme standard procedures."
+    import math, cmath, operator as op
+    self.update(vars(math))
+    self.update(vars(cmath))
+    self.update({
+     '+':op.add, '-':op.sub, '*':op.mul, '/':op.div, 'not':op.not_, 
+     '>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq, 
+     'equal?':op.eq, 'eq?':op.is_, 'length':len, 'cons':cons,
+     'car':lambda x:x[0], 'cdr':lambda x:x[1:], 'append':op.add,  
+     'list':lambda *x:list(x), 'list?': lambda x:isa(x,list),
+     'null?':lambda x:x==[], 'symbol?':lambda x: isa(x, Symbol),
+     'boolean?':lambda x: isa(x, bool), 'pair?':is_pair, 
+     'port?': lambda x:isa(x,file), 'apply':lambda proc,l: proc(*l), 
+     'eval':lambda x: eval(expand(x)), 'load':lambda fn: load(fn), 'call/cc':callcc,
+     'open-input-file':open,'close-input-port':lambda p: p.file.close(), 
+     'open-output-file':lambda f:open(f,'w'), 'close-output-port':lambda p: p.close(),
+     'eof-object?':lambda x:x is eof_object, 'read-char':readchar,
+     'read':read, 'write':lambda x,port=sys.stdout:port.write(to_string(x)),
+     'display':lambda x,port=sys.stdout:port.write(x if isa(x,str) else to_string(x))})
+    return self
+
+isa = isinstance
+
+global_env = add_globals(Env())
+
+################ eval (tail recursive)
+
+def eval(x, env=global_env):
+    "Evaluate an expression in an environment."
+    while True:
+        if isa(x, Symbol):       # variable reference
+            return env.find(x)[x]
+        elif not isa(x, list):   # constant literal
+            return x                
+        elif x[0] is _quote:     # (quote exp)
+            (_, exp) = x
+            return exp
+        elif x[0] is _if:        # (if test conseq alt)
+            (_, test, conseq, alt) = x
+            x = (conseq if eval(test, env) else alt)
+        elif x[0] is _set:       # (set! var exp)
+            (_, var, exp) = x
+            env.find(var)[var] = eval(exp, env)
+            return None
+        elif x[0] is _define:    # (define var exp)
+            (_, var, exp) = x
+            env[var] = eval(exp, env)
+            return None
+        elif x[0] is _lambda:    # (lambda (var*) exp)
+            (_, vars, exp) = x
+            return Procedure(vars, exp, env)
+        elif x[0] is _begin:     # (begin exp+)
+            for exp in x[1:-1]:
+                eval(exp, env)
+            x = x[-1]
+        else:                    # (proc exp*)
+            exps = [eval(exp, env) for exp in x]
+            proc = exps.pop(0)
+            if isa(proc, Procedure):
+                x = proc.exp
+                env = Env(proc.parms, exps, proc.env)
+            else:
+                return proc(*exps)
+
+################ expand
+
+def expand(x, toplevel=False):
+    "Walk tree of x, making optimizations/fixes, and signaling SyntaxError."
+    require(x, x!=[])                    # () => Error
+    if not isa(x, list):                 # constant => unchanged
+        return x
+    elif x[0] is _quote:                 # (quote exp)
+        require(x, len(x)==2)
+        return x
+    elif x[0] is _if:                    
+        if len(x)==3: x = x + [None]     # (if t c) => (if t c None)
+        require(x, len(x)==4)
+        return map(expand, x)
+    elif x[0] is _set:                   
+        require(x, len(x)==3); 
+        var = x[1]                       # (set! non-var exp) => Error
+        require(x, isa(var, Symbol), "can set! only a symbol")
+        return [_set, var, expand(x[2])]
+    elif x[0] is _define or x[0] is _definemacro: 
+        require(x, len(x)>=3)            
+        _def, v, body = x[0], x[1], x[2:]
+        if isa(v, list) and v:           # (define (f args) body)
+            f, args = v[0], v[1:]        #  => (define f (lambda (args) body))
+            return expand([_def, f, [_lambda, args]+body])
+        else:
+            require(x, len(x)==3)        # (define non-var/list exp) => Error
+            require(x, isa(v, Symbol), "can define only a symbol")
+            exp = expand(x[2])
+            if _def is _definemacro:     
+                require(x, toplevel, "define-macro only allowed at top level")
+                proc = eval(exp)       
+                require(x, callable(proc), "macro must be a procedure")
+                macro_table[v] = proc    # (define-macro v proc)
+                return None              #  => None; add v:proc to macro_table
+            return [_define, v, exp]
+    elif x[0] is _begin:
+        if len(x)==1: return None        # (begin) => None
+        else: return [expand(xi, toplevel) for xi in x]
+    elif x[0] is _lambda:                # (lambda (x) e1 e2) 
+        require(x, len(x)>=3)            #  => (lambda (x) (begin e1 e2))
+        vars, body = x[1], x[2:]
+        require(x, (isa(vars, list) and all(isa(v, Symbol) for v in vars))
+                or isa(vars, Symbol), "illegal lambda argument list")
+        exp = body[0] if len(body) == 1 else [_begin] + body
+        return [_lambda, vars, expand(exp)]   
+    elif x[0] is _quasiquote:            # `x => expand_quasiquote(x)
+        require(x, len(x)==2)
+        return expand_quasiquote(x[1])
+    elif isa(x[0], Symbol) and x[0] in macro_table:
+        return expand(macro_table[x[0]](*x[1:]), toplevel) # (m arg...) 
+    else:                                #        => macroexpand if m isa macro
+        return map(expand, x)            # (f arg...) => expand each
+
+def require(x, predicate, msg="wrong length"):
+    "Signal a syntax error if predicate is false."
+    if not predicate: raise SyntaxError(to_string(x)+': '+msg)
+
+_append, _cons, _let = map(Sym, "append cons let".split())
+
+def expand_quasiquote(x):
+    """Expand `x => 'x; `,x => x; `(,@x y) => (append x y) """
+    if not is_pair(x):
+        return [_quote, x]
+    require(x, x[0] is not _unquotesplicing, "can't splice here")
+    if x[0] is _unquote:
+        require(x, len(x)==2)
+        return x[1]
+    elif is_pair(x[0]) and x[0][0] is _unquotesplicing:
+        require(x[0], len(x[0])==2)
+        return [_append, x[0][1], expand_quasiquote(x[1:])]
+    else:
+        return [_cons, expand_quasiquote(x[0]), expand_quasiquote(x[1:])]
+
+def let(*args):
+    args = list(args)
+    x = cons(_let, args)
+    require(x, len(args)>1)
+    bindings, body = args[0], args[1:]
+    require(x, all(isa(b, list) and len(b)==2 and isa(b[0], Symbol)
+                   for b in bindings), "illegal binding list")
+    vars, vals = zip(*bindings)
+    return [[_lambda, list(vars)]+map(expand, body)] + map(expand, vals)
+
+macro_table = {_let:let} ## More macros can go here
+
+eval(parse("""(begin
+
+(define-macro and (lambda args 
+   (if (null? args) #t
+       (if (= (length args) 1) (car args)
+           `(if ,(car args) (and ,@(cdr args)) #f)))))
+
+;; More macros can also go here
+
+)"""))
+
+if __name__ == '__main__':
+    repl()
+
--- a/lispytest.py
+++ b/lispytest.py
@ -0,0 +1,121 @@
+
+################ Tests for lis.py and lispy.py
+
+lis_tests = [
+    ("(quote (testing 1 (2.0) -3.14e159))", ['testing', 1, [2.0], -3.14e159]),
+    ("(+ 2 2)", 4),
+    ("(+ (* 2 100) (* 1 10))", 210),
+    ("(if (> 6 5) (+ 1 1) (+ 2 2))", 2),
+    ("(if (< 6 5) (+ 1 1) (+ 2 2))", 4),
+    ("(define x 3)", None), ("x", 3), ("(+ x x)", 6),
+    ("(begin (define x 1) (set! x (+ x 1)) (+ x 1))", 3),
+    ("((lambda (x) (+ x x)) 5)", 10),
+    ("(define twice (lambda (x) (* 2 x)))", None), ("(twice 5)", 10),
+    ("(define compose (lambda (f g) (lambda (x) (f (g x)))))", None),
+    ("((compose list twice) 5)", [10]),
+    ("(define repeat (lambda (f) (compose f f)))", None),
+    ("((repeat twice) 5)", 20), ("((repeat (repeat twice)) 5)", 80),
+    ("(define fact (lambda (n) (if (<= n 1) 1 (* n (fact (- n 1))))))", None),
+    ("(fact 3)", 6),
+    ("(fact 50)", 30414093201713378043612608166064768844377641568960512000000000000),
+    ("(define abs (lambda (n) ((if (> n 0) + -) 0 n)))", None),
+    ("(list (abs -3) (abs 0) (abs 3))", [3, 0, 3]),
+    ("""(define combine (lambda (f)
+    (lambda (x y)
+      (if (null? x) (quote ())
+          (f (list (car x) (car y))
+             ((combine f) (cdr x) (cdr y)))))))""", None),
+    ("(define zip (combine cons))", None),
+    ("(zip (list 1 2 3 4) (list 5 6 7 8))", [[1, 5], [2, 6], [3, 7], [4, 8]]),
+    ("""(define riff-shuffle (lambda (deck) (begin
+    (define take (lambda (n seq) (if (<= n 0) (quote ()) (cons (car seq) (take (- n 1) (cdr seq))))))
+    (define drop (lambda (n seq) (if (<= n 0) seq (drop (- n 1) (cdr seq)))))
+    (define mid (lambda (seq) (/ (length seq) 2)))
+    ((combine append) (take (mid deck) deck) (drop (mid deck) deck)))))""", None),
+    ("(riff-shuffle (list 1 2 3 4 5 6 7 8))", [1, 5, 2, 6, 3, 7, 4, 8]),
+    ("((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8))",  [1, 3, 5, 7, 2, 4, 6, 8]),
+    ("(riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8))))", [1,2,3,4,5,6,7,8]),
+    ]
+
+lispy_tests = [
+    ("()", SyntaxError), ("(set! x)", SyntaxError), 
+    ("(define 3 4)", SyntaxError),
+    ("(quote 1 2)", SyntaxError), ("(if 1 2 3 4)", SyntaxError), 
+    ("(lambda 3 3)", SyntaxError), ("(lambda (x))", SyntaxError),
+    ("""(if (= 1 2) (define-macro a 'a) 
+     (define-macro a 'b))""", SyntaxError),
+    ("(define (twice x) (* 2 x))", None), ("(twice 2)", 4),
+    ("(twice 2 2)", TypeError),
+    ("(define lyst (lambda items items))", None),
+    ("(lyst 1 2 3 (+ 2 2))", [1,2,3,4]),
+    ("(if 1 2)", 2),
+    ("(if (= 3 4) 2)", None),
+    ("(define ((account bal) amt) (set! bal (+ bal amt)) bal)", None),
+    ("(define a1 (account 100))", None),
+    ("(a1 0)", 100), ("(a1 10)", 110), ("(a1 10)", 120),
+    ("""(define (newton guess function derivative epsilon)
+    (define guess2 (- guess (/ (function guess) (derivative guess))))
+    (if (< (abs (- guess guess2)) epsilon) guess2
+        (newton guess2 function derivative epsilon)))""", None),
+    ("""(define (square-root a)
+    (newton 1 (lambda (x) (- (* x x) a)) (lambda (x) (* 2 x)) 1e-8))""", None),
+    ("(> (square-root 200.) 14.14213)", True),
+    ("(< (square-root 200.) 14.14215)", True),
+    ("(= (square-root 200.) (sqrt 200.))", True),
+    ("""(define (sum-squares-range start end)
+         (define (sumsq-acc start end acc)
+            (if (> start end) acc (sumsq-acc (+ start 1) end (+ (* start start) acc))))
+         (sumsq-acc start end 0))""", None),
+    ("(sum-squares-range 1 3000)", 9004500500), ## Tests tail recursion
+    ("(call/cc (lambda (throw) (+ 5 (* 10 (throw 1))))) ;; throw", 1),
+    ("(call/cc (lambda (throw) (+ 5 (* 10 1)))) ;; do not throw", 15),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 (escape 3)))))))) ; 1 level""", 35),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 (throw 3)))))))) ; 2 levels""", 3),
+    ("""(call/cc (lambda (throw) 
+         (+ 5 (* 10 (call/cc (lambda (escape) (* 100 1))))))) ; 0 levels""", 1005),
+    ("(* 1i 1i)", -1), ("(sqrt -1)", 1j),
+    ("(let ((a 1) (b 2)) (+ a b))", 3),
+    ("(let ((a 1) (b 2 3)) (+ a b))", SyntaxError),
+    ("(and 1 2 3)", 3), ("(and (> 2 1) 2 3)", 3), ("(and)", True),
+    ("(and (> 2 1) (> 2 3))", False),
+    ("(define-macro unless (lambda args `(if (not ,(car args)) (begin ,@(cdr args))))) ; test `", None),
+    ("(unless (= 2 (+ 1 1)) (display 2) 3 4)", None),
+    (r'(unless (= 4 (+ 1 1)) (display 2) (display "\n") 3 4)', 4),
+    ("(quote x)", 'x'), 
+    ("(quote (1 2 three))", [1, 2, 'three']), 
+    ("'x", 'x'),
+    ("'(one 2 3)", ['one', 2, 3]),
+    ("(define L (list 1 2 3))", None),
+    ("`(testing ,@L testing)", ['testing',1,2,3,'testing']),
+    ("`(testing ,L testing)", ['testing',[1,2,3],'testing']),
+    ("`,@L", SyntaxError),
+    ("""'(1 ;test comments '
+     ;skip this line
+     2 ; more ; comments ; ) )
+     3) ; final comment""", [1,2,3]),
+    ]
+
+def test(tests, name=''):
+    "For each (exp, expected) test case, see if eval(parse(exp)) == expected."
+    fails = 0
+    for (x, expected) in tests:
+        try:
+            result = eval(parse(x))
+            print x, '=>', to_string(result)
+            ok = (result == expected)
+        except Exception as e:
+            print x, '=raises=>', type(e).__name__, e
+            ok = issubclass(expected, Exception) and isinstance(e, expected)
+        if not ok:
+            fails += 1
+            print 'FAIL!!!  Expected', expected
+    print '%s %s: %d out of %d tests fail.' % ('*'*45, name, fails, len(tests))
+
+if __name__ == '__main__':
+    from lis import *
+    test(lis_tests, 'lis.py')
+    from lispy import *
+    test(lis_tests+lispy_tests, 'lispy.py')
+
--- a/pal.py
+++ b/pal.py
@ -0,0 +1,154 @@
+import string, random, os, re, bisect
+
+"""Produce Panama-ish Palindromes. Copyright (C) 2002, Peter Norvig.
+See http://www.norvig.com/license.html and http://www.norvig.com/pal-alg.html"""
+
+def is_panama(p):
+    "Test if p is a Panama-ish palindrome."
+    def is_unique(seq): return len(seq) == len(dict(zip(seq, seq)))
+    return (p.endswith('Panama') and is_palindrome(p)
+	    and is_unique([s.strip() for s in p.split(',')]))
+
+def is_palindrome(phrase):
+    "Test if a phrase is a palindrome."
+    cphrase = canonical(phrase)
+    return cphrase == reverse(cphrase)
+
+def canonical(word, sub=re.compile('[^A-Za-z0-9]').sub):
+    "The canonical form for comparing: lowercase alphanumerics."
+    return sub('', word).lower()
+
+def read_dict(filename='npdict.txt'):
+    "Read the file into global variables _fw and _bw and _truename."
+    global _fw, _bw, _truename
+    _fw, _bw, _truename = [], [], {'': ''}
+    for word in open(filename).read().splitlines():
+        w = canonical(word)
+        _fw.append(w)
+        _bw.append(reverse(w))
+        _truename[w] = word
+    _fw.sort(); _bw.sort()
+    return len(_fw), len(_bw), len(_truename)
+
+def update(obj, **entries): obj.__dict__.update(entries); return obj
+
+class PalDict:
+    """A dictionary from which you can find canonical words that start or end
+    with a given canonical substring, and find the true name of a
+    canonical word."""
+    def __init__(self, fw=None, bw=None, truename=None):
+        update(self, fw=fw or _fw, bw=bw or _bw, truename=truename or _truename)
+
+    def startswith(self, prefix, k=100):
+        """Return up to k canonical words that start with prefix.
+        If there are more than k, choose from them at random."""
+        return k_startingwith(k, self.fw, prefix)
+
+    def endswith(self, suffix, k=100):
+        """Return up to k canonical words that end with suffix.
+        If there are more than k, choose from them at random.
+        Both the suffix and the word returned are reversed."""
+        return k_startingwith(k, self.bw, suffix)
+
+def k_startingwith(k, words, prefix):
+    """Choose up to k words that match the prefix (choose randomly if > k)."""
+    start = bisect.bisect(words, prefix)
+    end = bisect.bisect(words, prefix + 'zzzz')
+    n = end - start
+    if k >= n:
+        results = words[start:end]
+        random.shuffle(results)
+    else: # Should really try to avoid duplicates
+        results = [words[random.randrange(start, end)] for i in range(k)]
+    return results
+
+class Panama:
+    def __init__(self, L='A man, a plan', R='a canal, Panama', dict=None):
+        left = [canonical(w) for w in L.split(', ')]
+        right = [canonical(reverse(w)) for w in reverse(R.split(', '))]
+        update(self, left=left, right=right, dict=dict or PalDict(), best=0, 
+               seen={}, diff=len(''.join(left)) - len(''.join(right)))
+        for word in left + map(reverse, right):
+            self.seen[word] = 1
+
+    def missing(self, k=20):
+        """Return the substring that is missing, and candidate words."""
+        if self.diff >= 0: # Left is longer, missing on right
+            substr =  self.left[-1][-self.diff:]
+            return substr, self.dict.endswith(substr, k)
+        else: # Right is longer, missing on left
+            substr =  self.right[-1][self.diff:]
+            return substr, self.dict.startswith(substr, k)
+
+    def search(self, k=200):
+        "Search for palindromes; consider at most k words at each level."
+        self.stack = [self.missing(k)]
+        while self.stack:
+            substr, words = self.stack[-1]
+            if is_palindrome(substr):
+                self.report()
+            if words:
+                self.extend(words.pop(), k)
+            elif not self.backtrack():
+                return
+
+    def extend(self, word, k):
+        "Add a new word (unless we've already seen it)."
+        if self.diff >= 0: # Left is longer, add to right
+            fword = reverse(word)
+            if fword in self.seen: return
+            self.diff -= len(fword)
+            self.seen[fword] = 1
+            self.right.append(word)
+            self.stack.append(self.missing(k))
+        else: # Right is longer, add to left
+            if word in self.seen: return
+            self.diff += len(word)
+            self.seen[word] = 1
+            self.left.append(word)
+            self.stack.append(self.missing(k))
+
+    def backtrack(self):
+        "Remove the last word added; return 0 if can't backtrack"
+        if self.diff >= 0: # Left is longer, pop from left
+            if not self.left: return 0
+            word = self.left.pop()
+            self.diff -= len(word)
+            del self.seen[word]
+        else: # Right is longer, pop from right
+            if not self.right: return 0
+            word = self.right.pop()
+            self.diff += len(word)
+            del self.seen[reverse(word)]
+        self.stack.pop()
+        return 1
+
+    def report(self):
+        "Write current state to log file."
+        if len(self) > self.best + 200:
+            self.best = len(self)
+            print self.best
+            self.bestphrase = str(self)
+            assert is_panama(self.bestphrase)
+            f = open('pallog%d.txt' % os.getpid(), 'w')
+            f.write(self.bestphrase + '\n')
+            f.close()
+
+    def __len__(self):
+        return len(self.left) + len(self.right)
+
+    def __str__(self):
+        truename = self.dict.truename
+        lefts = [truename[w] for w in self.left]
+        rights = [truename[reverse(w)] for w in reverse(self.right[:])]
+        return ', '.join(lefts + ['*****'] + rights)
+
+def reverse(x):
+    "Reverse a list or string."
+    if type(x) == type(''):
+        return ''.join(reverse(list(x)))
+    else:
+        x.reverse()
+        return x
+
+if __name__ == '__main__': read_dict(); p = Panama(); p.search()
--- a/pal2.py
+++ b/pal2.py
@ -0,0 +1,262 @@
+import random, re, bisect, time
+
+"""Produce Panama-ish Palindromes. Copyright (C) 2002-2008, Peter Norvig."""
+
+################ Checking for Palindromes
+
+def is_panama(s):
+    "Test if string s is a Panama-ish palindrome."
+    return is_palindrome(s) and is_unique(phrases(s))
+
+def is_palindrome(s):
+    "Test if a string is a palindrome."
+    s1 = canonical(s)
+    return s1 == reversestr(s1)
+
+def phrases(s):
+    "Break a string s into comma-separated phrases."
+    return [phrase.strip() for phrase in s.split(',')]
+
+def canonical(word, sub=re.compile('''[-* \t\n\r.,;!?:()`"']''').sub):
+    "The canonical form for comparing: lowercase, no blanks or punctuation."
+    return sub('', word).lower()
+
+################ Utilities
+
+def reversestr(x):
+    "Reverse a string."
+    return x[::-1]
+
+def is_unique(seq):
+    "Return true if seq has no duplicate elements."
+    return len(seq) == len(set(seq))
+
+def update(obj, **entries):
+    "Change attributes of obj, according to the keyword args."
+    obj.__dict__.update(entries)
+    return obj
+
+################ Reading in a dictionary
+
+class PalDict:
+    """A dictionary from which you can find canonical words that start or end
+    with a given canonical substring, and find the true name of a
+    canonical word with d.truename[canonicalword]."""
+    
+    def __init__(self, k=1000, filename='npdict.txt'):
+        words, rwords, truename = [], [], {'': '', 'panama': 'Panama!'}
+        for tword in open(filename).read().splitlines():
+            word = canonical(tword)
+            words.append(word)
+            rwords.append(reversestr(word))
+            truename[word] = tword
+        words.sort()
+        rwords.sort()
+        update(self, k=k, words=words, rwords=rwords, truename=truename,
+               reversibles={}, rangek=range(k), tryharder=False)
+
+    def startswith(self, prefix):
+        """Return up to k canonical words that start with prefix.
+        If there are more than k, choose from them at random."""
+        return self._k_startingwith(self.words, prefix)
+
+    def endswith(self, rsuffix):
+        """Return up to k canonical words that end with the reversed suffix.
+        If you want words ending in 'ing', ask for d.endswith('gni').
+        If there are more than k, choose from them at random."""
+        return map(reversestr, self._k_startingwith(self.rwords, rsuffix))
+
+    def __contains__(self, word):
+        return word in self.truename
+
+    def reversible_words(self):
+        "Find words that have a reverse in the dict, like {'Camus': 'Sumac'}"
+        if not self.reversibles:
+            reversibles = self.reversibles
+            for rw in self.rwords:
+                if rw in self:
+                    w = reversestr(rw)
+                    if w != rw and w not in reversibles:
+                        reversibles[w] = rw
+            self.reversibles = reversibles
+        return self.reversibles
+
+    def _k_startingwith(self, words, prefix):
+        start = bisect.bisect_left(words, prefix)
+        end = bisect.bisect(words, prefix + 'zzzz')
+        n = end - start
+        if self.k >= n: # get all the words that start with prefix
+            results = words[start:end]
+        else: # sample from words starting with prefix 
+            indexes = random.sample(xrange(start, end), self.k)
+            results = [words[i] for i in indexes]
+        random.shuffle(results)
+        ## Consider words that are prefixes of the prefix.
+        ## This is very slow, so don't use it until late in the game.
+        if self.tryharder:
+            for i in range(3, len(prefix)):
+                w = prefix[0:i]
+                if ((words == self.words and w in self.truename) or
+                    (words == self.rwords and reversestr(w) in self.truename)):
+                    results.append(w)
+        return results
+
+paldict = PalDict() 
+
+def anpdictshort():
+    "Find the words that are valid when every phrase must start with 'a'"
+    def segment(word):  return [s for s in word.split('a') if s]
+    def valid(word): return all(reversestr(s) in segments for s in segment(word))
+    words = map(canonical, file('anpdict.txt'))
+    segments = set(s for w in words for s in segment(canonical(w)))
+    valid_words = [paldict.truename[w] for w in words if valid(w)]
+    file('anpdict-short.txt', 'w').write('\n'.join(valid_words))
+
+################ Search for a palindrome
+
+class Panama:
+    def __init__(self, L='A man, a plan', R='a canal, Panama', dict=paldict):
+        ## .left and .right hold lists of canonical words
+        ## .diff holds the number of characters that are not matched,
+        ##  positive for words on left, negative for right.
+        ## .stack holds (action, side, arg) tuples
+        update(self, left=[], right=[], best=0, seen={}, diff=0, stack=[],
+               used_reversibles=False, starttime=time.clock(), dict=dict)
+        for word in L.split(','):
+            self.add('left', canonical(word))
+        for rword in reversestr(R).split(','):
+            self.add('right', canonical(reversestr(rword)))
+        self.consider_candidates()
+        
+    def search(self, steps=50000000):
+        "Search for palindromes."
+        for _ in xrange(steps):
+            if not self.stack:
+                return 'done'
+            action, dir, substr, arg = self.stack[-1]
+            if action == 'added': # undo the last word added
+                self.remove(dir, arg)
+            elif action == 'trying' and arg: # try the next word if there is one
+                self.add(dir, arg.pop()) and self.consider_candidates()
+            elif action == 'trying' and not arg: # otherwise backtrack
+                self.stack.pop()
+            else:
+                raise ValueError(action)
+
+    def add(self, dir, word):
+        "add a word"
+        if word in self.seen:
+            return False
+        else:
+            getattr(self, dir).append(word)
+            self.diff += factor[dir] * len(word)
+            self.seen[word] = True
+            self.stack.append(('added', dir, '?', word))
+            return True
+
+    def remove(self, dir, word):
+        "remove a word"
+        oldword = getattr(self, dir).pop()
+        assert word == oldword
+        self.diff -= factor[dir] * len(word)
+        del self.seen[word]
+        self.stack.pop()
+        
+    def consider_candidates(self):
+        """Push a new state with a set of candidate words onto stack."""
+        if self.diff > 0: # Left is longer, consider adding on right
+            dir = 'right'
+            substr =  self.left[-1][-self.diff:]
+            candidates = self.dict.endswith(substr)
+        elif self.diff < 0: # Right is longer, consider adding on left
+            dir = 'left'
+            substr =  reversestr(self.right[-1][0:-self.diff])
+            candidates = self.dict.startswith(substr)
+        else: # Both sides are same size
+            dir = 'left'
+            if not self.used_reversibles:
+                self.report()
+                self.add_reversibles()
+            substr = ''
+            candidates = self.dict.startswith('')
+        if substr == reversestr(substr):
+            self.report()
+        self.stack.append(('trying', dir, substr, candidates))
+
+    def add_reversibles(self):
+        "Add in reversible words."
+        print 'using reversibles ...'
+        for (word, rword) in self.dict.reversible_words().items():
+            if word not in self.seen and rword not in self.seen:
+                self.add('left', word)
+                self.add('right', rword)
+        self.used_reversibles = True
+        self.stack = []
+        print '...done'
+                
+    def report(self):
+        "Report a new palindrome to log file (if it is sufficiently big)."
+        N = len(self)
+        if N > 13333:
+            self.dict.tryharder = True
+        if N > self.best and (N > 12500 or N > self.best+500):
+            self.best = len(self)
+            self.bestphrase = str(self)
+            print '%5d phrases (%5d words) in %3d seconds' % (
+                self.best, self.bestphrase.count(' ')+1, time.clock() - self.starttime)
+            assert is_panama(self.bestphrase)
+            f = open('pallog%d.txt' % (id(self) % 10000), 'w')
+            f.write(self.bestphrase + '\n')
+            f.close()
+
+    def __len__(self):
+        return len(self.left) + len(self.right)
+
+    def __str__(self):
+        truename = self.dict.truename
+        lefts = [truename[w] for w in self.left]
+        rights =[truename[w] for w in self.right]
+        return ', '.join(lefts + rights[::-1])
+
+factor = {'left': +1, 'right': -1}
+
+# Note that we only allow one truename per canonical name.  Occasionally
+# this means we miss a good word (as in "a node" vs. "an ode"), but there
+# are only 665 of these truename collisions, and most of them are of the
+# form "a mark-up" vs. "a markup" so it seemed better to disallow them.
+
+################ Unit Tests
+ 
+def tests(p=Panama()):
+    assert is_panama('A man, a plan, a canal, Panama.')
+    assert is_panama('''A (man),     a   plan,,;, a ```canal?'' -- Panama!''')
+    assert not is_panama('A man, a plan, a radar, a canal, Panama.')
+    assert is_palindrome('A man, a plan, a canal, Panama.')
+    assert is_palindrome('radar, radar? radar!')
+    assert not is_palindrome('radars')
+    assert phrases('A man, a plan, Panama') == ['A man', 'a plan', 'Panama']
+    assert canonical('A man, a plan, a canal, Panama') == 'amanaplanacanalpanama'
+    assert reversestr('foo') == 'oof'
+    assert is_unique([1, 2, 3])
+    assert not is_unique([1, 2, 2])
+    d = p.dict
+    def sameset(a, b): return set(a) == set(b)
+    assert 'panama' in d
+    assert d.words[0] in d
+    assert d.words[-1] in d
+    assert sameset(d.startswith('aword'), ['awording', 'awordbreak',
+        'awordiness', 'awordage', 'awordplay', 'awordlore', 'awordbook',
+        'awordlessness', 'aword', 'awordsmith'])
+    assert sameset(d.endswith('ytisob'), ['aglobosity', 'averbosity',
+        'asubglobosity', 'anonverbosity', 'agibbosity'])
+    d.tryharder = True
+    assert sameset(d.startswith('oklahoma'), ['oklahoma', 'okla'])
+    d.tryharder = False
+    assert d.startswith('oklahoma') == ['oklahoma']
+    assert d.startswith('fsfdsfdsfds') == []
+    print 'all tests pass'
+
+if __name__ == '__main__': 
+    p = Panama();
+    tests(p)
+    p.search()
--- a/pal3.py
+++ b/pal3.py
@ -0,0 +1,170 @@
+from collections import Counter, deque
+import re
+
+class PhraseDict(dict):
+    """A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with:
+    .prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre'
+    .suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'"""
+    def __init__(self, phrases):
+        for phrase in phrases:
+            phrase = phrase.strip()
+            self[letters(phrase)] = phrase
+        self.prefixes = Counter(x for p in self for x in prefixes(p))
+        self.suffixes = Counter(x for p in self for x in suffixes(p))
+        
+def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)]
+
+def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)]
+
+def letters(phrase, sub=re.compile(r'[\W]+').sub):
+    "Remove all the non-letters from phrase; return lowercase version."
+    return sub('', phrase).lower()
+
+DICT = PhraseDict(open('npdict.txt'))
+
+class Panama:
+    """Panama represents a palindrome, or a state in searching for one.
+    It has .left and .right to hold the phrases that are chosen,
+    and .L and .R to hold the current partial phrases in the middle (still working on these).
+    Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from."""
+    
+    def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT):
+        assert cat(left + [L]) == cat([R] + right)[::-1]
+        self.left   = list(left)        # list of complete phrases on left
+        self.L      = L                 # an incomplete phrase on left
+        self.R      = R                 # an incomplete phrase on right
+        self.right  = deque(right)      # deque of complete phrases on right
+        self.dict   = dict              # a {letters: actual_phrase} mapping
+        self.set    = set(left + right) # a set of all complete phrases in palindrome
+        self.best   = []                # list of phrases in longest palindrome found
+        self.Nshown = 0                 # the number of phrases shown in the previous printout
+        self.i      = 0                 # the number of steps taken in the search
+        self.check()
+
+    def __str__(self): return self.original_phrases(self.best)
+    
+    def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases)
+
+    def search(self, steps=10**5):
+        """Depth-first search for palindromes. From the current state, find all applicable actions.
+        Do the first one, and put on the stack reminders to undo it and try the others,
+        but first search deeper from the result of the first action."""
+        stack = [self.applicable_actions()]
+        for self.i in range(steps):
+            if not stack: 
+                return
+            command = stack.pop()
+            if isinstance(command, UndoCommand):
+                self.undo(command)
+            elif command:
+                act = command.pop()
+                self.do(act)
+                self.check()
+                stack.extend([command, UndoCommand(act), self.applicable_actions()])
+                
+    def do(self, act):
+        "Modify the current state by adding a letter, or finishing a phrase."
+        if act == ',': # finish phrase on left
+            self.set.add(self.L)
+            self.left.append(self.L)
+            self.L = ''
+        elif act == ';': # finish phrase on right
+            self.set.add(self.R)
+            self.right.appendleft(self.R)
+            self.R = ''
+        else: # add a letter
+            self.L = self.L + act 
+            self.R = act + self.R
+    
+    def undo(self, act):
+        "Modify the current state by undoing an action that was previously done."
+        if act == ',': # unfinish phrase on left
+            assert self.L == ''
+            self.L = self.left.pop()
+            self.set.remove(self.L)
+        elif act == ';': # unfinish phrase on right
+            assert self.R == ''
+            self.R = self.right.popleft()
+            self.set.remove(self.R)
+        else: # remove a letter
+            self.L = self.L[:-1]
+            self.R = self.R[1:]
+            
+    def check(self):
+        "Check to see if current state is a palindrome, and if so, record it and maybe print."
+        if not self.is_palindrome(): return
+        N = len(self.left) + len(self.right) 
+        if N > len(self.best):
+            self.best = self.left + list(self.right)
+            if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500:
+                self.Nshown = N
+                print(self.report())
+            
+    def report(self):
+        N = len(self.best)
+        nwords = N + sum(self.dict[p].count(' ') for p in self.best)
+        nletters = sum(len(p) for p in self.best)
+        return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})'
+                .format(N, nwords, nletters, self.i+1))
+        
+    def applicable_actions(self):
+        L, R, D = self.L, self.R, self.dict
+        actions = []
+
+        def score(A): return D.prefixes[L+A] * D.suffixes[A+R]
+        if self.is_allowed(L):
+            actions.append(',')
+        if self.is_allowed(R):
+            actions.append(';')
+        for A in sorted(alphabet, key=score):
+            if score(A) > 0:
+                actions.append(A)    
+
+        return actions
+ 
+    def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set
+        
+    def is_palindrome(self): 
+        "Is this a palindrome? (Does any extra .L or .R match the other side?)"
+        return ((self.L == '' and self.left[-1].endswith(self.R)) or 
+                (self.R == '' and self.right[0].startswith(self.L)))
+
+alphabet    = 'abcdefghijklmnopqrstuvwxyz'
+cat         = ''.join
+UndoCommand = str
+DoCommand   = list
+                      
+################ Unit Tests
+
+def test1():
+    assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello']
+    assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello']
+    assert letters('a man') == 'aman'
+    assert letters('an elk') == 'anelk'
+    assert letters('Mr. T') == 'mrt'
+    assert letters('Donald E. Knuth') == 'donaldeknuth'
+    assert len(DICT) == 125512
+    assert 'panama' in DICT
+    assert 'aman' in DICT
+    assert 'threemen' not in DICT
+    assert DICT['acanal'] == 'a canal'
+    return 'ok'
+
+def test2():
+    p1 = Panama()
+    assert p1.is_palindrome()
+    assert str(p1) == 'a man, a plan, a canal, Panama'
+    p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama'])
+    assert not p2.is_palindrome()
+    p3 = Panama(['maya'], '', '', ['ayam'])
+    assert p3.is_palindrome()
+    assert str(p3) == 'Maya, a yam'
+    return 'ok'
+
+if __name__ == '__main__': 
+    p = Panama();
+    test1()
+    test2()
+    p.search(10**6)
+    print(p.report())
+    print(str(p))
--- a/parse.py
+++ b/parse.py
@ -0,0 +1,52 @@
+grammar = {
+  'Noun': ['stench', 'wumpus'],
+  'Verb': ['is', 'smell'],
+  'Adjective': ['dead', 'smelly'],
+  'Adverb': ['left', 'back'],
+  'Pronoun': ['me', 'you'],
+  'Name': ['John', 'Mary'],
+  'Article': ['the', 'a'],
+  'Preposition': ['to', 'in'],
+  'Conjunction': ['and', 'or'],
+  'Digit': ['0', '1'],
+  
+  'S': [['NP', 'VP'], ['S', 'Comjunction', 'S']],
+  'NP': ['Pronoun', 'Noun', ['Article', 'Noun'], ['Digit', 'Digit'], 
+         ['NP', 'PP'], ['NP', 'RelClause']],
+  'VP': ['Verb', ['VP', 'NP'], ['VP', 'Adjective'], ['VP', 'PP'], 
+         ['VP', 'Adverb']],
+  'PP': [['Preposition', 'NP']],
+  'RelClause': [['that', 'VP']]
+  }
+  
+
+def parse(forest, grammar):
+    if len(forest) == 1 and category(forest[0]) == 'S':
+        return forest[0]
+    for i in range(len(forest)):
+        for lhs in grammar.keys():
+            for rhs in grammar[lhs]:
+		rhs = mklist(rhs)
+		n = len(rhs)
+		subsequence = forest[i:i+n]
+		if match(subsequence, rhs):
+                    print subsequence, lhs, '=>', rhs
+		    forest2 = forest[:]
+		    forest2[i:i+n] = [(lhs, subsequence)]
+		    result = parse(forest2, grammar)
+		    if result != None:
+			return result
+    return None
+
+def mklist(x):
+    if type(x) == type([]): return x
+    else: return [x]
+
+def match(forest, rhs):
+    for i in range(len(rhs)):
+        if category(forest[i]) != rhs[i] and forest[i] != rhs[i]: return 0
+    return 1
+
+def category(forest):
+    if type(forest) == type(()): return forest[0]
+    else: return 'word'
--- a/py2html.py
+++ b/py2html.py
@ -0,0 +1,110 @@
+"""Pretty-print Python code to colorized, hyperlinked html.
+
+In python, do:
+    py2html.convert_files(['file1.py', 'file2.py', ...]) 
+From the shell, do:
+    python py2html.py *.py"""
+
+import re, string, time, os
+
+
+id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
+g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
+def b(text): return '<b>%s</b>' % text
+def i(text): return '<i>%s</i>' % text
+def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
+def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
+def hilite(text, bg="ffff00"):
+    return '<b style="background-color:%s"><a name="%s">%s</b>' % (
+        bg, text, text)
+
+def modulelink(module, baseurl=''):
+    """Hyperlink to a module, either locally or on python.org"""
+    if module+'.py' not in local_files:
+        baseurl = 'http://www.python.org/doc/current/lib/module-'
+    return link(baseurl+module+'.html', module)
+
+def importer(m):
+    "Turn text such as 'utils, math, re' into a string of HTML links."
+    modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
+    return (m.group(1) + ', '.join(modules) + m.group(3))
+
+def find1(regex, str):
+    return (re.findall(regex, str) or ['&nbsp;'])[0]
+
+def convert_files(filenames, local_filenames=None, tblfile='readme.htm'):
+    "Convert files of python code to colorized HTML."
+    global local_files
+    local_files = local_filenames or filenames
+    summary_table = {}
+    for f in filenames:
+        fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
+        text = fulltext
+        for (pattern, repl) in replacements:
+            text = re.sub(pattern, repl, text)
+        text = '<<header("AIMA Python file: %s")>><pre>%s</pre><<footer>>' % (
+            f, text)
+        open(f[:-3]+'.htm', 'w').write(text)
+        if tblfile:
+            ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
+            module = f.replace('.py','')
+            lines = fulltext.count('\n')
+            desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
+            summary_table.setdefault(ch,[]).append((module, lines, desc))
+    if tblfile:
+        totallines = 0
+        tbl = ["<tr><th>Chapter<th>Module<th>Files<th>Lines<th>Description"]
+        fmt = "<tr><td align=right>%s<th>%s<td>%s<td align=right>%s<td>%s" 
+        items = summary_table.items(); items.sort(num_cmp)
+        for (ch, entries) in items:
+            for (module, lines, desc) in entries:
+                totallines += lines
+                files = link(module+'.py', '.py')
+                if os.path.exists(module+'.txt'):
+                    files += ' ' + link(module+'.txt', '.txt')
+                tbl += [fmt % (ch, link(module+'.html', module), 
+                               files, lines, desc)]
+        tbl += [fmt % ('', '', '', totallines, ''), "</table>"]
+        ## Now read the tblfile, and replace the first table with tbl
+        old = open(tblfile).read()
+        new = re.sub("(?s)(<table border=1>)(.*)(</table>)", 
+                     r'\1' + '\n'.join(tbl) + r'\3', old, 1)
+        open(tblfile, 'w').write(new)
+
+def num_cmp(x, y):
+    def num(x):
+        nums = re.findall('[0-9]+', x or '')
+        if nums: return int(nums[0])
+        return x
+    return cmp(num(x[0]), num(y[0]))
+
+### Above is general (more or less); below is specific to my files.
+
+def comment(text): return i(color("green", text))
+
+replacements = [
+    (r'&', '&amp;'),
+    (r'<', '&lt;'),
+    (r'>', '&gt;'),
+    (r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
+    (r"""('[^']*?'|"[^"]*?")""", comment(g1)),
+    (r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
+    (r'(#.*)', color("cc33cc", g1)),
+    (r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
+    (r'(?m)(^\s*)(def\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
+    (r'(?m)(^\s*)(class\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
+    (r'(from\s+)([a-z]+)(\s+import)', importer),
+    (r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
+    ]
+
+if __name__ == '__main__':
+    import sys, glob
+    files = []
+    for arg in sys.argv[1:]:
+        files.extend(glob.glob(arg))
+    convert_files(files) 
+
+## ENHANCEMENTS:
+## Can get confused with """ and '''; not a problem in practice.
+## Maybe we should create an index 
+## Probably should switch to Doxygen
--- a/spell.py
+++ b/spell.py
@ -0,0 +1,106 @@
+"""Spelling Corrector in Python 3; see http://norvig.com/spell-correct.html
+
+Copyright (c) 2007-2016 Peter Norvig
+MIT license: www.opensource.org/licenses/mit-license.php
+"""
+
+################ Spelling Corrector 
+
+import re
+from collections import Counter
+
+def words(text): return re.findall(r'\w+', text.lower())
+
+WORDS = Counter(words(open('big.txt').read()))
+
+def P(word, N=sum(WORDS.values())): 
+    "Probability of `word`."
+    return WORDS[word] / N
+
+def correction(word): 
+    "Most probable spelling correction for word."
+    return max(candidates(word), key=P)
+
+def candidates(word): 
+    "Generate possible spelling corrections for word."
+    return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
+
+def known(words): 
+    "The subset of `words` that appear in the dictionary of WORDS."
+    return set(w for w in words if w in WORDS)
+
+def edits1(word):
+    "All edits that are one edit away from `word`."
+    letters    = 'abcdefghijklmnopqrstuvwxyz'
+    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
+    deletes    = [L + R[1:]               for L, R in splits if R]
+    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
+    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
+    inserts    = [L + c + R               for L, R in splits for c in letters]
+    return set(deletes + transposes + replaces + inserts)
+
+def edits2(word): 
+    "All edits that are two edits away from `word`."
+    return (e2 for e1 in edits1(word) for e2 in edits1(e1))
+
+################ Test Code 
+
+def unit_tests():
+    assert correction('speling') == 'spelling'              # insert
+    assert correction('korrectud') == 'corrected'           # replace 2
+    assert correction('bycycle') == 'bicycle'               # replace
+    assert correction('inconvient') == 'inconvenient'       # insert 2
+    assert correction('arrainged') == 'arranged'            # delete
+    assert correction('peotry') =='poetry'                  # transpose
+    assert correction('peotryy') =='poetry'                 # transpose + delete
+    assert correction('word') == 'word'                     # known
+    assert correction('quintessential') == 'quintessential' # unknown
+    assert words('This is a TEST.') == ['this', 'is', 'a', 'test']
+    assert Counter(words('This is a test. 123; A TEST this is.')) == (
+           Counter({'123': 1, 'a': 2, 'is': 2, 'test': 2, 'this': 2}))
+    assert len(WORDS) == 32192
+    assert sum(WORDS.values()) == 1115504
+    assert WORDS.most_common(10) == [
+     ('the', 79808),
+     ('of', 40024),
+     ('and', 38311),
+     ('to', 28765),
+     ('in', 22020),
+     ('a', 21124),
+     ('that', 12512),
+     ('he', 12401),
+     ('was', 11410),
+     ('it', 10681)]
+    assert WORDS['the'] == 79808
+    assert P('quintessential') == 0
+    assert 0.07 < P('the') < 0.08
+    return 'unit_tests pass'
+
+def spelltest(tests, verbose=False):
+    "Run correction(wrong) on all (right, wrong) pairs; report results."
+    import time
+    start = time.clock()
+    good, unknown = 0, 0
+    n = len(tests)
+    for right, wrong in tests:
+        w = correction(wrong)
+        good += (w == right)
+        if w != right:
+            unknown += (right not in WORDS)
+            if verbose:
+                print('correction({}) => {} ({}); expected {} ({})'
+                      .format(wrong, w, WORDS[w], right, WORDS[right]))
+    dt = time.clock() - start
+    print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second '
+          .format(good / n, n, unknown / n, n / dt))
+    
+def Testset(lines):
+    "Parse 'right: wrong1 wrong2' lines into [('right', 'wrong1'), ('right', 'wrong2')] pairs."
+    return [(right, wrong)
+            for (right, wrongs) in (line.split(':') for line in lines)
+            for wrong in wrongs.split()]
+
+if __name__ == '__main__':
+    print(unit_tests())
+    spelltest(Testset(open('spell-testset1.txt')))
+    spelltest(Testset(open('spell-testset2.txt')))
--- a/sudoku.py
+++ b/sudoku.py
@ -0,0 +1,201 @@
+## Solve Every Sudoku Puzzle
+
+## See http://norvig.com/sudoku.html
+
+## Throughout this program we have:
+##   r is a row,    e.g. 'A'
+##   c is a column, e.g. '3'
+##   s is a square, e.g. 'A3'
+##   d is a digit,  e.g. '9'
+##   u is a unit,   e.g. ['A1','B1','C1','D1','E1','F1','G1','H1','I1']
+##   grid is a grid,e.g. 81 non-blank chars, e.g. starting with '.18...7...
+##   values is a dict of possible values, e.g. {'A1':'12349', 'A2':'8', ...}
+
+def cross(A, B):
+    "Cross product of elements in A and elements in B."
+    return [a+b for a in A for b in B]
+
+digits   = '123456789'
+rows     = 'ABCDEFGHI'
+cols     = digits
+squares  = cross(rows, cols)
+unitlist = ([cross(rows, c) for c in cols] +
+            [cross(r, cols) for r in rows] +
+            [cross(rs, cs) for rs in ('ABC','DEF','GHI') for cs in ('123','456','789')])
+units = dict((s, [u for u in unitlist if s in u])
+             for s in squares)
+peers = dict((s, set(sum(units[s],[]))-set([s]))
+             for s in squares)
+
+################ Unit Tests ################
+
+def test():
+    "A set of tests that must pass."
+    assert len(squares) == 81
+    assert len(unitlist) == 27
+    assert all(len(units[s]) == 3 for s in squares)
+    assert all(len(peers[s]) == 20 for s in squares)
+    assert units['C2'] == [['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2'],
+                           ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'],
+                           ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']]
+    assert peers['C2'] == set(['A2', 'B2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2',
+                               'C1', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9',
+                               'A1', 'A3', 'B1', 'B3'])
+    print 'All tests pass.'
+
+################ Parse a Grid ################
+
+def parse_grid(grid):
+    """Convert grid to a dict of possible values, {square: digits}, or
+    return False if a contradiction is detected."""
+    ## To start, every square can be any digit; then assign values from the grid.
+    values = dict((s, digits) for s in squares)
+    for s,d in grid_values(grid).items():
+        if d in digits and not assign(values, s, d):
+            return False ## (Fail if we can't assign d to square s.)
+    return values
+
+def grid_values(grid):
+    "Convert grid into a dict of {square: char} with '0' or '.' for empties."
+    chars = [c for c in grid if c in digits or c in '0.']
+    assert len(chars) == 81
+    return dict(zip(squares, chars))
+
+################ Constraint Propagation ################
+
+def assign(values, s, d):
+    """Eliminate all the other values (except d) from values[s] and propagate.
+    Return values, except return False if a contradiction is detected."""
+    other_values = values[s].replace(d, '')
+    if all(eliminate(values, s, d2) for d2 in other_values):
+        return values
+    else:
+        return False
+
+def eliminate(values, s, d):
+    """Eliminate d from values[s]; propagate when values or places <= 2.
+    Return values, except return False if a contradiction is detected."""
+    if d not in values[s]:
+        return values ## Already eliminated
+    values[s] = values[s].replace(d,'')
+    ## (1) If a square s is reduced to one value d2, then eliminate d2 from the peers.
+    if len(values[s]) == 0:
+        return False ## Contradiction: removed last value
+    elif len(values[s]) == 1:
+        d2 = values[s]
+        if not all(eliminate(values, s2, d2) for s2 in peers[s]):
+            return False
+    ## (2) If a unit u is reduced to only one place for a value d, then put it there.
+    for u in units[s]:
+        dplaces = [s for s in u if d in values[s]]
+        if len(dplaces) == 0:
+            return False ## Contradiction: no place for this value
+        elif len(dplaces) == 1:
+            # d can only be in one place in unit; assign it there
+            if not assign(values, dplaces[0], d):
+                return False
+    return values
+
+################ Display as 2-D grid ################
+
+def display(values):
+    "Display these values as a 2-D grid."
+    width = 1+max(len(values[s]) for s in squares)
+    line = '+'.join(['-'*(width*3)]*3)
+    for r in rows:
+        print ''.join(values[r+c].center(width)+('|' if c in '36' else '')
+                      for c in cols)
+        if r in 'CF': print line
+    print
+
+################ Search ################
+
+def solve(grid): return search(parse_grid(grid))
+
+def search(values):
+    "Using depth-first search and propagation, try all possible values."
+    if values is False:
+        return False ## Failed earlier
+    if all(len(values[s]) == 1 for s in squares):
+        return values ## Solved!
+    ## Chose the unfilled square s with the fewest possibilities
+    n,s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
+    return some(search(assign(values.copy(), s, d))
+                for d in values[s])
+
+################ Utilities ################
+
+def some(seq):
+    "Return some element of seq that is true."
+    for e in seq:
+        if e: return e
+    return False
+
+def from_file(filename, sep='\n'):
+    "Parse a file into a list of strings, separated by sep."
+    return file(filename).read().strip().split(sep)
+
+def shuffled(seq):
+    "Return a randomly shuffled copy of the input sequence."
+    seq = list(seq)
+    random.shuffle(seq)
+    return seq
+
+################ System test ################
+
+import time, random
+
+def solve_all(grids, name='', showif=0.0):
+    """Attempt to solve a sequence of grids. Report results.
+    When showif is a number of seconds, display puzzles that take longer.
+    When showif is None, don't display any puzzles."""
+    def time_solve(grid):
+        start = time.clock()
+        values = solve(grid)
+        t = time.clock()-start
+        ## Display puzzles that take long enough
+        if showif is not None and t > showif:
+            display(grid_values(grid))
+            if values: display(values)
+            print '(%.2f seconds)\n' % t
+        return (t, solved(values))
+    times, results = zip(*[time_solve(grid) for grid in grids])
+    N = len(grids)
+    if N > 1:
+        print "Solved %d of %d %s puzzles (avg %.2f secs (%d Hz), max %.2f secs)." % (
+            sum(results), N, name, sum(times)/N, N/sum(times), max(times))
+
+def solved(values):
+    "A puzzle is solved if each unit is a permutation of the digits 1 to 9."
+    def unitsolved(unit): return set(values[s] for s in unit) == set(digits)
+    return values is not False and all(unitsolved(unit) for unit in unitlist)
+
+def random_puzzle(N=17):
+    """Make a random puzzle with N or more assignments. Restart on contradictions.
+    Note the resulting puzzle is not guaranteed to be solvable, but empirically
+    about 99.8% of them are solvable. Some have multiple solutions."""
+    values = dict((s, digits) for s in squares)
+    for s in shuffled(squares):
+        if not assign(values, s, random.choice(values[s])):
+            break
+        ds = [values[s] for s in squares if len(values[s]) == 1]
+        if len(ds) >= N and len(set(ds)) >= 8:
+            return ''.join(values[s] if len(values[s])==1 else '.' for s in squares)
+    return random_puzzle(N) ## Give up and make a new puzzle
+
+grid1  = '003020600900305001001806400008102900700000008006708200002609500800203009005010300'
+grid2  = '4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......'
+hard1  = '.....6....59.....82....8....45........3........6..3.54...325..6..................'
+    
+if __name__ == '__main__':
+    test()
+    solve_all(from_file("easy50.txt", '========'), "easy", None)
+    solve_all(from_file("top95.txt"), "hard", None)
+    solve_all(from_file("hardest.txt"), "hardest", None)
+    solve_all([random_puzzle() for _ in range(99)], "random", 100.0)
+
+## References used:
+## http://www.scanraid.com/BasicStrategies.htm
+## http://www.sudokudragon.com/sudokustrategy.htm
+## http://www.krazydad.com/blog/2005/09/29/an-index-of-sudoku-strategies/
+## http://www2.warwick.ac.uk/fac/sci/moac/currentstudents/peter_cock/python/sudoku/
--- a/testaccum.py
+++ b/testaccum.py
@ -0,0 +1,73 @@
+from __future__ import division
+import re
+from accum import *
+
+
+acc_re = re.compile("[[](.+):(.+) for (.+) in (.+)[]]")
+
+def expand_accumulations(program_text):
+    """Replace any accumulation displays in program_text with calls to
+    accumulation.  Used to simulate a hypothetical Python interpreter that
+    actually handles accumlation displays. This one is rather poor: it
+    won't match across lines, it won't match nested accumulation displays,
+    and it doesn't handle multiple 'for' clauses; nor 'if' clauses."""
+    def _(matchobj):
+        (acc, exp, x, it) = matchobj.groups()
+        return "accumulation(%s, lambda %s: (%s), %s)" % (acc, x, exp, it)
+    return acc_re.sub(_, program_text)
+
+def test1(acc_display, expected):
+    "Eval an accumulation display and see if it gets the expected answer."
+    print acc_display
+    result = eval(expand_accumulations(acc_display))
+    assert result == expected, ('Got %s; expected %s' % (result, expected))
+    print '    ==>  %s' % result
+
+#### Initialize some data
+temp = [70, 70, 71, 74, 76, 76, 72, 76, 77, 77, 77, 78,
+        78, 79, 79, 79, 78, 80, 82, 83, 83, 81, 84, 83]
+data = temp
+def f(x): return 2 * x
+votes = {'Arnie': 48, 'Gray': 45, 'Tom': 13, 'Cruz': 32, 'Peter': 3}
+candidates = votes.keys()
+
+def test():
+
+    print 'temp = ', temp
+    print 'data = temp'
+    print 'votes = ', votes
+    print 'candidates = ', candidates
+    print
+    
+    #### Test some accumulation displays
+    test1("[Max: temp[hour] for hour in range(24)]",
+          max([temp[hour] for hour in range(24)]))
+    test1("[Min: temp[hour] for hour in range(24)]",
+          min([temp[hour] for hour in range(24)]))
+    test1("[Sum: x*x for x in data]",
+          sum([x*x for x in data]))
+    test1("[Mean: f(x) for x in data]",
+          sum([f(x) for x in data])/len(data))
+    test1("[Median: f(x) for x in data]",
+          156.0)
+    test1("[Mode: f(x) for x in data]",
+          166)
+    test1("[Argmax: votes[c] for c in candidates]",
+          'Arnie')
+    test1("[Argmin: votes[c] for c in candidates]",
+          'Peter')
+    test1("[Some: temp[hour] > 75 for hour in range(24)]",
+          len([hour for four in range(24) if temp[hour] > 75])>0)
+    test1("[Every: temp[hour] > 75 for hour in range(24)]",
+          len([h for h in range(24) if temp[h] > 75]) == 24)
+    test1("[Top(10): temp[hour] for hour in range(24)]",
+          [84, 83, 83, 83, 82, 81, 80, 79, 79, 79])
+    test1("[Join(', '): votes[c] for c in candidates]",
+                       ', '.join([str(votes[c]) for c in candidates]))
+    test1("[SortBy: abs(x) for x in (-2, -4, 3, 1)]",
+          [1, -2, 3, -4])
+    test1("[SortBy(reverse=True): abs(x) for x in (-2, -4, 3, 1)]",
+          [-4, 3, -2, 1])
+
+if __name__ == "__main__":
+    test()