Add subdirectories

Add /ipynb/ and /py/ subdirectories to keep the home page neater.
This commit is contained in:
Peter Norvig
2017-10-23 10:32:23 -07:00
parent ff96ec21ba
commit 88819c4cd0
61 changed files with 48 additions and 46 deletions

134
py/SET.py Normal file
View File

@@ -0,0 +1,134 @@
import random
import collections
import itertools
"""
Game of Set (Peter Norvig 2010-2015)
How often do sets appear when we deal an array of cards?
How often in the course of playing out the game?
Here are the data types we will use:
card: A string, such as '3R=0', meaning "three red striped ovals".
deck: A list of cards, initially of length 81.
layout: A list of cards, initially of length 12.
set: A tuple of 3 cards.
Tallies: A dict: {12: {True: 33, False: 1}}} means a layout of size 12
tallied 33 sets and 1 non-set.
"""
#### Cards, dealing cards, and defining the notion of sets.
CARDS = [number + color + shade + symbol
for number in '123'
for color in 'RGP'
for shade in '@O='
for symbol in '0SD']
def deal(n, deck):
"Deal n cards from the deck."
return [deck.pop() for _ in range(n)]
def is_set(cards):
"Are these 3 cards a set? No if any feature has 2 values."
for f in range(4):
values = {card[f] for card in cards}
if len(values) == 2:
return False
return True
def find_set(layout):
"Return a set found from this layout, if there is one."
for cards in itertools.combinations(layout, 3):
if is_set(cards):
return cards
return ()
#### Tallying set:no-set ratio
def Tallies():
"A data structure to keep track, for each size, the number of sets and no-sets."
return collections.defaultdict(lambda: {True: 0, False: 0})
def tally(tallies, layout):
"Record that a set was found or not found in a layout of given size; return the set."
s = find_set(layout)
tallies[len(layout)][bool(s)] += 1
return s
#### Three experiments
def tally_initial_layout(N, sizes=(12, 15)):
"Record tallies for N initial deals."
tallies = Tallies()
deck = list(CARDS)
for deal in range(N):
random.shuffle(deck)
for size in sizes:
tally(tallies, deck[:size])
return tallies
def tally_initial_layout_no_prior_sets(N, sizes=(12, 15)):
"""Simulate N initial deals for each size, keeping tallies for Sets and NoSets,
but only when there was no set with 3 fewer cards."""
tallies = Tallies()
deck = list(CARDS)
for deal in range(N):
random.shuffle(deck)
for size in sizes:
if not find_set(deck[:size-3]):
tally(tallies, deck[:size])
return tallies
def tally_game_play(N):
"Record tallies for the play of N complete games."
tallies = Tallies()
for game in range(N):
deck = list(CARDS)
random.shuffle(deck)
layout = deal(12, deck)
while deck:
s = tally(tallies, layout)
# Pick up the cards in the set, if any
for card in s: layout.remove(card)
# Deal new cards
if len(layout) < 12 or not s:
layout += deal(3, deck)
return tallies
def experiments(N):
show({12: [1, 33], 15: [1, 2500]},
'the instruction booklet')
show(tally_initial_layout(N),
'initial layout')
show(tally_game_play(N // 25),
'game play')
show(tally_initial_layout_no_prior_sets(N),
'initial layout, but no sets before dealing last 3 cards')
def show(tallies, label):
"Print out the counts."
print()
print('Size | Sets | NoSets | Set:NoSet ratio for', label)
print('-----+--------+--------+----------------')
for size in sorted(tallies):
y, n = tallies[size][True], tallies[size][False]
ratio = ('inft' if n==0 else int(round(float(y)/n)))
print('{:4d} |{:7,d} |{:7,d} | {:4}:1'
.format(size, y, n, ratio))
def test():
assert len(CARDS) == 81 == len(set(CARDS))
assert is_set(('3R=O', '2R=S', '1R=D'))
assert not is_set(('3R=0', '2R=S', '1R@D'))
assert find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R=D']) == ('3R=0', '2R=S', '1R=D')
assert not find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R@D'])
photo = '2P=0 3P=D 2R=0 3GO0 2POD 3R@D 2RO0 2ROS 1P@S 2P@0 3ROS 2GOD 2P@D 1GOD 3GOS'.split()
assert not find_set(photo)
assert set(itertools.combinations([1, 2, 3, 4], 3)) == {(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)}
print('All tests pass.')
test()
experiments(100000)

159
py/beal.py Normal file
View File

@@ -0,0 +1,159 @@
"""Search for counterexamples to Beal's conjecture
See http://norvig.com/beal.html and http://www.bealconjecture.com"""
from __future__ import division, print_function
from math import log
from itertools import combinations, product
from collections import defaultdict
try:
from math import gcd # For Python 3.6 and up
except ImportError:
from fractions import gcd # For older versions (works in 2.7 as well)
def beal(max_A, max_x):
"""See if any A ** x + B ** y equals some C ** z, with gcd(A, B) == 1.
Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
Apowers = make_Apowers(max_A, max_x)
Czroots = make_Czroots(Apowers)
for (A, B) in combinations(Apowers, 2):
if gcd(A, B) == 1:
for (Ax, By) in product(Apowers[A], Apowers[B]):
Cz = Ax + By
if Cz in Czroots:
C = Czroots[Cz]
x, y, z = exponent(Ax, A), exponent(By, B), exponent(Cz, C)
print('{} ** {} + {} ** {} == {} ** {} == {}'
.format(A, x, B, y, C, z, C ** z))
def make_Apowers(max_A, max_x):
"A dict of {A: [A**3, A**4, ...], ...}."
exponents = exponents_upto(max_x)
return {A: [A ** x for x in (exponents if (A != 1) else [3])]
for A in range(1, max_A+1)}
def make_Czroots(Apowers): return {Cz: C for C in Apowers for Cz in Apowers[C]}
def exponents_upto(max_x):
"Return all odd primes up to max_x, as well as 4."
exponents = [3, 4] if max_x >= 4 else [3] if max_x == 3 else []
for x in range(5, max_x, 2):
if not any(x % p == 0 for p in exponents):
exponents.append(x)
return exponents
def exponent(Cz, C):
"""Recover z such that C ** z == Cz (or equivalently z = log Cz base C).
For exponent(1, 1), arbitrarily choose to return 3."""
return 3 if (Cz == C == 1) else int(round(log(Cz, C)))
##############################################################################
def tests():
assert make_Apowers(6, 10) == {
1: [1],
2: [8, 16, 32, 128],
3: [27, 81, 243, 2187],
4: [64, 256, 1024, 16384],
5: [125, 625, 3125, 78125],
6: [216, 1296, 7776, 279936]}
assert make_Czroots(make_Apowers(5, 8)) == {
1: 1, 8: 2, 16: 2, 27: 3, 32: 2, 64: 4, 81: 3,
125: 5, 128: 2, 243: 3, 256: 4, 625: 5, 1024: 4,
2187: 3, 3125: 5, 16384: 4, 78125: 5}
Czroots = make_Czroots(make_Apowers(100, 100))
assert 3 ** 3 + 6 ** 3 in Czroots
assert 99 ** 97 in Czroots
assert 101 ** 100 not in Czroots
assert Czroots[99 ** 97] == 99
assert exponent(10 ** 5, 10) == 5
assert exponent(7 ** 3, 7) == 3
assert exponent(1234 ** 999, 1234) == 999
assert exponent(12345 ** 6789, 12345) == 6789
assert exponent(3 ** 10000, 3) == 10000
assert exponent(1, 1) == 3
assert exponents_upto(2) == []
assert exponents_upto(3) == [3]
assert exponents_upto(4) == [3, 4]
assert exponents_upto(40) == [3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]
assert exponents_upto(100) == [
3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
67, 71, 73, 79, 83, 89, 97]
assert gcd(3, 6) == 3
assert gcd(3, 7) == 1
assert gcd(861591083269373931, 94815872265407) == 97
assert gcd(2*3*5*(7**10)*(11**12), 3*(7**5)*(11**13)*17) == 3*(7**5)*(11**12)
return 'tests pass'
##############################################################################
def beal_modp(max_A, max_x, p=2**31-1):
"""See if any A ** x + B ** y equals some C ** z (mod p), with gcd(A, B) == 1.
If so, verify that the equation works without the (mod p).
Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
assert p >= max_A
Apowers = make_Apowers_modp(max_A, max_x, p)
Czroots = make_Czroots_modp(Apowers)
for (A, B) in combinations(Apowers, 2):
if gcd(A, B) == 1:
for (Axp, x), (Byp, y) in product(Apowers[A], Apowers[B]):
Czp = (Axp + Byp) % p
if Czp in Czroots:
lhs = A ** x + B ** y
for (C, z) in Czroots[Czp]:
if lhs == C ** z:
print('{} ** {} + {} ** {} == {} ** {} == {}'
.format(A, x, B, y, C, z, C ** z))
def make_Apowers_modp(max_A, max_x, p):
"A dict of {A: [(A**3 (mod p), 3), (A**4 (mod p), 4), ...]}."
exponents = exponents_upto(max_x)
return {A: [(pow(A, x, p), x) for x in (exponents if (A != 1) else [3])]
for A in range(1, max_A+1)}
def make_Czroots_modp(Apowers):
"A dict of {C**z (mod p): [(C, z),...]}"
Czroots = defaultdict(list)
for A in Apowers:
for (Axp, x) in Apowers[A]:
Czroots[Axp].append((A, x))
return Czroots
##############################################################################
def simpsons(bases, powers):
"""Find the integers (A, B, C, n) that come closest to solving
Fermat's equation, A ** n + B ** n == C ** n.
Let A, B range over all pairs of bases and n over all powers."""
equations = ((A, B, iroot(A ** n + B ** n, n), n)
for A, B in combinations(bases, 2)
for n in powers)
return min(equations, key=relative_error)
def iroot(i, n):
"The integer closest to the nth root of i."
return int(round(i ** (1./n)))
def relative_error(equation):
"Error between LHS and RHS of equation, relative to RHS."
(A, B, C, n) = equation
LHS = A ** n + B ** n
RHS = C ** n
return abs(LHS - RHS) / RHS
if __name__ == '__main__':
print(tests())
print("Searching beal(500, 100)")
print(beal(500, 100))
print("Finding Simpson-esque near-solutions to Fermat's Equation")
def s(b, p): print('{0}^{3} + {1}^{3} = {2}^{3}'.format(*simpsons(b, p)))
s(range(1000, 2000), [11, 12, 13])
s(range(3000, 5000), [12])
print("Searching beal_modp(500, 100)")
print(beal_modp(500, 100))

238
py/docex.py Normal file
View File

@@ -0,0 +1,238 @@
"""A framework for running unit tests and examples, written in docstrings.
This lets you write "Ex: sqrt(4) ==> 2; sqrt(-1) raises ValueError" in a
docstring, and then execute the examples as unit tests.
This functionality is similar to the doctest module. The major
differences between docex and doctest are:
(1) Brevity. With docex you write the one-line comment
"Ex: len('abc') ==> 3; len([]) ==> 0; len(5) raises TypeError"
With doctest you would need 9 lines for the same thing:
'''>>> len('abc')
3
>>> len([])
0
>>> len(5))
Traceback (most recent call last):
...
TypeError: len() of unsized object
'''
(2) Docex handles both examples and unit tests.
It took me a while to recognize this distinction: when I write
"sqrt(4) ==> 2" it has two purposes -- to serve as a unit test
and to serve as an example of how to use the sqrt function.
When I write "random.choice('abc')" it serves as an example of
how to use the choice function, but it is not a unit test.
docex lets you do both; doctest only supports tests. Of course
you can coerce this into a test in doctest, with something like
>>> random.choice('abc') in 'abc'
True
(3) Eval-based rather than string-comparison based. The docex string
"dict(zip([1,4,9], [1,2,3])) ==> {1: 1, 4: 2, 9: 3}" works even
when a different version of Python decides to print the dict as
"{9: 3, 4: 2, 1: 1}" because docex evals the right-hand-side and
checks to see if it is equal. That's good for dicts, its good for
writing "1+1==2 ==> True" and having it work in versions of Python
where True prints as "1" rather than as "True", and so on,
but doctest has the edge if you want to compare against something
that doesn't have an eval-able output, or if you want to test
printed output.
(4) Doctest has many more features, and is better supported.
I wrote docex before doctest was an official part of Python, but
with the refactoring of doctest in Python 2.4, I decided to switch
my code over to doctest, even though I prefer the brevity of docex.
I still offer docex for those who want it.
From Python, when you want to test modules m1, m2, ... do:
docex.Docex([m1, m2, ...])
From the shell, when you want to test files *.py, do:
python docex.py [log-file] *.py
If log file ends in .htm or .html, it will be written in HTML.
If log file is -, or if it is missing, then standard output is used.
For each module, Docex looks at the __doc__ and _docex strings of the
module itself, and of each member, and recursively for each member
class. If a line in a docstring starts with r'^\s*Ex: ' (a line with
blanks, then 'Ex: '), then the remainder of the string after the colon
is treated as examples. Each line of the examples should conform to
one of the following formats:
(1) Blank line or a comment; these just get echoed verbatim to the log.
(2) Of the form example1 ; example2 ; ...
(3) Of the form 'x ==> y' for any expressions x and y.
x is evaled and assigned to _, then y is evaled.
If x != y, an error message is printed.
(4) Of the form 'x raises y', for any statement x and expression y.
First y is evaled to yield an exception type, then x is execed.
If x doesn't raise the right exception, an error msg is printed.
(5) Of the form 'statement'. Statement is execed for side effect.
(6) Of the form 'expression'. Expression is evaled for side effect.
"""
import re, sys, types
class Docex:
"""A class to run test examples written in docstrings or in _docex."""
def __init__(self, modules=None, html=0, out=None,
title='Docex Example Output'):
if modules is None:
modules = sys.modules.values()
self.passed = self.failed = 0;
self.dictionary = {}
self.already_seen = {}
self.html = html
try:
if out: sys.stdout = out
self.writeln(title, '<h1>', '</h1><pre>')
for module in modules:
self.run_module(module)
self.writeln(str(self), '</pre>\n<hr><h1>', '</h1>\n')
finally:
if out:
sys.stdout = sys.__stdout__
out.close()
def __repr__(self):
if self.failed:
return ('<Test: #### failed %d, passed %d>'
% (self.failed, self.passed))
else:
return '<Test: passed all %d>' % self.passed
def run_module(self, object):
"""Run the docstrings, and then all members of the module."""
if not self.seen(object):
self.dictionary.update(vars(object)) # import module into self
name = object.__name__
self.writeln('## Module %s ' % name,
'\n</pre><a name=%s><h1>' % name,
'</h1><pre>')
self.run_docstring(object)
names = object.__dict__.keys()
names.sort()
for name in names:
val = object.__dict__[name]
if isinstance(val, types.ClassType):
self.run_class(val)
elif isinstance(val, types.ModuleType):
pass
elif not self.seen(val):
self.run_docstring(val)
def run_class(self, object):
"""Run the docstrings, and then all members of the class."""
if not self.seen(object):
self.run_docstring(object)
names = object.__dict__.keys()
names.sort()
for name in names:
self.run_docstring(object.__dict__[name])
def run_docstring(self, object, search=re.compile(r'(?m)^\s*Ex: ').search):
"Run the __doc__ and _docex attributes, if the object has them."
if hasattr(object, '__doc__'):
s = object.__doc__
if isinstance(s, str):
match = search(s)
if match: self.run_string(s[match.end():])
if hasattr(object, '_docex'):
self.run_string(object._docex)
def run_string(self, teststr):
"""Run a test string, printing inputs and results."""
if not teststr: return
teststr = teststr.strip()
if teststr.find('\n') > -1:
map(self.run_string, teststr.split('\n'))
elif teststr == '' or teststr.startswith('#'):
self.writeln(teststr)
elif teststr.find('; ') > -1:
for substr in teststr.split('; '): self.run_string(substr)
elif teststr.find('==>') > -1:
teststr, result = teststr.split('==>')
self.evaluate(teststr, result)
elif teststr.find(' raises ') > -1:
teststr, exception = teststr.split(' raises ')
self.raises(teststr, exception)
else: ## Try to eval, but if it is a statement, exec
try:
self.evaluate(teststr)
except SyntaxError:
exec teststr in self.dictionary
def evaluate(self, teststr, resultstr=None):
"Eval teststr and check if resultstr (if given) evals to the same."
self.writeln('>>> ' + teststr.strip())
result = eval(teststr, self.dictionary)
self.dictionary['_'] = result
self.writeln(repr(result))
if resultstr == None:
return
elif result == eval(resultstr, self.dictionary):
self.passed += 1
else:
self.fail(teststr, resultstr)
def raises(self, teststr, exceptionstr):
teststr = teststr.strip()
self.writeln('>>> ' + teststr)
except_class = eval(exceptionstr, self.dictionary)
try:
exec teststr in self.dictionary
except except_class:
self.writeln('# raises %s as expected' % exceptionstr)
self.passed += 1
return
self.fail(teststr, exceptionstr)
def fail(self, teststr, resultstr):
self.writeln('###### ERROR, TEST FAILED: expected %s for %s'
% (resultstr, teststr),
'<font color=red><b>', '</b></font>')
self.failed += 1
def writeln(self, s, before='', after=''):
"Write s, html escaped, and wrapped with html code before and after."
s = str(s)
if self.html:
s = s.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
print '%s%s%s' % (before, s, after)
else:
print s
def seen(self, object):
"""Return true if this object has been seen before.
In any case, record that we have seen it."""
result = self.already_seen.has_key(id(object))
self.already_seen[id(object)] = 1
return result
def main(args):
"""Run Docex. args should be a list of python filenames.
If the first arg is a non-python filename, it is taken as the
name of a log file to which output is written. If it ends in
".htm" or ".html", then the output is written as html. If the
first arg is "-", then standard output is used as the log file."""
import glob
out = None
html = 0
if args[0] != "-" and not args[0].endswith(".py"):
out = open(args[0], 'w')
if args[0].endswith(".html") or args[0].endswith(".htm"):
html = 1
modules = []
for arg in args:
for file in glob.glob(arg):
if file.endswith('.py'):
modules.append(__import__(file[:-3]))
print Docex(modules, html=html, out=out)
if __name__ == '__main__':
main(sys.argv[1:])

193
py/ibol.py Normal file
View File

@@ -0,0 +1,193 @@
from collections import defaultdict
def get_genomes(fname="byronbayseqs.fas.txt"):
"Return a list of genomes, and a list of their corresponding names."
import re
names, species, genomes = [], [], []
for name, g in re.findall('>(.*?)\r([^\r]*)\r*', file(fname).read()):
names.append(name)
species.append(name.split('|')[-1])
genomes.append(g)
return names, species, genomes
def get_neighbors(fname="editdistances.txt"):
"Return dict: neighbors[i][j] = neighbors[j][i] = d means i,j are d apart."
## Read the data pre-computed from the Java program
neighbors = dict((i, {}) for i in range(n))
for line in file(fname):
i,j,d = map(int, line.split())
neighbors[i][j] = neighbors[j][i] = d
return neighbors
def cluster(neighbors, d, dc):
"""Return a list of clusters, each cluster element is within d of another
and within dc of every other cluster element."""
unclustered = set(neighbors) ## set of g's not yet clustered
return [closure(g, set(), unclustered, d, dc)
for g in neighbors if g in unclustered]
def closure(g, s, unclustered, d, dc):
"Accumulate in set s the transitive closure of 'near', starting at g"
if g not in s and g in unclustered and near(g, s, d, dc):
s.add(g); unclustered.remove(g)
for g2 in neighbors[g]:
closure(g2, s, unclustered, d, dc)
return s
def dist(i, j):
"Distance between two genomes."
if i == j: return 0
return neighbors[min(i, j)].get(max(i, j), max_distance)
def near(g, cluster, d, dc):
"Is g within d of some member of c, and within dc of every member of c?"
distances = [dist(g, g2) for g2 in cluster] or [0]
return min(distances) <= d and max(distances) <= dc
def diameter(cluster):
"The largest distance between two elements of the cluster"
return max([dist(i, j) for i in cluster for j in cluster] or [0])
def margin(cluster):
"The distance from a cluster to the nearest g2 outside this cluster."
return min([d for g in cluster for g2,d in neighbors[g].items()
if g2 not in cluster] or [max_distance])
################################################################ Analysis
def pct(num, den):
"Return a string representing the percentage. "
if '__len__' in dir(den): den = len(den)
if num==den: return ' 100%'
return '%.1f%%' % (num*100.0/den)
def histo(items):
"Make a histogram from a sequence of items or (item, count) tuples."
D = defaultdict(int)
for item in items:
if isinstance(item, tuple): D[item[0]] += item[1]
else: D[item] += 1
return D
def showh(d):
"Show a histogram"
if not isinstance(d, dict): d = histo(d)
return ' '.join('%s:%s' % i for i in sorted(d.items()))
def greport(genomes):
print "Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes)))
G = dict((g, set()) for g in genomes)
for i in range(n):
G[genomes[i]].add(species[i])
print "Multi-named genomes:", (
len([s for s in G.values() if len(s) > 1]))
lens = map(len, genomes)
print "Genome lengths: min=%d, max=%d" % (min(lens), max(lens))
print "Character counts: ", showh(c for g in genomes for c in g)
def nreport(neighbors):
NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
for n in neighbors:
nn = min(neighbors[n].values() or ['>25'])
NN[nn] += 1
for d2 in neighbors[n].values():
NumN[d2] += 1
print
print "Nearest neighbor counts:", showh(NN)
print "Number of neighbors at each distance:", showh(NumN)
def nspecies(c): return len(set(species[g] for g in c))
def showc(c):
return "N=%d, D=%d, M=%d: %s %s" % (
len(c), diameter(c), margin(c), list(c), showh(species[g] for g in c))
def creport(drange, dcrange):
def table(what, fn):
print "\n" + what
print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
for d in drange:
print '%s (%2d)' % (pct(d, glen), d),
for dc in dcrange:
print '%5s' % fn(cluster(neighbors, d, dc)),
print
print '\nNearest neighbor must be closer than this percentage (places). '
print 'Each column: all genomes in cluster within this percentage of each other.'
table("Number of clusters", len)
cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
M, T = defaultdict(int), defaultdict(int)
for c in cluster1:
M[margin(c)] += 1; T[margin(c)] += len(c)
for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
print '\nMargins', showh(M)
for c in cluster1:
if margin(c) <= 16:
print showc(c)
print '\nScatter plot of cluster diameter vs. margin.'
for c in cluster1:
if diameter(c) > 0:
pass
#print '%d\t%d' % (diameter(c), margin(c))
print '\nDifference from cluster(neighbors, 11, 14):'
#table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
print '\nNumber of clusters witth more than one species name:'
#table(lambda cl: sum(nspecies(c) > 1 for c in cl))
def pct_near_another(clusters, P=1.25):
total = 0
for c in clusters:
d = diameter(c)
for g in c:
for g2 in neighbors[g]:
if g2 not in c and dist(g, g2) < P*d:
total += 1
return pct(total, n)
def f(P):
print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
table(lambda cl: pct_near_another(cl, P))
#map(f, [1.2, 1.33, 1.5])
def sreport(species):
SS = defaultdict(int)
print
for s in set(species):
c = [g for g in range(n) if species[g] == s]
d = diameter(c)
if d > 14:
if d==glen: d = '>25'
print 'diameter %s for %s (%d elements)' % (d, s, len(c))
SS[d] += 1
print 'Diameters of %d labelled clusters: %s' % (len(set(species)), showh(SS))
def compare(cl1, cl2):
"Compare two lists of clusters"
return sum(c1==c2 or 0.5*(abs(len(c1)-len(c2))==1 and
(c1.issubset(c2) or c2.issubset(c1)))
for c1 in cl1 for c2 in cl2)
def unit_tests():
assert set(len(g) for g in genomes) == set([glen])
clusters = cluster(neighbors, 11, 11)
assert sum(len(c) for c in clusters) == len(genomes)
assert len(set(g for c in clusters for g in c)) == len(genomes)
assert dist(17, 42) == dist(42, 17)
assert diameter(set()) == 0
assert diameter([17, 42]) == dist(17, 42)
assert pct(1, 2) == '50.0%'
print '\nAll tests pass.\n'
################################################################ Main body
max_distance = 26
names, species, genomes = get_genomes() ## genomes = ['ACT...', ...]
n = len(genomes)
glen = len(genomes[0])
neighbors = get_neighbors() ## neighbor[g] = {g2:d2, g3:g3, ...}
greport(genomes)
nreport(neighbors)
creport(range(6, 15), [glen,16,15,14,13, 12, 11])
#sreport(species)
unit_tests()

443
py/lettercount.py Normal file
View File

@@ -0,0 +1,443 @@
"""
Code to support http://norvig.com/mayzner.html
Read files in the Google Books ngram format, and convert them to a simpler format.
The original format looks like this:
word \t year \t word_count \t book_count
word_POS \t year \t word_count \t book_count
for example,
accreted_VERB 1846 7 4
accreted_VERB 1847 1 1
accreted_VERB 1848 1 1
The function 'read_year_file' will convert a file of this form into a dict of
{WORD: count} pairs, where the WORD is uppercased, and the count is the total
over all years (you have the option to specify a starting year) and all
capitalizations. Then 'read_dict' and 'write_dict' convert between a dict and
an external file format that looks like this:
ACCRETED 9
"""
from __future__ import division
from collections import Counter, defaultdict
#### Read files in Books-Ngram format; convert to a dict
def read_year_file(filename, dic=None):
"""Read a file of 'word year word_count book_count' lines and convert to a dict
{WORD: totalcount}. Uppercase all words, and only include all-alphabetic words."""
if dic is None: dic = {}
for line in file(filename):
word, year, c1, c2 = line.split('\t')
if '_' in word:
word = word[:word.index('_')]
if word.isalpha():
word = word.upper()
dic[word] = dic.get(word, 0) + int(c1)
return dic
#### Read and write files of the form 'WORD \t count \n'
def write_dict(dic, filename):
"Write a {word:count} dict as 'word \t count' lines in filename."
out = file(filename, 'w')
for key in sorted(dic):
out.write('%s\t%s\n' % (key, dic[key]))
return out.close()
def read_dict(filename, sep='\t'):
"Read 'word \t count' lines from file and make them into a dict of {word:count}."
pairs = (line.split(sep) for line in file(filename))
return {word: int(count) for (word, count) in pairs}
#### Convert a bunch of year files into dict file format.
def convert_files(filenames, mincount=1e5):
def report(filename, D, adj):
import time
N = len(D)
W = sum(v for v in D.itervalues())
print '%s: %s %s words (%s tokens) at %s' % (
filename, adj, format(W, ',d'), format(N, ',d'),
time.strftime("%H:%M:%S", time.gmtime()))
for f in filenames:
report(f, {}, 'starting')
D = read_year_file(f)
report(f, D, 'total')
for key in list(D):
if D[key] < mincount:
del D[key]
write_dict(D, 'WORD-' + f[-1].upper())
report(f, D, 'popular')
def load(filename='top-words.txt'):
"Load file of 'word \t count' lines into D (a dict), W (length of D) and M (total number of words)."
global D, W, M
D = read_dict(filename)
W = len(D)
M = sum(D.values())
#### Compute letter counts and save as HTML files.
def histogram(items):
"Return a Counter of the number of times each key occurs in (key, val) pairs."
C = Counter()
for (key, val) in items:
C[key] += val
return C
def end(name): return '/' + name
def tag(name, **kwds): return '<' + name + keywords(kwds) + '>'
def row(cells, **kwds):
return '<tr>' + ''
def ngram_tables(dic, N, pos=[0, 1, 2, 3, 4, -5, -4, -3, -2, -1]):
"""Return three dicts of letter N-grams of length N: counts, counts1, counts2.
counts is a dict of {'AB': 123} that counts how often 'AB' occurs.
counts1[i] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i.
counts2[i][j] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i."""
L = len(max(D, key=len))
counts = Counter()
counts1 = [Counter() for _ in range(L)]
counts2 = [[Counter() for i in range(L)]]
def counter(pairs):
"Make a Counter from an iterable of (value, count) pairs."
c = Counter()
for (value, count) in pairs:
c[value] += count
return c
def ngrams(word, N):
return [word[i:i+N] for i in range(len(word)+1-N)]
import glob
#convert_files(glob.glob('book?'))
#DB = [[letter_counts() for length in range(length)] for length in range(maxlen)]
## Unused ???
def letter_counts(wc):
"""From word_counts dictionary wc, Create a dictionary of {(s, i, L): count}
where s is a letter n-gram, i is the starting position, and L is the length
of the word in which it appears."""
result = defaultdict(int)
for (word, count) in wc.iteritems():
for p in pieces(word):
result[p] += count
return result
def pieces(word):
"Yield the 1- and 2-letter grams in (s, i, L) format."
L = len(word)
for i in range(L):
yield (word[i], i, L)
if i+1 < L:
yield (word[i:i+2], i, L)
def getcount(counts, s, pos, length):
"""The count for letter sequence s (one or two letters) starting at
position i of words of length length. If any argument is all, sum them up."""
if length == all:
return sum(getcount(counts, s, pos, L) for L in all_lengths)
elif pos == all:
return sum(getcount(counts, s, i, length) for i in range(length))
else:
return counts[s, pos, length]
print 'start'
#wc = word_counts('count_100K.txt')
#counts = letter_counts(wc)
print 'end'
def test():
D = {'the': 100, 'of': 70, 'and': 60, 'to': 50, 'a': 40}
def num(ch):
"Translate 'a' or 'A' to 0, ... 'z' or 'Z' to 25."
return 'abcdefghijklmnopqrstuvwxyz'.index(ch.lower())
def stats(D, NS = (1, 2, 3, 4, 5, 6)):
counts = {n: Counter() for n in NS}
print 'words ' + ' '.join(' %d-grams ' % n for n in NS)
for (i, word) in enumerate(sortedby(D), 1):
for n in NS:
for ng in ngrams(word, n):
counts[n][ng] += 1
if i % 5000 == 0 or i == len(D):
print "%4dK" % (i/1000),
for n in NS:
c = len(counts[n])
field = "%5d (%d%%)" % (c, int(round(c*100/(26**n))))
print '%12s' % field,
print
letters = 'ETAOINSRHLDCUMFPGWYBVKXJQZ'
alphabet = ''.join(sorted(letters))
from itertools import cycle, izip
colors = 'ygobp'
def bar(text, color, count, N, pixels, height=16):
width = int(round(pixels * count / N))
if width < 2: width = 3
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
return '<span title="%s"><img src="%s.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
title, color, height, width, -width+2, text) # -int(width/2+5)
def letter_bar(LC, N=None, factor='', pixels=700):
if N is None: N = sum(LC.values())
#divisor = {'':1., 'K':1e3, 'M':1e6, 'B':1e9}[factor]
return ''.join(
bar(L.lower(), color, LC[L], N, pixels)
for (L, color) in izip(letters, cycle(colors)))
def singleton(x): return [x]
positions = [0, 1, 2, 3, 4, 5, 6, -7, -6, -5, -4, -3, -2, -1]
def substr(word, pos, length):
"""Return the substr of word of given length starting/ending at pos; or None."""
W = len(word)
if pos >= 0 and pos+length <= W:
return word[pos:pos+length]
elif pos < 0 and abs(pos)+length-1 <= W:
return word[W+pos+1-length:W+pos+1]
else:
return None
def lettercount(D, pos):
LC = histogram((substr(w, pos, 1), D[w]) for w in D)
del LC[None]
print LC
pos_name = (str(pos)+'+' if isinstance(pos, tuple) else
pos if pos < 0 else
pos+1)
return '\n<br>\n%-3s %s' % (pos_name, letter_bar(LC))
def ngramcount(D, n=2):
return histogram((ng, D[w]) for w in D for ng in ngrams(w, n))
def twograms(D2):
N = sum(D2.values())
header = '<table cellpadding=1 cellborder=1>'
rows = [tr([cell(A+B, D2, N) for A in alphabet]) for B in alphabet]
return '\n'.join([header] + rows + ['</table>'])
def cell(text, D2, N, height=16, maxwidth=25, scale=27):
count = D2.get(text, 0)
width = int(round(maxwidth * count * scale * 1. / N))
if width < 1: width = 1
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
return '<td title="%s"><img src="o.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
title, height, width, -width+2, text)
def cell(text, D2, N, height=16, maxwidth=25, scale=27):
count = D2.get(text, 0)
width = int(round(maxwidth * count * scale * 1. / N))
if width < 1: width = 1
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
return '<td title="%s" background="o.jpg" height=%d width=%d>%s' % (
title, height, width, text)
def tr(cells):
return '<tr>' + ''.join(cells)
def comma(n): return '{:,}'.format(n)
def ngram_stats(D, n, k=5):
DN = ngramcount(D, n)
topk = ', '.join(sortedby(DN)[:k])
return '<tr><td>%d-grams<td align=right>%s<td align=right>%s<td><a href="counts-%d.csv">counts-%d.csv</a><td><a href="counts-%d.html">counts-%d.html</a><td>%s' % (
n, comma(len(DN)), comma(sum(DN.values())), n, n, n, n, topk)
#### Tables
def sortedby(D):
return sorted(D, key=lambda x: -D[x])
ANY = '*'
wordlengths = range(1, 10)
def col(*args): return args
def columns(n, wordlengths=wordlengths):
lengths = [k for k in wordlengths if k >= n]
return ([col(ANY, ANY)]
+ [col(k, ANY) for k in lengths]
+ [col(k, start, start+n-1) for k in lengths for start in range(1, 2+k-n)]
+ [col(ANY, start, start+n-1) for start in wordlengths]
+ [col(ANY, -k, -k+n-1) for k in reversed(lengths) if -k+n-1 < 0])
def colname(col):
fmt = '%s/%s' if (len(col) == 2) else '%s/%d:%d'
return fmt % col
def csvline(first, rest):
return '\t'.join([first] + map(str, rest))
def makecsv(n, D=D):
out = file('ngrams%d.csv' % n, 'w')
cols = columns(n)
Dng = defaultdict(lambda: defaultdict(int))
for w in D:
for (start, ng) in enumerate(ngrams(w, n), 1):
entry = Dng[ng]
N = D[w]
wlen = len(w)
entry[ANY, ANY] += N
entry[wlen, ANY] += N
if start <= 9:
entry[wlen, start, start+n-1] += N
entry[ANY, start, start+n-1] += N
from_end = wlen-start+1
if from_end <= 9:
entry[ANY, -from_end, -from_end+n-1] += N
# enumerate ngrams from word and increment counts for each one
print >> out, csvline('%d-gram' % n, map(colname, cols))
for ng in sorted(Dng, key=lambda ng: -Dng[ng][(ANY, ANY)]):
print >> out, csvline(ng, [Dng[ng].get(col, 0) for col in cols])
out.close()
return Dng
### Tests
"""
>>> for w in words:
print '%-6s %6.2f B (%4.2f%%) <img src="s.jpg" height=12 width=%d>' % (w.lower(), D[w]/1e9, D[w]*100./N, int(round(D[w]*4000./N)))
...
the 53.10 B (7.14%) <img src="s.jpg" height=12 width=286>
of 30.97 B (4.16%) <img src="s.jpg" height=12 width=167>
and 22.63 B (3.04%) <img src="s.jpg" height=12 width=122>
to 19.35 B (2.60%) <img src="s.jpg" height=12 width=104>
in 16.89 B (2.27%) <img src="s.jpg" height=12 width=91>
a 15.31 B (2.06%) <img src="s.jpg" height=12 width=82>
is 8.38 B (1.13%) <img src="s.jpg" height=12 width=45>
that 8.00 B (1.08%) <img src="s.jpg" height=12 width=43>
for 6.55 B (0.88%) <img src="s.jpg" height=12 width=35>
it 5.74 B (0.77%) <img src="s.jpg" height=12 width=31>
as 5.70 B (0.77%) <img src="s.jpg" height=12 width=31>
was 5.50 B (0.74%) <img src="s.jpg" height=12 width=30>
with 5.18 B (0.70%) <img src="s.jpg" height=12 width=28>
be 4.82 B (0.65%) <img src="s.jpg" height=12 width=26>
by 4.70 B (0.63%) <img src="s.jpg" height=12 width=25>
on 4.59 B (0.62%) <img src="s.jpg" height=12 width=25>
not 4.52 B (0.61%) <img src="s.jpg" height=12 width=24>
he 4.11 B (0.55%) <img src="s.jpg" height=12 width=22>
i 3.88 B (0.52%) <img src="s.jpg" height=12 width=21>
this 3.83 B (0.51%) <img src="s.jpg" height=12 width=21>
are 3.70 B (0.50%) <img src="s.jpg" height=12 width=20>
or 3.67 B (0.49%) <img src="s.jpg" height=12 width=20>
his 3.61 B (0.49%) <img src="s.jpg" height=12 width=19>
from 3.47 B (0.47%) <img src="s.jpg" height=12 width=19>
at 3.41 B (0.46%) <img src="s.jpg" height=12 width=18>
which 3.14 B (0.42%) <img src="s.jpg" height=12 width=17>
but 2.79 B (0.38%) <img src="s.jpg" height=12 width=15>
have 2.78 B (0.37%) <img src="s.jpg" height=12 width=15>
an 2.73 B (0.37%) <img src="s.jpg" height=12 width=15>
had 2.62 B (0.35%) <img src="s.jpg" height=12 width=14>
they 2.46 B (0.33%) <img src="s.jpg" height=12 width=13>
you 2.34 B (0.31%) <img src="s.jpg" height=12 width=13>
were 2.27 B (0.31%) <img src="s.jpg" height=12 width=12>
their 2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
one 2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
all 2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
we 2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
can 1.67 B (0.22%) <img src="s.jpg" height=12 width=9>
her 1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
has 1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
there 1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
been 1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
if 1.56 B (0.21%) <img src="s.jpg" height=12 width=8>
more 1.55 B (0.21%) <img src="s.jpg" height=12 width=8>
when 1.52 B (0.20%) <img src="s.jpg" height=12 width=8>
will 1.49 B (0.20%) <img src="s.jpg" height=12 width=8>
would 1.47 B (0.20%) <img src="s.jpg" height=12 width=8>
who 1.46 B (0.20%) <img src="s.jpg" height=12 width=8>
so 1.45 B (0.19%) <img src="s.jpg" height=12 width=8>
no 1.40 B (0.19%) <img src="s.jpg" height=12 width=8>
>>> for n in sorted(H):
print '%2d %9.2f M (%6.3f%%) <img src="s.jpg" height=12 width=%d> %d' % (n, H[n]/1e6, H[n]*100./NN, H[n]*3000./NN, n)
...
1 22301.22 M ( 2.998%) <img src="s.jpg" height=12 width=89> 1
2 131293.85 M (17.651%) <img src="s.jpg" height=12 width=529> 2
3 152568.38 M (20.511%) <img src="s.jpg" height=12 width=615> 3
4 109988.33 M (14.787%) <img src="s.jpg" height=12 width=443> 4
5 79589.32 M (10.700%) <img src="s.jpg" height=12 width=320> 5
6 62391.21 M ( 8.388%) <img src="s.jpg" height=12 width=251> 6
7 59052.66 M ( 7.939%) <img src="s.jpg" height=12 width=238> 7
8 44207.29 M ( 5.943%) <img src="s.jpg" height=12 width=178> 8
9 33006.93 M ( 4.437%) <img src="s.jpg" height=12 width=133> 9
10 22883.84 M ( 3.076%) <img src="s.jpg" height=12 width=92> 10
11 13098.06 M ( 1.761%) <img src="s.jpg" height=12 width=52> 11
12 7124.15 M ( 0.958%) <img src="s.jpg" height=12 width=28> 12
13 3850.58 M ( 0.518%) <img src="s.jpg" height=12 width=15> 13
14 1653.08 M ( 0.222%) <img src="s.jpg" height=12 width=6> 14
15 565.24 M ( 0.076%) <img src="s.jpg" height=12 width=2> 15
16 151.22 M ( 0.020%) <img src="s.jpg" height=12 width=0> 16
17 72.81 M ( 0.010%) <img src="s.jpg" height=12 width=0> 17
18 28.62 M ( 0.004%) <img src="s.jpg" height=12 width=0> 18
19 8.51 M ( 0.001%) <img src="s.jpg" height=12 width=0> 19
20 6.35 M ( 0.001%) <img src="s.jpg" height=12 width=0> 20
21 0.13 M ( 0.000%) <img src="s.jpg" height=12 width=0> 21
22 0.81 M ( 0.000%) <img src="s.jpg" height=12 width=0> 22
23 0.32 M ( 0.000%) <img src="s.jpg" height=12 width=0> 23
>>> NL = sum(LC.values())
>>> for L in sorted(LC, key=lambda L: -LC[L]):
print '%s %8.1f B (%5.2f%%) <img src="s.jpg" height=12 width=%d>' % (L, LC[L]/1e9, LC[L]*100./NL, LC[L]*3000./NL)
...
E 445.2 B (12.49%) <img src="s.jpg" height=12 width=374>
T 330.5 B ( 9.28%) <img src="s.jpg" height=12 width=278>
A 286.5 B ( 8.04%) <img src="s.jpg" height=12 width=241>
O 272.3 B ( 7.64%) <img src="s.jpg" height=12 width=229>
I 269.7 B ( 7.57%) <img src="s.jpg" height=12 width=227>
N 257.8 B ( 7.23%) <img src="s.jpg" height=12 width=217>
S 232.1 B ( 6.51%) <img src="s.jpg" height=12 width=195>
R 223.8 B ( 6.28%) <img src="s.jpg" height=12 width=188>
H 180.1 B ( 5.05%) <img src="s.jpg" height=12 width=151>
L 145.0 B ( 4.07%) <img src="s.jpg" height=12 width=122>
D 136.0 B ( 3.82%) <img src="s.jpg" height=12 width=114>
C 119.2 B ( 3.34%) <img src="s.jpg" height=12 width=100>
U 97.3 B ( 2.73%) <img src="s.jpg" height=12 width=81>
M 89.5 B ( 2.51%) <img src="s.jpg" height=12 width=75>
F 85.6 B ( 2.40%) <img src="s.jpg" height=12 width=72>
P 76.1 B ( 2.14%) <img src="s.jpg" height=12 width=64>
G 66.6 B ( 1.87%) <img src="s.jpg" height=12 width=56>
W 59.7 B ( 1.68%) <img src="s.jpg" height=12 width=50>
Y 59.3 B ( 1.66%) <img src="s.jpg" height=12 width=49>
B 52.9 B ( 1.48%) <img src="s.jpg" height=12 width=44>
V 37.5 B ( 1.05%) <img src="s.jpg" height=12 width=31>
K 19.3 B ( 0.54%) <img src="s.jpg" height=12 width=16>
X 8.4 B ( 0.23%) <img src="s.jpg" height=12 width=7>
J 5.7 B ( 0.16%) <img src="s.jpg" height=12 width=4>
Q 4.3 B ( 0.12%) <img src="s.jpg" height=12 width=3>
Z 3.2 B ( 0.09%) <img src="s.jpg" height=12 width=2>
>>> D2 = ngramcount(D, 2)
>>> for ng in sorted(D2, key=lambda L: -D2[L])[:50]: print '%s %8.1f B (%5.2f%%) <img src="o.jpg" height=12 width=%d>' % (ng, D2[ng]/1e9, D2[ng]*100./N2, D2[ng]*15000./N2)
def doit(k=25):
counts = [sortedby(ngramcount(D, n))[:k] for n in range(2, 10)]
for i in range(k):
print (' '.join(count[i] for count in counts)).lower()
"""

145
py/lis.py Normal file
View File

@@ -0,0 +1,145 @@
################ Lispy: Scheme Interpreter in Python
## (c) Peter Norvig, 2010-16; See http://norvig.com/lispy.html
from __future__ import division
import math
import operator as op
################ Types
Symbol = str # A Lisp Symbol is implemented as a Python str
List = list # A Lisp List is implemented as a Python list
Number = (int, float) # A Lisp Number is implemented as a Python int or float
################ Parsing: parse, tokenize, and read_from_tokens
def parse(program):
"Read a Scheme expression from a string."
return read_from_tokens(tokenize(program))
def tokenize(s):
"Convert a string into a list of tokens."
return s.replace('(',' ( ').replace(')',' ) ').split()
def read_from_tokens(tokens):
"Read an expression from a sequence of tokens."
if len(tokens) == 0:
raise SyntaxError('unexpected EOF while reading')
token = tokens.pop(0)
if '(' == token:
L = []
while tokens[0] != ')':
L.append(read_from_tokens(tokens))
tokens.pop(0) # pop off ')'
return L
elif ')' == token:
raise SyntaxError('unexpected )')
else:
return atom(token)
def atom(token):
"Numbers become numbers; every other token is a symbol."
try: return int(token)
except ValueError:
try: return float(token)
except ValueError:
return Symbol(token)
################ Environments
def standard_env():
"An environment with some Scheme standard procedures."
env = Env()
env.update(vars(math)) # sin, cos, sqrt, pi, ...
env.update({
'+':op.add, '-':op.sub, '*':op.mul, '/':op.truediv,
'>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq,
'abs': abs,
'append': op.add,
'apply': apply,
'begin': lambda *x: x[-1],
'car': lambda x: x[0],
'cdr': lambda x: x[1:],
'cons': lambda x,y: [x] + y,
'eq?': op.is_,
'equal?': op.eq,
'length': len,
'list': lambda *x: list(x),
'list?': lambda x: isinstance(x,list),
'map': map,
'max': max,
'min': min,
'not': op.not_,
'null?': lambda x: x == [],
'number?': lambda x: isinstance(x, Number),
'procedure?': callable,
'round': round,
'symbol?': lambda x: isinstance(x, Symbol),
})
return env
class Env(dict):
"An environment: a dict of {'var':val} pairs, with an outer Env."
def __init__(self, parms=(), args=(), outer=None):
self.update(zip(parms, args))
self.outer = outer
def find(self, var):
"Find the innermost Env where var appears."
return self if (var in self) else self.outer.find(var)
global_env = standard_env()
################ Interaction: A REPL
def repl(prompt='lis.py> '):
"A prompt-read-eval-print loop."
while True:
val = eval(parse(raw_input(prompt)))
if val is not None:
print(lispstr(val))
def lispstr(exp):
"Convert a Python object back into a Lisp-readable string."
if isinstance(exp, List):
return '(' + ' '.join(map(lispstr, exp)) + ')'
else:
return str(exp)
################ Procedures
class Procedure(object):
"A user-defined Scheme procedure."
def __init__(self, parms, body, env):
self.parms, self.body, self.env = parms, body, env
def __call__(self, *args):
return eval(self.body, Env(self.parms, args, self.env))
################ eval
def eval(x, env=global_env):
"Evaluate an expression in an environment."
if isinstance(x, Symbol): # variable reference
return env.find(x)[x]
elif not isinstance(x, List): # constant literal
return x
elif x[0] == 'quote': # (quote exp)
(_, exp) = x
return exp
elif x[0] == 'if': # (if test conseq alt)
(_, test, conseq, alt) = x
exp = (conseq if eval(test, env) else alt)
return eval(exp, env)
elif x[0] == 'define': # (define var exp)
(_, var, exp) = x
env[var] = eval(exp, env)
elif x[0] == 'set!': # (set! var exp)
(_, var, exp) = x
env.find(var)[var] = eval(exp, env)
elif x[0] == 'lambda': # (lambda (var...) body)
(_, parms, body) = x
return Procedure(parms, body, env)
else: # (proc arg...)
proc = eval(x[0], env)
args = [eval(exp, env) for exp in x[1:]]
return proc(*args)

318
py/lispy.py Normal file
View File

@@ -0,0 +1,318 @@
################ Scheme Interpreter in Python
## (c) Peter Norvig, 2010; See http://norvig.com/lispy2.html
################ Symbol, Procedure, classes
from __future__ import division
import re, sys, StringIO
class Symbol(str): pass
def Sym(s, symbol_table={}):
"Find or create unique Symbol entry for str s in symbol table."
if s not in symbol_table: symbol_table[s] = Symbol(s)
return symbol_table[s]
_quote, _if, _set, _define, _lambda, _begin, _definemacro, = map(Sym,
"quote if set! define lambda begin define-macro".split())
_quasiquote, _unquote, _unquotesplicing = map(Sym,
"quasiquote unquote unquote-splicing".split())
class Procedure(object):
"A user-defined Scheme procedure."
def __init__(self, parms, exp, env):
self.parms, self.exp, self.env = parms, exp, env
def __call__(self, *args):
return eval(self.exp, Env(self.parms, args, self.env))
################ parse, read, and user interaction
def parse(inport):
"Parse a program: read and expand/error-check it."
# Backwards compatibility: given a str, convert it to an InPort
if isinstance(inport, str): inport = InPort(StringIO.StringIO(inport))
return expand(read(inport), toplevel=True)
eof_object = Symbol('#<eof-object>') # Note: uninterned; can't be read
class InPort(object):
"An input port. Retains a line of chars."
tokenizer = r"""\s*(,@|[('`,)]|"(?:[\\].|[^\\"])*"|;.*|[^\s('"`,;)]*)(.*)"""
def __init__(self, file):
self.file = file; self.line = ''
def next_token(self):
"Return the next token, reading new text into line buffer if needed."
while True:
if self.line == '': self.line = self.file.readline()
if self.line == '': return eof_object
token, self.line = re.match(InPort.tokenizer, self.line).groups()
if token != '' and not token.startswith(';'):
return token
def readchar(inport):
"Read the next character from an input port."
if inport.line != '':
ch, inport.line = inport.line[0], inport.line[1:]
return ch
else:
return inport.file.read(1) or eof_object
def read(inport):
"Read a Scheme expression from an input port."
def read_ahead(token):
if '(' == token:
L = []
while True:
token = inport.next_token()
if token == ')': return L
else: L.append(read_ahead(token))
elif ')' == token: raise SyntaxError('unexpected )')
elif token in quotes: return [quotes[token], read(inport)]
elif token is eof_object: raise SyntaxError('unexpected EOF in list')
else: return atom(token)
# body of read:
token1 = inport.next_token()
return eof_object if token1 is eof_object else read_ahead(token1)
quotes = {"'":_quote, "`":_quasiquote, ",":_unquote, ",@":_unquotesplicing}
def atom(token):
'Numbers become numbers; #t and #f are booleans; "..." string; otherwise Symbol.'
if token == '#t': return True
elif token == '#f': return False
elif token[0] == '"': return token[1:-1].decode('string_escape')
try: return int(token)
except ValueError:
try: return float(token)
except ValueError:
try: return complex(token.replace('i', 'j', 1))
except ValueError:
return Sym(token)
def to_string(x):
"Convert a Python object back into a Lisp-readable string."
if x is True: return "#t"
elif x is False: return "#f"
elif isa(x, Symbol): return x
elif isa(x, str): return '"%s"' % x.encode('string_escape').replace('"',r'\"')
elif isa(x, list): return '('+' '.join(map(to_string, x))+')'
elif isa(x, complex): return str(x).replace('j', 'i')
else: return str(x)
def load(filename):
"Eval every expression from a file."
repl(None, InPort(open(filename)), None)
def repl(prompt='lispy> ', inport=InPort(sys.stdin), out=sys.stdout):
"A prompt-read-eval-print loop."
sys.stderr.write("Lispy version 2.0\n")
while True:
try:
if prompt: sys.stderr.write(prompt)
x = parse(inport)
if x is eof_object: return
val = eval(x)
if val is not None and out: print >> out, to_string(val)
except Exception as e:
print '%s: %s' % (type(e).__name__, e)
################ Environment class
class Env(dict):
"An environment: a dict of {'var':val} pairs, with an outer Env."
def __init__(self, parms=(), args=(), outer=None):
# Bind parm list to corresponding args, or single parm to list of args
self.outer = outer
if isa(parms, Symbol):
self.update({parms:list(args)})
else:
if len(args) != len(parms):
raise TypeError('expected %s, given %s, '
% (to_string(parms), to_string(args)))
self.update(zip(parms,args))
def find(self, var):
"Find the innermost Env where var appears."
if var in self: return self
elif self.outer is None: raise LookupError(var)
else: return self.outer.find(var)
def is_pair(x): return x != [] and isa(x, list)
def cons(x, y): return [x]+y
def callcc(proc):
"Call proc with current continuation; escape only"
ball = RuntimeWarning("Sorry, can't continue this continuation any longer.")
def throw(retval): ball.retval = retval; raise ball
try:
return proc(throw)
except RuntimeWarning as w:
if w is ball: return ball.retval
else: raise w
def add_globals(self):
"Add some Scheme standard procedures."
import math, cmath, operator as op
self.update(vars(math))
self.update(vars(cmath))
self.update({
'+':op.add, '-':op.sub, '*':op.mul, '/':op.div, 'not':op.not_,
'>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq,
'equal?':op.eq, 'eq?':op.is_, 'length':len, 'cons':cons,
'car':lambda x:x[0], 'cdr':lambda x:x[1:], 'append':op.add,
'list':lambda *x:list(x), 'list?': lambda x:isa(x,list),
'null?':lambda x:x==[], 'symbol?':lambda x: isa(x, Symbol),
'boolean?':lambda x: isa(x, bool), 'pair?':is_pair,
'port?': lambda x:isa(x,file), 'apply':lambda proc,l: proc(*l),
'eval':lambda x: eval(expand(x)), 'load':lambda fn: load(fn), 'call/cc':callcc,
'open-input-file':open,'close-input-port':lambda p: p.file.close(),
'open-output-file':lambda f:open(f,'w'), 'close-output-port':lambda p: p.close(),
'eof-object?':lambda x:x is eof_object, 'read-char':readchar,
'read':read, 'write':lambda x,port=sys.stdout:port.write(to_string(x)),
'display':lambda x,port=sys.stdout:port.write(x if isa(x,str) else to_string(x))})
return self
isa = isinstance
global_env = add_globals(Env())
################ eval (tail recursive)
def eval(x, env=global_env):
"Evaluate an expression in an environment."
while True:
if isa(x, Symbol): # variable reference
return env.find(x)[x]
elif not isa(x, list): # constant literal
return x
elif x[0] is _quote: # (quote exp)
(_, exp) = x
return exp
elif x[0] is _if: # (if test conseq alt)
(_, test, conseq, alt) = x
x = (conseq if eval(test, env) else alt)
elif x[0] is _set: # (set! var exp)
(_, var, exp) = x
env.find(var)[var] = eval(exp, env)
return None
elif x[0] is _define: # (define var exp)
(_, var, exp) = x
env[var] = eval(exp, env)
return None
elif x[0] is _lambda: # (lambda (var*) exp)
(_, vars, exp) = x
return Procedure(vars, exp, env)
elif x[0] is _begin: # (begin exp+)
for exp in x[1:-1]:
eval(exp, env)
x = x[-1]
else: # (proc exp*)
exps = [eval(exp, env) for exp in x]
proc = exps.pop(0)
if isa(proc, Procedure):
x = proc.exp
env = Env(proc.parms, exps, proc.env)
else:
return proc(*exps)
################ expand
def expand(x, toplevel=False):
"Walk tree of x, making optimizations/fixes, and signaling SyntaxError."
require(x, x!=[]) # () => Error
if not isa(x, list): # constant => unchanged
return x
elif x[0] is _quote: # (quote exp)
require(x, len(x)==2)
return x
elif x[0] is _if:
if len(x)==3: x = x + [None] # (if t c) => (if t c None)
require(x, len(x)==4)
return map(expand, x)
elif x[0] is _set:
require(x, len(x)==3);
var = x[1] # (set! non-var exp) => Error
require(x, isa(var, Symbol), "can set! only a symbol")
return [_set, var, expand(x[2])]
elif x[0] is _define or x[0] is _definemacro:
require(x, len(x)>=3)
_def, v, body = x[0], x[1], x[2:]
if isa(v, list) and v: # (define (f args) body)
f, args = v[0], v[1:] # => (define f (lambda (args) body))
return expand([_def, f, [_lambda, args]+body])
else:
require(x, len(x)==3) # (define non-var/list exp) => Error
require(x, isa(v, Symbol), "can define only a symbol")
exp = expand(x[2])
if _def is _definemacro:
require(x, toplevel, "define-macro only allowed at top level")
proc = eval(exp)
require(x, callable(proc), "macro must be a procedure")
macro_table[v] = proc # (define-macro v proc)
return None # => None; add v:proc to macro_table
return [_define, v, exp]
elif x[0] is _begin:
if len(x)==1: return None # (begin) => None
else: return [expand(xi, toplevel) for xi in x]
elif x[0] is _lambda: # (lambda (x) e1 e2)
require(x, len(x)>=3) # => (lambda (x) (begin e1 e2))
vars, body = x[1], x[2:]
require(x, (isa(vars, list) and all(isa(v, Symbol) for v in vars))
or isa(vars, Symbol), "illegal lambda argument list")
exp = body[0] if len(body) == 1 else [_begin] + body
return [_lambda, vars, expand(exp)]
elif x[0] is _quasiquote: # `x => expand_quasiquote(x)
require(x, len(x)==2)
return expand_quasiquote(x[1])
elif isa(x[0], Symbol) and x[0] in macro_table:
return expand(macro_table[x[0]](*x[1:]), toplevel) # (m arg...)
else: # => macroexpand if m isa macro
return map(expand, x) # (f arg...) => expand each
def require(x, predicate, msg="wrong length"):
"Signal a syntax error if predicate is false."
if not predicate: raise SyntaxError(to_string(x)+': '+msg)
_append, _cons, _let = map(Sym, "append cons let".split())
def expand_quasiquote(x):
"""Expand `x => 'x; `,x => x; `(,@x y) => (append x y) """
if not is_pair(x):
return [_quote, x]
require(x, x[0] is not _unquotesplicing, "can't splice here")
if x[0] is _unquote:
require(x, len(x)==2)
return x[1]
elif is_pair(x[0]) and x[0][0] is _unquotesplicing:
require(x[0], len(x[0])==2)
return [_append, x[0][1], expand_quasiquote(x[1:])]
else:
return [_cons, expand_quasiquote(x[0]), expand_quasiquote(x[1:])]
def let(*args):
args = list(args)
x = cons(_let, args)
require(x, len(args)>1)
bindings, body = args[0], args[1:]
require(x, all(isa(b, list) and len(b)==2 and isa(b[0], Symbol)
for b in bindings), "illegal binding list")
vars, vals = zip(*bindings)
return [[_lambda, list(vars)]+map(expand, body)] + map(expand, vals)
macro_table = {_let:let} ## More macros can go here
eval(parse("""(begin
(define-macro and (lambda args
(if (null? args) #t
(if (= (length args) 1) (car args)
`(if ,(car args) (and ,@(cdr args)) #f)))))
;; More macros can also go here
)"""))
if __name__ == '__main__':
repl()

121
py/lispytest.py Normal file
View File

@@ -0,0 +1,121 @@
################ Tests for lis.py and lispy.py
lis_tests = [
("(quote (testing 1 (2.0) -3.14e159))", ['testing', 1, [2.0], -3.14e159]),
("(+ 2 2)", 4),
("(+ (* 2 100) (* 1 10))", 210),
("(if (> 6 5) (+ 1 1) (+ 2 2))", 2),
("(if (< 6 5) (+ 1 1) (+ 2 2))", 4),
("(define x 3)", None), ("x", 3), ("(+ x x)", 6),
("(begin (define x 1) (set! x (+ x 1)) (+ x 1))", 3),
("((lambda (x) (+ x x)) 5)", 10),
("(define twice (lambda (x) (* 2 x)))", None), ("(twice 5)", 10),
("(define compose (lambda (f g) (lambda (x) (f (g x)))))", None),
("((compose list twice) 5)", [10]),
("(define repeat (lambda (f) (compose f f)))", None),
("((repeat twice) 5)", 20), ("((repeat (repeat twice)) 5)", 80),
("(define fact (lambda (n) (if (<= n 1) 1 (* n (fact (- n 1))))))", None),
("(fact 3)", 6),
("(fact 50)", 30414093201713378043612608166064768844377641568960512000000000000),
("(define abs (lambda (n) ((if (> n 0) + -) 0 n)))", None),
("(list (abs -3) (abs 0) (abs 3))", [3, 0, 3]),
("""(define combine (lambda (f)
(lambda (x y)
(if (null? x) (quote ())
(f (list (car x) (car y))
((combine f) (cdr x) (cdr y)))))))""", None),
("(define zip (combine cons))", None),
("(zip (list 1 2 3 4) (list 5 6 7 8))", [[1, 5], [2, 6], [3, 7], [4, 8]]),
("""(define riff-shuffle (lambda (deck) (begin
(define take (lambda (n seq) (if (<= n 0) (quote ()) (cons (car seq) (take (- n 1) (cdr seq))))))
(define drop (lambda (n seq) (if (<= n 0) seq (drop (- n 1) (cdr seq)))))
(define mid (lambda (seq) (/ (length seq) 2)))
((combine append) (take (mid deck) deck) (drop (mid deck) deck)))))""", None),
("(riff-shuffle (list 1 2 3 4 5 6 7 8))", [1, 5, 2, 6, 3, 7, 4, 8]),
("((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8))", [1, 3, 5, 7, 2, 4, 6, 8]),
("(riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8))))", [1,2,3,4,5,6,7,8]),
]
lispy_tests = [
("()", SyntaxError), ("(set! x)", SyntaxError),
("(define 3 4)", SyntaxError),
("(quote 1 2)", SyntaxError), ("(if 1 2 3 4)", SyntaxError),
("(lambda 3 3)", SyntaxError), ("(lambda (x))", SyntaxError),
("""(if (= 1 2) (define-macro a 'a)
(define-macro a 'b))""", SyntaxError),
("(define (twice x) (* 2 x))", None), ("(twice 2)", 4),
("(twice 2 2)", TypeError),
("(define lyst (lambda items items))", None),
("(lyst 1 2 3 (+ 2 2))", [1,2,3,4]),
("(if 1 2)", 2),
("(if (= 3 4) 2)", None),
("(define ((account bal) amt) (set! bal (+ bal amt)) bal)", None),
("(define a1 (account 100))", None),
("(a1 0)", 100), ("(a1 10)", 110), ("(a1 10)", 120),
("""(define (newton guess function derivative epsilon)
(define guess2 (- guess (/ (function guess) (derivative guess))))
(if (< (abs (- guess guess2)) epsilon) guess2
(newton guess2 function derivative epsilon)))""", None),
("""(define (square-root a)
(newton 1 (lambda (x) (- (* x x) a)) (lambda (x) (* 2 x)) 1e-8))""", None),
("(> (square-root 200.) 14.14213)", True),
("(< (square-root 200.) 14.14215)", True),
("(= (square-root 200.) (sqrt 200.))", True),
("""(define (sum-squares-range start end)
(define (sumsq-acc start end acc)
(if (> start end) acc (sumsq-acc (+ start 1) end (+ (* start start) acc))))
(sumsq-acc start end 0))""", None),
("(sum-squares-range 1 3000)", 9004500500), ## Tests tail recursion
("(call/cc (lambda (throw) (+ 5 (* 10 (throw 1))))) ;; throw", 1),
("(call/cc (lambda (throw) (+ 5 (* 10 1)))) ;; do not throw", 15),
("""(call/cc (lambda (throw)
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 (escape 3)))))))) ; 1 level""", 35),
("""(call/cc (lambda (throw)
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 (throw 3)))))))) ; 2 levels""", 3),
("""(call/cc (lambda (throw)
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 1))))))) ; 0 levels""", 1005),
("(* 1i 1i)", -1), ("(sqrt -1)", 1j),
("(let ((a 1) (b 2)) (+ a b))", 3),
("(let ((a 1) (b 2 3)) (+ a b))", SyntaxError),
("(and 1 2 3)", 3), ("(and (> 2 1) 2 3)", 3), ("(and)", True),
("(and (> 2 1) (> 2 3))", False),
("(define-macro unless (lambda args `(if (not ,(car args)) (begin ,@(cdr args))))) ; test `", None),
("(unless (= 2 (+ 1 1)) (display 2) 3 4)", None),
(r'(unless (= 4 (+ 1 1)) (display 2) (display "\n") 3 4)', 4),
("(quote x)", 'x'),
("(quote (1 2 three))", [1, 2, 'three']),
("'x", 'x'),
("'(one 2 3)", ['one', 2, 3]),
("(define L (list 1 2 3))", None),
("`(testing ,@L testing)", ['testing',1,2,3,'testing']),
("`(testing ,L testing)", ['testing',[1,2,3],'testing']),
("`,@L", SyntaxError),
("""'(1 ;test comments '
;skip this line
2 ; more ; comments ; ) )
3) ; final comment""", [1,2,3]),
]
def test(tests, name=''):
"For each (exp, expected) test case, see if eval(parse(exp)) == expected."
fails = 0
for (x, expected) in tests:
try:
result = eval(parse(x))
print x, '=>', to_string(result)
ok = (result == expected)
except Exception as e:
print x, '=raises=>', type(e).__name__, e
ok = issubclass(expected, Exception) and isinstance(e, expected)
if not ok:
fails += 1
print 'FAIL!!! Expected', expected
print '%s %s: %d out of %d tests fail.' % ('*'*45, name, fails, len(tests))
if __name__ == '__main__':
from lis import *
test(lis_tests, 'lis.py')
from lispy import *
test(lis_tests+lispy_tests, 'lispy.py')

154
py/pal.py Normal file
View File

@@ -0,0 +1,154 @@
import string, random, os, re, bisect
"""Produce Panama-ish Palindromes. Copyright (C) 2002, Peter Norvig.
See http://www.norvig.com/license.html and http://www.norvig.com/pal-alg.html"""
def is_panama(p):
"Test if p is a Panama-ish palindrome."
def is_unique(seq): return len(seq) == len(dict(zip(seq, seq)))
return (p.endswith('Panama') and is_palindrome(p)
and is_unique([s.strip() for s in p.split(',')]))
def is_palindrome(phrase):
"Test if a phrase is a palindrome."
cphrase = canonical(phrase)
return cphrase == reverse(cphrase)
def canonical(word, sub=re.compile('[^A-Za-z0-9]').sub):
"The canonical form for comparing: lowercase alphanumerics."
return sub('', word).lower()
def read_dict(filename='npdict.txt'):
"Read the file into global variables _fw and _bw and _truename."
global _fw, _bw, _truename
_fw, _bw, _truename = [], [], {'': ''}
for word in open(filename).read().splitlines():
w = canonical(word)
_fw.append(w)
_bw.append(reverse(w))
_truename[w] = word
_fw.sort(); _bw.sort()
return len(_fw), len(_bw), len(_truename)
def update(obj, **entries): obj.__dict__.update(entries); return obj
class PalDict:
"""A dictionary from which you can find canonical words that start or end
with a given canonical substring, and find the true name of a
canonical word."""
def __init__(self, fw=None, bw=None, truename=None):
update(self, fw=fw or _fw, bw=bw or _bw, truename=truename or _truename)
def startswith(self, prefix, k=100):
"""Return up to k canonical words that start with prefix.
If there are more than k, choose from them at random."""
return k_startingwith(k, self.fw, prefix)
def endswith(self, suffix, k=100):
"""Return up to k canonical words that end with suffix.
If there are more than k, choose from them at random.
Both the suffix and the word returned are reversed."""
return k_startingwith(k, self.bw, suffix)
def k_startingwith(k, words, prefix):
"""Choose up to k words that match the prefix (choose randomly if > k)."""
start = bisect.bisect(words, prefix)
end = bisect.bisect(words, prefix + 'zzzz')
n = end - start
if k >= n:
results = words[start:end]
random.shuffle(results)
else: # Should really try to avoid duplicates
results = [words[random.randrange(start, end)] for i in range(k)]
return results
class Panama:
def __init__(self, L='A man, a plan', R='a canal, Panama', dict=None):
left = [canonical(w) for w in L.split(', ')]
right = [canonical(reverse(w)) for w in reverse(R.split(', '))]
update(self, left=left, right=right, dict=dict or PalDict(), best=0,
seen={}, diff=len(''.join(left)) - len(''.join(right)))
for word in left + map(reverse, right):
self.seen[word] = 1
def missing(self, k=20):
"""Return the substring that is missing, and candidate words."""
if self.diff >= 0: # Left is longer, missing on right
substr = self.left[-1][-self.diff:]
return substr, self.dict.endswith(substr, k)
else: # Right is longer, missing on left
substr = self.right[-1][self.diff:]
return substr, self.dict.startswith(substr, k)
def search(self, k=200):
"Search for palindromes; consider at most k words at each level."
self.stack = [self.missing(k)]
while self.stack:
substr, words = self.stack[-1]
if is_palindrome(substr):
self.report()
if words:
self.extend(words.pop(), k)
elif not self.backtrack():
return
def extend(self, word, k):
"Add a new word (unless we've already seen it)."
if self.diff >= 0: # Left is longer, add to right
fword = reverse(word)
if fword in self.seen: return
self.diff -= len(fword)
self.seen[fword] = 1
self.right.append(word)
self.stack.append(self.missing(k))
else: # Right is longer, add to left
if word in self.seen: return
self.diff += len(word)
self.seen[word] = 1
self.left.append(word)
self.stack.append(self.missing(k))
def backtrack(self):
"Remove the last word added; return 0 if can't backtrack"
if self.diff >= 0: # Left is longer, pop from left
if not self.left: return 0
word = self.left.pop()
self.diff -= len(word)
del self.seen[word]
else: # Right is longer, pop from right
if not self.right: return 0
word = self.right.pop()
self.diff += len(word)
del self.seen[reverse(word)]
self.stack.pop()
return 1
def report(self):
"Write current state to log file."
if len(self) > self.best + 200:
self.best = len(self)
print self.best
self.bestphrase = str(self)
assert is_panama(self.bestphrase)
f = open('pallog%d.txt' % os.getpid(), 'w')
f.write(self.bestphrase + '\n')
f.close()
def __len__(self):
return len(self.left) + len(self.right)
def __str__(self):
truename = self.dict.truename
lefts = [truename[w] for w in self.left]
rights = [truename[reverse(w)] for w in reverse(self.right[:])]
return ', '.join(lefts + ['*****'] + rights)
def reverse(x):
"Reverse a list or string."
if type(x) == type(''):
return ''.join(reverse(list(x)))
else:
x.reverse()
return x
if __name__ == '__main__': read_dict(); p = Panama(); p.search()

262
py/pal2.py Normal file
View File

@@ -0,0 +1,262 @@
import random, re, bisect, time
"""Produce Panama-ish Palindromes. Copyright (C) 2002-2008, Peter Norvig."""
################ Checking for Palindromes
def is_panama(s):
"Test if string s is a Panama-ish palindrome."
return is_palindrome(s) and is_unique(phrases(s))
def is_palindrome(s):
"Test if a string is a palindrome."
s1 = canonical(s)
return s1 == reversestr(s1)
def phrases(s):
"Break a string s into comma-separated phrases."
return [phrase.strip() for phrase in s.split(',')]
def canonical(word, sub=re.compile('''[-* \t\n\r.,;!?:()`"']''').sub):
"The canonical form for comparing: lowercase, no blanks or punctuation."
return sub('', word).lower()
################ Utilities
def reversestr(x):
"Reverse a string."
return x[::-1]
def is_unique(seq):
"Return true if seq has no duplicate elements."
return len(seq) == len(set(seq))
def update(obj, **entries):
"Change attributes of obj, according to the keyword args."
obj.__dict__.update(entries)
return obj
################ Reading in a dictionary
class PalDict:
"""A dictionary from which you can find canonical words that start or end
with a given canonical substring, and find the true name of a
canonical word with d.truename[canonicalword]."""
def __init__(self, k=1000, filename='npdict.txt'):
words, rwords, truename = [], [], {'': '', 'panama': 'Panama!'}
for tword in open(filename).read().splitlines():
word = canonical(tword)
words.append(word)
rwords.append(reversestr(word))
truename[word] = tword
words.sort()
rwords.sort()
update(self, k=k, words=words, rwords=rwords, truename=truename,
reversibles={}, rangek=range(k), tryharder=False)
def startswith(self, prefix):
"""Return up to k canonical words that start with prefix.
If there are more than k, choose from them at random."""
return self._k_startingwith(self.words, prefix)
def endswith(self, rsuffix):
"""Return up to k canonical words that end with the reversed suffix.
If you want words ending in 'ing', ask for d.endswith('gni').
If there are more than k, choose from them at random."""
return map(reversestr, self._k_startingwith(self.rwords, rsuffix))
def __contains__(self, word):
return word in self.truename
def reversible_words(self):
"Find words that have a reverse in the dict, like {'Camus': 'Sumac'}"
if not self.reversibles:
reversibles = self.reversibles
for rw in self.rwords:
if rw in self:
w = reversestr(rw)
if w != rw and w not in reversibles:
reversibles[w] = rw
self.reversibles = reversibles
return self.reversibles
def _k_startingwith(self, words, prefix):
start = bisect.bisect_left(words, prefix)
end = bisect.bisect(words, prefix + 'zzzz')
n = end - start
if self.k >= n: # get all the words that start with prefix
results = words[start:end]
else: # sample from words starting with prefix
indexes = random.sample(xrange(start, end), self.k)
results = [words[i] for i in indexes]
random.shuffle(results)
## Consider words that are prefixes of the prefix.
## This is very slow, so don't use it until late in the game.
if self.tryharder:
for i in range(3, len(prefix)):
w = prefix[0:i]
if ((words == self.words and w in self.truename) or
(words == self.rwords and reversestr(w) in self.truename)):
results.append(w)
return results
paldict = PalDict()
def anpdictshort():
"Find the words that are valid when every phrase must start with 'a'"
def segment(word): return [s for s in word.split('a') if s]
def valid(word): return all(reversestr(s) in segments for s in segment(word))
words = map(canonical, file('anpdict.txt'))
segments = set(s for w in words for s in segment(canonical(w)))
valid_words = [paldict.truename[w] for w in words if valid(w)]
file('anpdict-short.txt', 'w').write('\n'.join(valid_words))
################ Search for a palindrome
class Panama:
def __init__(self, L='A man, a plan', R='a canal, Panama', dict=paldict):
## .left and .right hold lists of canonical words
## .diff holds the number of characters that are not matched,
## positive for words on left, negative for right.
## .stack holds (action, side, arg) tuples
update(self, left=[], right=[], best=0, seen={}, diff=0, stack=[],
used_reversibles=False, starttime=time.clock(), dict=dict)
for word in L.split(','):
self.add('left', canonical(word))
for rword in reversestr(R).split(','):
self.add('right', canonical(reversestr(rword)))
self.consider_candidates()
def search(self, steps=50000000):
"Search for palindromes."
for _ in xrange(steps):
if not self.stack:
return 'done'
action, dir, substr, arg = self.stack[-1]
if action == 'added': # undo the last word added
self.remove(dir, arg)
elif action == 'trying' and arg: # try the next word if there is one
self.add(dir, arg.pop()) and self.consider_candidates()
elif action == 'trying' and not arg: # otherwise backtrack
self.stack.pop()
else:
raise ValueError(action)
def add(self, dir, word):
"add a word"
if word in self.seen:
return False
else:
getattr(self, dir).append(word)
self.diff += factor[dir] * len(word)
self.seen[word] = True
self.stack.append(('added', dir, '?', word))
return True
def remove(self, dir, word):
"remove a word"
oldword = getattr(self, dir).pop()
assert word == oldword
self.diff -= factor[dir] * len(word)
del self.seen[word]
self.stack.pop()
def consider_candidates(self):
"""Push a new state with a set of candidate words onto stack."""
if self.diff > 0: # Left is longer, consider adding on right
dir = 'right'
substr = self.left[-1][-self.diff:]
candidates = self.dict.endswith(substr)
elif self.diff < 0: # Right is longer, consider adding on left
dir = 'left'
substr = reversestr(self.right[-1][0:-self.diff])
candidates = self.dict.startswith(substr)
else: # Both sides are same size
dir = 'left'
if not self.used_reversibles:
self.report()
self.add_reversibles()
substr = ''
candidates = self.dict.startswith('')
if substr == reversestr(substr):
self.report()
self.stack.append(('trying', dir, substr, candidates))
def add_reversibles(self):
"Add in reversible words."
print 'using reversibles ...'
for (word, rword) in self.dict.reversible_words().items():
if word not in self.seen and rword not in self.seen:
self.add('left', word)
self.add('right', rword)
self.used_reversibles = True
self.stack = []
print '...done'
def report(self):
"Report a new palindrome to log file (if it is sufficiently big)."
N = len(self)
if N > 13333:
self.dict.tryharder = True
if N > self.best and (N > 12500 or N > self.best+500):
self.best = len(self)
self.bestphrase = str(self)
print '%5d phrases (%5d words) in %3d seconds' % (
self.best, self.bestphrase.count(' ')+1, time.clock() - self.starttime)
assert is_panama(self.bestphrase)
f = open('pallog%d.txt' % (id(self) % 10000), 'w')
f.write(self.bestphrase + '\n')
f.close()
def __len__(self):
return len(self.left) + len(self.right)
def __str__(self):
truename = self.dict.truename
lefts = [truename[w] for w in self.left]
rights =[truename[w] for w in self.right]
return ', '.join(lefts + rights[::-1])
factor = {'left': +1, 'right': -1}
# Note that we only allow one truename per canonical name. Occasionally
# this means we miss a good word (as in "a node" vs. "an ode"), but there
# are only 665 of these truename collisions, and most of them are of the
# form "a mark-up" vs. "a markup" so it seemed better to disallow them.
################ Unit Tests
def tests(p=Panama()):
assert is_panama('A man, a plan, a canal, Panama.')
assert is_panama('''A (man), a plan,,;, a ```canal?'' -- Panama!''')
assert not is_panama('A man, a plan, a radar, a canal, Panama.')
assert is_palindrome('A man, a plan, a canal, Panama.')
assert is_palindrome('radar, radar? radar!')
assert not is_palindrome('radars')
assert phrases('A man, a plan, Panama') == ['A man', 'a plan', 'Panama']
assert canonical('A man, a plan, a canal, Panama') == 'amanaplanacanalpanama'
assert reversestr('foo') == 'oof'
assert is_unique([1, 2, 3])
assert not is_unique([1, 2, 2])
d = p.dict
def sameset(a, b): return set(a) == set(b)
assert 'panama' in d
assert d.words[0] in d
assert d.words[-1] in d
assert sameset(d.startswith('aword'), ['awording', 'awordbreak',
'awordiness', 'awordage', 'awordplay', 'awordlore', 'awordbook',
'awordlessness', 'aword', 'awordsmith'])
assert sameset(d.endswith('ytisob'), ['aglobosity', 'averbosity',
'asubglobosity', 'anonverbosity', 'agibbosity'])
d.tryharder = True
assert sameset(d.startswith('oklahoma'), ['oklahoma', 'okla'])
d.tryharder = False
assert d.startswith('oklahoma') == ['oklahoma']
assert d.startswith('fsfdsfdsfds') == []
print 'all tests pass'
if __name__ == '__main__':
p = Panama();
tests(p)
p.search()

170
py/pal3.py Normal file
View File

@@ -0,0 +1,170 @@
from collections import Counter, deque
import re
class PhraseDict(dict):
"""A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with:
.prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre'
.suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'"""
def __init__(self, phrases):
for phrase in phrases:
phrase = phrase.strip()
self[letters(phrase)] = phrase
self.prefixes = Counter(x for p in self for x in prefixes(p))
self.suffixes = Counter(x for p in self for x in suffixes(p))
def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)]
def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)]
def letters(phrase, sub=re.compile(r'[\W]+').sub):
"Remove all the non-letters from phrase; return lowercase version."
return sub('', phrase).lower()
DICT = PhraseDict(open('npdict.txt'))
class Panama:
"""Panama represents a palindrome, or a state in searching for one.
It has .left and .right to hold the phrases that are chosen,
and .L and .R to hold the current partial phrases in the middle (still working on these).
Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from."""
def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT):
assert cat(left + [L]) == cat([R] + right)[::-1]
self.left = list(left) # list of complete phrases on left
self.L = L # an incomplete phrase on left
self.R = R # an incomplete phrase on right
self.right = deque(right) # deque of complete phrases on right
self.dict = dict # a {letters: actual_phrase} mapping
self.set = set(left + right) # a set of all complete phrases in palindrome
self.best = [] # list of phrases in longest palindrome found
self.Nshown = 0 # the number of phrases shown in the previous printout
self.i = 0 # the number of steps taken in the search
self.check()
def __str__(self): return self.original_phrases(self.best)
def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases)
def search(self, steps=10**5):
"""Depth-first search for palindromes. From the current state, find all applicable actions.
Do the first one, and put on the stack reminders to undo it and try the others,
but first search deeper from the result of the first action."""
stack = [self.applicable_actions()]
for self.i in range(steps):
if not stack:
return
command = stack.pop()
if isinstance(command, UndoCommand):
self.undo(command)
elif command:
act = command.pop()
self.do(act)
self.check()
stack.extend([command, UndoCommand(act), self.applicable_actions()])
def do(self, act):
"Modify the current state by adding a letter, or finishing a phrase."
if act == ',': # finish phrase on left
self.set.add(self.L)
self.left.append(self.L)
self.L = ''
elif act == ';': # finish phrase on right
self.set.add(self.R)
self.right.appendleft(self.R)
self.R = ''
else: # add a letter
self.L = self.L + act
self.R = act + self.R
def undo(self, act):
"Modify the current state by undoing an action that was previously done."
if act == ',': # unfinish phrase on left
assert self.L == ''
self.L = self.left.pop()
self.set.remove(self.L)
elif act == ';': # unfinish phrase on right
assert self.R == ''
self.R = self.right.popleft()
self.set.remove(self.R)
else: # remove a letter
self.L = self.L[:-1]
self.R = self.R[1:]
def check(self):
"Check to see if current state is a palindrome, and if so, record it and maybe print."
if not self.is_palindrome(): return
N = len(self.left) + len(self.right)
if N > len(self.best):
self.best = self.left + list(self.right)
if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500:
self.Nshown = N
print(self.report())
def report(self):
N = len(self.best)
nwords = N + sum(self.dict[p].count(' ') for p in self.best)
nletters = sum(len(p) for p in self.best)
return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})'
.format(N, nwords, nletters, self.i+1))
def applicable_actions(self):
L, R, D = self.L, self.R, self.dict
actions = []
def score(A): return D.prefixes[L+A] * D.suffixes[A+R]
if self.is_allowed(L):
actions.append(',')
if self.is_allowed(R):
actions.append(';')
for A in sorted(alphabet, key=score):
if score(A) > 0:
actions.append(A)
return actions
def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set
def is_palindrome(self):
"Is this a palindrome? (Does any extra .L or .R match the other side?)"
return ((self.L == '' and self.left[-1].endswith(self.R)) or
(self.R == '' and self.right[0].startswith(self.L)))
alphabet = 'abcdefghijklmnopqrstuvwxyz'
cat = ''.join
UndoCommand = str
DoCommand = list
################ Unit Tests
def test1():
assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello']
assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello']
assert letters('a man') == 'aman'
assert letters('an elk') == 'anelk'
assert letters('Mr. T') == 'mrt'
assert letters('Donald E. Knuth') == 'donaldeknuth'
assert len(DICT) == 125512
assert 'panama' in DICT
assert 'aman' in DICT
assert 'threemen' not in DICT
assert DICT['acanal'] == 'a canal'
return 'ok'
def test2():
p1 = Panama()
assert p1.is_palindrome()
assert str(p1) == 'a man, a plan, a canal, Panama'
p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama'])
assert not p2.is_palindrome()
p3 = Panama(['maya'], '', '', ['ayam'])
assert p3.is_palindrome()
assert str(p3) == 'Maya, a yam'
return 'ok'
if __name__ == '__main__':
p = Panama();
test1()
test2()
p.search(10**6)
print(p.report())
print(str(p))

52
py/parse.py Normal file
View File

@@ -0,0 +1,52 @@
grammar = {
'Noun': ['stench', 'wumpus'],
'Verb': ['is', 'smell'],
'Adjective': ['dead', 'smelly'],
'Adverb': ['left', 'back'],
'Pronoun': ['me', 'you'],
'Name': ['John', 'Mary'],
'Article': ['the', 'a'],
'Preposition': ['to', 'in'],
'Conjunction': ['and', 'or'],
'Digit': ['0', '1'],
'S': [['NP', 'VP'], ['S', 'Comjunction', 'S']],
'NP': ['Pronoun', 'Noun', ['Article', 'Noun'], ['Digit', 'Digit'],
['NP', 'PP'], ['NP', 'RelClause']],
'VP': ['Verb', ['VP', 'NP'], ['VP', 'Adjective'], ['VP', 'PP'],
['VP', 'Adverb']],
'PP': [['Preposition', 'NP']],
'RelClause': [['that', 'VP']]
}
def parse(forest, grammar):
if len(forest) == 1 and category(forest[0]) == 'S':
return forest[0]
for i in range(len(forest)):
for lhs in grammar.keys():
for rhs in grammar[lhs]:
rhs = mklist(rhs)
n = len(rhs)
subsequence = forest[i:i+n]
if match(subsequence, rhs):
print subsequence, lhs, '=>', rhs
forest2 = forest[:]
forest2[i:i+n] = [(lhs, subsequence)]
result = parse(forest2, grammar)
if result != None:
return result
return None
def mklist(x):
if type(x) == type([]): return x
else: return [x]
def match(forest, rhs):
for i in range(len(rhs)):
if category(forest[i]) != rhs[i] and forest[i] != rhs[i]: return 0
return 1
def category(forest):
if type(forest) == type(()): return forest[0]
else: return 'word'

110
py/py2html.py Normal file
View File

@@ -0,0 +1,110 @@
"""Pretty-print Python code to colorized, hyperlinked html.
In python, do:
py2html.convert_files(['file1.py', 'file2.py', ...])
From the shell, do:
python py2html.py *.py"""
import re, string, time, os
id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
def b(text): return '<b>%s</b>' % text
def i(text): return '<i>%s</i>' % text
def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
def hilite(text, bg="ffff00"):
return '<b style="background-color:%s"><a name="%s">%s</b>' % (
bg, text, text)
def modulelink(module, baseurl=''):
"""Hyperlink to a module, either locally or on python.org"""
if module+'.py' not in local_files:
baseurl = 'http://www.python.org/doc/current/lib/module-'
return link(baseurl+module+'.html', module)
def importer(m):
"Turn text such as 'utils, math, re' into a string of HTML links."
modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
return (m.group(1) + ', '.join(modules) + m.group(3))
def find1(regex, str):
return (re.findall(regex, str) or ['&nbsp;'])[0]
def convert_files(filenames, local_filenames=None, tblfile='readme.htm'):
"Convert files of python code to colorized HTML."
global local_files
local_files = local_filenames or filenames
summary_table = {}
for f in filenames:
fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
text = fulltext
for (pattern, repl) in replacements:
text = re.sub(pattern, repl, text)
text = '<<header("AIMA Python file: %s")>><pre>%s</pre><<footer>>' % (
f, text)
open(f[:-3]+'.htm', 'w').write(text)
if tblfile:
ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
module = f.replace('.py','')
lines = fulltext.count('\n')
desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
summary_table.setdefault(ch,[]).append((module, lines, desc))
if tblfile:
totallines = 0
tbl = ["<tr><th>Chapter<th>Module<th>Files<th>Lines<th>Description"]
fmt = "<tr><td align=right>%s<th>%s<td>%s<td align=right>%s<td>%s"
items = summary_table.items(); items.sort(num_cmp)
for (ch, entries) in items:
for (module, lines, desc) in entries:
totallines += lines
files = link(module+'.py', '.py')
if os.path.exists(module+'.txt'):
files += ' ' + link(module+'.txt', '.txt')
tbl += [fmt % (ch, link(module+'.html', module),
files, lines, desc)]
tbl += [fmt % ('', '', '', totallines, ''), "</table>"]
## Now read the tblfile, and replace the first table with tbl
old = open(tblfile).read()
new = re.sub("(?s)(<table border=1>)(.*)(</table>)",
r'\1' + '\n'.join(tbl) + r'\3', old, 1)
open(tblfile, 'w').write(new)
def num_cmp(x, y):
def num(x):
nums = re.findall('[0-9]+', x or '')
if nums: return int(nums[0])
return x
return cmp(num(x[0]), num(y[0]))
### Above is general (more or less); below is specific to my files.
def comment(text): return i(color("green", text))
replacements = [
(r'&', '&amp;'),
(r'<', '&lt;'),
(r'>', '&gt;'),
(r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
(r"""('[^']*?'|"[^"]*?")""", comment(g1)),
(r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
(r'(#.*)', color("cc33cc", g1)),
(r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
(r'(?m)(^\s*)(def\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
(r'(?m)(^\s*)(class\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
(r'(from\s+)([a-z]+)(\s+import)', importer),
(r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
]
if __name__ == '__main__':
import sys, glob
files = []
for arg in sys.argv[1:]:
files.extend(glob.glob(arg))
convert_files(files)
## ENHANCEMENTS:
## Can get confused with """ and '''; not a problem in practice.
## Maybe we should create an index
## Probably should switch to Doxygen

106
py/spell.py Normal file
View File

@@ -0,0 +1,106 @@
"""Spelling Corrector in Python 3; see http://norvig.com/spell-correct.html
Copyright (c) 2007-2016 Peter Norvig
MIT license: www.opensource.org/licenses/mit-license.php
"""
################ Spelling Corrector
import re
from collections import Counter
def words(text): return re.findall(r'\w+', text.lower())
WORDS = Counter(words(open('big.txt').read()))
def P(word, N=sum(WORDS.values())):
"Probability of `word`."
return WORDS[word] / N
def correction(word):
"Most probable spelling correction for word."
return max(candidates(word), key=P)
def candidates(word):
"Generate possible spelling corrections for word."
return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
def known(words):
"The subset of `words` that appear in the dictionary of WORDS."
return set(w for w in words if w in WORDS)
def edits1(word):
"All edits that are one edit away from `word`."
letters = 'abcdefghijklmnopqrstuvwxyz'
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [L + R[1:] for L, R in splits if R]
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
inserts = [L + c + R for L, R in splits for c in letters]
return set(deletes + transposes + replaces + inserts)
def edits2(word):
"All edits that are two edits away from `word`."
return (e2 for e1 in edits1(word) for e2 in edits1(e1))
################ Test Code
def unit_tests():
assert correction('speling') == 'spelling' # insert
assert correction('korrectud') == 'corrected' # replace 2
assert correction('bycycle') == 'bicycle' # replace
assert correction('inconvient') == 'inconvenient' # insert 2
assert correction('arrainged') == 'arranged' # delete
assert correction('peotry') =='poetry' # transpose
assert correction('peotryy') =='poetry' # transpose + delete
assert correction('word') == 'word' # known
assert correction('quintessential') == 'quintessential' # unknown
assert words('This is a TEST.') == ['this', 'is', 'a', 'test']
assert Counter(words('This is a test. 123; A TEST this is.')) == (
Counter({'123': 1, 'a': 2, 'is': 2, 'test': 2, 'this': 2}))
assert len(WORDS) == 32192
assert sum(WORDS.values()) == 1115504
assert WORDS.most_common(10) == [
('the', 79808),
('of', 40024),
('and', 38311),
('to', 28765),
('in', 22020),
('a', 21124),
('that', 12512),
('he', 12401),
('was', 11410),
('it', 10681)]
assert WORDS['the'] == 79808
assert P('quintessential') == 0
assert 0.07 < P('the') < 0.08
return 'unit_tests pass'
def spelltest(tests, verbose=False):
"Run correction(wrong) on all (right, wrong) pairs; report results."
import time
start = time.clock()
good, unknown = 0, 0
n = len(tests)
for right, wrong in tests:
w = correction(wrong)
good += (w == right)
if w != right:
unknown += (right not in WORDS)
if verbose:
print('correction({}) => {} ({}); expected {} ({})'
.format(wrong, w, WORDS[w], right, WORDS[right]))
dt = time.clock() - start
print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second '
.format(good / n, n, unknown / n, n / dt))
def Testset(lines):
"Parse 'right: wrong1 wrong2' lines into [('right', 'wrong1'), ('right', 'wrong2')] pairs."
return [(right, wrong)
for (right, wrongs) in (line.split(':') for line in lines)
for wrong in wrongs.split()]
if __name__ == '__main__':
print(unit_tests())
spelltest(Testset(open('spell-testset1.txt')))
spelltest(Testset(open('spell-testset2.txt')))

50
py/sudoku-easy50.txt Normal file
View File

@@ -0,0 +1,50 @@
003020600900305001001806400008102900700000008006708200002609500800203009005010300
200080300060070084030500209000105408000000000402706000301007040720040060004010003
000000907000420180000705026100904000050000040000507009920108000034059000507000000
030050040008010500460000012070502080000603000040109030250000098001020600080060020
020810740700003100090002805009040087400208003160030200302700060005600008076051090
100920000524010000000000070050008102000000000402700090060000000000030945000071006
043080250600000000000001094900004070000608000010200003820500000000000005034090710
480006902002008001900370060840010200003704100001060049020085007700900600609200018
000900002050123400030000160908000000070000090000000205091000050007439020400007000
001900003900700160030005007050000009004302600200000070600100030042007006500006800
000125400008400000420800000030000095060902010510000060000003049000007200001298000
062340750100005600570000040000094800400000006005830000030000091006400007059083260
300000000005009000200504000020000700160000058704310600000890100000067080000005437
630000000000500008005674000000020000003401020000000345000007004080300902947100080
000020040008035000000070602031046970200000000000501203049000730000000010800004000
361025900080960010400000057008000471000603000259000800740000005020018060005470329
050807020600010090702540006070020301504000908103080070900076205060090003080103040
080005000000003457000070809060400903007010500408007020901020000842300000000100080
003502900000040000106000305900251008070408030800763001308000104000020000005104800
000000000009805100051907420290401065000000000140508093026709580005103600000000000
020030090000907000900208005004806500607000208003102900800605007000309000030020050
005000006070009020000500107804150000000803000000092805907006000030400010200000600
040000050001943600009000300600050002103000506800020007005000200002436700030000040
004000000000030002390700080400009001209801307600200008010008053900040000000000800
360020089000361000000000000803000602400603007607000108000000000000418000970030014
500400060009000800640020000000001008208000501700500000000090084003000600060003002
007256400400000005010030060000508000008060200000107000030070090200000004006312700
000000000079050180800000007007306800450708096003502700700000005016030420000000000
030000080009000500007509200700105008020090030900402001004207100002000800070000090
200170603050000100000006079000040700000801000009050000310400000005000060906037002
000000080800701040040020030374000900000030000005000321010060050050802006080000000
000000085000210009960080100500800016000000000890006007009070052300054000480000000
608070502050608070002000300500090006040302050800050003005000200010704090409060701
050010040107000602000905000208030501040070020901080406000401000304000709020060010
053000790009753400100000002090080010000907000080030070500000003007641200061000940
006080300049070250000405000600317004007000800100826009000702000075040190003090600
005080700700204005320000084060105040008000500070803010450000091600508007003010600
000900800128006400070800060800430007500000009600079008090004010003600284001007000
000080000270000054095000810009806400020403060006905100017000620460000038000090000
000602000400050001085010620038206710000000000019407350026040530900020007000809000
000900002050123400030000160908000000070000090000000205091000050007439020400007000
380000000000400785009020300060090000800302009000040070001070500495006000000000092
000158000002060800030000040027030510000000000046080790050000080004070100000325000
010500200900001000002008030500030007008000500600080004040100700000700006003004050
080000040000469000400000007005904600070608030008502100900000005000781000060000010
904200007010000000000706500000800090020904060040002000001607000000000030300005702
000700800006000031040002000024070000010030080000060290000800070860000500002006000
001007090590080001030000080000005800050060020004100000080000030100020079020700400
000003017015009008060000000100007000009000200000500004000000020500600340340200000
300200000000107000706030500070009080900020004010800050009040301000702000000008006

11
py/sudoku-hardest.txt Normal file
View File

@@ -0,0 +1,11 @@
85...24..72......9..4.........1.7..23.5...9...4...........8..7..17..........36.4.
..53.....8......2..7..1.5..4....53...1..7...6..32...8..6.5....9..4....3......97..
12..4......5.69.1...9...5.........7.7...52.9..3......2.9.6...5.4..9..8.1..3...9.4
...57..3.1......2.7...234......8...4..7..4...49....6.5.42...3.....7..9....18.....
7..1523........92....3.....1....47.8.......6............9...5.6.4.9.7...8....6.1.
1....7.9..3..2...8..96..5....53..9...1..8...26....4...3......1..4......7..7...3..
1...34.8....8..5....4.6..21.18......3..1.2..6......81.52..7.9....6..9....9.64...2
...92......68.3...19..7...623..4.1....1...7....8.3..297...8..91...5.72......64...
.6.5.4.3.1...9...8.........9...5...6.4.6.2.7.7...4...5.........4...8...1.5.2.3.4.
7.....4...2..7..8...3..8.799..5..3...6..2..9...1.97..6...3..9...3..4..6...9..1.35
....7..2.8.......6.1.2.5...9.54....8.........3....85.1...3.2.8.4.......9.7..6....

95
py/sudoku-top95.txt Normal file
View File

@@ -0,0 +1,95 @@
4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......
52...6.........7.13...........4..8..6......5...........418.........3..2...87.....
6.....8.3.4.7.................5.4.7.3..2.....1.6.......2.....5.....8.6......1....
48.3............71.2.......7.5....6....2..8.............1.76...3.....4......5....
....14....3....2...7..........9...3.6.1.............8.2.....1.4....5.6.....7.8...
......52..8.4......3...9...5.1...6..2..7........3.....6...1..........7.4.......3.
6.2.5.........3.4..........43...8....1....2........7..5..27...........81...6.....
.524.........7.1..............8.2...3.....6...9.5.....1.6.3...........897........
6.2.5.........4.3..........43...8....1....2........7..5..27...........81...6.....
.923.........8.1...........1.7.4...........658.........6.5.2...4.....7.....9.....
6..3.2....5.....1..........7.26............543.........8.15........4.2........7..
.6.5.1.9.1...9..539....7....4.8...7.......5.8.817.5.3.....5.2............76..8...
..5...987.4..5...1..7......2...48....9.1.....6..2.....3..6..2.......9.7.......5..
3.6.7...........518.........1.4.5...7.....6.....2......2.....4.....8.3.....5.....
1.....3.8.7.4..............2.3.1...........958.........5.6...7.....8.2...4.......
6..3.2....4.....1..........7.26............543.........8.15........4.2........7..
....3..9....2....1.5.9..............1.2.8.4.6.8.5...2..75......4.1..6..3.....4.6.
45.....3....8.1....9...........5..9.2..7.....8.........1..4..........7.2...6..8..
.237....68...6.59.9.....7......4.97.3.7.96..2.........5..47.........2....8.......
..84...3....3.....9....157479...8........7..514.....2...9.6...2.5....4......9..56
.98.1....2......6.............3.2.5..84.........6.........4.8.93..5...........1..
..247..58..............1.4.....2...9528.9.4....9...1.........3.3....75..685..2...
4.....8.5.3..........7......2.....6.....5.4......1.......6.3.7.5..2.....1.9......
.2.3......63.....58.......15....9.3....7........1....8.879..26......6.7...6..7..4
1.....7.9.4...72..8.........7..1..6.3.......5.6..4..2.........8..53...7.7.2....46
4.....3.....8.2......7........1...8734.......6........5...6........1.4...82......
.......71.2.8........4.3...7...6..5....2..3..9........6...7.....8....4......5....
6..3.2....4.....8..........7.26............543.........8.15........8.2........7..
.47.8...1............6..7..6....357......5....1..6....28..4.....9.1...4.....2.69.
......8.17..2........5.6......7...5..1....3...8.......5......2..4..8....6...3....
38.6.......9.......2..3.51......5....3..1..6....4......17.5..8.......9.......7.32
...5...........5.697.....2...48.2...25.1...3..8..3.........4.7..13.5..9..2...31..
.2.......3.5.62..9.68...3...5..........64.8.2..47..9....3.....1.....6...17.43....
.8..4....3......1........2...5...4.69..1..8..2...........3.9....6....5.....2.....
..8.9.1...6.5...2......6....3.1.7.5.........9..4...3...5....2...7...3.8.2..7....4
4.....5.8.3..........7......2.....6.....5.8......1.......6.3.7.5..2.....1.8......
1.....3.8.6.4..............2.3.1...........958.........5.6...7.....8.2...4.......
1....6.8..64..........4...7....9.6...7.4..5..5...7.1...5....32.3....8...4........
249.6...3.3....2..8.......5.....6......2......1..4.82..9.5..7....4.....1.7...3...
...8....9.873...4.6..7.......85..97...........43..75.......3....3...145.4....2..1
...5.1....9....8...6.......4.1..........7..9........3.8.....1.5...2..4.....36....
......8.16..2........7.5......6...2..1....3...8.......2......7..3..8....5...4....
.476...5.8.3.....2.....9......8.5..6...1.....6.24......78...51...6....4..9...4..7
.....7.95.....1...86..2.....2..73..85......6...3..49..3.5...41724................
.4.5.....8...9..3..76.2.....146..........9..7.....36....1..4.5..6......3..71..2..
.834.........7..5...........4.1.8..........27...3.....2.6.5....5.....8........1..
..9.....3.....9...7.....5.6..65..4.....3......28......3..75.6..6...........12.3.8
.26.39......6....19.....7.......4..9.5....2....85.....3..2..9..4....762.........4
2.3.8....8..7...........1...6.5.7...4......3....1............82.5....6...1.......
6..3.2....1.....5..........7.26............843.........8.15........8.2........7..
1.....9...64..1.7..7..4.......3.....3.89..5....7....2.....6.7.9.....4.1....129.3.
.........9......84.623...5....6...453...1...6...9...7....1.....4.5..2....3.8....9
.2....5938..5..46.94..6...8..2.3.....6..8.73.7..2.........4.38..7....6..........5
9.4..5...25.6..1..31......8.7...9...4..26......147....7.......2...3..8.6.4.....9.
...52.....9...3..4......7...1.....4..8..453..6...1...87.2........8....32.4..8..1.
53..2.9...24.3..5...9..........1.827...7.........981.............64....91.2.5.43.
1....786...7..8.1.8..2....9........24...1......9..5...6.8..........5.9.......93.4
....5...11......7..6.....8......4.....9.1.3.....596.2..8..62..7..7......3.5.7.2..
.47.2....8....1....3....9.2.....5...6..81..5.....4.....7....3.4...9...1.4..27.8..
......94.....9...53....5.7..8.4..1..463...........7.8.8..7.....7......28.5.26....
.2......6....41.....78....1......7....37.....6..412....1..74..5..8.5..7......39..
1.....3.8.6.4..............2.3.1...........758.........7.5...6.....8.2...4.......
2....1.9..1..3.7..9..8...2.......85..6.4.........7...3.2.3...6....5.....1.9...2.5
..7..8.....6.2.3...3......9.1..5..6.....1.....7.9....2........4.83..4...26....51.
...36....85.......9.4..8........68.........17..9..45...1.5...6.4....9..2.....3...
34.6.......7.......2..8.57......5....7..1..2....4......36.2..1.......9.......7.82
......4.18..2........6.7......8...6..4....3...1.......6......2..5..1....7...3....
.4..5..67...1...4....2.....1..8..3........2...6...........4..5.3.....8..2........
.......4...2..4..1.7..5..9...3..7....4..6....6..1..8...2....1..85.9...6.....8...3
8..7....4.5....6............3.97...8....43..5....2.9....6......2...6...7.71..83.2
.8...4.5....7..3............1..85...6.....2......4....3.26............417........
....7..8...6...5...2...3.61.1...7..2..8..534.2..9.......2......58...6.3.4...1....
......8.16..2........7.5......6...2..1....3...8.......2......7..4..8....5...3....
.2..........6....3.74.8.........3..2.8..4..1.6..5.........1.78.5....9..........4.
.52..68.......7.2.......6....48..9..2..41......1.....8..61..38.....9...63..6..1.9
....1.78.5....9..........4..2..........6....3.74.8.........3..2.8..4..1.6..5.....
1.......3.6.3..7...7...5..121.7...9...7........8.1..2....8.64....9.2..6....4.....
4...7.1....19.46.5.....1......7....2..2.3....847..6....14...8.6.2....3..6...9....
......8.17..2........5.6......7...5..1....3...8.......5......2..3..8....6...4....
963......1....8......2.5....4.8......1....7......3..257......3...9.2.4.7......9..
15.3......7..4.2....4.72.....8.........9..1.8.1..8.79......38...........6....7423
..........5724...98....947...9..3...5..9..12...3.1.9...6....25....56.....7......6
....75....1..2.....4...3...5.....3.2...8...1.......6.....1..48.2........7........
6.....7.3.4.8.................5.4.8.7..2.....1.3.......2.....5.....7.9......1....
....6...4..6.3....1..4..5.77.....8.5...8.....6.8....9...2.9....4....32....97..1..
.32.....58..3.....9.428...1...4...39...6...5.....1.....2...67.8.....4....95....6.
...5.3.......6.7..5.8....1636..2.......4.1.......3...567....2.8..4.7.......2..5..
.5.3.7.4.1.........3.......5.8.3.61....8..5.9.6..1........4...6...6927....2...9..
..5..8..18......9.......78....4.....64....9......53..2.6.........138..5....9.714.
..........72.6.1....51...82.8...13..4.........37.9..1.....238..5.4..9.........79.
...658.....4......12............96.7...3..5....2.8...3..19..8..3.6.....4....473..
.2.3.......6..8.9.83.5........2...8.7.9..5........6..4.......1...1...4.22..7..8.9
.5..9....1.....6.....3.8.....8.4...9514.......3....2..........4.8...6..77..15..6.
.....2.......7...17..3...9.8..7......2.89.6...13..6....9..5.824.....891..........
3...8.......7....51..............36...2..4....7...........6.13..452...........8..

161
py/sudoku.py Normal file
View File

@@ -0,0 +1,161 @@
## Solve Every Sudoku Puzzle
## See http://norvig.com/sudoku.html
## Throughout this program we have:
## r is a row, e.g. 'A'
## c is a column, e.g. '3'
## s is a square, e.g. 'A3'
## d is a digit, e.g. '9'
## u is a unit, e.g. ['A1','B1','C1','D1','E1','F1','G1','H1','I1']
## grid is a grid,e.g. 81 non-blank chars, e.g. starting with '.18...7...
## values is a dict of possible values, e.g. {'A1':'12349', 'A2':'8', ...}
def cross(A, B):
"Cross product of elements in A and elements in B."
return [a+b for a in A for b in B]
digits = '123456789'
rows = 'ABCDEFGHI'
cols = digits
squares = cross(rows, cols)
unitlist = ([cross(rows, c) for c in cols] +
[cross(r, cols) for r in rows] +
[cross(rs, cs) for rs in ('ABC','DEF','GHI') for cs in ('123','456','789')])
units = dict((s, [u for u in unitlist if s in u])
for s in squares)
peers = dict((s, set(sum(units[s],[]))-set([s]))
for s in squares)
################ Unit Tests ################
def test():
"A set of tests that must pass."
assert len(squares) == 81
assert len(unitlist) == 27
assert all(len(units[s]) == 3 for s in squares)
assert all(len(peers[s]) == 20 for s in squares)
assert units['C2'] == [['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2'],
['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'],
['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']]
assert peers['C2'] == set(['A2', 'B2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2',
'C1', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9',
'A1', 'A3', 'B1', 'B3'])
print('All tests pass.')
################ Parse a Grid ################
def parse_grid(grid):
"""Convert grid to a dict of possible values, {square: digits}, or
return False if a contradiction is detected."""
## To start, every square can be any digit; then assign values from the grid.
values = dict((s, digits) for s in squares)
for s,d in grid_values(grid).items():
if d in digits and not assign(values, s, d):
return False ## (Fail if we can't assign d to square s.)
return values
def grid_values(grid):
"Convert grid into a dict of {square: char} with '0' or '.' for empties."
chars = [c for c in grid if c in digits or c in '0.']
if len(chars) != 81: print(grid, chars, len(chars))
assert len(chars) == 81
return dict(zip(squares, chars))
################ Constraint Propagation ################
def assign(values, s, d):
"""Eliminate all the other values (except d) from values[s] and propagate.
Return values, except return False if a contradiction is detected."""
other_values = values[s].replace(d, '')
if all(eliminate(values, s, d2) for d2 in other_values):
return values
else:
return False
def eliminate(values, s, d):
"""Eliminate d from values[s]; propagate when values or places <= 2.
Return values, except return False if a contradiction is detected."""
if d not in values[s]:
return values ## Already eliminated
values[s] = values[s].replace(d,'')
## (1) If a square s is reduced to one value d2, then eliminate d2 from the peers.
if len(values[s]) == 0:
return False ## Contradiction: removed last value
elif len(values[s]) == 1:
d2 = values[s]
if not all(eliminate(values, s2, d2) for s2 in peers[s]):
return False
## (2) If a unit u is reduced to only one place for a value d, then put it there.
for u in units[s]:
dplaces = [s for s in u if d in values[s]]
if len(dplaces) == 0:
return False ## Contradiction: no place for this value
elif len(dplaces) == 1:
# d can only be in one place in unit; assign it there
if not assign(values, dplaces[0], d):
return False
return values
################ Display as 2-D grid ################
def display(values):
"Display these values as a 2-D grid."
width = 1+max(len(values[s]) for s in squares)
line = '+'.join(['-'*(width*3)]*3)
for r in rows:
print(''.join(values[r+c].center(width) + ('|' if c in '36' else '')
for c in cols))
if r in 'CF': print(line)
print()
################ Search ################
def solve(grid): return search(parse_grid(grid))
def search(values):
"Using depth-first search and propagation, try all possible values."
if values is False:
return False ## Failed earlier
if all(len(values[s]) == 1 for s in squares):
return values ## Solved!
## Chose the unfilled square s with the fewest possibilities
n,s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
for d in values[s]:
result = search(assign(values.copy(), s, d))
if result: return result
################ System test ################
import time
def solve_all(grids, name=''):
"""Attempt to solve a sequence of grids. Report results."""
times, results = zip(*[time_solve(grid) for grid in grids])
N = len(results)
if N > 1:
print("Solved %d of %d %s puzzles (avg %.2f secs (%d Hz), max %.2f secs)." % (
sum(results), N, name, sum(times)/N, N/sum(times), max(times)))
def time_solve(grid):
start = time.clock()
values = solve(grid)
t = time.clock()-start
return (t, solved(values))
def solved(values):
"A puzzle is solved if each unit is a permutation of the digits 1 to 9."
def unitsolved(unit): return set(values[s] for s in unit) == set(digits)
return values is not False and all(unitsolved(unit) for unit in unitlist)
grid1 = '003020600900305001001806400008102900700000008006708200002609500800203009005010300'
grid2 = '4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......'
hard1 = '.....6....59.....82....8....45........3........6..3.54...325..6..................'
if __name__ == '__main__':
test()
solve_all(open("sudoku-easy50.txt"), "easy")
solve_all(open("sudoku-top95.txt"), "hard")
solve_all(open("sudoku-hardest.txt"), "hardest")

73
py/testaccum.py Normal file
View File

@@ -0,0 +1,73 @@
from __future__ import division
import re
from accum import *
acc_re = re.compile("[[](.+):(.+) for (.+) in (.+)[]]")
def expand_accumulations(program_text):
"""Replace any accumulation displays in program_text with calls to
accumulation. Used to simulate a hypothetical Python interpreter that
actually handles accumlation displays. This one is rather poor: it
won't match across lines, it won't match nested accumulation displays,
and it doesn't handle multiple 'for' clauses; nor 'if' clauses."""
def _(matchobj):
(acc, exp, x, it) = matchobj.groups()
return "accumulation(%s, lambda %s: (%s), %s)" % (acc, x, exp, it)
return acc_re.sub(_, program_text)
def test1(acc_display, expected):
"Eval an accumulation display and see if it gets the expected answer."
print acc_display
result = eval(expand_accumulations(acc_display))
assert result == expected, ('Got %s; expected %s' % (result, expected))
print ' ==> %s' % result
#### Initialize some data
temp = [70, 70, 71, 74, 76, 76, 72, 76, 77, 77, 77, 78,
78, 79, 79, 79, 78, 80, 82, 83, 83, 81, 84, 83]
data = temp
def f(x): return 2 * x
votes = {'Arnie': 48, 'Gray': 45, 'Tom': 13, 'Cruz': 32, 'Peter': 3}
candidates = votes.keys()
def test():
print 'temp = ', temp
print 'data = temp'
print 'votes = ', votes
print 'candidates = ', candidates
print
#### Test some accumulation displays
test1("[Max: temp[hour] for hour in range(24)]",
max([temp[hour] for hour in range(24)]))
test1("[Min: temp[hour] for hour in range(24)]",
min([temp[hour] for hour in range(24)]))
test1("[Sum: x*x for x in data]",
sum([x*x for x in data]))
test1("[Mean: f(x) for x in data]",
sum([f(x) for x in data])/len(data))
test1("[Median: f(x) for x in data]",
156.0)
test1("[Mode: f(x) for x in data]",
166)
test1("[Argmax: votes[c] for c in candidates]",
'Arnie')
test1("[Argmin: votes[c] for c in candidates]",
'Peter')
test1("[Some: temp[hour] > 75 for hour in range(24)]",
len([hour for four in range(24) if temp[hour] > 75])>0)
test1("[Every: temp[hour] > 75 for hour in range(24)]",
len([h for h in range(24) if temp[h] > 75]) == 24)
test1("[Top(10): temp[hour] for hour in range(24)]",
[84, 83, 83, 83, 82, 81, 80, 79, 79, 79])
test1("[Join(', '): votes[c] for c in candidates]",
', '.join([str(votes[c]) for c in candidates]))
test1("[SortBy: abs(x) for x in (-2, -4, 3, 1)]",
[1, -2, 3, -4])
test1("[SortBy(reverse=True): abs(x) for x in (-2, -4, 3, 1)]",
[-4, 3, -2, 1])
if __name__ == "__main__":
test()

170
py/yaptu.py Normal file
View File

@@ -0,0 +1,170 @@
"""Yet Another Python Templating Utility, Version 1.2, by Alex Martelli.
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52305
(Specialized to HTML and modified by Peter Norvig.)
Copies input to output, with some substitutions. There are three types
of substitutions: lexical, expression, and statement.
LEXICAL SUBSTITUTIONS:
& < >
These characters, if surrounded by whitespace, are replaced by
the corresonding HTML entities: &amp;, &lt;, &gt;.
EXPRESSION SUBSTITUTIONS:
<<exp>>
Replace <<exp>> by eval(exp), where exp is a Python expression.
The most common use is when exp is just a variable name.
Example: <<green>>
Special case 1: If exp starts with '/', replace '/' by '_'.
Example: <</green>> becomes <<_green>
Special case 2: If exp evals to a callable, call it.
Example: <<random.random>> is the same as <<random.random()>>
Special case 3: If exp evals to None, replace it with ''.
Example: <<list.append(item)>> generates no text.
STATEMENT SUBSTITUTIONS:
All statement substitutions start with a #[ in column 1, and end with
a #] in column 1 of a subsequent line. Nesting is allowed, and
works like you would expect. There are two variants:
#[
stmts
#]
Any number of lines of Python stmts are executed.
The first line must be empty, except for the #[
#[ stmt-header:
lines
#]
The lines are interpreted as HTML with embedded expressions,
and are sent to output, once for each execution of stmt-header.
stmt-header is usually a for or if; This is hard to explain,
but easy to see with an example:
<table><tr><th> Number <th> Number squared
#[ for i in range(10):
<tr><td> <<i>> <td> <<i**2>>
#]
</table>
This produces one line of the table for each value of i in [0 .. 9].
If your compound statement has multiple stmt-headers, you use #| to
introduce the subsequent stmt-headers (such as else: or except:).
Another example:
#[ if time.localtime()[6] in [5, 6]:
Have a good weekend!
#| else:
Time for work.
#]
"""
import sys, re, os, os.path
class Copier:
"Smart-copier (YAPTU) class"
def copyblock(self, i=0, last=None):
"Main copy method: process lines [i,last) of block"
def repl(match, self=self):
"Replace the match with its value as a Python expression."
expr = self.preproc(match.group(1), 'eval')
if self.verbose: print '=== eval{%s}' % expr,
try:
val = eval(expr, self.globals)
except:
self.oops('eval', expr)
if callable(val): val = val()
if val == None: val = ''
if self.verbose: print '========>', val
return str(val)
block = self.globals['_bl']
if last is None: last = len(block)
while i < last:
line = block[i]
if line.startswith("#["): # a statement starts at line block[i]
# i is the last line to _not_ process
stmt = line[2:].strip()
j = i+1 # look for 'finish' from here onwards
nest = 1 # count nesting levels of statements
while j<last and not stmt.endswith("#]"):
line = block[j]
# first look for nested statements or 'finish' lines
if line.startswith("#]"): # found a statement-end
nest = nest - 1
if nest == 0: break # j is first line to _not_ process
elif line.startswith("#["): # found a nested statement
nest = nest + 1
elif nest == 1 and line.startswith("#|"):
# look for continuation only at this nesting
nestat = line[2:].strip()
stmt = '%s _cb(%s,%s)\n%s' % (stmt,i+1,j,nestat)
i=j # again, i is the last line to _not_ process
j = j+1
if stmt == '': ## A multi-line python suite
self.execute(''.join(block[i+1:j]))
i = j+1
else: ## The header of a for loop (etc.) is on this line
self.execute("%s _cb(%s,%s)" % (stmt,i+1,j))
i = j+1
else: # normal line, just copy with substitution
self.outf.write(self.regex.sub(repl,self.preproc(line,'copy')))
i = i+1
def __init__(self, globals):
"Create a Copier."
self.regex = re.compile("<<(.*?)>>")
self.globals = globals
self.globals['_cb'] = self.copyblock
self.outf = sys.stdout
self.verbose = 0
def execute(self, stmt):
stmt = self.preproc(stmt, 'exec') + '\n'
if self.verbose:
print "******* executing {%s} in %s" % (stmt, self.globals.keys())
try:
exec stmt in self.globals
except:
self.oops('exec', stmt)
def oops(self, why, what):
print 'Something went wrong in %sing {%s}' % (why, what)
print 'Globals:', self.globals.keys(), \
self.globals.get('SECTIONS', '???')
raise
def preproc(self, string, why, reg=re.compile(r"\s([<>&])\s"),
table={'&':' &amp; ', '<':' &lt; ', '>':' &gt; '}):
# If it starts with '/', change to '_'
if why in ('exec', 'eval'):
string = string.strip()
if string[0] == '/':
string = '_' + string[1:]
return string
elif why == 'copy':
# Expand & < > into entitites if surrounded by whitespace
return reg.sub(lambda match: table[match.group(1)], string)
def copyfile(self, filename, ext="html"):
"Convert filename.* to filename.ext, where ext defaults to html."
global yaptu_filename
outname = re.sub('[.][a-zA-Z0-9]+?$', '', filename) + '.'+ext
print 'Transforming', filename, 'to', outname
self.globals['_bl'] = file(filename).readlines()
yaptu_filename = filename
self.outf = file(outname, 'w')
self.copyblock()
if __name__ == '__main__':
copier = Copier(globals())
for filename in sys.argv[1:]:
if filename == '-v':
copier.verbose = 1
else:
copier.copyfile(filename)