Add subdirectories
Add /ipynb/ and /py/ subdirectories to keep the home page neater.
This commit is contained in:
134
py/SET.py
Normal file
134
py/SET.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import random
|
||||
import collections
|
||||
import itertools
|
||||
|
||||
"""
|
||||
Game of Set (Peter Norvig 2010-2015)
|
||||
|
||||
How often do sets appear when we deal an array of cards?
|
||||
How often in the course of playing out the game?
|
||||
|
||||
Here are the data types we will use:
|
||||
|
||||
card: A string, such as '3R=0', meaning "three red striped ovals".
|
||||
deck: A list of cards, initially of length 81.
|
||||
layout: A list of cards, initially of length 12.
|
||||
set: A tuple of 3 cards.
|
||||
Tallies: A dict: {12: {True: 33, False: 1}}} means a layout of size 12
|
||||
tallied 33 sets and 1 non-set.
|
||||
"""
|
||||
|
||||
#### Cards, dealing cards, and defining the notion of sets.
|
||||
|
||||
CARDS = [number + color + shade + symbol
|
||||
for number in '123'
|
||||
for color in 'RGP'
|
||||
for shade in '@O='
|
||||
for symbol in '0SD']
|
||||
|
||||
def deal(n, deck):
|
||||
"Deal n cards from the deck."
|
||||
return [deck.pop() for _ in range(n)]
|
||||
|
||||
def is_set(cards):
|
||||
"Are these 3 cards a set? No if any feature has 2 values."
|
||||
for f in range(4):
|
||||
values = {card[f] for card in cards}
|
||||
if len(values) == 2:
|
||||
return False
|
||||
return True
|
||||
|
||||
def find_set(layout):
|
||||
"Return a set found from this layout, if there is one."
|
||||
for cards in itertools.combinations(layout, 3):
|
||||
if is_set(cards):
|
||||
return cards
|
||||
return ()
|
||||
|
||||
#### Tallying set:no-set ratio
|
||||
|
||||
def Tallies():
|
||||
"A data structure to keep track, for each size, the number of sets and no-sets."
|
||||
return collections.defaultdict(lambda: {True: 0, False: 0})
|
||||
|
||||
def tally(tallies, layout):
|
||||
"Record that a set was found or not found in a layout of given size; return the set."
|
||||
s = find_set(layout)
|
||||
tallies[len(layout)][bool(s)] += 1
|
||||
return s
|
||||
|
||||
#### Three experiments
|
||||
|
||||
def tally_initial_layout(N, sizes=(12, 15)):
|
||||
"Record tallies for N initial deals."
|
||||
tallies = Tallies()
|
||||
deck = list(CARDS)
|
||||
for deal in range(N):
|
||||
random.shuffle(deck)
|
||||
for size in sizes:
|
||||
tally(tallies, deck[:size])
|
||||
return tallies
|
||||
|
||||
def tally_initial_layout_no_prior_sets(N, sizes=(12, 15)):
|
||||
"""Simulate N initial deals for each size, keeping tallies for Sets and NoSets,
|
||||
but only when there was no set with 3 fewer cards."""
|
||||
tallies = Tallies()
|
||||
deck = list(CARDS)
|
||||
for deal in range(N):
|
||||
random.shuffle(deck)
|
||||
for size in sizes:
|
||||
if not find_set(deck[:size-3]):
|
||||
tally(tallies, deck[:size])
|
||||
return tallies
|
||||
|
||||
def tally_game_play(N):
|
||||
"Record tallies for the play of N complete games."
|
||||
tallies = Tallies()
|
||||
for game in range(N):
|
||||
deck = list(CARDS)
|
||||
random.shuffle(deck)
|
||||
layout = deal(12, deck)
|
||||
while deck:
|
||||
s = tally(tallies, layout)
|
||||
# Pick up the cards in the set, if any
|
||||
for card in s: layout.remove(card)
|
||||
# Deal new cards
|
||||
if len(layout) < 12 or not s:
|
||||
layout += deal(3, deck)
|
||||
return tallies
|
||||
|
||||
def experiments(N):
|
||||
show({12: [1, 33], 15: [1, 2500]},
|
||||
'the instruction booklet')
|
||||
show(tally_initial_layout(N),
|
||||
'initial layout')
|
||||
show(tally_game_play(N // 25),
|
||||
'game play')
|
||||
show(tally_initial_layout_no_prior_sets(N),
|
||||
'initial layout, but no sets before dealing last 3 cards')
|
||||
|
||||
|
||||
def show(tallies, label):
|
||||
"Print out the counts."
|
||||
print()
|
||||
print('Size | Sets | NoSets | Set:NoSet ratio for', label)
|
||||
print('-----+--------+--------+----------------')
|
||||
for size in sorted(tallies):
|
||||
y, n = tallies[size][True], tallies[size][False]
|
||||
ratio = ('inft' if n==0 else int(round(float(y)/n)))
|
||||
print('{:4d} |{:7,d} |{:7,d} | {:4}:1'
|
||||
.format(size, y, n, ratio))
|
||||
|
||||
def test():
|
||||
assert len(CARDS) == 81 == len(set(CARDS))
|
||||
assert is_set(('3R=O', '2R=S', '1R=D'))
|
||||
assert not is_set(('3R=0', '2R=S', '1R@D'))
|
||||
assert find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R=D']) == ('3R=0', '2R=S', '1R=D')
|
||||
assert not find_set(['1PO0', '2G=D', '3R=0', '2R=S', '1R@D'])
|
||||
photo = '2P=0 3P=D 2R=0 3GO0 2POD 3R@D 2RO0 2ROS 1P@S 2P@0 3ROS 2GOD 2P@D 1GOD 3GOS'.split()
|
||||
assert not find_set(photo)
|
||||
assert set(itertools.combinations([1, 2, 3, 4], 3)) == {(1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)}
|
||||
print('All tests pass.')
|
||||
|
||||
test()
|
||||
experiments(100000)
|
||||
159
py/beal.py
Normal file
159
py/beal.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""Search for counterexamples to Beal's conjecture
|
||||
See http://norvig.com/beal.html and http://www.bealconjecture.com"""
|
||||
|
||||
from __future__ import division, print_function
|
||||
from math import log
|
||||
from itertools import combinations, product
|
||||
from collections import defaultdict
|
||||
try:
|
||||
from math import gcd # For Python 3.6 and up
|
||||
except ImportError:
|
||||
from fractions import gcd # For older versions (works in 2.7 as well)
|
||||
|
||||
def beal(max_A, max_x):
|
||||
"""See if any A ** x + B ** y equals some C ** z, with gcd(A, B) == 1.
|
||||
Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
|
||||
Apowers = make_Apowers(max_A, max_x)
|
||||
Czroots = make_Czroots(Apowers)
|
||||
for (A, B) in combinations(Apowers, 2):
|
||||
if gcd(A, B) == 1:
|
||||
for (Ax, By) in product(Apowers[A], Apowers[B]):
|
||||
Cz = Ax + By
|
||||
if Cz in Czroots:
|
||||
C = Czroots[Cz]
|
||||
x, y, z = exponent(Ax, A), exponent(By, B), exponent(Cz, C)
|
||||
print('{} ** {} + {} ** {} == {} ** {} == {}'
|
||||
.format(A, x, B, y, C, z, C ** z))
|
||||
|
||||
def make_Apowers(max_A, max_x):
|
||||
"A dict of {A: [A**3, A**4, ...], ...}."
|
||||
exponents = exponents_upto(max_x)
|
||||
return {A: [A ** x for x in (exponents if (A != 1) else [3])]
|
||||
for A in range(1, max_A+1)}
|
||||
|
||||
def make_Czroots(Apowers): return {Cz: C for C in Apowers for Cz in Apowers[C]}
|
||||
|
||||
def exponents_upto(max_x):
|
||||
"Return all odd primes up to max_x, as well as 4."
|
||||
exponents = [3, 4] if max_x >= 4 else [3] if max_x == 3 else []
|
||||
for x in range(5, max_x, 2):
|
||||
if not any(x % p == 0 for p in exponents):
|
||||
exponents.append(x)
|
||||
return exponents
|
||||
|
||||
def exponent(Cz, C):
|
||||
"""Recover z such that C ** z == Cz (or equivalently z = log Cz base C).
|
||||
For exponent(1, 1), arbitrarily choose to return 3."""
|
||||
return 3 if (Cz == C == 1) else int(round(log(Cz, C)))
|
||||
|
||||
##############################################################################
|
||||
|
||||
def tests():
|
||||
assert make_Apowers(6, 10) == {
|
||||
1: [1],
|
||||
2: [8, 16, 32, 128],
|
||||
3: [27, 81, 243, 2187],
|
||||
4: [64, 256, 1024, 16384],
|
||||
5: [125, 625, 3125, 78125],
|
||||
6: [216, 1296, 7776, 279936]}
|
||||
|
||||
assert make_Czroots(make_Apowers(5, 8)) == {
|
||||
1: 1, 8: 2, 16: 2, 27: 3, 32: 2, 64: 4, 81: 3,
|
||||
125: 5, 128: 2, 243: 3, 256: 4, 625: 5, 1024: 4,
|
||||
2187: 3, 3125: 5, 16384: 4, 78125: 5}
|
||||
Czroots = make_Czroots(make_Apowers(100, 100))
|
||||
assert 3 ** 3 + 6 ** 3 in Czroots
|
||||
assert 99 ** 97 in Czroots
|
||||
assert 101 ** 100 not in Czroots
|
||||
assert Czroots[99 ** 97] == 99
|
||||
|
||||
assert exponent(10 ** 5, 10) == 5
|
||||
assert exponent(7 ** 3, 7) == 3
|
||||
assert exponent(1234 ** 999, 1234) == 999
|
||||
assert exponent(12345 ** 6789, 12345) == 6789
|
||||
assert exponent(3 ** 10000, 3) == 10000
|
||||
assert exponent(1, 1) == 3
|
||||
|
||||
assert exponents_upto(2) == []
|
||||
assert exponents_upto(3) == [3]
|
||||
assert exponents_upto(4) == [3, 4]
|
||||
assert exponents_upto(40) == [3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]
|
||||
assert exponents_upto(100) == [
|
||||
3, 4, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
|
||||
67, 71, 73, 79, 83, 89, 97]
|
||||
|
||||
assert gcd(3, 6) == 3
|
||||
assert gcd(3, 7) == 1
|
||||
assert gcd(861591083269373931, 94815872265407) == 97
|
||||
assert gcd(2*3*5*(7**10)*(11**12), 3*(7**5)*(11**13)*17) == 3*(7**5)*(11**12)
|
||||
return 'tests pass'
|
||||
|
||||
##############################################################################
|
||||
|
||||
def beal_modp(max_A, max_x, p=2**31-1):
|
||||
"""See if any A ** x + B ** y equals some C ** z (mod p), with gcd(A, B) == 1.
|
||||
If so, verify that the equation works without the (mod p).
|
||||
Consider any 1 <= A,B <= max_A and x,y <= max_x, with x,y prime or 4."""
|
||||
assert p >= max_A
|
||||
Apowers = make_Apowers_modp(max_A, max_x, p)
|
||||
Czroots = make_Czroots_modp(Apowers)
|
||||
for (A, B) in combinations(Apowers, 2):
|
||||
if gcd(A, B) == 1:
|
||||
for (Axp, x), (Byp, y) in product(Apowers[A], Apowers[B]):
|
||||
Czp = (Axp + Byp) % p
|
||||
if Czp in Czroots:
|
||||
lhs = A ** x + B ** y
|
||||
for (C, z) in Czroots[Czp]:
|
||||
if lhs == C ** z:
|
||||
print('{} ** {} + {} ** {} == {} ** {} == {}'
|
||||
.format(A, x, B, y, C, z, C ** z))
|
||||
|
||||
|
||||
def make_Apowers_modp(max_A, max_x, p):
|
||||
"A dict of {A: [(A**3 (mod p), 3), (A**4 (mod p), 4), ...]}."
|
||||
exponents = exponents_upto(max_x)
|
||||
return {A: [(pow(A, x, p), x) for x in (exponents if (A != 1) else [3])]
|
||||
for A in range(1, max_A+1)}
|
||||
|
||||
def make_Czroots_modp(Apowers):
|
||||
"A dict of {C**z (mod p): [(C, z),...]}"
|
||||
Czroots = defaultdict(list)
|
||||
for A in Apowers:
|
||||
for (Axp, x) in Apowers[A]:
|
||||
Czroots[Axp].append((A, x))
|
||||
return Czroots
|
||||
|
||||
##############################################################################
|
||||
|
||||
def simpsons(bases, powers):
|
||||
"""Find the integers (A, B, C, n) that come closest to solving
|
||||
Fermat's equation, A ** n + B ** n == C ** n.
|
||||
Let A, B range over all pairs of bases and n over all powers."""
|
||||
equations = ((A, B, iroot(A ** n + B ** n, n), n)
|
||||
for A, B in combinations(bases, 2)
|
||||
for n in powers)
|
||||
return min(equations, key=relative_error)
|
||||
|
||||
def iroot(i, n):
|
||||
"The integer closest to the nth root of i."
|
||||
return int(round(i ** (1./n)))
|
||||
|
||||
def relative_error(equation):
|
||||
"Error between LHS and RHS of equation, relative to RHS."
|
||||
(A, B, C, n) = equation
|
||||
LHS = A ** n + B ** n
|
||||
RHS = C ** n
|
||||
return abs(LHS - RHS) / RHS
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(tests())
|
||||
print("Searching beal(500, 100)")
|
||||
print(beal(500, 100))
|
||||
print("Finding Simpson-esque near-solutions to Fermat's Equation")
|
||||
def s(b, p): print('{0}^{3} + {1}^{3} = {2}^{3}'.format(*simpsons(b, p)))
|
||||
s(range(1000, 2000), [11, 12, 13])
|
||||
s(range(3000, 5000), [12])
|
||||
print("Searching beal_modp(500, 100)")
|
||||
print(beal_modp(500, 100))
|
||||
|
||||
|
||||
238
py/docex.py
Normal file
238
py/docex.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""A framework for running unit tests and examples, written in docstrings.
|
||||
|
||||
This lets you write "Ex: sqrt(4) ==> 2; sqrt(-1) raises ValueError" in a
|
||||
docstring, and then execute the examples as unit tests.
|
||||
|
||||
This functionality is similar to the doctest module. The major
|
||||
differences between docex and doctest are:
|
||||
|
||||
(1) Brevity. With docex you write the one-line comment
|
||||
"Ex: len('abc') ==> 3; len([]) ==> 0; len(5) raises TypeError"
|
||||
With doctest you would need 9 lines for the same thing:
|
||||
'''>>> len('abc')
|
||||
3
|
||||
>>> len([])
|
||||
0
|
||||
>>> len(5))
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: len() of unsized object
|
||||
'''
|
||||
|
||||
(2) Docex handles both examples and unit tests.
|
||||
It took me a while to recognize this distinction: when I write
|
||||
"sqrt(4) ==> 2" it has two purposes -- to serve as a unit test
|
||||
and to serve as an example of how to use the sqrt function.
|
||||
When I write "random.choice('abc')" it serves as an example of
|
||||
how to use the choice function, but it is not a unit test.
|
||||
docex lets you do both; doctest only supports tests. Of course
|
||||
you can coerce this into a test in doctest, with something like
|
||||
>>> random.choice('abc') in 'abc'
|
||||
True
|
||||
|
||||
(3) Eval-based rather than string-comparison based. The docex string
|
||||
"dict(zip([1,4,9], [1,2,3])) ==> {1: 1, 4: 2, 9: 3}" works even
|
||||
when a different version of Python decides to print the dict as
|
||||
"{9: 3, 4: 2, 1: 1}" because docex evals the right-hand-side and
|
||||
checks to see if it is equal. That's good for dicts, its good for
|
||||
writing "1+1==2 ==> True" and having it work in versions of Python
|
||||
where True prints as "1" rather than as "True", and so on,
|
||||
but doctest has the edge if you want to compare against something
|
||||
that doesn't have an eval-able output, or if you want to test
|
||||
printed output.
|
||||
|
||||
(4) Doctest has many more features, and is better supported.
|
||||
I wrote docex before doctest was an official part of Python, but
|
||||
with the refactoring of doctest in Python 2.4, I decided to switch
|
||||
my code over to doctest, even though I prefer the brevity of docex.
|
||||
I still offer docex for those who want it.
|
||||
|
||||
From Python, when you want to test modules m1, m2, ... do:
|
||||
docex.Docex([m1, m2, ...])
|
||||
From the shell, when you want to test files *.py, do:
|
||||
python docex.py [log-file] *.py
|
||||
If log file ends in .htm or .html, it will be written in HTML.
|
||||
If log file is -, or if it is missing, then standard output is used.
|
||||
|
||||
For each module, Docex looks at the __doc__ and _docex strings of the
|
||||
module itself, and of each member, and recursively for each member
|
||||
class. If a line in a docstring starts with r'^\s*Ex: ' (a line with
|
||||
blanks, then 'Ex: '), then the remainder of the string after the colon
|
||||
is treated as examples. Each line of the examples should conform to
|
||||
one of the following formats:
|
||||
|
||||
(1) Blank line or a comment; these just get echoed verbatim to the log.
|
||||
(2) Of the form example1 ; example2 ; ...
|
||||
(3) Of the form 'x ==> y' for any expressions x and y.
|
||||
x is evaled and assigned to _, then y is evaled.
|
||||
If x != y, an error message is printed.
|
||||
(4) Of the form 'x raises y', for any statement x and expression y.
|
||||
First y is evaled to yield an exception type, then x is execed.
|
||||
If x doesn't raise the right exception, an error msg is printed.
|
||||
(5) Of the form 'statement'. Statement is execed for side effect.
|
||||
(6) Of the form 'expression'. Expression is evaled for side effect.
|
||||
"""
|
||||
|
||||
import re, sys, types
|
||||
|
||||
class Docex:
|
||||
"""A class to run test examples written in docstrings or in _docex."""
|
||||
|
||||
def __init__(self, modules=None, html=0, out=None,
|
||||
title='Docex Example Output'):
|
||||
if modules is None:
|
||||
modules = sys.modules.values()
|
||||
self.passed = self.failed = 0;
|
||||
self.dictionary = {}
|
||||
self.already_seen = {}
|
||||
self.html = html
|
||||
try:
|
||||
if out: sys.stdout = out
|
||||
self.writeln(title, '<h1>', '</h1><pre>')
|
||||
for module in modules:
|
||||
self.run_module(module)
|
||||
self.writeln(str(self), '</pre>\n<hr><h1>', '</h1>\n')
|
||||
finally:
|
||||
if out:
|
||||
sys.stdout = sys.__stdout__
|
||||
out.close()
|
||||
|
||||
def __repr__(self):
|
||||
if self.failed:
|
||||
return ('<Test: #### failed %d, passed %d>'
|
||||
% (self.failed, self.passed))
|
||||
else:
|
||||
return '<Test: passed all %d>' % self.passed
|
||||
|
||||
def run_module(self, object):
|
||||
"""Run the docstrings, and then all members of the module."""
|
||||
if not self.seen(object):
|
||||
self.dictionary.update(vars(object)) # import module into self
|
||||
name = object.__name__
|
||||
self.writeln('## Module %s ' % name,
|
||||
'\n</pre><a name=%s><h1>' % name,
|
||||
'</h1><pre>')
|
||||
self.run_docstring(object)
|
||||
names = object.__dict__.keys()
|
||||
names.sort()
|
||||
for name in names:
|
||||
val = object.__dict__[name]
|
||||
if isinstance(val, types.ClassType):
|
||||
self.run_class(val)
|
||||
elif isinstance(val, types.ModuleType):
|
||||
pass
|
||||
elif not self.seen(val):
|
||||
self.run_docstring(val)
|
||||
|
||||
def run_class(self, object):
|
||||
"""Run the docstrings, and then all members of the class."""
|
||||
if not self.seen(object):
|
||||
self.run_docstring(object)
|
||||
names = object.__dict__.keys()
|
||||
names.sort()
|
||||
for name in names:
|
||||
self.run_docstring(object.__dict__[name])
|
||||
|
||||
def run_docstring(self, object, search=re.compile(r'(?m)^\s*Ex: ').search):
|
||||
"Run the __doc__ and _docex attributes, if the object has them."
|
||||
if hasattr(object, '__doc__'):
|
||||
s = object.__doc__
|
||||
if isinstance(s, str):
|
||||
match = search(s)
|
||||
if match: self.run_string(s[match.end():])
|
||||
if hasattr(object, '_docex'):
|
||||
self.run_string(object._docex)
|
||||
|
||||
def run_string(self, teststr):
|
||||
"""Run a test string, printing inputs and results."""
|
||||
if not teststr: return
|
||||
teststr = teststr.strip()
|
||||
if teststr.find('\n') > -1:
|
||||
map(self.run_string, teststr.split('\n'))
|
||||
elif teststr == '' or teststr.startswith('#'):
|
||||
self.writeln(teststr)
|
||||
elif teststr.find('; ') > -1:
|
||||
for substr in teststr.split('; '): self.run_string(substr)
|
||||
elif teststr.find('==>') > -1:
|
||||
teststr, result = teststr.split('==>')
|
||||
self.evaluate(teststr, result)
|
||||
elif teststr.find(' raises ') > -1:
|
||||
teststr, exception = teststr.split(' raises ')
|
||||
self.raises(teststr, exception)
|
||||
else: ## Try to eval, but if it is a statement, exec
|
||||
try:
|
||||
self.evaluate(teststr)
|
||||
except SyntaxError:
|
||||
exec teststr in self.dictionary
|
||||
|
||||
def evaluate(self, teststr, resultstr=None):
|
||||
"Eval teststr and check if resultstr (if given) evals to the same."
|
||||
self.writeln('>>> ' + teststr.strip())
|
||||
result = eval(teststr, self.dictionary)
|
||||
self.dictionary['_'] = result
|
||||
self.writeln(repr(result))
|
||||
if resultstr == None:
|
||||
return
|
||||
elif result == eval(resultstr, self.dictionary):
|
||||
self.passed += 1
|
||||
else:
|
||||
self.fail(teststr, resultstr)
|
||||
|
||||
def raises(self, teststr, exceptionstr):
|
||||
teststr = teststr.strip()
|
||||
self.writeln('>>> ' + teststr)
|
||||
except_class = eval(exceptionstr, self.dictionary)
|
||||
try:
|
||||
exec teststr in self.dictionary
|
||||
except except_class:
|
||||
self.writeln('# raises %s as expected' % exceptionstr)
|
||||
self.passed += 1
|
||||
return
|
||||
self.fail(teststr, exceptionstr)
|
||||
|
||||
def fail(self, teststr, resultstr):
|
||||
self.writeln('###### ERROR, TEST FAILED: expected %s for %s'
|
||||
% (resultstr, teststr),
|
||||
'<font color=red><b>', '</b></font>')
|
||||
self.failed += 1
|
||||
|
||||
def writeln(self, s, before='', after=''):
|
||||
"Write s, html escaped, and wrapped with html code before and after."
|
||||
s = str(s)
|
||||
if self.html:
|
||||
s = s.replace('&','&').replace('<','<').replace('>','>')
|
||||
print '%s%s%s' % (before, s, after)
|
||||
else:
|
||||
print s
|
||||
|
||||
def seen(self, object):
|
||||
"""Return true if this object has been seen before.
|
||||
In any case, record that we have seen it."""
|
||||
result = self.already_seen.has_key(id(object))
|
||||
self.already_seen[id(object)] = 1
|
||||
return result
|
||||
|
||||
def main(args):
|
||||
"""Run Docex. args should be a list of python filenames.
|
||||
If the first arg is a non-python filename, it is taken as the
|
||||
name of a log file to which output is written. If it ends in
|
||||
".htm" or ".html", then the output is written as html. If the
|
||||
first arg is "-", then standard output is used as the log file."""
|
||||
import glob
|
||||
out = None
|
||||
html = 0
|
||||
if args[0] != "-" and not args[0].endswith(".py"):
|
||||
out = open(args[0], 'w')
|
||||
if args[0].endswith(".html") or args[0].endswith(".htm"):
|
||||
html = 1
|
||||
modules = []
|
||||
for arg in args:
|
||||
for file in glob.glob(arg):
|
||||
if file.endswith('.py'):
|
||||
modules.append(__import__(file[:-3]))
|
||||
print Docex(modules, html=html, out=out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
||||
|
||||
193
py/ibol.py
Normal file
193
py/ibol.py
Normal file
@@ -0,0 +1,193 @@
|
||||
from collections import defaultdict
|
||||
|
||||
def get_genomes(fname="byronbayseqs.fas.txt"):
|
||||
"Return a list of genomes, and a list of their corresponding names."
|
||||
import re
|
||||
names, species, genomes = [], [], []
|
||||
for name, g in re.findall('>(.*?)\r([^\r]*)\r*', file(fname).read()):
|
||||
names.append(name)
|
||||
species.append(name.split('|')[-1])
|
||||
genomes.append(g)
|
||||
return names, species, genomes
|
||||
|
||||
def get_neighbors(fname="editdistances.txt"):
|
||||
"Return dict: neighbors[i][j] = neighbors[j][i] = d means i,j are d apart."
|
||||
## Read the data pre-computed from the Java program
|
||||
neighbors = dict((i, {}) for i in range(n))
|
||||
for line in file(fname):
|
||||
i,j,d = map(int, line.split())
|
||||
neighbors[i][j] = neighbors[j][i] = d
|
||||
return neighbors
|
||||
|
||||
def cluster(neighbors, d, dc):
|
||||
"""Return a list of clusters, each cluster element is within d of another
|
||||
and within dc of every other cluster element."""
|
||||
unclustered = set(neighbors) ## set of g's not yet clustered
|
||||
return [closure(g, set(), unclustered, d, dc)
|
||||
for g in neighbors if g in unclustered]
|
||||
|
||||
def closure(g, s, unclustered, d, dc):
|
||||
"Accumulate in set s the transitive closure of 'near', starting at g"
|
||||
if g not in s and g in unclustered and near(g, s, d, dc):
|
||||
s.add(g); unclustered.remove(g)
|
||||
for g2 in neighbors[g]:
|
||||
closure(g2, s, unclustered, d, dc)
|
||||
return s
|
||||
|
||||
def dist(i, j):
|
||||
"Distance between two genomes."
|
||||
if i == j: return 0
|
||||
return neighbors[min(i, j)].get(max(i, j), max_distance)
|
||||
|
||||
def near(g, cluster, d, dc):
|
||||
"Is g within d of some member of c, and within dc of every member of c?"
|
||||
distances = [dist(g, g2) for g2 in cluster] or [0]
|
||||
return min(distances) <= d and max(distances) <= dc
|
||||
|
||||
def diameter(cluster):
|
||||
"The largest distance between two elements of the cluster"
|
||||
return max([dist(i, j) for i in cluster for j in cluster] or [0])
|
||||
|
||||
def margin(cluster):
|
||||
"The distance from a cluster to the nearest g2 outside this cluster."
|
||||
return min([d for g in cluster for g2,d in neighbors[g].items()
|
||||
if g2 not in cluster] or [max_distance])
|
||||
|
||||
################################################################ Analysis
|
||||
|
||||
def pct(num, den):
|
||||
"Return a string representing the percentage. "
|
||||
if '__len__' in dir(den): den = len(den)
|
||||
if num==den: return ' 100%'
|
||||
return '%.1f%%' % (num*100.0/den)
|
||||
|
||||
def histo(items):
|
||||
"Make a histogram from a sequence of items or (item, count) tuples."
|
||||
D = defaultdict(int)
|
||||
for item in items:
|
||||
if isinstance(item, tuple): D[item[0]] += item[1]
|
||||
else: D[item] += 1
|
||||
return D
|
||||
|
||||
def showh(d):
|
||||
"Show a histogram"
|
||||
if not isinstance(d, dict): d = histo(d)
|
||||
return ' '.join('%s:%s' % i for i in sorted(d.items()))
|
||||
|
||||
def greport(genomes):
|
||||
print "Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes)))
|
||||
G = dict((g, set()) for g in genomes)
|
||||
for i in range(n):
|
||||
G[genomes[i]].add(species[i])
|
||||
print "Multi-named genomes:", (
|
||||
len([s for s in G.values() if len(s) > 1]))
|
||||
lens = map(len, genomes)
|
||||
print "Genome lengths: min=%d, max=%d" % (min(lens), max(lens))
|
||||
print "Character counts: ", showh(c for g in genomes for c in g)
|
||||
|
||||
def nreport(neighbors):
|
||||
NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
|
||||
for n in neighbors:
|
||||
nn = min(neighbors[n].values() or ['>25'])
|
||||
NN[nn] += 1
|
||||
for d2 in neighbors[n].values():
|
||||
NumN[d2] += 1
|
||||
print
|
||||
print "Nearest neighbor counts:", showh(NN)
|
||||
print "Number of neighbors at each distance:", showh(NumN)
|
||||
|
||||
def nspecies(c): return len(set(species[g] for g in c))
|
||||
|
||||
def showc(c):
|
||||
return "N=%d, D=%d, M=%d: %s %s" % (
|
||||
len(c), diameter(c), margin(c), list(c), showh(species[g] for g in c))
|
||||
|
||||
def creport(drange, dcrange):
|
||||
def table(what, fn):
|
||||
print "\n" + what
|
||||
print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
|
||||
for d in drange:
|
||||
print '%s (%2d)' % (pct(d, glen), d),
|
||||
for dc in dcrange:
|
||||
print '%5s' % fn(cluster(neighbors, d, dc)),
|
||||
print
|
||||
print '\nNearest neighbor must be closer than this percentage (places). '
|
||||
print 'Each column: all genomes in cluster within this percentage of each other.'
|
||||
table("Number of clusters", len)
|
||||
cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
|
||||
print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
|
||||
M, T = defaultdict(int), defaultdict(int)
|
||||
for c in cluster1:
|
||||
M[margin(c)] += 1; T[margin(c)] += len(c)
|
||||
for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
|
||||
print '\nMargins', showh(M)
|
||||
for c in cluster1:
|
||||
if margin(c) <= 16:
|
||||
print showc(c)
|
||||
print '\nScatter plot of cluster diameter vs. margin.'
|
||||
for c in cluster1:
|
||||
if diameter(c) > 0:
|
||||
pass
|
||||
#print '%d\t%d' % (diameter(c), margin(c))
|
||||
print '\nDifference from cluster(neighbors, 11, 14):'
|
||||
#table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
|
||||
print '\nNumber of clusters witth more than one species name:'
|
||||
#table(lambda cl: sum(nspecies(c) > 1 for c in cl))
|
||||
def pct_near_another(clusters, P=1.25):
|
||||
total = 0
|
||||
for c in clusters:
|
||||
d = diameter(c)
|
||||
for g in c:
|
||||
for g2 in neighbors[g]:
|
||||
if g2 not in c and dist(g, g2) < P*d:
|
||||
total += 1
|
||||
return pct(total, n)
|
||||
def f(P):
|
||||
print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
|
||||
table(lambda cl: pct_near_another(cl, P))
|
||||
#map(f, [1.2, 1.33, 1.5])
|
||||
|
||||
def sreport(species):
|
||||
SS = defaultdict(int)
|
||||
print
|
||||
for s in set(species):
|
||||
c = [g for g in range(n) if species[g] == s]
|
||||
d = diameter(c)
|
||||
if d > 14:
|
||||
if d==glen: d = '>25'
|
||||
print 'diameter %s for %s (%d elements)' % (d, s, len(c))
|
||||
SS[d] += 1
|
||||
print 'Diameters of %d labelled clusters: %s' % (len(set(species)), showh(SS))
|
||||
|
||||
def compare(cl1, cl2):
|
||||
"Compare two lists of clusters"
|
||||
return sum(c1==c2 or 0.5*(abs(len(c1)-len(c2))==1 and
|
||||
(c1.issubset(c2) or c2.issubset(c1)))
|
||||
for c1 in cl1 for c2 in cl2)
|
||||
|
||||
def unit_tests():
|
||||
assert set(len(g) for g in genomes) == set([glen])
|
||||
clusters = cluster(neighbors, 11, 11)
|
||||
assert sum(len(c) for c in clusters) == len(genomes)
|
||||
assert len(set(g for c in clusters for g in c)) == len(genomes)
|
||||
assert dist(17, 42) == dist(42, 17)
|
||||
assert diameter(set()) == 0
|
||||
assert diameter([17, 42]) == dist(17, 42)
|
||||
assert pct(1, 2) == '50.0%'
|
||||
print '\nAll tests pass.\n'
|
||||
|
||||
|
||||
|
||||
################################################################ Main body
|
||||
|
||||
max_distance = 26
|
||||
names, species, genomes = get_genomes() ## genomes = ['ACT...', ...]
|
||||
n = len(genomes)
|
||||
glen = len(genomes[0])
|
||||
neighbors = get_neighbors() ## neighbor[g] = {g2:d2, g3:g3, ...}
|
||||
greport(genomes)
|
||||
nreport(neighbors)
|
||||
creport(range(6, 15), [glen,16,15,14,13, 12, 11])
|
||||
#sreport(species)
|
||||
|
||||
unit_tests()
|
||||
443
py/lettercount.py
Normal file
443
py/lettercount.py
Normal file
@@ -0,0 +1,443 @@
|
||||
"""
|
||||
Code to support http://norvig.com/mayzner.html
|
||||
Read files in the Google Books ngram format, and convert them to a simpler format.
|
||||
The original format looks like this:
|
||||
|
||||
word \t year \t word_count \t book_count
|
||||
word_POS \t year \t word_count \t book_count
|
||||
|
||||
for example,
|
||||
|
||||
accreted_VERB 1846 7 4
|
||||
accreted_VERB 1847 1 1
|
||||
accreted_VERB 1848 1 1
|
||||
|
||||
The function 'read_year_file' will convert a file of this form into a dict of
|
||||
{WORD: count} pairs, where the WORD is uppercased, and the count is the total
|
||||
over all years (you have the option to specify a starting year) and all
|
||||
capitalizations. Then 'read_dict' and 'write_dict' convert between a dict and
|
||||
an external file format that looks like this:
|
||||
|
||||
ACCRETED 9
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
#### Read files in Books-Ngram format; convert to a dict
|
||||
|
||||
def read_year_file(filename, dic=None):
|
||||
"""Read a file of 'word year word_count book_count' lines and convert to a dict
|
||||
{WORD: totalcount}. Uppercase all words, and only include all-alphabetic words."""
|
||||
if dic is None: dic = {}
|
||||
for line in file(filename):
|
||||
word, year, c1, c2 = line.split('\t')
|
||||
if '_' in word:
|
||||
word = word[:word.index('_')]
|
||||
if word.isalpha():
|
||||
word = word.upper()
|
||||
dic[word] = dic.get(word, 0) + int(c1)
|
||||
return dic
|
||||
|
||||
#### Read and write files of the form 'WORD \t count \n'
|
||||
|
||||
def write_dict(dic, filename):
|
||||
"Write a {word:count} dict as 'word \t count' lines in filename."
|
||||
out = file(filename, 'w')
|
||||
for key in sorted(dic):
|
||||
out.write('%s\t%s\n' % (key, dic[key]))
|
||||
return out.close()
|
||||
|
||||
def read_dict(filename, sep='\t'):
|
||||
"Read 'word \t count' lines from file and make them into a dict of {word:count}."
|
||||
pairs = (line.split(sep) for line in file(filename))
|
||||
return {word: int(count) for (word, count) in pairs}
|
||||
|
||||
#### Convert a bunch of year files into dict file format.
|
||||
|
||||
def convert_files(filenames, mincount=1e5):
|
||||
def report(filename, D, adj):
|
||||
import time
|
||||
N = len(D)
|
||||
W = sum(v for v in D.itervalues())
|
||||
print '%s: %s %s words (%s tokens) at %s' % (
|
||||
filename, adj, format(W, ',d'), format(N, ',d'),
|
||||
time.strftime("%H:%M:%S", time.gmtime()))
|
||||
for f in filenames:
|
||||
report(f, {}, 'starting')
|
||||
D = read_year_file(f)
|
||||
report(f, D, 'total')
|
||||
for key in list(D):
|
||||
if D[key] < mincount:
|
||||
del D[key]
|
||||
write_dict(D, 'WORD-' + f[-1].upper())
|
||||
report(f, D, 'popular')
|
||||
|
||||
def load(filename='top-words.txt'):
|
||||
"Load file of 'word \t count' lines into D (a dict), W (length of D) and M (total number of words)."
|
||||
global D, W, M
|
||||
D = read_dict(filename)
|
||||
W = len(D)
|
||||
M = sum(D.values())
|
||||
|
||||
#### Compute letter counts and save as HTML files.
|
||||
|
||||
def histogram(items):
|
||||
"Return a Counter of the number of times each key occurs in (key, val) pairs."
|
||||
C = Counter()
|
||||
for (key, val) in items:
|
||||
C[key] += val
|
||||
return C
|
||||
|
||||
def end(name): return '/' + name
|
||||
|
||||
def tag(name, **kwds): return '<' + name + keywords(kwds) + '>'
|
||||
|
||||
def row(cells, **kwds):
|
||||
return '<tr>' + ''
|
||||
|
||||
def ngram_tables(dic, N, pos=[0, 1, 2, 3, 4, -5, -4, -3, -2, -1]):
|
||||
"""Return three dicts of letter N-grams of length N: counts, counts1, counts2.
|
||||
counts is a dict of {'AB': 123} that counts how often 'AB' occurs.
|
||||
counts1[i] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i.
|
||||
counts2[i][j] is a dict of {'AB': 123} that counts how often 'AB' occurs at position i."""
|
||||
L = len(max(D, key=len))
|
||||
counts = Counter()
|
||||
counts1 = [Counter() for _ in range(L)]
|
||||
counts2 = [[Counter() for i in range(L)]]
|
||||
|
||||
def counter(pairs):
|
||||
"Make a Counter from an iterable of (value, count) pairs."
|
||||
c = Counter()
|
||||
for (value, count) in pairs:
|
||||
c[value] += count
|
||||
return c
|
||||
|
||||
def ngrams(word, N):
|
||||
return [word[i:i+N] for i in range(len(word)+1-N)]
|
||||
|
||||
|
||||
import glob
|
||||
#convert_files(glob.glob('book?'))
|
||||
|
||||
#DB = [[letter_counts() for length in range(length)] for length in range(maxlen)]
|
||||
|
||||
|
||||
## Unused ???
|
||||
|
||||
def letter_counts(wc):
|
||||
"""From word_counts dictionary wc, Create a dictionary of {(s, i, L): count}
|
||||
where s is a letter n-gram, i is the starting position, and L is the length
|
||||
of the word in which it appears."""
|
||||
result = defaultdict(int)
|
||||
for (word, count) in wc.iteritems():
|
||||
for p in pieces(word):
|
||||
result[p] += count
|
||||
return result
|
||||
|
||||
def pieces(word):
|
||||
"Yield the 1- and 2-letter grams in (s, i, L) format."
|
||||
L = len(word)
|
||||
for i in range(L):
|
||||
yield (word[i], i, L)
|
||||
if i+1 < L:
|
||||
yield (word[i:i+2], i, L)
|
||||
|
||||
def getcount(counts, s, pos, length):
|
||||
"""The count for letter sequence s (one or two letters) starting at
|
||||
position i of words of length length. If any argument is all, sum them up."""
|
||||
if length == all:
|
||||
return sum(getcount(counts, s, pos, L) for L in all_lengths)
|
||||
elif pos == all:
|
||||
return sum(getcount(counts, s, i, length) for i in range(length))
|
||||
else:
|
||||
return counts[s, pos, length]
|
||||
|
||||
|
||||
print 'start'
|
||||
#wc = word_counts('count_100K.txt')
|
||||
#counts = letter_counts(wc)
|
||||
print 'end'
|
||||
|
||||
|
||||
|
||||
def test():
|
||||
D = {'the': 100, 'of': 70, 'and': 60, 'to': 50, 'a': 40}
|
||||
|
||||
def num(ch):
|
||||
"Translate 'a' or 'A' to 0, ... 'z' or 'Z' to 25."
|
||||
return 'abcdefghijklmnopqrstuvwxyz'.index(ch.lower())
|
||||
|
||||
|
||||
def stats(D, NS = (1, 2, 3, 4, 5, 6)):
|
||||
counts = {n: Counter() for n in NS}
|
||||
print 'words ' + ' '.join(' %d-grams ' % n for n in NS)
|
||||
for (i, word) in enumerate(sortedby(D), 1):
|
||||
for n in NS:
|
||||
for ng in ngrams(word, n):
|
||||
counts[n][ng] += 1
|
||||
if i % 5000 == 0 or i == len(D):
|
||||
print "%4dK" % (i/1000),
|
||||
for n in NS:
|
||||
c = len(counts[n])
|
||||
field = "%5d (%d%%)" % (c, int(round(c*100/(26**n))))
|
||||
print '%12s' % field,
|
||||
print
|
||||
|
||||
letters = 'ETAOINSRHLDCUMFPGWYBVKXJQZ'
|
||||
alphabet = ''.join(sorted(letters))
|
||||
|
||||
from itertools import cycle, izip
|
||||
|
||||
colors = 'ygobp'
|
||||
|
||||
def bar(text, color, count, N, pixels, height=16):
|
||||
width = int(round(pixels * count / N))
|
||||
if width < 2: width = 3
|
||||
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
|
||||
return '<span title="%s"><img src="%s.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
|
||||
title, color, height, width, -width+2, text) # -int(width/2+5)
|
||||
|
||||
def letter_bar(LC, N=None, factor='', pixels=700):
|
||||
if N is None: N = sum(LC.values())
|
||||
#divisor = {'':1., 'K':1e3, 'M':1e6, 'B':1e9}[factor]
|
||||
return ''.join(
|
||||
bar(L.lower(), color, LC[L], N, pixels)
|
||||
for (L, color) in izip(letters, cycle(colors)))
|
||||
|
||||
|
||||
def singleton(x): return [x]
|
||||
|
||||
positions = [0, 1, 2, 3, 4, 5, 6, -7, -6, -5, -4, -3, -2, -1]
|
||||
|
||||
def substr(word, pos, length):
|
||||
"""Return the substr of word of given length starting/ending at pos; or None."""
|
||||
W = len(word)
|
||||
if pos >= 0 and pos+length <= W:
|
||||
return word[pos:pos+length]
|
||||
elif pos < 0 and abs(pos)+length-1 <= W:
|
||||
return word[W+pos+1-length:W+pos+1]
|
||||
else:
|
||||
return None
|
||||
|
||||
def lettercount(D, pos):
|
||||
LC = histogram((substr(w, pos, 1), D[w]) for w in D)
|
||||
del LC[None]
|
||||
print LC
|
||||
pos_name = (str(pos)+'+' if isinstance(pos, tuple) else
|
||||
pos if pos < 0 else
|
||||
pos+1)
|
||||
return '\n<br>\n%-3s %s' % (pos_name, letter_bar(LC))
|
||||
|
||||
def ngramcount(D, n=2):
|
||||
return histogram((ng, D[w]) for w in D for ng in ngrams(w, n))
|
||||
|
||||
def twograms(D2):
|
||||
N = sum(D2.values())
|
||||
header = '<table cellpadding=1 cellborder=1>'
|
||||
rows = [tr([cell(A+B, D2, N) for A in alphabet]) for B in alphabet]
|
||||
return '\n'.join([header] + rows + ['</table>'])
|
||||
|
||||
def cell(text, D2, N, height=16, maxwidth=25, scale=27):
|
||||
count = D2.get(text, 0)
|
||||
width = int(round(maxwidth * count * scale * 1. / N))
|
||||
if width < 1: width = 1
|
||||
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
|
||||
return '<td title="%s"><img src="o.jpg" height=%d width=%d><span style="position:relative; left:%d; bottom:4">%s</span></span>' % (
|
||||
title, height, width, -width+2, text)
|
||||
|
||||
def cell(text, D2, N, height=16, maxwidth=25, scale=27):
|
||||
count = D2.get(text, 0)
|
||||
width = int(round(maxwidth * count * scale * 1. / N))
|
||||
if width < 1: width = 1
|
||||
title = '{}: {:.3f}%; {:,}'.format(text, count*100./N, count)
|
||||
return '<td title="%s" background="o.jpg" height=%d width=%d>%s' % (
|
||||
title, height, width, text)
|
||||
|
||||
def tr(cells):
|
||||
return '<tr>' + ''.join(cells)
|
||||
|
||||
def comma(n): return '{:,}'.format(n)
|
||||
|
||||
def ngram_stats(D, n, k=5):
|
||||
DN = ngramcount(D, n)
|
||||
topk = ', '.join(sortedby(DN)[:k])
|
||||
return '<tr><td>%d-grams<td align=right>%s<td align=right>%s<td><a href="counts-%d.csv">counts-%d.csv</a><td><a href="counts-%d.html">counts-%d.html</a><td>%s' % (
|
||||
n, comma(len(DN)), comma(sum(DN.values())), n, n, n, n, topk)
|
||||
|
||||
#### Tables
|
||||
|
||||
def sortedby(D):
|
||||
return sorted(D, key=lambda x: -D[x])
|
||||
|
||||
ANY = '*'
|
||||
|
||||
wordlengths = range(1, 10)
|
||||
|
||||
def col(*args): return args
|
||||
|
||||
def columns(n, wordlengths=wordlengths):
|
||||
lengths = [k for k in wordlengths if k >= n]
|
||||
return ([col(ANY, ANY)]
|
||||
+ [col(k, ANY) for k in lengths]
|
||||
+ [col(k, start, start+n-1) for k in lengths for start in range(1, 2+k-n)]
|
||||
+ [col(ANY, start, start+n-1) for start in wordlengths]
|
||||
+ [col(ANY, -k, -k+n-1) for k in reversed(lengths) if -k+n-1 < 0])
|
||||
|
||||
def colname(col):
|
||||
fmt = '%s/%s' if (len(col) == 2) else '%s/%d:%d'
|
||||
return fmt % col
|
||||
|
||||
def csvline(first, rest):
|
||||
return '\t'.join([first] + map(str, rest))
|
||||
|
||||
def makecsv(n, D=D):
|
||||
out = file('ngrams%d.csv' % n, 'w')
|
||||
cols = columns(n)
|
||||
Dng = defaultdict(lambda: defaultdict(int))
|
||||
for w in D:
|
||||
for (start, ng) in enumerate(ngrams(w, n), 1):
|
||||
entry = Dng[ng]
|
||||
N = D[w]
|
||||
wlen = len(w)
|
||||
entry[ANY, ANY] += N
|
||||
entry[wlen, ANY] += N
|
||||
if start <= 9:
|
||||
entry[wlen, start, start+n-1] += N
|
||||
entry[ANY, start, start+n-1] += N
|
||||
from_end = wlen-start+1
|
||||
if from_end <= 9:
|
||||
entry[ANY, -from_end, -from_end+n-1] += N
|
||||
# enumerate ngrams from word and increment counts for each one
|
||||
print >> out, csvline('%d-gram' % n, map(colname, cols))
|
||||
for ng in sorted(Dng, key=lambda ng: -Dng[ng][(ANY, ANY)]):
|
||||
print >> out, csvline(ng, [Dng[ng].get(col, 0) for col in cols])
|
||||
out.close()
|
||||
return Dng
|
||||
|
||||
### Tests
|
||||
|
||||
"""
|
||||
>>> for w in words:
|
||||
print '%-6s %6.2f B (%4.2f%%) <img src="s.jpg" height=12 width=%d>' % (w.lower(), D[w]/1e9, D[w]*100./N, int(round(D[w]*4000./N)))
|
||||
...
|
||||
the 53.10 B (7.14%) <img src="s.jpg" height=12 width=286>
|
||||
of 30.97 B (4.16%) <img src="s.jpg" height=12 width=167>
|
||||
and 22.63 B (3.04%) <img src="s.jpg" height=12 width=122>
|
||||
to 19.35 B (2.60%) <img src="s.jpg" height=12 width=104>
|
||||
in 16.89 B (2.27%) <img src="s.jpg" height=12 width=91>
|
||||
a 15.31 B (2.06%) <img src="s.jpg" height=12 width=82>
|
||||
is 8.38 B (1.13%) <img src="s.jpg" height=12 width=45>
|
||||
that 8.00 B (1.08%) <img src="s.jpg" height=12 width=43>
|
||||
for 6.55 B (0.88%) <img src="s.jpg" height=12 width=35>
|
||||
it 5.74 B (0.77%) <img src="s.jpg" height=12 width=31>
|
||||
as 5.70 B (0.77%) <img src="s.jpg" height=12 width=31>
|
||||
was 5.50 B (0.74%) <img src="s.jpg" height=12 width=30>
|
||||
with 5.18 B (0.70%) <img src="s.jpg" height=12 width=28>
|
||||
be 4.82 B (0.65%) <img src="s.jpg" height=12 width=26>
|
||||
by 4.70 B (0.63%) <img src="s.jpg" height=12 width=25>
|
||||
on 4.59 B (0.62%) <img src="s.jpg" height=12 width=25>
|
||||
not 4.52 B (0.61%) <img src="s.jpg" height=12 width=24>
|
||||
he 4.11 B (0.55%) <img src="s.jpg" height=12 width=22>
|
||||
i 3.88 B (0.52%) <img src="s.jpg" height=12 width=21>
|
||||
this 3.83 B (0.51%) <img src="s.jpg" height=12 width=21>
|
||||
are 3.70 B (0.50%) <img src="s.jpg" height=12 width=20>
|
||||
or 3.67 B (0.49%) <img src="s.jpg" height=12 width=20>
|
||||
his 3.61 B (0.49%) <img src="s.jpg" height=12 width=19>
|
||||
from 3.47 B (0.47%) <img src="s.jpg" height=12 width=19>
|
||||
at 3.41 B (0.46%) <img src="s.jpg" height=12 width=18>
|
||||
which 3.14 B (0.42%) <img src="s.jpg" height=12 width=17>
|
||||
but 2.79 B (0.38%) <img src="s.jpg" height=12 width=15>
|
||||
have 2.78 B (0.37%) <img src="s.jpg" height=12 width=15>
|
||||
an 2.73 B (0.37%) <img src="s.jpg" height=12 width=15>
|
||||
had 2.62 B (0.35%) <img src="s.jpg" height=12 width=14>
|
||||
they 2.46 B (0.33%) <img src="s.jpg" height=12 width=13>
|
||||
you 2.34 B (0.31%) <img src="s.jpg" height=12 width=13>
|
||||
were 2.27 B (0.31%) <img src="s.jpg" height=12 width=12>
|
||||
their 2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
|
||||
one 2.15 B (0.29%) <img src="s.jpg" height=12 width=12>
|
||||
all 2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
|
||||
we 2.06 B (0.28%) <img src="s.jpg" height=12 width=11>
|
||||
can 1.67 B (0.22%) <img src="s.jpg" height=12 width=9>
|
||||
her 1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
|
||||
has 1.63 B (0.22%) <img src="s.jpg" height=12 width=9>
|
||||
there 1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
|
||||
been 1.62 B (0.22%) <img src="s.jpg" height=12 width=9>
|
||||
if 1.56 B (0.21%) <img src="s.jpg" height=12 width=8>
|
||||
more 1.55 B (0.21%) <img src="s.jpg" height=12 width=8>
|
||||
when 1.52 B (0.20%) <img src="s.jpg" height=12 width=8>
|
||||
will 1.49 B (0.20%) <img src="s.jpg" height=12 width=8>
|
||||
would 1.47 B (0.20%) <img src="s.jpg" height=12 width=8>
|
||||
who 1.46 B (0.20%) <img src="s.jpg" height=12 width=8>
|
||||
so 1.45 B (0.19%) <img src="s.jpg" height=12 width=8>
|
||||
no 1.40 B (0.19%) <img src="s.jpg" height=12 width=8>
|
||||
|
||||
>>> for n in sorted(H):
|
||||
print '%2d %9.2f M (%6.3f%%) <img src="s.jpg" height=12 width=%d> %d' % (n, H[n]/1e6, H[n]*100./NN, H[n]*3000./NN, n)
|
||||
...
|
||||
1 22301.22 M ( 2.998%) <img src="s.jpg" height=12 width=89> 1
|
||||
2 131293.85 M (17.651%) <img src="s.jpg" height=12 width=529> 2
|
||||
3 152568.38 M (20.511%) <img src="s.jpg" height=12 width=615> 3
|
||||
4 109988.33 M (14.787%) <img src="s.jpg" height=12 width=443> 4
|
||||
5 79589.32 M (10.700%) <img src="s.jpg" height=12 width=320> 5
|
||||
6 62391.21 M ( 8.388%) <img src="s.jpg" height=12 width=251> 6
|
||||
7 59052.66 M ( 7.939%) <img src="s.jpg" height=12 width=238> 7
|
||||
8 44207.29 M ( 5.943%) <img src="s.jpg" height=12 width=178> 8
|
||||
9 33006.93 M ( 4.437%) <img src="s.jpg" height=12 width=133> 9
|
||||
10 22883.84 M ( 3.076%) <img src="s.jpg" height=12 width=92> 10
|
||||
11 13098.06 M ( 1.761%) <img src="s.jpg" height=12 width=52> 11
|
||||
12 7124.15 M ( 0.958%) <img src="s.jpg" height=12 width=28> 12
|
||||
13 3850.58 M ( 0.518%) <img src="s.jpg" height=12 width=15> 13
|
||||
14 1653.08 M ( 0.222%) <img src="s.jpg" height=12 width=6> 14
|
||||
15 565.24 M ( 0.076%) <img src="s.jpg" height=12 width=2> 15
|
||||
16 151.22 M ( 0.020%) <img src="s.jpg" height=12 width=0> 16
|
||||
17 72.81 M ( 0.010%) <img src="s.jpg" height=12 width=0> 17
|
||||
18 28.62 M ( 0.004%) <img src="s.jpg" height=12 width=0> 18
|
||||
19 8.51 M ( 0.001%) <img src="s.jpg" height=12 width=0> 19
|
||||
20 6.35 M ( 0.001%) <img src="s.jpg" height=12 width=0> 20
|
||||
21 0.13 M ( 0.000%) <img src="s.jpg" height=12 width=0> 21
|
||||
22 0.81 M ( 0.000%) <img src="s.jpg" height=12 width=0> 22
|
||||
23 0.32 M ( 0.000%) <img src="s.jpg" height=12 width=0> 23
|
||||
|
||||
>>> NL = sum(LC.values())
|
||||
|
||||
>>> for L in sorted(LC, key=lambda L: -LC[L]):
|
||||
print '%s %8.1f B (%5.2f%%) <img src="s.jpg" height=12 width=%d>' % (L, LC[L]/1e9, LC[L]*100./NL, LC[L]*3000./NL)
|
||||
...
|
||||
E 445.2 B (12.49%) <img src="s.jpg" height=12 width=374>
|
||||
T 330.5 B ( 9.28%) <img src="s.jpg" height=12 width=278>
|
||||
A 286.5 B ( 8.04%) <img src="s.jpg" height=12 width=241>
|
||||
O 272.3 B ( 7.64%) <img src="s.jpg" height=12 width=229>
|
||||
I 269.7 B ( 7.57%) <img src="s.jpg" height=12 width=227>
|
||||
N 257.8 B ( 7.23%) <img src="s.jpg" height=12 width=217>
|
||||
S 232.1 B ( 6.51%) <img src="s.jpg" height=12 width=195>
|
||||
R 223.8 B ( 6.28%) <img src="s.jpg" height=12 width=188>
|
||||
H 180.1 B ( 5.05%) <img src="s.jpg" height=12 width=151>
|
||||
L 145.0 B ( 4.07%) <img src="s.jpg" height=12 width=122>
|
||||
D 136.0 B ( 3.82%) <img src="s.jpg" height=12 width=114>
|
||||
C 119.2 B ( 3.34%) <img src="s.jpg" height=12 width=100>
|
||||
U 97.3 B ( 2.73%) <img src="s.jpg" height=12 width=81>
|
||||
M 89.5 B ( 2.51%) <img src="s.jpg" height=12 width=75>
|
||||
F 85.6 B ( 2.40%) <img src="s.jpg" height=12 width=72>
|
||||
P 76.1 B ( 2.14%) <img src="s.jpg" height=12 width=64>
|
||||
G 66.6 B ( 1.87%) <img src="s.jpg" height=12 width=56>
|
||||
W 59.7 B ( 1.68%) <img src="s.jpg" height=12 width=50>
|
||||
Y 59.3 B ( 1.66%) <img src="s.jpg" height=12 width=49>
|
||||
B 52.9 B ( 1.48%) <img src="s.jpg" height=12 width=44>
|
||||
V 37.5 B ( 1.05%) <img src="s.jpg" height=12 width=31>
|
||||
K 19.3 B ( 0.54%) <img src="s.jpg" height=12 width=16>
|
||||
X 8.4 B ( 0.23%) <img src="s.jpg" height=12 width=7>
|
||||
J 5.7 B ( 0.16%) <img src="s.jpg" height=12 width=4>
|
||||
Q 4.3 B ( 0.12%) <img src="s.jpg" height=12 width=3>
|
||||
Z 3.2 B ( 0.09%) <img src="s.jpg" height=12 width=2>
|
||||
|
||||
>>> D2 = ngramcount(D, 2)
|
||||
|
||||
>>> for ng in sorted(D2, key=lambda L: -D2[L])[:50]: print '%s %8.1f B (%5.2f%%) <img src="o.jpg" height=12 width=%d>' % (ng, D2[ng]/1e9, D2[ng]*100./N2, D2[ng]*15000./N2)
|
||||
|
||||
def doit(k=25):
|
||||
counts = [sortedby(ngramcount(D, n))[:k] for n in range(2, 10)]
|
||||
for i in range(k):
|
||||
print (' '.join(count[i] for count in counts)).lower()
|
||||
"""
|
||||
145
py/lis.py
Normal file
145
py/lis.py
Normal file
@@ -0,0 +1,145 @@
|
||||
################ Lispy: Scheme Interpreter in Python
|
||||
|
||||
## (c) Peter Norvig, 2010-16; See http://norvig.com/lispy.html
|
||||
|
||||
from __future__ import division
|
||||
import math
|
||||
import operator as op
|
||||
|
||||
################ Types
|
||||
|
||||
Symbol = str # A Lisp Symbol is implemented as a Python str
|
||||
List = list # A Lisp List is implemented as a Python list
|
||||
Number = (int, float) # A Lisp Number is implemented as a Python int or float
|
||||
|
||||
################ Parsing: parse, tokenize, and read_from_tokens
|
||||
|
||||
def parse(program):
|
||||
"Read a Scheme expression from a string."
|
||||
return read_from_tokens(tokenize(program))
|
||||
|
||||
def tokenize(s):
|
||||
"Convert a string into a list of tokens."
|
||||
return s.replace('(',' ( ').replace(')',' ) ').split()
|
||||
|
||||
def read_from_tokens(tokens):
|
||||
"Read an expression from a sequence of tokens."
|
||||
if len(tokens) == 0:
|
||||
raise SyntaxError('unexpected EOF while reading')
|
||||
token = tokens.pop(0)
|
||||
if '(' == token:
|
||||
L = []
|
||||
while tokens[0] != ')':
|
||||
L.append(read_from_tokens(tokens))
|
||||
tokens.pop(0) # pop off ')'
|
||||
return L
|
||||
elif ')' == token:
|
||||
raise SyntaxError('unexpected )')
|
||||
else:
|
||||
return atom(token)
|
||||
|
||||
def atom(token):
|
||||
"Numbers become numbers; every other token is a symbol."
|
||||
try: return int(token)
|
||||
except ValueError:
|
||||
try: return float(token)
|
||||
except ValueError:
|
||||
return Symbol(token)
|
||||
|
||||
################ Environments
|
||||
|
||||
def standard_env():
|
||||
"An environment with some Scheme standard procedures."
|
||||
env = Env()
|
||||
env.update(vars(math)) # sin, cos, sqrt, pi, ...
|
||||
env.update({
|
||||
'+':op.add, '-':op.sub, '*':op.mul, '/':op.truediv,
|
||||
'>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq,
|
||||
'abs': abs,
|
||||
'append': op.add,
|
||||
'apply': apply,
|
||||
'begin': lambda *x: x[-1],
|
||||
'car': lambda x: x[0],
|
||||
'cdr': lambda x: x[1:],
|
||||
'cons': lambda x,y: [x] + y,
|
||||
'eq?': op.is_,
|
||||
'equal?': op.eq,
|
||||
'length': len,
|
||||
'list': lambda *x: list(x),
|
||||
'list?': lambda x: isinstance(x,list),
|
||||
'map': map,
|
||||
'max': max,
|
||||
'min': min,
|
||||
'not': op.not_,
|
||||
'null?': lambda x: x == [],
|
||||
'number?': lambda x: isinstance(x, Number),
|
||||
'procedure?': callable,
|
||||
'round': round,
|
||||
'symbol?': lambda x: isinstance(x, Symbol),
|
||||
})
|
||||
return env
|
||||
|
||||
class Env(dict):
|
||||
"An environment: a dict of {'var':val} pairs, with an outer Env."
|
||||
def __init__(self, parms=(), args=(), outer=None):
|
||||
self.update(zip(parms, args))
|
||||
self.outer = outer
|
||||
def find(self, var):
|
||||
"Find the innermost Env where var appears."
|
||||
return self if (var in self) else self.outer.find(var)
|
||||
|
||||
global_env = standard_env()
|
||||
|
||||
################ Interaction: A REPL
|
||||
|
||||
def repl(prompt='lis.py> '):
|
||||
"A prompt-read-eval-print loop."
|
||||
while True:
|
||||
val = eval(parse(raw_input(prompt)))
|
||||
if val is not None:
|
||||
print(lispstr(val))
|
||||
|
||||
def lispstr(exp):
|
||||
"Convert a Python object back into a Lisp-readable string."
|
||||
if isinstance(exp, List):
|
||||
return '(' + ' '.join(map(lispstr, exp)) + ')'
|
||||
else:
|
||||
return str(exp)
|
||||
|
||||
################ Procedures
|
||||
|
||||
class Procedure(object):
|
||||
"A user-defined Scheme procedure."
|
||||
def __init__(self, parms, body, env):
|
||||
self.parms, self.body, self.env = parms, body, env
|
||||
def __call__(self, *args):
|
||||
return eval(self.body, Env(self.parms, args, self.env))
|
||||
|
||||
################ eval
|
||||
|
||||
def eval(x, env=global_env):
|
||||
"Evaluate an expression in an environment."
|
||||
if isinstance(x, Symbol): # variable reference
|
||||
return env.find(x)[x]
|
||||
elif not isinstance(x, List): # constant literal
|
||||
return x
|
||||
elif x[0] == 'quote': # (quote exp)
|
||||
(_, exp) = x
|
||||
return exp
|
||||
elif x[0] == 'if': # (if test conseq alt)
|
||||
(_, test, conseq, alt) = x
|
||||
exp = (conseq if eval(test, env) else alt)
|
||||
return eval(exp, env)
|
||||
elif x[0] == 'define': # (define var exp)
|
||||
(_, var, exp) = x
|
||||
env[var] = eval(exp, env)
|
||||
elif x[0] == 'set!': # (set! var exp)
|
||||
(_, var, exp) = x
|
||||
env.find(var)[var] = eval(exp, env)
|
||||
elif x[0] == 'lambda': # (lambda (var...) body)
|
||||
(_, parms, body) = x
|
||||
return Procedure(parms, body, env)
|
||||
else: # (proc arg...)
|
||||
proc = eval(x[0], env)
|
||||
args = [eval(exp, env) for exp in x[1:]]
|
||||
return proc(*args)
|
||||
318
py/lispy.py
Normal file
318
py/lispy.py
Normal file
@@ -0,0 +1,318 @@
|
||||
################ Scheme Interpreter in Python
|
||||
|
||||
## (c) Peter Norvig, 2010; See http://norvig.com/lispy2.html
|
||||
|
||||
################ Symbol, Procedure, classes
|
||||
|
||||
from __future__ import division
|
||||
import re, sys, StringIO
|
||||
|
||||
class Symbol(str): pass
|
||||
|
||||
def Sym(s, symbol_table={}):
|
||||
"Find or create unique Symbol entry for str s in symbol table."
|
||||
if s not in symbol_table: symbol_table[s] = Symbol(s)
|
||||
return symbol_table[s]
|
||||
|
||||
_quote, _if, _set, _define, _lambda, _begin, _definemacro, = map(Sym,
|
||||
"quote if set! define lambda begin define-macro".split())
|
||||
|
||||
_quasiquote, _unquote, _unquotesplicing = map(Sym,
|
||||
"quasiquote unquote unquote-splicing".split())
|
||||
|
||||
class Procedure(object):
|
||||
"A user-defined Scheme procedure."
|
||||
def __init__(self, parms, exp, env):
|
||||
self.parms, self.exp, self.env = parms, exp, env
|
||||
def __call__(self, *args):
|
||||
return eval(self.exp, Env(self.parms, args, self.env))
|
||||
|
||||
################ parse, read, and user interaction
|
||||
|
||||
def parse(inport):
|
||||
"Parse a program: read and expand/error-check it."
|
||||
# Backwards compatibility: given a str, convert it to an InPort
|
||||
if isinstance(inport, str): inport = InPort(StringIO.StringIO(inport))
|
||||
return expand(read(inport), toplevel=True)
|
||||
|
||||
eof_object = Symbol('#<eof-object>') # Note: uninterned; can't be read
|
||||
|
||||
class InPort(object):
|
||||
"An input port. Retains a line of chars."
|
||||
tokenizer = r"""\s*(,@|[('`,)]|"(?:[\\].|[^\\"])*"|;.*|[^\s('"`,;)]*)(.*)"""
|
||||
def __init__(self, file):
|
||||
self.file = file; self.line = ''
|
||||
def next_token(self):
|
||||
"Return the next token, reading new text into line buffer if needed."
|
||||
while True:
|
||||
if self.line == '': self.line = self.file.readline()
|
||||
if self.line == '': return eof_object
|
||||
token, self.line = re.match(InPort.tokenizer, self.line).groups()
|
||||
if token != '' and not token.startswith(';'):
|
||||
return token
|
||||
|
||||
def readchar(inport):
|
||||
"Read the next character from an input port."
|
||||
if inport.line != '':
|
||||
ch, inport.line = inport.line[0], inport.line[1:]
|
||||
return ch
|
||||
else:
|
||||
return inport.file.read(1) or eof_object
|
||||
|
||||
def read(inport):
|
||||
"Read a Scheme expression from an input port."
|
||||
def read_ahead(token):
|
||||
if '(' == token:
|
||||
L = []
|
||||
while True:
|
||||
token = inport.next_token()
|
||||
if token == ')': return L
|
||||
else: L.append(read_ahead(token))
|
||||
elif ')' == token: raise SyntaxError('unexpected )')
|
||||
elif token in quotes: return [quotes[token], read(inport)]
|
||||
elif token is eof_object: raise SyntaxError('unexpected EOF in list')
|
||||
else: return atom(token)
|
||||
# body of read:
|
||||
token1 = inport.next_token()
|
||||
return eof_object if token1 is eof_object else read_ahead(token1)
|
||||
|
||||
quotes = {"'":_quote, "`":_quasiquote, ",":_unquote, ",@":_unquotesplicing}
|
||||
|
||||
def atom(token):
|
||||
'Numbers become numbers; #t and #f are booleans; "..." string; otherwise Symbol.'
|
||||
if token == '#t': return True
|
||||
elif token == '#f': return False
|
||||
elif token[0] == '"': return token[1:-1].decode('string_escape')
|
||||
try: return int(token)
|
||||
except ValueError:
|
||||
try: return float(token)
|
||||
except ValueError:
|
||||
try: return complex(token.replace('i', 'j', 1))
|
||||
except ValueError:
|
||||
return Sym(token)
|
||||
|
||||
def to_string(x):
|
||||
"Convert a Python object back into a Lisp-readable string."
|
||||
if x is True: return "#t"
|
||||
elif x is False: return "#f"
|
||||
elif isa(x, Symbol): return x
|
||||
elif isa(x, str): return '"%s"' % x.encode('string_escape').replace('"',r'\"')
|
||||
elif isa(x, list): return '('+' '.join(map(to_string, x))+')'
|
||||
elif isa(x, complex): return str(x).replace('j', 'i')
|
||||
else: return str(x)
|
||||
|
||||
def load(filename):
|
||||
"Eval every expression from a file."
|
||||
repl(None, InPort(open(filename)), None)
|
||||
|
||||
def repl(prompt='lispy> ', inport=InPort(sys.stdin), out=sys.stdout):
|
||||
"A prompt-read-eval-print loop."
|
||||
sys.stderr.write("Lispy version 2.0\n")
|
||||
while True:
|
||||
try:
|
||||
if prompt: sys.stderr.write(prompt)
|
||||
x = parse(inport)
|
||||
if x is eof_object: return
|
||||
val = eval(x)
|
||||
if val is not None and out: print >> out, to_string(val)
|
||||
except Exception as e:
|
||||
print '%s: %s' % (type(e).__name__, e)
|
||||
|
||||
################ Environment class
|
||||
|
||||
class Env(dict):
|
||||
"An environment: a dict of {'var':val} pairs, with an outer Env."
|
||||
def __init__(self, parms=(), args=(), outer=None):
|
||||
# Bind parm list to corresponding args, or single parm to list of args
|
||||
self.outer = outer
|
||||
if isa(parms, Symbol):
|
||||
self.update({parms:list(args)})
|
||||
else:
|
||||
if len(args) != len(parms):
|
||||
raise TypeError('expected %s, given %s, '
|
||||
% (to_string(parms), to_string(args)))
|
||||
self.update(zip(parms,args))
|
||||
def find(self, var):
|
||||
"Find the innermost Env where var appears."
|
||||
if var in self: return self
|
||||
elif self.outer is None: raise LookupError(var)
|
||||
else: return self.outer.find(var)
|
||||
|
||||
def is_pair(x): return x != [] and isa(x, list)
|
||||
def cons(x, y): return [x]+y
|
||||
|
||||
def callcc(proc):
|
||||
"Call proc with current continuation; escape only"
|
||||
ball = RuntimeWarning("Sorry, can't continue this continuation any longer.")
|
||||
def throw(retval): ball.retval = retval; raise ball
|
||||
try:
|
||||
return proc(throw)
|
||||
except RuntimeWarning as w:
|
||||
if w is ball: return ball.retval
|
||||
else: raise w
|
||||
|
||||
def add_globals(self):
|
||||
"Add some Scheme standard procedures."
|
||||
import math, cmath, operator as op
|
||||
self.update(vars(math))
|
||||
self.update(vars(cmath))
|
||||
self.update({
|
||||
'+':op.add, '-':op.sub, '*':op.mul, '/':op.div, 'not':op.not_,
|
||||
'>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq,
|
||||
'equal?':op.eq, 'eq?':op.is_, 'length':len, 'cons':cons,
|
||||
'car':lambda x:x[0], 'cdr':lambda x:x[1:], 'append':op.add,
|
||||
'list':lambda *x:list(x), 'list?': lambda x:isa(x,list),
|
||||
'null?':lambda x:x==[], 'symbol?':lambda x: isa(x, Symbol),
|
||||
'boolean?':lambda x: isa(x, bool), 'pair?':is_pair,
|
||||
'port?': lambda x:isa(x,file), 'apply':lambda proc,l: proc(*l),
|
||||
'eval':lambda x: eval(expand(x)), 'load':lambda fn: load(fn), 'call/cc':callcc,
|
||||
'open-input-file':open,'close-input-port':lambda p: p.file.close(),
|
||||
'open-output-file':lambda f:open(f,'w'), 'close-output-port':lambda p: p.close(),
|
||||
'eof-object?':lambda x:x is eof_object, 'read-char':readchar,
|
||||
'read':read, 'write':lambda x,port=sys.stdout:port.write(to_string(x)),
|
||||
'display':lambda x,port=sys.stdout:port.write(x if isa(x,str) else to_string(x))})
|
||||
return self
|
||||
|
||||
isa = isinstance
|
||||
|
||||
global_env = add_globals(Env())
|
||||
|
||||
################ eval (tail recursive)
|
||||
|
||||
def eval(x, env=global_env):
|
||||
"Evaluate an expression in an environment."
|
||||
while True:
|
||||
if isa(x, Symbol): # variable reference
|
||||
return env.find(x)[x]
|
||||
elif not isa(x, list): # constant literal
|
||||
return x
|
||||
elif x[0] is _quote: # (quote exp)
|
||||
(_, exp) = x
|
||||
return exp
|
||||
elif x[0] is _if: # (if test conseq alt)
|
||||
(_, test, conseq, alt) = x
|
||||
x = (conseq if eval(test, env) else alt)
|
||||
elif x[0] is _set: # (set! var exp)
|
||||
(_, var, exp) = x
|
||||
env.find(var)[var] = eval(exp, env)
|
||||
return None
|
||||
elif x[0] is _define: # (define var exp)
|
||||
(_, var, exp) = x
|
||||
env[var] = eval(exp, env)
|
||||
return None
|
||||
elif x[0] is _lambda: # (lambda (var*) exp)
|
||||
(_, vars, exp) = x
|
||||
return Procedure(vars, exp, env)
|
||||
elif x[0] is _begin: # (begin exp+)
|
||||
for exp in x[1:-1]:
|
||||
eval(exp, env)
|
||||
x = x[-1]
|
||||
else: # (proc exp*)
|
||||
exps = [eval(exp, env) for exp in x]
|
||||
proc = exps.pop(0)
|
||||
if isa(proc, Procedure):
|
||||
x = proc.exp
|
||||
env = Env(proc.parms, exps, proc.env)
|
||||
else:
|
||||
return proc(*exps)
|
||||
|
||||
################ expand
|
||||
|
||||
def expand(x, toplevel=False):
|
||||
"Walk tree of x, making optimizations/fixes, and signaling SyntaxError."
|
||||
require(x, x!=[]) # () => Error
|
||||
if not isa(x, list): # constant => unchanged
|
||||
return x
|
||||
elif x[0] is _quote: # (quote exp)
|
||||
require(x, len(x)==2)
|
||||
return x
|
||||
elif x[0] is _if:
|
||||
if len(x)==3: x = x + [None] # (if t c) => (if t c None)
|
||||
require(x, len(x)==4)
|
||||
return map(expand, x)
|
||||
elif x[0] is _set:
|
||||
require(x, len(x)==3);
|
||||
var = x[1] # (set! non-var exp) => Error
|
||||
require(x, isa(var, Symbol), "can set! only a symbol")
|
||||
return [_set, var, expand(x[2])]
|
||||
elif x[0] is _define or x[0] is _definemacro:
|
||||
require(x, len(x)>=3)
|
||||
_def, v, body = x[0], x[1], x[2:]
|
||||
if isa(v, list) and v: # (define (f args) body)
|
||||
f, args = v[0], v[1:] # => (define f (lambda (args) body))
|
||||
return expand([_def, f, [_lambda, args]+body])
|
||||
else:
|
||||
require(x, len(x)==3) # (define non-var/list exp) => Error
|
||||
require(x, isa(v, Symbol), "can define only a symbol")
|
||||
exp = expand(x[2])
|
||||
if _def is _definemacro:
|
||||
require(x, toplevel, "define-macro only allowed at top level")
|
||||
proc = eval(exp)
|
||||
require(x, callable(proc), "macro must be a procedure")
|
||||
macro_table[v] = proc # (define-macro v proc)
|
||||
return None # => None; add v:proc to macro_table
|
||||
return [_define, v, exp]
|
||||
elif x[0] is _begin:
|
||||
if len(x)==1: return None # (begin) => None
|
||||
else: return [expand(xi, toplevel) for xi in x]
|
||||
elif x[0] is _lambda: # (lambda (x) e1 e2)
|
||||
require(x, len(x)>=3) # => (lambda (x) (begin e1 e2))
|
||||
vars, body = x[1], x[2:]
|
||||
require(x, (isa(vars, list) and all(isa(v, Symbol) for v in vars))
|
||||
or isa(vars, Symbol), "illegal lambda argument list")
|
||||
exp = body[0] if len(body) == 1 else [_begin] + body
|
||||
return [_lambda, vars, expand(exp)]
|
||||
elif x[0] is _quasiquote: # `x => expand_quasiquote(x)
|
||||
require(x, len(x)==2)
|
||||
return expand_quasiquote(x[1])
|
||||
elif isa(x[0], Symbol) and x[0] in macro_table:
|
||||
return expand(macro_table[x[0]](*x[1:]), toplevel) # (m arg...)
|
||||
else: # => macroexpand if m isa macro
|
||||
return map(expand, x) # (f arg...) => expand each
|
||||
|
||||
def require(x, predicate, msg="wrong length"):
|
||||
"Signal a syntax error if predicate is false."
|
||||
if not predicate: raise SyntaxError(to_string(x)+': '+msg)
|
||||
|
||||
_append, _cons, _let = map(Sym, "append cons let".split())
|
||||
|
||||
def expand_quasiquote(x):
|
||||
"""Expand `x => 'x; `,x => x; `(,@x y) => (append x y) """
|
||||
if not is_pair(x):
|
||||
return [_quote, x]
|
||||
require(x, x[0] is not _unquotesplicing, "can't splice here")
|
||||
if x[0] is _unquote:
|
||||
require(x, len(x)==2)
|
||||
return x[1]
|
||||
elif is_pair(x[0]) and x[0][0] is _unquotesplicing:
|
||||
require(x[0], len(x[0])==2)
|
||||
return [_append, x[0][1], expand_quasiquote(x[1:])]
|
||||
else:
|
||||
return [_cons, expand_quasiquote(x[0]), expand_quasiquote(x[1:])]
|
||||
|
||||
def let(*args):
|
||||
args = list(args)
|
||||
x = cons(_let, args)
|
||||
require(x, len(args)>1)
|
||||
bindings, body = args[0], args[1:]
|
||||
require(x, all(isa(b, list) and len(b)==2 and isa(b[0], Symbol)
|
||||
for b in bindings), "illegal binding list")
|
||||
vars, vals = zip(*bindings)
|
||||
return [[_lambda, list(vars)]+map(expand, body)] + map(expand, vals)
|
||||
|
||||
macro_table = {_let:let} ## More macros can go here
|
||||
|
||||
eval(parse("""(begin
|
||||
|
||||
(define-macro and (lambda args
|
||||
(if (null? args) #t
|
||||
(if (= (length args) 1) (car args)
|
||||
`(if ,(car args) (and ,@(cdr args)) #f)))))
|
||||
|
||||
;; More macros can also go here
|
||||
|
||||
)"""))
|
||||
|
||||
if __name__ == '__main__':
|
||||
repl()
|
||||
|
||||
121
py/lispytest.py
Normal file
121
py/lispytest.py
Normal file
@@ -0,0 +1,121 @@
|
||||
|
||||
################ Tests for lis.py and lispy.py
|
||||
|
||||
lis_tests = [
|
||||
("(quote (testing 1 (2.0) -3.14e159))", ['testing', 1, [2.0], -3.14e159]),
|
||||
("(+ 2 2)", 4),
|
||||
("(+ (* 2 100) (* 1 10))", 210),
|
||||
("(if (> 6 5) (+ 1 1) (+ 2 2))", 2),
|
||||
("(if (< 6 5) (+ 1 1) (+ 2 2))", 4),
|
||||
("(define x 3)", None), ("x", 3), ("(+ x x)", 6),
|
||||
("(begin (define x 1) (set! x (+ x 1)) (+ x 1))", 3),
|
||||
("((lambda (x) (+ x x)) 5)", 10),
|
||||
("(define twice (lambda (x) (* 2 x)))", None), ("(twice 5)", 10),
|
||||
("(define compose (lambda (f g) (lambda (x) (f (g x)))))", None),
|
||||
("((compose list twice) 5)", [10]),
|
||||
("(define repeat (lambda (f) (compose f f)))", None),
|
||||
("((repeat twice) 5)", 20), ("((repeat (repeat twice)) 5)", 80),
|
||||
("(define fact (lambda (n) (if (<= n 1) 1 (* n (fact (- n 1))))))", None),
|
||||
("(fact 3)", 6),
|
||||
("(fact 50)", 30414093201713378043612608166064768844377641568960512000000000000),
|
||||
("(define abs (lambda (n) ((if (> n 0) + -) 0 n)))", None),
|
||||
("(list (abs -3) (abs 0) (abs 3))", [3, 0, 3]),
|
||||
("""(define combine (lambda (f)
|
||||
(lambda (x y)
|
||||
(if (null? x) (quote ())
|
||||
(f (list (car x) (car y))
|
||||
((combine f) (cdr x) (cdr y)))))))""", None),
|
||||
("(define zip (combine cons))", None),
|
||||
("(zip (list 1 2 3 4) (list 5 6 7 8))", [[1, 5], [2, 6], [3, 7], [4, 8]]),
|
||||
("""(define riff-shuffle (lambda (deck) (begin
|
||||
(define take (lambda (n seq) (if (<= n 0) (quote ()) (cons (car seq) (take (- n 1) (cdr seq))))))
|
||||
(define drop (lambda (n seq) (if (<= n 0) seq (drop (- n 1) (cdr seq)))))
|
||||
(define mid (lambda (seq) (/ (length seq) 2)))
|
||||
((combine append) (take (mid deck) deck) (drop (mid deck) deck)))))""", None),
|
||||
("(riff-shuffle (list 1 2 3 4 5 6 7 8))", [1, 5, 2, 6, 3, 7, 4, 8]),
|
||||
("((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8))", [1, 3, 5, 7, 2, 4, 6, 8]),
|
||||
("(riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8))))", [1,2,3,4,5,6,7,8]),
|
||||
]
|
||||
|
||||
lispy_tests = [
|
||||
("()", SyntaxError), ("(set! x)", SyntaxError),
|
||||
("(define 3 4)", SyntaxError),
|
||||
("(quote 1 2)", SyntaxError), ("(if 1 2 3 4)", SyntaxError),
|
||||
("(lambda 3 3)", SyntaxError), ("(lambda (x))", SyntaxError),
|
||||
("""(if (= 1 2) (define-macro a 'a)
|
||||
(define-macro a 'b))""", SyntaxError),
|
||||
("(define (twice x) (* 2 x))", None), ("(twice 2)", 4),
|
||||
("(twice 2 2)", TypeError),
|
||||
("(define lyst (lambda items items))", None),
|
||||
("(lyst 1 2 3 (+ 2 2))", [1,2,3,4]),
|
||||
("(if 1 2)", 2),
|
||||
("(if (= 3 4) 2)", None),
|
||||
("(define ((account bal) amt) (set! bal (+ bal amt)) bal)", None),
|
||||
("(define a1 (account 100))", None),
|
||||
("(a1 0)", 100), ("(a1 10)", 110), ("(a1 10)", 120),
|
||||
("""(define (newton guess function derivative epsilon)
|
||||
(define guess2 (- guess (/ (function guess) (derivative guess))))
|
||||
(if (< (abs (- guess guess2)) epsilon) guess2
|
||||
(newton guess2 function derivative epsilon)))""", None),
|
||||
("""(define (square-root a)
|
||||
(newton 1 (lambda (x) (- (* x x) a)) (lambda (x) (* 2 x)) 1e-8))""", None),
|
||||
("(> (square-root 200.) 14.14213)", True),
|
||||
("(< (square-root 200.) 14.14215)", True),
|
||||
("(= (square-root 200.) (sqrt 200.))", True),
|
||||
("""(define (sum-squares-range start end)
|
||||
(define (sumsq-acc start end acc)
|
||||
(if (> start end) acc (sumsq-acc (+ start 1) end (+ (* start start) acc))))
|
||||
(sumsq-acc start end 0))""", None),
|
||||
("(sum-squares-range 1 3000)", 9004500500), ## Tests tail recursion
|
||||
("(call/cc (lambda (throw) (+ 5 (* 10 (throw 1))))) ;; throw", 1),
|
||||
("(call/cc (lambda (throw) (+ 5 (* 10 1)))) ;; do not throw", 15),
|
||||
("""(call/cc (lambda (throw)
|
||||
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 (escape 3)))))))) ; 1 level""", 35),
|
||||
("""(call/cc (lambda (throw)
|
||||
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 (throw 3)))))))) ; 2 levels""", 3),
|
||||
("""(call/cc (lambda (throw)
|
||||
(+ 5 (* 10 (call/cc (lambda (escape) (* 100 1))))))) ; 0 levels""", 1005),
|
||||
("(* 1i 1i)", -1), ("(sqrt -1)", 1j),
|
||||
("(let ((a 1) (b 2)) (+ a b))", 3),
|
||||
("(let ((a 1) (b 2 3)) (+ a b))", SyntaxError),
|
||||
("(and 1 2 3)", 3), ("(and (> 2 1) 2 3)", 3), ("(and)", True),
|
||||
("(and (> 2 1) (> 2 3))", False),
|
||||
("(define-macro unless (lambda args `(if (not ,(car args)) (begin ,@(cdr args))))) ; test `", None),
|
||||
("(unless (= 2 (+ 1 1)) (display 2) 3 4)", None),
|
||||
(r'(unless (= 4 (+ 1 1)) (display 2) (display "\n") 3 4)', 4),
|
||||
("(quote x)", 'x'),
|
||||
("(quote (1 2 three))", [1, 2, 'three']),
|
||||
("'x", 'x'),
|
||||
("'(one 2 3)", ['one', 2, 3]),
|
||||
("(define L (list 1 2 3))", None),
|
||||
("`(testing ,@L testing)", ['testing',1,2,3,'testing']),
|
||||
("`(testing ,L testing)", ['testing',[1,2,3],'testing']),
|
||||
("`,@L", SyntaxError),
|
||||
("""'(1 ;test comments '
|
||||
;skip this line
|
||||
2 ; more ; comments ; ) )
|
||||
3) ; final comment""", [1,2,3]),
|
||||
]
|
||||
|
||||
def test(tests, name=''):
|
||||
"For each (exp, expected) test case, see if eval(parse(exp)) == expected."
|
||||
fails = 0
|
||||
for (x, expected) in tests:
|
||||
try:
|
||||
result = eval(parse(x))
|
||||
print x, '=>', to_string(result)
|
||||
ok = (result == expected)
|
||||
except Exception as e:
|
||||
print x, '=raises=>', type(e).__name__, e
|
||||
ok = issubclass(expected, Exception) and isinstance(e, expected)
|
||||
if not ok:
|
||||
fails += 1
|
||||
print 'FAIL!!! Expected', expected
|
||||
print '%s %s: %d out of %d tests fail.' % ('*'*45, name, fails, len(tests))
|
||||
|
||||
if __name__ == '__main__':
|
||||
from lis import *
|
||||
test(lis_tests, 'lis.py')
|
||||
from lispy import *
|
||||
test(lis_tests+lispy_tests, 'lispy.py')
|
||||
|
||||
154
py/pal.py
Normal file
154
py/pal.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import string, random, os, re, bisect
|
||||
|
||||
"""Produce Panama-ish Palindromes. Copyright (C) 2002, Peter Norvig.
|
||||
See http://www.norvig.com/license.html and http://www.norvig.com/pal-alg.html"""
|
||||
|
||||
def is_panama(p):
|
||||
"Test if p is a Panama-ish palindrome."
|
||||
def is_unique(seq): return len(seq) == len(dict(zip(seq, seq)))
|
||||
return (p.endswith('Panama') and is_palindrome(p)
|
||||
and is_unique([s.strip() for s in p.split(',')]))
|
||||
|
||||
def is_palindrome(phrase):
|
||||
"Test if a phrase is a palindrome."
|
||||
cphrase = canonical(phrase)
|
||||
return cphrase == reverse(cphrase)
|
||||
|
||||
def canonical(word, sub=re.compile('[^A-Za-z0-9]').sub):
|
||||
"The canonical form for comparing: lowercase alphanumerics."
|
||||
return sub('', word).lower()
|
||||
|
||||
def read_dict(filename='npdict.txt'):
|
||||
"Read the file into global variables _fw and _bw and _truename."
|
||||
global _fw, _bw, _truename
|
||||
_fw, _bw, _truename = [], [], {'': ''}
|
||||
for word in open(filename).read().splitlines():
|
||||
w = canonical(word)
|
||||
_fw.append(w)
|
||||
_bw.append(reverse(w))
|
||||
_truename[w] = word
|
||||
_fw.sort(); _bw.sort()
|
||||
return len(_fw), len(_bw), len(_truename)
|
||||
|
||||
def update(obj, **entries): obj.__dict__.update(entries); return obj
|
||||
|
||||
class PalDict:
|
||||
"""A dictionary from which you can find canonical words that start or end
|
||||
with a given canonical substring, and find the true name of a
|
||||
canonical word."""
|
||||
def __init__(self, fw=None, bw=None, truename=None):
|
||||
update(self, fw=fw or _fw, bw=bw or _bw, truename=truename or _truename)
|
||||
|
||||
def startswith(self, prefix, k=100):
|
||||
"""Return up to k canonical words that start with prefix.
|
||||
If there are more than k, choose from them at random."""
|
||||
return k_startingwith(k, self.fw, prefix)
|
||||
|
||||
def endswith(self, suffix, k=100):
|
||||
"""Return up to k canonical words that end with suffix.
|
||||
If there are more than k, choose from them at random.
|
||||
Both the suffix and the word returned are reversed."""
|
||||
return k_startingwith(k, self.bw, suffix)
|
||||
|
||||
def k_startingwith(k, words, prefix):
|
||||
"""Choose up to k words that match the prefix (choose randomly if > k)."""
|
||||
start = bisect.bisect(words, prefix)
|
||||
end = bisect.bisect(words, prefix + 'zzzz')
|
||||
n = end - start
|
||||
if k >= n:
|
||||
results = words[start:end]
|
||||
random.shuffle(results)
|
||||
else: # Should really try to avoid duplicates
|
||||
results = [words[random.randrange(start, end)] for i in range(k)]
|
||||
return results
|
||||
|
||||
class Panama:
|
||||
def __init__(self, L='A man, a plan', R='a canal, Panama', dict=None):
|
||||
left = [canonical(w) for w in L.split(', ')]
|
||||
right = [canonical(reverse(w)) for w in reverse(R.split(', '))]
|
||||
update(self, left=left, right=right, dict=dict or PalDict(), best=0,
|
||||
seen={}, diff=len(''.join(left)) - len(''.join(right)))
|
||||
for word in left + map(reverse, right):
|
||||
self.seen[word] = 1
|
||||
|
||||
def missing(self, k=20):
|
||||
"""Return the substring that is missing, and candidate words."""
|
||||
if self.diff >= 0: # Left is longer, missing on right
|
||||
substr = self.left[-1][-self.diff:]
|
||||
return substr, self.dict.endswith(substr, k)
|
||||
else: # Right is longer, missing on left
|
||||
substr = self.right[-1][self.diff:]
|
||||
return substr, self.dict.startswith(substr, k)
|
||||
|
||||
def search(self, k=200):
|
||||
"Search for palindromes; consider at most k words at each level."
|
||||
self.stack = [self.missing(k)]
|
||||
while self.stack:
|
||||
substr, words = self.stack[-1]
|
||||
if is_palindrome(substr):
|
||||
self.report()
|
||||
if words:
|
||||
self.extend(words.pop(), k)
|
||||
elif not self.backtrack():
|
||||
return
|
||||
|
||||
def extend(self, word, k):
|
||||
"Add a new word (unless we've already seen it)."
|
||||
if self.diff >= 0: # Left is longer, add to right
|
||||
fword = reverse(word)
|
||||
if fword in self.seen: return
|
||||
self.diff -= len(fword)
|
||||
self.seen[fword] = 1
|
||||
self.right.append(word)
|
||||
self.stack.append(self.missing(k))
|
||||
else: # Right is longer, add to left
|
||||
if word in self.seen: return
|
||||
self.diff += len(word)
|
||||
self.seen[word] = 1
|
||||
self.left.append(word)
|
||||
self.stack.append(self.missing(k))
|
||||
|
||||
def backtrack(self):
|
||||
"Remove the last word added; return 0 if can't backtrack"
|
||||
if self.diff >= 0: # Left is longer, pop from left
|
||||
if not self.left: return 0
|
||||
word = self.left.pop()
|
||||
self.diff -= len(word)
|
||||
del self.seen[word]
|
||||
else: # Right is longer, pop from right
|
||||
if not self.right: return 0
|
||||
word = self.right.pop()
|
||||
self.diff += len(word)
|
||||
del self.seen[reverse(word)]
|
||||
self.stack.pop()
|
||||
return 1
|
||||
|
||||
def report(self):
|
||||
"Write current state to log file."
|
||||
if len(self) > self.best + 200:
|
||||
self.best = len(self)
|
||||
print self.best
|
||||
self.bestphrase = str(self)
|
||||
assert is_panama(self.bestphrase)
|
||||
f = open('pallog%d.txt' % os.getpid(), 'w')
|
||||
f.write(self.bestphrase + '\n')
|
||||
f.close()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.left) + len(self.right)
|
||||
|
||||
def __str__(self):
|
||||
truename = self.dict.truename
|
||||
lefts = [truename[w] for w in self.left]
|
||||
rights = [truename[reverse(w)] for w in reverse(self.right[:])]
|
||||
return ', '.join(lefts + ['*****'] + rights)
|
||||
|
||||
def reverse(x):
|
||||
"Reverse a list or string."
|
||||
if type(x) == type(''):
|
||||
return ''.join(reverse(list(x)))
|
||||
else:
|
||||
x.reverse()
|
||||
return x
|
||||
|
||||
if __name__ == '__main__': read_dict(); p = Panama(); p.search()
|
||||
262
py/pal2.py
Normal file
262
py/pal2.py
Normal file
@@ -0,0 +1,262 @@
|
||||
import random, re, bisect, time
|
||||
|
||||
"""Produce Panama-ish Palindromes. Copyright (C) 2002-2008, Peter Norvig."""
|
||||
|
||||
################ Checking for Palindromes
|
||||
|
||||
def is_panama(s):
|
||||
"Test if string s is a Panama-ish palindrome."
|
||||
return is_palindrome(s) and is_unique(phrases(s))
|
||||
|
||||
def is_palindrome(s):
|
||||
"Test if a string is a palindrome."
|
||||
s1 = canonical(s)
|
||||
return s1 == reversestr(s1)
|
||||
|
||||
def phrases(s):
|
||||
"Break a string s into comma-separated phrases."
|
||||
return [phrase.strip() for phrase in s.split(',')]
|
||||
|
||||
def canonical(word, sub=re.compile('''[-* \t\n\r.,;!?:()`"']''').sub):
|
||||
"The canonical form for comparing: lowercase, no blanks or punctuation."
|
||||
return sub('', word).lower()
|
||||
|
||||
################ Utilities
|
||||
|
||||
def reversestr(x):
|
||||
"Reverse a string."
|
||||
return x[::-1]
|
||||
|
||||
def is_unique(seq):
|
||||
"Return true if seq has no duplicate elements."
|
||||
return len(seq) == len(set(seq))
|
||||
|
||||
def update(obj, **entries):
|
||||
"Change attributes of obj, according to the keyword args."
|
||||
obj.__dict__.update(entries)
|
||||
return obj
|
||||
|
||||
################ Reading in a dictionary
|
||||
|
||||
class PalDict:
|
||||
"""A dictionary from which you can find canonical words that start or end
|
||||
with a given canonical substring, and find the true name of a
|
||||
canonical word with d.truename[canonicalword]."""
|
||||
|
||||
def __init__(self, k=1000, filename='npdict.txt'):
|
||||
words, rwords, truename = [], [], {'': '', 'panama': 'Panama!'}
|
||||
for tword in open(filename).read().splitlines():
|
||||
word = canonical(tword)
|
||||
words.append(word)
|
||||
rwords.append(reversestr(word))
|
||||
truename[word] = tword
|
||||
words.sort()
|
||||
rwords.sort()
|
||||
update(self, k=k, words=words, rwords=rwords, truename=truename,
|
||||
reversibles={}, rangek=range(k), tryharder=False)
|
||||
|
||||
def startswith(self, prefix):
|
||||
"""Return up to k canonical words that start with prefix.
|
||||
If there are more than k, choose from them at random."""
|
||||
return self._k_startingwith(self.words, prefix)
|
||||
|
||||
def endswith(self, rsuffix):
|
||||
"""Return up to k canonical words that end with the reversed suffix.
|
||||
If you want words ending in 'ing', ask for d.endswith('gni').
|
||||
If there are more than k, choose from them at random."""
|
||||
return map(reversestr, self._k_startingwith(self.rwords, rsuffix))
|
||||
|
||||
def __contains__(self, word):
|
||||
return word in self.truename
|
||||
|
||||
def reversible_words(self):
|
||||
"Find words that have a reverse in the dict, like {'Camus': 'Sumac'}"
|
||||
if not self.reversibles:
|
||||
reversibles = self.reversibles
|
||||
for rw in self.rwords:
|
||||
if rw in self:
|
||||
w = reversestr(rw)
|
||||
if w != rw and w not in reversibles:
|
||||
reversibles[w] = rw
|
||||
self.reversibles = reversibles
|
||||
return self.reversibles
|
||||
|
||||
def _k_startingwith(self, words, prefix):
|
||||
start = bisect.bisect_left(words, prefix)
|
||||
end = bisect.bisect(words, prefix + 'zzzz')
|
||||
n = end - start
|
||||
if self.k >= n: # get all the words that start with prefix
|
||||
results = words[start:end]
|
||||
else: # sample from words starting with prefix
|
||||
indexes = random.sample(xrange(start, end), self.k)
|
||||
results = [words[i] for i in indexes]
|
||||
random.shuffle(results)
|
||||
## Consider words that are prefixes of the prefix.
|
||||
## This is very slow, so don't use it until late in the game.
|
||||
if self.tryharder:
|
||||
for i in range(3, len(prefix)):
|
||||
w = prefix[0:i]
|
||||
if ((words == self.words and w in self.truename) or
|
||||
(words == self.rwords and reversestr(w) in self.truename)):
|
||||
results.append(w)
|
||||
return results
|
||||
|
||||
paldict = PalDict()
|
||||
|
||||
def anpdictshort():
|
||||
"Find the words that are valid when every phrase must start with 'a'"
|
||||
def segment(word): return [s for s in word.split('a') if s]
|
||||
def valid(word): return all(reversestr(s) in segments for s in segment(word))
|
||||
words = map(canonical, file('anpdict.txt'))
|
||||
segments = set(s for w in words for s in segment(canonical(w)))
|
||||
valid_words = [paldict.truename[w] for w in words if valid(w)]
|
||||
file('anpdict-short.txt', 'w').write('\n'.join(valid_words))
|
||||
|
||||
################ Search for a palindrome
|
||||
|
||||
class Panama:
|
||||
def __init__(self, L='A man, a plan', R='a canal, Panama', dict=paldict):
|
||||
## .left and .right hold lists of canonical words
|
||||
## .diff holds the number of characters that are not matched,
|
||||
## positive for words on left, negative for right.
|
||||
## .stack holds (action, side, arg) tuples
|
||||
update(self, left=[], right=[], best=0, seen={}, diff=0, stack=[],
|
||||
used_reversibles=False, starttime=time.clock(), dict=dict)
|
||||
for word in L.split(','):
|
||||
self.add('left', canonical(word))
|
||||
for rword in reversestr(R).split(','):
|
||||
self.add('right', canonical(reversestr(rword)))
|
||||
self.consider_candidates()
|
||||
|
||||
def search(self, steps=50000000):
|
||||
"Search for palindromes."
|
||||
for _ in xrange(steps):
|
||||
if not self.stack:
|
||||
return 'done'
|
||||
action, dir, substr, arg = self.stack[-1]
|
||||
if action == 'added': # undo the last word added
|
||||
self.remove(dir, arg)
|
||||
elif action == 'trying' and arg: # try the next word if there is one
|
||||
self.add(dir, arg.pop()) and self.consider_candidates()
|
||||
elif action == 'trying' and not arg: # otherwise backtrack
|
||||
self.stack.pop()
|
||||
else:
|
||||
raise ValueError(action)
|
||||
|
||||
def add(self, dir, word):
|
||||
"add a word"
|
||||
if word in self.seen:
|
||||
return False
|
||||
else:
|
||||
getattr(self, dir).append(word)
|
||||
self.diff += factor[dir] * len(word)
|
||||
self.seen[word] = True
|
||||
self.stack.append(('added', dir, '?', word))
|
||||
return True
|
||||
|
||||
def remove(self, dir, word):
|
||||
"remove a word"
|
||||
oldword = getattr(self, dir).pop()
|
||||
assert word == oldword
|
||||
self.diff -= factor[dir] * len(word)
|
||||
del self.seen[word]
|
||||
self.stack.pop()
|
||||
|
||||
def consider_candidates(self):
|
||||
"""Push a new state with a set of candidate words onto stack."""
|
||||
if self.diff > 0: # Left is longer, consider adding on right
|
||||
dir = 'right'
|
||||
substr = self.left[-1][-self.diff:]
|
||||
candidates = self.dict.endswith(substr)
|
||||
elif self.diff < 0: # Right is longer, consider adding on left
|
||||
dir = 'left'
|
||||
substr = reversestr(self.right[-1][0:-self.diff])
|
||||
candidates = self.dict.startswith(substr)
|
||||
else: # Both sides are same size
|
||||
dir = 'left'
|
||||
if not self.used_reversibles:
|
||||
self.report()
|
||||
self.add_reversibles()
|
||||
substr = ''
|
||||
candidates = self.dict.startswith('')
|
||||
if substr == reversestr(substr):
|
||||
self.report()
|
||||
self.stack.append(('trying', dir, substr, candidates))
|
||||
|
||||
def add_reversibles(self):
|
||||
"Add in reversible words."
|
||||
print 'using reversibles ...'
|
||||
for (word, rword) in self.dict.reversible_words().items():
|
||||
if word not in self.seen and rword not in self.seen:
|
||||
self.add('left', word)
|
||||
self.add('right', rword)
|
||||
self.used_reversibles = True
|
||||
self.stack = []
|
||||
print '...done'
|
||||
|
||||
def report(self):
|
||||
"Report a new palindrome to log file (if it is sufficiently big)."
|
||||
N = len(self)
|
||||
if N > 13333:
|
||||
self.dict.tryharder = True
|
||||
if N > self.best and (N > 12500 or N > self.best+500):
|
||||
self.best = len(self)
|
||||
self.bestphrase = str(self)
|
||||
print '%5d phrases (%5d words) in %3d seconds' % (
|
||||
self.best, self.bestphrase.count(' ')+1, time.clock() - self.starttime)
|
||||
assert is_panama(self.bestphrase)
|
||||
f = open('pallog%d.txt' % (id(self) % 10000), 'w')
|
||||
f.write(self.bestphrase + '\n')
|
||||
f.close()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.left) + len(self.right)
|
||||
|
||||
def __str__(self):
|
||||
truename = self.dict.truename
|
||||
lefts = [truename[w] for w in self.left]
|
||||
rights =[truename[w] for w in self.right]
|
||||
return ', '.join(lefts + rights[::-1])
|
||||
|
||||
factor = {'left': +1, 'right': -1}
|
||||
|
||||
# Note that we only allow one truename per canonical name. Occasionally
|
||||
# this means we miss a good word (as in "a node" vs. "an ode"), but there
|
||||
# are only 665 of these truename collisions, and most of them are of the
|
||||
# form "a mark-up" vs. "a markup" so it seemed better to disallow them.
|
||||
|
||||
################ Unit Tests
|
||||
|
||||
def tests(p=Panama()):
|
||||
assert is_panama('A man, a plan, a canal, Panama.')
|
||||
assert is_panama('''A (man), a plan,,;, a ```canal?'' -- Panama!''')
|
||||
assert not is_panama('A man, a plan, a radar, a canal, Panama.')
|
||||
assert is_palindrome('A man, a plan, a canal, Panama.')
|
||||
assert is_palindrome('radar, radar? radar!')
|
||||
assert not is_palindrome('radars')
|
||||
assert phrases('A man, a plan, Panama') == ['A man', 'a plan', 'Panama']
|
||||
assert canonical('A man, a plan, a canal, Panama') == 'amanaplanacanalpanama'
|
||||
assert reversestr('foo') == 'oof'
|
||||
assert is_unique([1, 2, 3])
|
||||
assert not is_unique([1, 2, 2])
|
||||
d = p.dict
|
||||
def sameset(a, b): return set(a) == set(b)
|
||||
assert 'panama' in d
|
||||
assert d.words[0] in d
|
||||
assert d.words[-1] in d
|
||||
assert sameset(d.startswith('aword'), ['awording', 'awordbreak',
|
||||
'awordiness', 'awordage', 'awordplay', 'awordlore', 'awordbook',
|
||||
'awordlessness', 'aword', 'awordsmith'])
|
||||
assert sameset(d.endswith('ytisob'), ['aglobosity', 'averbosity',
|
||||
'asubglobosity', 'anonverbosity', 'agibbosity'])
|
||||
d.tryharder = True
|
||||
assert sameset(d.startswith('oklahoma'), ['oklahoma', 'okla'])
|
||||
d.tryharder = False
|
||||
assert d.startswith('oklahoma') == ['oklahoma']
|
||||
assert d.startswith('fsfdsfdsfds') == []
|
||||
print 'all tests pass'
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = Panama();
|
||||
tests(p)
|
||||
p.search()
|
||||
170
py/pal3.py
Normal file
170
py/pal3.py
Normal file
@@ -0,0 +1,170 @@
|
||||
from collections import Counter, deque
|
||||
import re
|
||||
|
||||
class PhraseDict(dict):
|
||||
"""A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with:
|
||||
.prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre'
|
||||
.suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'"""
|
||||
def __init__(self, phrases):
|
||||
for phrase in phrases:
|
||||
phrase = phrase.strip()
|
||||
self[letters(phrase)] = phrase
|
||||
self.prefixes = Counter(x for p in self for x in prefixes(p))
|
||||
self.suffixes = Counter(x for p in self for x in suffixes(p))
|
||||
|
||||
def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)]
|
||||
|
||||
def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)]
|
||||
|
||||
def letters(phrase, sub=re.compile(r'[\W]+').sub):
|
||||
"Remove all the non-letters from phrase; return lowercase version."
|
||||
return sub('', phrase).lower()
|
||||
|
||||
DICT = PhraseDict(open('npdict.txt'))
|
||||
|
||||
class Panama:
|
||||
"""Panama represents a palindrome, or a state in searching for one.
|
||||
It has .left and .right to hold the phrases that are chosen,
|
||||
and .L and .R to hold the current partial phrases in the middle (still working on these).
|
||||
Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from."""
|
||||
|
||||
def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT):
|
||||
assert cat(left + [L]) == cat([R] + right)[::-1]
|
||||
self.left = list(left) # list of complete phrases on left
|
||||
self.L = L # an incomplete phrase on left
|
||||
self.R = R # an incomplete phrase on right
|
||||
self.right = deque(right) # deque of complete phrases on right
|
||||
self.dict = dict # a {letters: actual_phrase} mapping
|
||||
self.set = set(left + right) # a set of all complete phrases in palindrome
|
||||
self.best = [] # list of phrases in longest palindrome found
|
||||
self.Nshown = 0 # the number of phrases shown in the previous printout
|
||||
self.i = 0 # the number of steps taken in the search
|
||||
self.check()
|
||||
|
||||
def __str__(self): return self.original_phrases(self.best)
|
||||
|
||||
def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases)
|
||||
|
||||
def search(self, steps=10**5):
|
||||
"""Depth-first search for palindromes. From the current state, find all applicable actions.
|
||||
Do the first one, and put on the stack reminders to undo it and try the others,
|
||||
but first search deeper from the result of the first action."""
|
||||
stack = [self.applicable_actions()]
|
||||
for self.i in range(steps):
|
||||
if not stack:
|
||||
return
|
||||
command = stack.pop()
|
||||
if isinstance(command, UndoCommand):
|
||||
self.undo(command)
|
||||
elif command:
|
||||
act = command.pop()
|
||||
self.do(act)
|
||||
self.check()
|
||||
stack.extend([command, UndoCommand(act), self.applicable_actions()])
|
||||
|
||||
def do(self, act):
|
||||
"Modify the current state by adding a letter, or finishing a phrase."
|
||||
if act == ',': # finish phrase on left
|
||||
self.set.add(self.L)
|
||||
self.left.append(self.L)
|
||||
self.L = ''
|
||||
elif act == ';': # finish phrase on right
|
||||
self.set.add(self.R)
|
||||
self.right.appendleft(self.R)
|
||||
self.R = ''
|
||||
else: # add a letter
|
||||
self.L = self.L + act
|
||||
self.R = act + self.R
|
||||
|
||||
def undo(self, act):
|
||||
"Modify the current state by undoing an action that was previously done."
|
||||
if act == ',': # unfinish phrase on left
|
||||
assert self.L == ''
|
||||
self.L = self.left.pop()
|
||||
self.set.remove(self.L)
|
||||
elif act == ';': # unfinish phrase on right
|
||||
assert self.R == ''
|
||||
self.R = self.right.popleft()
|
||||
self.set.remove(self.R)
|
||||
else: # remove a letter
|
||||
self.L = self.L[:-1]
|
||||
self.R = self.R[1:]
|
||||
|
||||
def check(self):
|
||||
"Check to see if current state is a palindrome, and if so, record it and maybe print."
|
||||
if not self.is_palindrome(): return
|
||||
N = len(self.left) + len(self.right)
|
||||
if N > len(self.best):
|
||||
self.best = self.left + list(self.right)
|
||||
if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500:
|
||||
self.Nshown = N
|
||||
print(self.report())
|
||||
|
||||
def report(self):
|
||||
N = len(self.best)
|
||||
nwords = N + sum(self.dict[p].count(' ') for p in self.best)
|
||||
nletters = sum(len(p) for p in self.best)
|
||||
return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})'
|
||||
.format(N, nwords, nletters, self.i+1))
|
||||
|
||||
def applicable_actions(self):
|
||||
L, R, D = self.L, self.R, self.dict
|
||||
actions = []
|
||||
|
||||
def score(A): return D.prefixes[L+A] * D.suffixes[A+R]
|
||||
if self.is_allowed(L):
|
||||
actions.append(',')
|
||||
if self.is_allowed(R):
|
||||
actions.append(';')
|
||||
for A in sorted(alphabet, key=score):
|
||||
if score(A) > 0:
|
||||
actions.append(A)
|
||||
|
||||
return actions
|
||||
|
||||
def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set
|
||||
|
||||
def is_palindrome(self):
|
||||
"Is this a palindrome? (Does any extra .L or .R match the other side?)"
|
||||
return ((self.L == '' and self.left[-1].endswith(self.R)) or
|
||||
(self.R == '' and self.right[0].startswith(self.L)))
|
||||
|
||||
alphabet = 'abcdefghijklmnopqrstuvwxyz'
|
||||
cat = ''.join
|
||||
UndoCommand = str
|
||||
DoCommand = list
|
||||
|
||||
################ Unit Tests
|
||||
|
||||
def test1():
|
||||
assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello']
|
||||
assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello']
|
||||
assert letters('a man') == 'aman'
|
||||
assert letters('an elk') == 'anelk'
|
||||
assert letters('Mr. T') == 'mrt'
|
||||
assert letters('Donald E. Knuth') == 'donaldeknuth'
|
||||
assert len(DICT) == 125512
|
||||
assert 'panama' in DICT
|
||||
assert 'aman' in DICT
|
||||
assert 'threemen' not in DICT
|
||||
assert DICT['acanal'] == 'a canal'
|
||||
return 'ok'
|
||||
|
||||
def test2():
|
||||
p1 = Panama()
|
||||
assert p1.is_palindrome()
|
||||
assert str(p1) == 'a man, a plan, a canal, Panama'
|
||||
p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama'])
|
||||
assert not p2.is_palindrome()
|
||||
p3 = Panama(['maya'], '', '', ['ayam'])
|
||||
assert p3.is_palindrome()
|
||||
assert str(p3) == 'Maya, a yam'
|
||||
return 'ok'
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = Panama();
|
||||
test1()
|
||||
test2()
|
||||
p.search(10**6)
|
||||
print(p.report())
|
||||
print(str(p))
|
||||
52
py/parse.py
Normal file
52
py/parse.py
Normal file
@@ -0,0 +1,52 @@
|
||||
grammar = {
|
||||
'Noun': ['stench', 'wumpus'],
|
||||
'Verb': ['is', 'smell'],
|
||||
'Adjective': ['dead', 'smelly'],
|
||||
'Adverb': ['left', 'back'],
|
||||
'Pronoun': ['me', 'you'],
|
||||
'Name': ['John', 'Mary'],
|
||||
'Article': ['the', 'a'],
|
||||
'Preposition': ['to', 'in'],
|
||||
'Conjunction': ['and', 'or'],
|
||||
'Digit': ['0', '1'],
|
||||
|
||||
'S': [['NP', 'VP'], ['S', 'Comjunction', 'S']],
|
||||
'NP': ['Pronoun', 'Noun', ['Article', 'Noun'], ['Digit', 'Digit'],
|
||||
['NP', 'PP'], ['NP', 'RelClause']],
|
||||
'VP': ['Verb', ['VP', 'NP'], ['VP', 'Adjective'], ['VP', 'PP'],
|
||||
['VP', 'Adverb']],
|
||||
'PP': [['Preposition', 'NP']],
|
||||
'RelClause': [['that', 'VP']]
|
||||
}
|
||||
|
||||
|
||||
def parse(forest, grammar):
|
||||
if len(forest) == 1 and category(forest[0]) == 'S':
|
||||
return forest[0]
|
||||
for i in range(len(forest)):
|
||||
for lhs in grammar.keys():
|
||||
for rhs in grammar[lhs]:
|
||||
rhs = mklist(rhs)
|
||||
n = len(rhs)
|
||||
subsequence = forest[i:i+n]
|
||||
if match(subsequence, rhs):
|
||||
print subsequence, lhs, '=>', rhs
|
||||
forest2 = forest[:]
|
||||
forest2[i:i+n] = [(lhs, subsequence)]
|
||||
result = parse(forest2, grammar)
|
||||
if result != None:
|
||||
return result
|
||||
return None
|
||||
|
||||
def mklist(x):
|
||||
if type(x) == type([]): return x
|
||||
else: return [x]
|
||||
|
||||
def match(forest, rhs):
|
||||
for i in range(len(rhs)):
|
||||
if category(forest[i]) != rhs[i] and forest[i] != rhs[i]: return 0
|
||||
return 1
|
||||
|
||||
def category(forest):
|
||||
if type(forest) == type(()): return forest[0]
|
||||
else: return 'word'
|
||||
110
py/py2html.py
Normal file
110
py/py2html.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Pretty-print Python code to colorized, hyperlinked html.
|
||||
|
||||
In python, do:
|
||||
py2html.convert_files(['file1.py', 'file2.py', ...])
|
||||
From the shell, do:
|
||||
python py2html.py *.py"""
|
||||
|
||||
import re, string, time, os
|
||||
|
||||
|
||||
id = r'[a-zA-Z_][a-zA-Z_0-9]*' ## RE for a Python identifier
|
||||
g1, g2, g3, g4 = r'\1 \2 \3 \4'.split() ## groups for re.matches
|
||||
def b(text): return '<b>%s</b>' % text
|
||||
def i(text): return '<i>%s</i>' % text
|
||||
def color(rgb, text): return '<font color="%s">%s</font>' % (rgb, text)
|
||||
def link(url, anchor): return '<a href="%s">%s</a>' % (url, anchor)
|
||||
def hilite(text, bg="ffff00"):
|
||||
return '<b style="background-color:%s"><a name="%s">%s</b>' % (
|
||||
bg, text, text)
|
||||
|
||||
def modulelink(module, baseurl=''):
|
||||
"""Hyperlink to a module, either locally or on python.org"""
|
||||
if module+'.py' not in local_files:
|
||||
baseurl = 'http://www.python.org/doc/current/lib/module-'
|
||||
return link(baseurl+module+'.html', module)
|
||||
|
||||
def importer(m):
|
||||
"Turn text such as 'utils, math, re' into a string of HTML links."
|
||||
modules = [modulelink(mod.strip()) for mod in m.group(2).split(',')]
|
||||
return (m.group(1) + ', '.join(modules) + m.group(3))
|
||||
|
||||
def find1(regex, str):
|
||||
return (re.findall(regex, str) or [' '])[0]
|
||||
|
||||
def convert_files(filenames, local_filenames=None, tblfile='readme.htm'):
|
||||
"Convert files of python code to colorized HTML."
|
||||
global local_files
|
||||
local_files = local_filenames or filenames
|
||||
summary_table = {}
|
||||
for f in filenames:
|
||||
fulltext = '\n'.join(map(string.rstrip, open(f).readlines()))
|
||||
text = fulltext
|
||||
for (pattern, repl) in replacements:
|
||||
text = re.sub(pattern, repl, text)
|
||||
text = '<<header("AIMA Python file: %s")>><pre>%s</pre><<footer>>' % (
|
||||
f, text)
|
||||
open(f[:-3]+'.htm', 'w').write(text)
|
||||
if tblfile:
|
||||
ch = find1(r'Chapters?\s+([^ \)"]*)', fulltext)
|
||||
module = f.replace('.py','')
|
||||
lines = fulltext.count('\n')
|
||||
desc = find1(r'"""(.*)\n', fulltext).replace('"""', '')
|
||||
summary_table.setdefault(ch,[]).append((module, lines, desc))
|
||||
if tblfile:
|
||||
totallines = 0
|
||||
tbl = ["<tr><th>Chapter<th>Module<th>Files<th>Lines<th>Description"]
|
||||
fmt = "<tr><td align=right>%s<th>%s<td>%s<td align=right>%s<td>%s"
|
||||
items = summary_table.items(); items.sort(num_cmp)
|
||||
for (ch, entries) in items:
|
||||
for (module, lines, desc) in entries:
|
||||
totallines += lines
|
||||
files = link(module+'.py', '.py')
|
||||
if os.path.exists(module+'.txt'):
|
||||
files += ' ' + link(module+'.txt', '.txt')
|
||||
tbl += [fmt % (ch, link(module+'.html', module),
|
||||
files, lines, desc)]
|
||||
tbl += [fmt % ('', '', '', totallines, ''), "</table>"]
|
||||
## Now read the tblfile, and replace the first table with tbl
|
||||
old = open(tblfile).read()
|
||||
new = re.sub("(?s)(<table border=1>)(.*)(</table>)",
|
||||
r'\1' + '\n'.join(tbl) + r'\3', old, 1)
|
||||
open(tblfile, 'w').write(new)
|
||||
|
||||
def num_cmp(x, y):
|
||||
def num(x):
|
||||
nums = re.findall('[0-9]+', x or '')
|
||||
if nums: return int(nums[0])
|
||||
return x
|
||||
return cmp(num(x[0]), num(y[0]))
|
||||
|
||||
### Above is general (more or less); below is specific to my files.
|
||||
|
||||
def comment(text): return i(color("green", text))
|
||||
|
||||
replacements = [
|
||||
(r'&', '&'),
|
||||
(r'<', '<'),
|
||||
(r'>', '>'),
|
||||
(r'(?ms)^#+[#_]{10,} *\n', '<hr>'),
|
||||
(r"""('[^']*?'|"[^"]*?")""", comment(g1)),
|
||||
(r'(?s)(""".*?"""|' + r"'''.*?''')", comment(g1)),
|
||||
(r'(#.*)', color("cc33cc", g1)),
|
||||
(r'(?m)(^[a-zA-Z][a-zA-Z_0-9, ]+)(\s+=\s+)', hilite(g1) + g2),
|
||||
(r'(?m)(^\s*)(def\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
|
||||
(r'(?m)(^\s*)(class\s+)(%s)' % id, g1 + b(g2) + hilite(g3)),
|
||||
(r'(from\s+)([a-z]+)(\s+import)', importer),
|
||||
(r'(import\s+)([a-z, ]+)(\s|\n|$|,)', importer),
|
||||
]
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys, glob
|
||||
files = []
|
||||
for arg in sys.argv[1:]:
|
||||
files.extend(glob.glob(arg))
|
||||
convert_files(files)
|
||||
|
||||
## ENHANCEMENTS:
|
||||
## Can get confused with """ and '''; not a problem in practice.
|
||||
## Maybe we should create an index
|
||||
## Probably should switch to Doxygen
|
||||
106
py/spell.py
Normal file
106
py/spell.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Spelling Corrector in Python 3; see http://norvig.com/spell-correct.html
|
||||
|
||||
Copyright (c) 2007-2016 Peter Norvig
|
||||
MIT license: www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
|
||||
################ Spelling Corrector
|
||||
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
def words(text): return re.findall(r'\w+', text.lower())
|
||||
|
||||
WORDS = Counter(words(open('big.txt').read()))
|
||||
|
||||
def P(word, N=sum(WORDS.values())):
|
||||
"Probability of `word`."
|
||||
return WORDS[word] / N
|
||||
|
||||
def correction(word):
|
||||
"Most probable spelling correction for word."
|
||||
return max(candidates(word), key=P)
|
||||
|
||||
def candidates(word):
|
||||
"Generate possible spelling corrections for word."
|
||||
return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
|
||||
|
||||
def known(words):
|
||||
"The subset of `words` that appear in the dictionary of WORDS."
|
||||
return set(w for w in words if w in WORDS)
|
||||
|
||||
def edits1(word):
|
||||
"All edits that are one edit away from `word`."
|
||||
letters = 'abcdefghijklmnopqrstuvwxyz'
|
||||
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
|
||||
deletes = [L + R[1:] for L, R in splits if R]
|
||||
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
|
||||
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
|
||||
inserts = [L + c + R for L, R in splits for c in letters]
|
||||
return set(deletes + transposes + replaces + inserts)
|
||||
|
||||
def edits2(word):
|
||||
"All edits that are two edits away from `word`."
|
||||
return (e2 for e1 in edits1(word) for e2 in edits1(e1))
|
||||
|
||||
################ Test Code
|
||||
|
||||
def unit_tests():
|
||||
assert correction('speling') == 'spelling' # insert
|
||||
assert correction('korrectud') == 'corrected' # replace 2
|
||||
assert correction('bycycle') == 'bicycle' # replace
|
||||
assert correction('inconvient') == 'inconvenient' # insert 2
|
||||
assert correction('arrainged') == 'arranged' # delete
|
||||
assert correction('peotry') =='poetry' # transpose
|
||||
assert correction('peotryy') =='poetry' # transpose + delete
|
||||
assert correction('word') == 'word' # known
|
||||
assert correction('quintessential') == 'quintessential' # unknown
|
||||
assert words('This is a TEST.') == ['this', 'is', 'a', 'test']
|
||||
assert Counter(words('This is a test. 123; A TEST this is.')) == (
|
||||
Counter({'123': 1, 'a': 2, 'is': 2, 'test': 2, 'this': 2}))
|
||||
assert len(WORDS) == 32192
|
||||
assert sum(WORDS.values()) == 1115504
|
||||
assert WORDS.most_common(10) == [
|
||||
('the', 79808),
|
||||
('of', 40024),
|
||||
('and', 38311),
|
||||
('to', 28765),
|
||||
('in', 22020),
|
||||
('a', 21124),
|
||||
('that', 12512),
|
||||
('he', 12401),
|
||||
('was', 11410),
|
||||
('it', 10681)]
|
||||
assert WORDS['the'] == 79808
|
||||
assert P('quintessential') == 0
|
||||
assert 0.07 < P('the') < 0.08
|
||||
return 'unit_tests pass'
|
||||
|
||||
def spelltest(tests, verbose=False):
|
||||
"Run correction(wrong) on all (right, wrong) pairs; report results."
|
||||
import time
|
||||
start = time.clock()
|
||||
good, unknown = 0, 0
|
||||
n = len(tests)
|
||||
for right, wrong in tests:
|
||||
w = correction(wrong)
|
||||
good += (w == right)
|
||||
if w != right:
|
||||
unknown += (right not in WORDS)
|
||||
if verbose:
|
||||
print('correction({}) => {} ({}); expected {} ({})'
|
||||
.format(wrong, w, WORDS[w], right, WORDS[right]))
|
||||
dt = time.clock() - start
|
||||
print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second '
|
||||
.format(good / n, n, unknown / n, n / dt))
|
||||
|
||||
def Testset(lines):
|
||||
"Parse 'right: wrong1 wrong2' lines into [('right', 'wrong1'), ('right', 'wrong2')] pairs."
|
||||
return [(right, wrong)
|
||||
for (right, wrongs) in (line.split(':') for line in lines)
|
||||
for wrong in wrongs.split()]
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(unit_tests())
|
||||
spelltest(Testset(open('spell-testset1.txt')))
|
||||
spelltest(Testset(open('spell-testset2.txt')))
|
||||
50
py/sudoku-easy50.txt
Normal file
50
py/sudoku-easy50.txt
Normal file
@@ -0,0 +1,50 @@
|
||||
003020600900305001001806400008102900700000008006708200002609500800203009005010300
|
||||
200080300060070084030500209000105408000000000402706000301007040720040060004010003
|
||||
000000907000420180000705026100904000050000040000507009920108000034059000507000000
|
||||
030050040008010500460000012070502080000603000040109030250000098001020600080060020
|
||||
020810740700003100090002805009040087400208003160030200302700060005600008076051090
|
||||
100920000524010000000000070050008102000000000402700090060000000000030945000071006
|
||||
043080250600000000000001094900004070000608000010200003820500000000000005034090710
|
||||
480006902002008001900370060840010200003704100001060049020085007700900600609200018
|
||||
000900002050123400030000160908000000070000090000000205091000050007439020400007000
|
||||
001900003900700160030005007050000009004302600200000070600100030042007006500006800
|
||||
000125400008400000420800000030000095060902010510000060000003049000007200001298000
|
||||
062340750100005600570000040000094800400000006005830000030000091006400007059083260
|
||||
300000000005009000200504000020000700160000058704310600000890100000067080000005437
|
||||
630000000000500008005674000000020000003401020000000345000007004080300902947100080
|
||||
000020040008035000000070602031046970200000000000501203049000730000000010800004000
|
||||
361025900080960010400000057008000471000603000259000800740000005020018060005470329
|
||||
050807020600010090702540006070020301504000908103080070900076205060090003080103040
|
||||
080005000000003457000070809060400903007010500408007020901020000842300000000100080
|
||||
003502900000040000106000305900251008070408030800763001308000104000020000005104800
|
||||
000000000009805100051907420290401065000000000140508093026709580005103600000000000
|
||||
020030090000907000900208005004806500607000208003102900800605007000309000030020050
|
||||
005000006070009020000500107804150000000803000000092805907006000030400010200000600
|
||||
040000050001943600009000300600050002103000506800020007005000200002436700030000040
|
||||
004000000000030002390700080400009001209801307600200008010008053900040000000000800
|
||||
360020089000361000000000000803000602400603007607000108000000000000418000970030014
|
||||
500400060009000800640020000000001008208000501700500000000090084003000600060003002
|
||||
007256400400000005010030060000508000008060200000107000030070090200000004006312700
|
||||
000000000079050180800000007007306800450708096003502700700000005016030420000000000
|
||||
030000080009000500007509200700105008020090030900402001004207100002000800070000090
|
||||
200170603050000100000006079000040700000801000009050000310400000005000060906037002
|
||||
000000080800701040040020030374000900000030000005000321010060050050802006080000000
|
||||
000000085000210009960080100500800016000000000890006007009070052300054000480000000
|
||||
608070502050608070002000300500090006040302050800050003005000200010704090409060701
|
||||
050010040107000602000905000208030501040070020901080406000401000304000709020060010
|
||||
053000790009753400100000002090080010000907000080030070500000003007641200061000940
|
||||
006080300049070250000405000600317004007000800100826009000702000075040190003090600
|
||||
005080700700204005320000084060105040008000500070803010450000091600508007003010600
|
||||
000900800128006400070800060800430007500000009600079008090004010003600284001007000
|
||||
000080000270000054095000810009806400020403060006905100017000620460000038000090000
|
||||
000602000400050001085010620038206710000000000019407350026040530900020007000809000
|
||||
000900002050123400030000160908000000070000090000000205091000050007439020400007000
|
||||
380000000000400785009020300060090000800302009000040070001070500495006000000000092
|
||||
000158000002060800030000040027030510000000000046080790050000080004070100000325000
|
||||
010500200900001000002008030500030007008000500600080004040100700000700006003004050
|
||||
080000040000469000400000007005904600070608030008502100900000005000781000060000010
|
||||
904200007010000000000706500000800090020904060040002000001607000000000030300005702
|
||||
000700800006000031040002000024070000010030080000060290000800070860000500002006000
|
||||
001007090590080001030000080000005800050060020004100000080000030100020079020700400
|
||||
000003017015009008060000000100007000009000200000500004000000020500600340340200000
|
||||
300200000000107000706030500070009080900020004010800050009040301000702000000008006
|
||||
11
py/sudoku-hardest.txt
Normal file
11
py/sudoku-hardest.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
85...24..72......9..4.........1.7..23.5...9...4...........8..7..17..........36.4.
|
||||
..53.....8......2..7..1.5..4....53...1..7...6..32...8..6.5....9..4....3......97..
|
||||
12..4......5.69.1...9...5.........7.7...52.9..3......2.9.6...5.4..9..8.1..3...9.4
|
||||
...57..3.1......2.7...234......8...4..7..4...49....6.5.42...3.....7..9....18.....
|
||||
7..1523........92....3.....1....47.8.......6............9...5.6.4.9.7...8....6.1.
|
||||
1....7.9..3..2...8..96..5....53..9...1..8...26....4...3......1..4......7..7...3..
|
||||
1...34.8....8..5....4.6..21.18......3..1.2..6......81.52..7.9....6..9....9.64...2
|
||||
...92......68.3...19..7...623..4.1....1...7....8.3..297...8..91...5.72......64...
|
||||
.6.5.4.3.1...9...8.........9...5...6.4.6.2.7.7...4...5.........4...8...1.5.2.3.4.
|
||||
7.....4...2..7..8...3..8.799..5..3...6..2..9...1.97..6...3..9...3..4..6...9..1.35
|
||||
....7..2.8.......6.1.2.5...9.54....8.........3....85.1...3.2.8.4.......9.7..6....
|
||||
95
py/sudoku-top95.txt
Normal file
95
py/sudoku-top95.txt
Normal file
@@ -0,0 +1,95 @@
|
||||
4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......
|
||||
52...6.........7.13...........4..8..6......5...........418.........3..2...87.....
|
||||
6.....8.3.4.7.................5.4.7.3..2.....1.6.......2.....5.....8.6......1....
|
||||
48.3............71.2.......7.5....6....2..8.............1.76...3.....4......5....
|
||||
....14....3....2...7..........9...3.6.1.............8.2.....1.4....5.6.....7.8...
|
||||
......52..8.4......3...9...5.1...6..2..7........3.....6...1..........7.4.......3.
|
||||
6.2.5.........3.4..........43...8....1....2........7..5..27...........81...6.....
|
||||
.524.........7.1..............8.2...3.....6...9.5.....1.6.3...........897........
|
||||
6.2.5.........4.3..........43...8....1....2........7..5..27...........81...6.....
|
||||
.923.........8.1...........1.7.4...........658.........6.5.2...4.....7.....9.....
|
||||
6..3.2....5.....1..........7.26............543.........8.15........4.2........7..
|
||||
.6.5.1.9.1...9..539....7....4.8...7.......5.8.817.5.3.....5.2............76..8...
|
||||
..5...987.4..5...1..7......2...48....9.1.....6..2.....3..6..2.......9.7.......5..
|
||||
3.6.7...........518.........1.4.5...7.....6.....2......2.....4.....8.3.....5.....
|
||||
1.....3.8.7.4..............2.3.1...........958.........5.6...7.....8.2...4.......
|
||||
6..3.2....4.....1..........7.26............543.........8.15........4.2........7..
|
||||
....3..9....2....1.5.9..............1.2.8.4.6.8.5...2..75......4.1..6..3.....4.6.
|
||||
45.....3....8.1....9...........5..9.2..7.....8.........1..4..........7.2...6..8..
|
||||
.237....68...6.59.9.....7......4.97.3.7.96..2.........5..47.........2....8.......
|
||||
..84...3....3.....9....157479...8........7..514.....2...9.6...2.5....4......9..56
|
||||
.98.1....2......6.............3.2.5..84.........6.........4.8.93..5...........1..
|
||||
..247..58..............1.4.....2...9528.9.4....9...1.........3.3....75..685..2...
|
||||
4.....8.5.3..........7......2.....6.....5.4......1.......6.3.7.5..2.....1.9......
|
||||
.2.3......63.....58.......15....9.3....7........1....8.879..26......6.7...6..7..4
|
||||
1.....7.9.4...72..8.........7..1..6.3.......5.6..4..2.........8..53...7.7.2....46
|
||||
4.....3.....8.2......7........1...8734.......6........5...6........1.4...82......
|
||||
.......71.2.8........4.3...7...6..5....2..3..9........6...7.....8....4......5....
|
||||
6..3.2....4.....8..........7.26............543.........8.15........8.2........7..
|
||||
.47.8...1............6..7..6....357......5....1..6....28..4.....9.1...4.....2.69.
|
||||
......8.17..2........5.6......7...5..1....3...8.......5......2..4..8....6...3....
|
||||
38.6.......9.......2..3.51......5....3..1..6....4......17.5..8.......9.......7.32
|
||||
...5...........5.697.....2...48.2...25.1...3..8..3.........4.7..13.5..9..2...31..
|
||||
.2.......3.5.62..9.68...3...5..........64.8.2..47..9....3.....1.....6...17.43....
|
||||
.8..4....3......1........2...5...4.69..1..8..2...........3.9....6....5.....2.....
|
||||
..8.9.1...6.5...2......6....3.1.7.5.........9..4...3...5....2...7...3.8.2..7....4
|
||||
4.....5.8.3..........7......2.....6.....5.8......1.......6.3.7.5..2.....1.8......
|
||||
1.....3.8.6.4..............2.3.1...........958.........5.6...7.....8.2...4.......
|
||||
1....6.8..64..........4...7....9.6...7.4..5..5...7.1...5....32.3....8...4........
|
||||
249.6...3.3....2..8.......5.....6......2......1..4.82..9.5..7....4.....1.7...3...
|
||||
...8....9.873...4.6..7.......85..97...........43..75.......3....3...145.4....2..1
|
||||
...5.1....9....8...6.......4.1..........7..9........3.8.....1.5...2..4.....36....
|
||||
......8.16..2........7.5......6...2..1....3...8.......2......7..3..8....5...4....
|
||||
.476...5.8.3.....2.....9......8.5..6...1.....6.24......78...51...6....4..9...4..7
|
||||
.....7.95.....1...86..2.....2..73..85......6...3..49..3.5...41724................
|
||||
.4.5.....8...9..3..76.2.....146..........9..7.....36....1..4.5..6......3..71..2..
|
||||
.834.........7..5...........4.1.8..........27...3.....2.6.5....5.....8........1..
|
||||
..9.....3.....9...7.....5.6..65..4.....3......28......3..75.6..6...........12.3.8
|
||||
.26.39......6....19.....7.......4..9.5....2....85.....3..2..9..4....762.........4
|
||||
2.3.8....8..7...........1...6.5.7...4......3....1............82.5....6...1.......
|
||||
6..3.2....1.....5..........7.26............843.........8.15........8.2........7..
|
||||
1.....9...64..1.7..7..4.......3.....3.89..5....7....2.....6.7.9.....4.1....129.3.
|
||||
.........9......84.623...5....6...453...1...6...9...7....1.....4.5..2....3.8....9
|
||||
.2....5938..5..46.94..6...8..2.3.....6..8.73.7..2.........4.38..7....6..........5
|
||||
9.4..5...25.6..1..31......8.7...9...4..26......147....7.......2...3..8.6.4.....9.
|
||||
...52.....9...3..4......7...1.....4..8..453..6...1...87.2........8....32.4..8..1.
|
||||
53..2.9...24.3..5...9..........1.827...7.........981.............64....91.2.5.43.
|
||||
1....786...7..8.1.8..2....9........24...1......9..5...6.8..........5.9.......93.4
|
||||
....5...11......7..6.....8......4.....9.1.3.....596.2..8..62..7..7......3.5.7.2..
|
||||
.47.2....8....1....3....9.2.....5...6..81..5.....4.....7....3.4...9...1.4..27.8..
|
||||
......94.....9...53....5.7..8.4..1..463...........7.8.8..7.....7......28.5.26....
|
||||
.2......6....41.....78....1......7....37.....6..412....1..74..5..8.5..7......39..
|
||||
1.....3.8.6.4..............2.3.1...........758.........7.5...6.....8.2...4.......
|
||||
2....1.9..1..3.7..9..8...2.......85..6.4.........7...3.2.3...6....5.....1.9...2.5
|
||||
..7..8.....6.2.3...3......9.1..5..6.....1.....7.9....2........4.83..4...26....51.
|
||||
...36....85.......9.4..8........68.........17..9..45...1.5...6.4....9..2.....3...
|
||||
34.6.......7.......2..8.57......5....7..1..2....4......36.2..1.......9.......7.82
|
||||
......4.18..2........6.7......8...6..4....3...1.......6......2..5..1....7...3....
|
||||
.4..5..67...1...4....2.....1..8..3........2...6...........4..5.3.....8..2........
|
||||
.......4...2..4..1.7..5..9...3..7....4..6....6..1..8...2....1..85.9...6.....8...3
|
||||
8..7....4.5....6............3.97...8....43..5....2.9....6......2...6...7.71..83.2
|
||||
.8...4.5....7..3............1..85...6.....2......4....3.26............417........
|
||||
....7..8...6...5...2...3.61.1...7..2..8..534.2..9.......2......58...6.3.4...1....
|
||||
......8.16..2........7.5......6...2..1....3...8.......2......7..4..8....5...3....
|
||||
.2..........6....3.74.8.........3..2.8..4..1.6..5.........1.78.5....9..........4.
|
||||
.52..68.......7.2.......6....48..9..2..41......1.....8..61..38.....9...63..6..1.9
|
||||
....1.78.5....9..........4..2..........6....3.74.8.........3..2.8..4..1.6..5.....
|
||||
1.......3.6.3..7...7...5..121.7...9...7........8.1..2....8.64....9.2..6....4.....
|
||||
4...7.1....19.46.5.....1......7....2..2.3....847..6....14...8.6.2....3..6...9....
|
||||
......8.17..2........5.6......7...5..1....3...8.......5......2..3..8....6...4....
|
||||
963......1....8......2.5....4.8......1....7......3..257......3...9.2.4.7......9..
|
||||
15.3......7..4.2....4.72.....8.........9..1.8.1..8.79......38...........6....7423
|
||||
..........5724...98....947...9..3...5..9..12...3.1.9...6....25....56.....7......6
|
||||
....75....1..2.....4...3...5.....3.2...8...1.......6.....1..48.2........7........
|
||||
6.....7.3.4.8.................5.4.8.7..2.....1.3.......2.....5.....7.9......1....
|
||||
....6...4..6.3....1..4..5.77.....8.5...8.....6.8....9...2.9....4....32....97..1..
|
||||
.32.....58..3.....9.428...1...4...39...6...5.....1.....2...67.8.....4....95....6.
|
||||
...5.3.......6.7..5.8....1636..2.......4.1.......3...567....2.8..4.7.......2..5..
|
||||
.5.3.7.4.1.........3.......5.8.3.61....8..5.9.6..1........4...6...6927....2...9..
|
||||
..5..8..18......9.......78....4.....64....9......53..2.6.........138..5....9.714.
|
||||
..........72.6.1....51...82.8...13..4.........37.9..1.....238..5.4..9.........79.
|
||||
...658.....4......12............96.7...3..5....2.8...3..19..8..3.6.....4....473..
|
||||
.2.3.......6..8.9.83.5........2...8.7.9..5........6..4.......1...1...4.22..7..8.9
|
||||
.5..9....1.....6.....3.8.....8.4...9514.......3....2..........4.8...6..77..15..6.
|
||||
.....2.......7...17..3...9.8..7......2.89.6...13..6....9..5.824.....891..........
|
||||
3...8.......7....51..............36...2..4....7...........6.13..452...........8..
|
||||
161
py/sudoku.py
Normal file
161
py/sudoku.py
Normal file
@@ -0,0 +1,161 @@
|
||||
## Solve Every Sudoku Puzzle
|
||||
|
||||
## See http://norvig.com/sudoku.html
|
||||
|
||||
## Throughout this program we have:
|
||||
## r is a row, e.g. 'A'
|
||||
## c is a column, e.g. '3'
|
||||
## s is a square, e.g. 'A3'
|
||||
## d is a digit, e.g. '9'
|
||||
## u is a unit, e.g. ['A1','B1','C1','D1','E1','F1','G1','H1','I1']
|
||||
## grid is a grid,e.g. 81 non-blank chars, e.g. starting with '.18...7...
|
||||
## values is a dict of possible values, e.g. {'A1':'12349', 'A2':'8', ...}
|
||||
|
||||
def cross(A, B):
|
||||
"Cross product of elements in A and elements in B."
|
||||
return [a+b for a in A for b in B]
|
||||
|
||||
digits = '123456789'
|
||||
rows = 'ABCDEFGHI'
|
||||
cols = digits
|
||||
squares = cross(rows, cols)
|
||||
unitlist = ([cross(rows, c) for c in cols] +
|
||||
[cross(r, cols) for r in rows] +
|
||||
[cross(rs, cs) for rs in ('ABC','DEF','GHI') for cs in ('123','456','789')])
|
||||
units = dict((s, [u for u in unitlist if s in u])
|
||||
for s in squares)
|
||||
peers = dict((s, set(sum(units[s],[]))-set([s]))
|
||||
for s in squares)
|
||||
|
||||
################ Unit Tests ################
|
||||
|
||||
def test():
|
||||
"A set of tests that must pass."
|
||||
assert len(squares) == 81
|
||||
assert len(unitlist) == 27
|
||||
assert all(len(units[s]) == 3 for s in squares)
|
||||
assert all(len(peers[s]) == 20 for s in squares)
|
||||
assert units['C2'] == [['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2'],
|
||||
['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'],
|
||||
['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']]
|
||||
assert peers['C2'] == set(['A2', 'B2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2',
|
||||
'C1', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9',
|
||||
'A1', 'A3', 'B1', 'B3'])
|
||||
print('All tests pass.')
|
||||
|
||||
################ Parse a Grid ################
|
||||
|
||||
def parse_grid(grid):
|
||||
"""Convert grid to a dict of possible values, {square: digits}, or
|
||||
return False if a contradiction is detected."""
|
||||
## To start, every square can be any digit; then assign values from the grid.
|
||||
values = dict((s, digits) for s in squares)
|
||||
for s,d in grid_values(grid).items():
|
||||
if d in digits and not assign(values, s, d):
|
||||
return False ## (Fail if we can't assign d to square s.)
|
||||
return values
|
||||
|
||||
def grid_values(grid):
|
||||
"Convert grid into a dict of {square: char} with '0' or '.' for empties."
|
||||
chars = [c for c in grid if c in digits or c in '0.']
|
||||
if len(chars) != 81: print(grid, chars, len(chars))
|
||||
assert len(chars) == 81
|
||||
return dict(zip(squares, chars))
|
||||
|
||||
################ Constraint Propagation ################
|
||||
|
||||
def assign(values, s, d):
|
||||
"""Eliminate all the other values (except d) from values[s] and propagate.
|
||||
Return values, except return False if a contradiction is detected."""
|
||||
other_values = values[s].replace(d, '')
|
||||
if all(eliminate(values, s, d2) for d2 in other_values):
|
||||
return values
|
||||
else:
|
||||
return False
|
||||
|
||||
def eliminate(values, s, d):
|
||||
"""Eliminate d from values[s]; propagate when values or places <= 2.
|
||||
Return values, except return False if a contradiction is detected."""
|
||||
if d not in values[s]:
|
||||
return values ## Already eliminated
|
||||
values[s] = values[s].replace(d,'')
|
||||
## (1) If a square s is reduced to one value d2, then eliminate d2 from the peers.
|
||||
if len(values[s]) == 0:
|
||||
return False ## Contradiction: removed last value
|
||||
elif len(values[s]) == 1:
|
||||
d2 = values[s]
|
||||
if not all(eliminate(values, s2, d2) for s2 in peers[s]):
|
||||
return False
|
||||
## (2) If a unit u is reduced to only one place for a value d, then put it there.
|
||||
for u in units[s]:
|
||||
dplaces = [s for s in u if d in values[s]]
|
||||
if len(dplaces) == 0:
|
||||
return False ## Contradiction: no place for this value
|
||||
elif len(dplaces) == 1:
|
||||
# d can only be in one place in unit; assign it there
|
||||
if not assign(values, dplaces[0], d):
|
||||
return False
|
||||
return values
|
||||
|
||||
################ Display as 2-D grid ################
|
||||
|
||||
def display(values):
|
||||
"Display these values as a 2-D grid."
|
||||
width = 1+max(len(values[s]) for s in squares)
|
||||
line = '+'.join(['-'*(width*3)]*3)
|
||||
for r in rows:
|
||||
print(''.join(values[r+c].center(width) + ('|' if c in '36' else '')
|
||||
for c in cols))
|
||||
if r in 'CF': print(line)
|
||||
print()
|
||||
|
||||
################ Search ################
|
||||
|
||||
def solve(grid): return search(parse_grid(grid))
|
||||
|
||||
def search(values):
|
||||
"Using depth-first search and propagation, try all possible values."
|
||||
if values is False:
|
||||
return False ## Failed earlier
|
||||
if all(len(values[s]) == 1 for s in squares):
|
||||
return values ## Solved!
|
||||
## Chose the unfilled square s with the fewest possibilities
|
||||
n,s = min((len(values[s]), s) for s in squares if len(values[s]) > 1)
|
||||
for d in values[s]:
|
||||
result = search(assign(values.copy(), s, d))
|
||||
if result: return result
|
||||
|
||||
################ System test ################
|
||||
|
||||
import time
|
||||
|
||||
def solve_all(grids, name=''):
|
||||
"""Attempt to solve a sequence of grids. Report results."""
|
||||
times, results = zip(*[time_solve(grid) for grid in grids])
|
||||
N = len(results)
|
||||
if N > 1:
|
||||
print("Solved %d of %d %s puzzles (avg %.2f secs (%d Hz), max %.2f secs)." % (
|
||||
sum(results), N, name, sum(times)/N, N/sum(times), max(times)))
|
||||
|
||||
def time_solve(grid):
|
||||
start = time.clock()
|
||||
values = solve(grid)
|
||||
t = time.clock()-start
|
||||
return (t, solved(values))
|
||||
|
||||
def solved(values):
|
||||
"A puzzle is solved if each unit is a permutation of the digits 1 to 9."
|
||||
def unitsolved(unit): return set(values[s] for s in unit) == set(digits)
|
||||
return values is not False and all(unitsolved(unit) for unit in unitlist)
|
||||
|
||||
|
||||
grid1 = '003020600900305001001806400008102900700000008006708200002609500800203009005010300'
|
||||
grid2 = '4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......'
|
||||
hard1 = '.....6....59.....82....8....45........3........6..3.54...325..6..................'
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
solve_all(open("sudoku-easy50.txt"), "easy")
|
||||
solve_all(open("sudoku-top95.txt"), "hard")
|
||||
solve_all(open("sudoku-hardest.txt"), "hardest")
|
||||
|
||||
73
py/testaccum.py
Normal file
73
py/testaccum.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from __future__ import division
|
||||
import re
|
||||
from accum import *
|
||||
|
||||
|
||||
acc_re = re.compile("[[](.+):(.+) for (.+) in (.+)[]]")
|
||||
|
||||
def expand_accumulations(program_text):
|
||||
"""Replace any accumulation displays in program_text with calls to
|
||||
accumulation. Used to simulate a hypothetical Python interpreter that
|
||||
actually handles accumlation displays. This one is rather poor: it
|
||||
won't match across lines, it won't match nested accumulation displays,
|
||||
and it doesn't handle multiple 'for' clauses; nor 'if' clauses."""
|
||||
def _(matchobj):
|
||||
(acc, exp, x, it) = matchobj.groups()
|
||||
return "accumulation(%s, lambda %s: (%s), %s)" % (acc, x, exp, it)
|
||||
return acc_re.sub(_, program_text)
|
||||
|
||||
def test1(acc_display, expected):
|
||||
"Eval an accumulation display and see if it gets the expected answer."
|
||||
print acc_display
|
||||
result = eval(expand_accumulations(acc_display))
|
||||
assert result == expected, ('Got %s; expected %s' % (result, expected))
|
||||
print ' ==> %s' % result
|
||||
|
||||
#### Initialize some data
|
||||
temp = [70, 70, 71, 74, 76, 76, 72, 76, 77, 77, 77, 78,
|
||||
78, 79, 79, 79, 78, 80, 82, 83, 83, 81, 84, 83]
|
||||
data = temp
|
||||
def f(x): return 2 * x
|
||||
votes = {'Arnie': 48, 'Gray': 45, 'Tom': 13, 'Cruz': 32, 'Peter': 3}
|
||||
candidates = votes.keys()
|
||||
|
||||
def test():
|
||||
|
||||
print 'temp = ', temp
|
||||
print 'data = temp'
|
||||
print 'votes = ', votes
|
||||
print 'candidates = ', candidates
|
||||
print
|
||||
|
||||
#### Test some accumulation displays
|
||||
test1("[Max: temp[hour] for hour in range(24)]",
|
||||
max([temp[hour] for hour in range(24)]))
|
||||
test1("[Min: temp[hour] for hour in range(24)]",
|
||||
min([temp[hour] for hour in range(24)]))
|
||||
test1("[Sum: x*x for x in data]",
|
||||
sum([x*x for x in data]))
|
||||
test1("[Mean: f(x) for x in data]",
|
||||
sum([f(x) for x in data])/len(data))
|
||||
test1("[Median: f(x) for x in data]",
|
||||
156.0)
|
||||
test1("[Mode: f(x) for x in data]",
|
||||
166)
|
||||
test1("[Argmax: votes[c] for c in candidates]",
|
||||
'Arnie')
|
||||
test1("[Argmin: votes[c] for c in candidates]",
|
||||
'Peter')
|
||||
test1("[Some: temp[hour] > 75 for hour in range(24)]",
|
||||
len([hour for four in range(24) if temp[hour] > 75])>0)
|
||||
test1("[Every: temp[hour] > 75 for hour in range(24)]",
|
||||
len([h for h in range(24) if temp[h] > 75]) == 24)
|
||||
test1("[Top(10): temp[hour] for hour in range(24)]",
|
||||
[84, 83, 83, 83, 82, 81, 80, 79, 79, 79])
|
||||
test1("[Join(', '): votes[c] for c in candidates]",
|
||||
', '.join([str(votes[c]) for c in candidates]))
|
||||
test1("[SortBy: abs(x) for x in (-2, -4, 3, 1)]",
|
||||
[1, -2, 3, -4])
|
||||
test1("[SortBy(reverse=True): abs(x) for x in (-2, -4, 3, 1)]",
|
||||
[-4, 3, -2, 1])
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
||||
170
py/yaptu.py
Normal file
170
py/yaptu.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""Yet Another Python Templating Utility, Version 1.2, by Alex Martelli.
|
||||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52305
|
||||
(Specialized to HTML and modified by Peter Norvig.)
|
||||
|
||||
Copies input to output, with some substitutions. There are three types
|
||||
of substitutions: lexical, expression, and statement.
|
||||
|
||||
LEXICAL SUBSTITUTIONS:
|
||||
|
||||
& < >
|
||||
These characters, if surrounded by whitespace, are replaced by
|
||||
the corresonding HTML entities: &, <, >.
|
||||
|
||||
EXPRESSION SUBSTITUTIONS:
|
||||
|
||||
<<exp>>
|
||||
Replace <<exp>> by eval(exp), where exp is a Python expression.
|
||||
The most common use is when exp is just a variable name.
|
||||
Example: <<green>>
|
||||
Special case 1: If exp starts with '/', replace '/' by '_'.
|
||||
Example: <</green>> becomes <<_green>
|
||||
Special case 2: If exp evals to a callable, call it.
|
||||
Example: <<random.random>> is the same as <<random.random()>>
|
||||
Special case 3: If exp evals to None, replace it with ''.
|
||||
Example: <<list.append(item)>> generates no text.
|
||||
|
||||
STATEMENT SUBSTITUTIONS:
|
||||
|
||||
All statement substitutions start with a #[ in column 1, and end with
|
||||
a #] in column 1 of a subsequent line. Nesting is allowed, and
|
||||
works like you would expect. There are two variants:
|
||||
|
||||
#[
|
||||
stmts
|
||||
#]
|
||||
Any number of lines of Python stmts are executed.
|
||||
The first line must be empty, except for the #[
|
||||
|
||||
#[ stmt-header:
|
||||
lines
|
||||
#]
|
||||
The lines are interpreted as HTML with embedded expressions,
|
||||
and are sent to output, once for each execution of stmt-header.
|
||||
stmt-header is usually a for or if; This is hard to explain,
|
||||
but easy to see with an example:
|
||||
|
||||
<table><tr><th> Number <th> Number squared
|
||||
#[ for i in range(10):
|
||||
<tr><td> <<i>> <td> <<i**2>>
|
||||
#]
|
||||
</table>
|
||||
|
||||
This produces one line of the table for each value of i in [0 .. 9].
|
||||
If your compound statement has multiple stmt-headers, you use #| to
|
||||
introduce the subsequent stmt-headers (such as else: or except:).
|
||||
Another example:
|
||||
|
||||
#[ if time.localtime()[6] in [5, 6]:
|
||||
Have a good weekend!
|
||||
#| else:
|
||||
Time for work.
|
||||
#]
|
||||
"""
|
||||
|
||||
import sys, re, os, os.path
|
||||
|
||||
class Copier:
|
||||
"Smart-copier (YAPTU) class"
|
||||
|
||||
def copyblock(self, i=0, last=None):
|
||||
"Main copy method: process lines [i,last) of block"
|
||||
|
||||
def repl(match, self=self):
|
||||
"Replace the match with its value as a Python expression."
|
||||
expr = self.preproc(match.group(1), 'eval')
|
||||
if self.verbose: print '=== eval{%s}' % expr,
|
||||
try:
|
||||
val = eval(expr, self.globals)
|
||||
except:
|
||||
self.oops('eval', expr)
|
||||
if callable(val): val = val()
|
||||
if val == None: val = ''
|
||||
if self.verbose: print '========>', val
|
||||
return str(val)
|
||||
|
||||
block = self.globals['_bl']
|
||||
if last is None: last = len(block)
|
||||
while i < last:
|
||||
line = block[i]
|
||||
if line.startswith("#["): # a statement starts at line block[i]
|
||||
# i is the last line to _not_ process
|
||||
stmt = line[2:].strip()
|
||||
j = i+1 # look for 'finish' from here onwards
|
||||
nest = 1 # count nesting levels of statements
|
||||
while j<last and not stmt.endswith("#]"):
|
||||
line = block[j]
|
||||
# first look for nested statements or 'finish' lines
|
||||
if line.startswith("#]"): # found a statement-end
|
||||
nest = nest - 1
|
||||
if nest == 0: break # j is first line to _not_ process
|
||||
elif line.startswith("#["): # found a nested statement
|
||||
nest = nest + 1
|
||||
elif nest == 1 and line.startswith("#|"):
|
||||
# look for continuation only at this nesting
|
||||
nestat = line[2:].strip()
|
||||
stmt = '%s _cb(%s,%s)\n%s' % (stmt,i+1,j,nestat)
|
||||
i=j # again, i is the last line to _not_ process
|
||||
j = j+1
|
||||
if stmt == '': ## A multi-line python suite
|
||||
self.execute(''.join(block[i+1:j]))
|
||||
i = j+1
|
||||
else: ## The header of a for loop (etc.) is on this line
|
||||
self.execute("%s _cb(%s,%s)" % (stmt,i+1,j))
|
||||
i = j+1
|
||||
else: # normal line, just copy with substitution
|
||||
self.outf.write(self.regex.sub(repl,self.preproc(line,'copy')))
|
||||
i = i+1
|
||||
|
||||
def __init__(self, globals):
|
||||
"Create a Copier."
|
||||
self.regex = re.compile("<<(.*?)>>")
|
||||
self.globals = globals
|
||||
self.globals['_cb'] = self.copyblock
|
||||
self.outf = sys.stdout
|
||||
self.verbose = 0
|
||||
|
||||
def execute(self, stmt):
|
||||
stmt = self.preproc(stmt, 'exec') + '\n'
|
||||
if self.verbose:
|
||||
print "******* executing {%s} in %s" % (stmt, self.globals.keys())
|
||||
try:
|
||||
exec stmt in self.globals
|
||||
except:
|
||||
self.oops('exec', stmt)
|
||||
|
||||
def oops(self, why, what):
|
||||
print 'Something went wrong in %sing {%s}' % (why, what)
|
||||
print 'Globals:', self.globals.keys(), \
|
||||
self.globals.get('SECTIONS', '???')
|
||||
raise
|
||||
|
||||
def preproc(self, string, why, reg=re.compile(r"\s([<>&])\s"),
|
||||
table={'&':' & ', '<':' < ', '>':' > '}):
|
||||
# If it starts with '/', change to '_'
|
||||
if why in ('exec', 'eval'):
|
||||
string = string.strip()
|
||||
if string[0] == '/':
|
||||
string = '_' + string[1:]
|
||||
return string
|
||||
elif why == 'copy':
|
||||
# Expand & < > into entitites if surrounded by whitespace
|
||||
return reg.sub(lambda match: table[match.group(1)], string)
|
||||
|
||||
def copyfile(self, filename, ext="html"):
|
||||
"Convert filename.* to filename.ext, where ext defaults to html."
|
||||
global yaptu_filename
|
||||
outname = re.sub('[.][a-zA-Z0-9]+?$', '', filename) + '.'+ext
|
||||
print 'Transforming', filename, 'to', outname
|
||||
self.globals['_bl'] = file(filename).readlines()
|
||||
yaptu_filename = filename
|
||||
self.outf = file(outname, 'w')
|
||||
self.copyblock()
|
||||
|
||||
if __name__ == '__main__':
|
||||
copier = Copier(globals())
|
||||
for filename in sys.argv[1:]:
|
||||
if filename == '-v':
|
||||
copier.verbose = 1
|
||||
else:
|
||||
copier.copyfile(filename)
|
||||
Reference in New Issue
Block a user