updated from Atlas

This commit is contained in:
Luciano Ramalho
2015-04-01 22:48:56 -03:00
parent aab93699a4
commit 573e1a94c4
109 changed files with 5 additions and 6 deletions

View File

@@ -0,0 +1,20 @@
"""
>>> avg = make_averager()
>>> avg(10)
Traceback (most recent call last):
...
UnboundLocalError: local variable 'num_items' referenced before assignment
"""
def make_averager():
num_items = 0
total = 0
def averager(new_value):
num_items += 1
total += new_value
return total / num_items
return averager

View File

@@ -0,0 +1,45 @@
"""
>>> avg = make_averager()
>>> other_avg = make_averager()
>>> avg(10)
10.0
>>> avg(11)
10.5
>>> avg(12)
11.0
>>> avg.__code__.co_varnames
('new_value',)
>>> avg.__code__.co_freevars
('num_items', 'total')
>>> avg.__closure__ # doctest: +ELLIPSIS
(<cell at 0x...: int object at 0x...>, <cell at 0x...: int object at 0x...>)
>>> avg.__closure__[0].cell_contents
3
>>> avg.__closure__[1].cell_contents
33
>>> other_avg(5)
5.0
>>> other_avg(10)
7.5
>>> other_avg(15)
10.0
"""
DEMO = """
>>> avg.__closure__
(<cell at 0x10fd24f78: int object at 0x10f6d3db0>,
<cell at 0x10fd24d38: int object at 0x10f6d4170>)
"""
def make_averager():
num_items = 0
total = 0
def averager(new_value):
nonlocal num_items, total
num_items += 1
total += new_value
return total / num_items
return averager

View File

@@ -0,0 +1,39 @@
"""
>>> avg = make_averager()
>>> avg(10)
10.0
>>> avg(11)
10.5
>>> avg(12)
11.0
>>> avg.__code__.co_varnames
('new_value',)
>>> avg.__code__.co_freevars
('ns',)
>>> avg.__closure__ # doctest: +ELLIPSIS
(<cell at 0x...: Namespace object at 0x...>,)
>>> avg.__closure__[0].cell_contents.__dict__
{'total': 33, 'num_items': 3}
"""
DEMO = """
>>> avg.__closure__
(<cell at 0x108df5980: Namespace object at 0x108e06790>,)
"""
class Namespace(object):
pass
def make_averager():
ns = Namespace()
ns.num_items = 0
ns.total = 0
def averager(new_value):
ns.num_items += 1
ns.total += new_value
return float(ns.total) / ns.num_items
return averager

View File

@@ -0,0 +1,34 @@
"""
>>> import functools
>>> avg = functools.partial(averager, series=[])
>>> avg(10)
10.0
>>> avg(11)
10.5
>>> avg(12)
11.0
>>> avg.args
()
>>> avg.keywords
{'series': [10, 11, 12]}
>>> avg.func # doctest: +ELLIPSIS
<function averager at 0x...>
>>> avg.func.__code__.co_varnames
('new_value', 'series', 'total')
"""
DEMO = """
>>> avg.func
<function averager at 0x1010c5560>
>>> avg.func.__code__.co_varnames
('new_value',)
>>> avg.__code__.co_freevars
('num_items', 'total')
>>> avg.__closure__
"""
def averager(new_value, series):
series.append(new_value)
total = sum(series)
return float(total)/len(series)

View File

@@ -0,0 +1,28 @@
"""
>>> avg = make_averager()
>>> avg(10)
10.0
>>> avg(11)
10.5
>>> avg(12)
11.0
>>> avg.__code__.co_varnames
('new_value', 'total')
>>> avg.__code__.co_freevars
('series',)
>>> avg.__closure__ # doctest: +ELLIPSIS
(<cell at 0x...: list object at 0x...>,)
>>> avg.__closure__[0].cell_contents
[10, 11, 12]
"""
def make_averager():
series = []
def averager(new_value):
series.append(new_value)
total = sum(series)
return float(total)/len(series)
return averager

View File

@@ -0,0 +1,15 @@
# clockdeco.py
import time
def clock(func):
def clocked(*args):
t0 = time.time()
result = func(*args)
elapsed = time.time() - t0
name = func.__name__
arg_str = ', '.join(repr(arg) for arg in args)
print('[%0.8fs] %s(%s) -> %r' % (elapsed, name, arg_str, result))
return result
return clocked

View File

@@ -0,0 +1,23 @@
# clockdeco2.py
import time
import functools
def clock(func):
@functools.wraps(func)
def clocked(*args, **kwargs):
t0 = time.time()
result = func(*args, **kwargs)
elapsed = time.time() - t0
name = func.__name__
arg_lst = []
if args:
arg_lst.append(', '.join(repr(arg) for arg in args))
if kwargs:
pairs = ['%s=%r' % (k, w) for k, w in sorted(kwargs.items())]
arg_lst.append(', '.join(pairs))
arg_str = ', '.join(arg_lst)
print('[%0.8fs] %s(%s) -> %r ' % (elapsed, name, arg_str, result))
return result
return clocked

View File

@@ -0,0 +1,31 @@
# clockdec2o_demo.py
"""
>>> pythagoras(3, 4) # doctest: +ELLIPSIS
[0.0...s] pythagoras(3, 4) -> 5.0
5.0
>>> pythagoras(9, h=15) # doctest: +ELLIPSIS
[0.0...s] pythagoras(9, h=15) -> 12.0
12.0
"""
import time
import math
from clockdeco2 import clock
@clock
def pythagoras(a, b=None, h=None):
if b is None and h is None:
raise TypeError('must provide second leg (b) or hypotenuse (h)')
if h is None:
return math.sqrt(a*a + b*b)
else:
return math.sqrt(h*h - a*a)
if __name__=='__main__':
print('*' * 40, 'Calling pythagoras(3, 4)')
pythagoras(3, 4)
print('*' * 40, 'Calling pythagoras(9, h=15)')
pythagoras(9, h=15)

View File

@@ -0,0 +1,95 @@
"""
>>> f_empty()
[0.0...s] f_empty() -> None
>>> f_args('spam', 3)
[0.0...s] f_args('spam', 3) -> 'spamspamspam'
'spamspamspam'
>>> snooze(1234)
[1...s] snooze(1234) -> None
>>> average(1, 2, 3)
[0.0...s] average(1, 2, 3) -> 2.0
2.0
>>> average(*range(10**3))
[0.0...s] average(0, 1, ..., 999) -> 499.5
499.5
>>> factorial(10)
[0.000...s] factorial(1) -> 1
[0.000...s] factorial(2) -> 2
[0.000...s] factorial(3) -> 6
[0.000...s] factorial(4) -> 24
[0.000...s] factorial(5) -> 120
[0.000...s] factorial(6) -> 720
[0.000...s] factorial(7) -> 5040
[0.000...s] factorial(8) -> 40320
[0.000...s] factorial(9) -> 362880
[0.000...s] factorial(10) -> 3628800
3628800
>>> fibonacci(1)
[0.000...s] fibonacci(1) -> 1
1
>>> fibonacci(5)
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(3) -> 2
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(3) -> 2
[0.000...s] fibonacci(4) -> 3
[0.000...s] fibonacci(5) -> 5
5
>>> f_kwargs(3, 5, d='spam', c='eggs')
[0.0...s] f_kwargs(3, 5, c='eggs', d='spam') -> 15
15
>>> f_args.__name__
'f_args'
>>> f_kwargs.__name__
'f_kwargs'
"""
import time
from clockdeco2 import clock
@clock
def f_empty():
pass
@clock
def f_args(a, b):
return a*b
@clock
def snooze(milis):
time.sleep(milis/1000)
@clock
def average(*args):
return sum(args) / len(args)
@clock
def factorial(n):
return 1 if n < 2 else n*factorial(n-1)
@clock
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-2) + fibonacci(n-1)
@clock
def f_kwargs(a, b, c=1, d='eggs'):
from time import sleep
sleep(0.001)
return a*b
import doctest
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)

View File

@@ -0,0 +1,22 @@
from clockdeco import clock
import time
@clock
def snooze(milis):
time.sleep(milis/1000)
@clock
def factorial(n):
return 1 if n < 2 else n*factorial(n-1)
@clock
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-2) + fibonacci(n-1)
snooze(123)
print(factorial(6))
print(fibonacci(4))

View File

@@ -0,0 +1,20 @@
import functools
from clockdeco import clock
@functools.lru_cache()
@clock
def factorial(n):
return 1 if n < 2 else n*factorial(n-1)
@functools.lru_cache()
@clock
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-2) + fibonacci(n-1)
print(factorial(6))
print(fibonacci(6))

View File

@@ -0,0 +1,82 @@
"""
>>> f_empty()
[0.0...s] f_empty() -> None
>>> f_args('spam', 3)
[0.0...s] f_args('spam', 3) -> 'spamspamspam'
'spamspamspam'
>>> snooze(1234)
[1...s] snooze(1234) -> None
>>> average(1, 2, 3)
[0.0...s] average(1, 2, 3) -> 2.0
2.0
>>> average(*range(10**3))
[0.0...s] average(0, 1, ..., 999) -> 499.5
499.5
>>> factorial(10)
[0.000...s] factorial(1) -> 1
[0.000...s] factorial(2) -> 2
[0.000...s] factorial(3) -> 6
[0.000...s] factorial(4) -> 24
[0.000...s] factorial(5) -> 120
[0.000...s] factorial(6) -> 720
[0.000...s] factorial(7) -> 5040
[0.000...s] factorial(8) -> 40320
[0.000...s] factorial(9) -> 362880
[0.000...s] factorial(10) -> 3628800
3628800
>>> fibonacci(1)
[0.000...s] fibonacci(1) -> 1
1
>>> fibonacci(5)
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(3) -> 2
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(0) -> 0
[0.000...s] fibonacci(1) -> 1
[0.000...s] fibonacci(2) -> 1
[0.000...s] fibonacci(3) -> 2
[0.000...s] fibonacci(4) -> 3
[0.000...s] fibonacci(5) -> 5
5
>>> f_args.__name__
'clocked'
"""
import time
from clockdeco import clock
@clock
def f_empty():
pass
@clock
def f_args(a, b):
return a*b
@clock
def snooze(milis):
time.sleep(milis/1000)
@clock
def average(*args):
return sum(args) / len(args)
@clock
def factorial(n):
return 1 if n < 2 else n*factorial(n-1)
@clock
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-2) + fibonacci(n-1)
import doctest
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)

View File

@@ -0,0 +1,50 @@
# currency.py
"""
>>> convert(1, 'BRL', 'USD')
0.4591
>>> convert(1, 'USD', 'BRL')
2.1784
>>> convert(1, 'EUR', 'USD')
1.3482
>>> convert(1, 'USD', 'EUR')
0.7417
>>> convert(1, 'EUR', 'BRL')
2.9369
>>> convert(1, 'BRL', 'EUR')
0.3405
>>> from functools import partial
>>> eur = partial(convert, cur_to='EUR')
>>> eur(1, 'USD')
0.7417
>>> eur(1, 'BRL')
0.3405
>>> eur2brl = partial(convert, cur_from='EUR', cur_to='BRL')
>>> eur2brl(100)
293.6864
>>> type(eur2brl)
<class 'functools.partial'>
"""
DEMO = """
>>> eur2brl.func
<function convert at 0x1010c5560>
>>> eur2brl.args, eur2brl.keywords
((), {'cur_from': 'EUR', 'cur_to': 'BRL'})
"""
rates = {'BRL': 2.17836,
'CAD': 1.03615,
'CNY': 6.10562,
'EUR': 0.74173,
'GBP': 0.62814,
'INR': 61.8685,
'JPY': 98.6002,
'USD': 1.0}
reference = rates['USD']
def convert(amount, cur_from, cur_to):
ref_amount = reference / rates[cur_from] * amount
return round(ref_amount * rates[cur_to], 4)

View File

@@ -0,0 +1,60 @@
# source: http://oeis.org/A000045
fibo_seq = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610,
987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025,
121393, 196418, 317811, 514229, 832040, 1346269, 2178309,
3524578, 5702887, 9227465, 14930352, 24157817, 39088169]
from functools import lru_cache
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-2) + fibonacci(n-1)
@lru_cache()
def fibonacci2(n):
if n < 2:
return n
return fibonacci2(n-2) + fibonacci2(n-1)
def memoize(func):
'''simplest memoizing decorator'''
cache = {}
def memoized(*args):
if args not in cache:
cache[args] = func(*args)
return cache[args]
return memoized
def test():
for i, expected in enumerate(fibo_seq[:31]):
print(i, expected)
assert fibonacci(i) == expected
def chronograph():
global fibonacci
from time import time
t0 = time()
n = 32
res = fibonacci(n)
#res = [fibonacci(n) for n in range(30)]
t1 = time()
print(n, res, format(t1-t0, '0.6f'))
t0 = time()
res = fibonacci2(n)
#res = [fibonacci2(n) for n in range(30)]
t1 = time()
print(n, res, format(t1-t0, '0.6f'))
t0 = time()
fibonacci = memoize(fibonacci)
res = fibonacci(n)
#res = [fibonacci2(n) for n in range(30)]
t1 = time()
print(n, res, format(t1-t0, '0.6f'))
if __name__=='__main__':
#test()
chronograph()

View File

@@ -0,0 +1,63 @@
"""
>>> f1(3)
>>> b = 8
>>> f1(3)
a = 3
b = 8
>>> f2(3)
Traceback (most recent call last):
...
UnboundLocalError: local variable 'b' referenced before assignment
>>> f3(3)
a = 3
b = 7
b = 6
>>> b = -5
>>> ff = f4()
>>> ff(3)
a = 3
b = 11
b = 6
>>> print('b =', b)
b = -5
"""
def f1(a):
print('a =', a)
print('b =', b)
def f2(a):
print('a =', a)
print('b =', b)
b = a * 10
print('b =', b)
def f3(a):
global b
print('a =', a)
print('b =', b)
b = a * 10
print('b =', b)
def f3b(a):
nonlocal b
print('a =', a)
print('b =', b)
b = a * 10
print('b =', b)
def f4():
b = 11
def f5(a):
nonlocal b
print('a =', a)
print('b =', b)
b = a * 2
print('b =', b)
return f5
import doctest
doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)

View File

@@ -0,0 +1,27 @@
def d1(f):
def wrapped():
print('d1/wrapped')
return f()
return wrapped
def d2(f):
def wrapped():
print('d2/wrapped')
return f()
return wrapped
@d1
@d2
def f():
print('f')
f()
def g():
print('g')
g = d1(d2(g))
g()

View File

@@ -0,0 +1,34 @@
"""
Dict performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = dict((n, n.as_integer_ratio()) for n in selected)
print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
# print(' needles: %10d' % len(needles), end=' ')
'''
TEST = '''
found = 0
for n in needles:
if n in haystack:
found += 1
# print(' found: %10d' % found)
'''
MAX_EXPONENT = 7
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(size=size)
tt = timeit.repeat(stmt=TEST, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))

22
attic/dicts/index_alex.py Normal file
View File

@@ -0,0 +1,22 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
"""Build a map word -> list-of-line-numbers"""
import sys
import re
NONWORD_RE = re.compile('\W+')
idx = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for n, line in enumerate(fp, 1):
for word in NONWORD_RE.split(line):
if word.strip():
idx.setdefault(word, []).append(n)
# print in alphabetical order
for word in sorted(idx, key=str.upper):
print(word, idx[word])

View File

@@ -0,0 +1,48 @@
"""
Set performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = {type}(selected)
# print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
needles = set(needles)
# print(' needles: %10d' % len(needles), end=' ')
'''
tests = [
('FOR_LOOP_TEST', '''
found = 0
for n in needles:
if n in haystack:
found += 1
assert found == 500
'''),
('SET_&_TEST', '''
found = len(needles & haystack)
assert found == 500
'''
)]
MAX_EXPONENT = 7
for collection_type in 'dict.fromkeys set list'.split():
if collection_type == 'set':
available_tests = tests
else:
available_tests = tests[:1]
for test_name, test in available_tests:
print('*' * 25, collection_type, test_name)
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(type=collection_type, size=size)
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:9.6f}'.format(size, MAX_EXPONENT + 1, min(tt)))

View File

@@ -0,0 +1,53 @@
"""StrKeyDict0 converts non-string keys to `str` on lookup
# BEGIN STRKEYDICT0_TESTS
Tests for item retrieval using `d[key]` notation::
>>> d = StrKeyDict0([('2', 'two'), ('4', 'four')])
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for item retrieval using `d.get(key)` notation::
>>> d.get('2')
'two'
>>> d.get(4)
'four'
>>> d.get(1, 'N/A')
'N/A'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
# END STRKEYDICT0_TESTS
"""
# BEGIN STRKEYDICT0
import collections
class StrKeyDict0(collections.UserDict): # <1>
def __missing__(self, key):
if isinstance(key, str): # <2>
raise KeyError(key)
return self[str(key)] # <3>
def __contains__(self, key):
return str(key) in self.data # <3>
# END STRKEYDICT0

View File

@@ -0,0 +1,95 @@
"""StrKeyDict always converts non-string keys to `str`
This is a variation of `strkeydict.StrKeyDict` implemented
as a `dict` built-in subclass (instead of a `UserDict` subclass)
Test for initializer: keys are converted to `str`.
>>> d = StrKeyDict([(2, 'two'), ('4', 'four')])
>>> sorted(d.keys())
['2', '4']
Tests for item retrieval using `d[key]` notation::
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for item retrieval using `d.get(key)` notation::
>>> d.get('2')
'two'
>>> d.get(4)
'four'
>>> d.get(1, 'N/A')
'N/A'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
Test for item assignment using non-string key::
>>> d[0] = 'zero'
>>> d['0']
'zero'
Tests for update using a `dict` or a sequence of pairs::
>>> d.update({6:'six', '8':'eight'})
>>> sorted(d.keys())
['0', '2', '4', '6', '8']
>>> d.update([(10, 'ten'), ('12', 'twelve')])
>>> sorted(d.keys())
['0', '10', '12', '2', '4', '6', '8']
>>> d.update([1, 3, 5])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
"""
import collections.abc
class StrKeyDict(dict):
def __init__(self, iterable=None, **kwds):
super().__init__()
self.update(iterable, **kwds)
def __missing__(self, key):
if isinstance(key, str):
raise KeyError(key)
return self[str(key)]
def __contains__(self, key):
return key in self.keys() or str(key) in self.keys()
def __setitem__(self, key, item):
super().__setitem__(str(key), item)
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def update(self, iterable=None, **kwds):
if iterable is not None:
if isinstance(iterable, collections.abc.Mapping):
pairs = iterable.items()
else:
pairs = ((k, v) for k, v in iterable)
for key, value in pairs:
self[key] = value
if kwds:
self.update(kwds)

View File

@@ -0,0 +1,300 @@
"""Unit tests for transformdict.py."""
import unittest
from test import support
from test import mapping_tests
import pickle
import copy
from functools import partial
from transformdict import TransformDict
def str_lower(s):
return s.lower()
class TransformDictTestBase(unittest.TestCase):
def check_underlying_dict(self, d, expected):
"""
Check for implementation details.
"""
self.assertEqual(d._data, expected)
self.assertEqual(set(d._original), set(expected))
self.assertEqual([d._transform(v) for v in d._original.values()],
list(d._original.keys()))
class TestTransformDict(TransformDictTestBase):
def test_init(self):
with self.assertRaises(TypeError):
TransformDict()
with self.assertRaises(TypeError):
# Too many positional args
TransformDict(str.lower, {}, {})
with self.assertRaises(TypeError):
# Not a callable
TransformDict(object())
d = TransformDict(str.lower)
self.check_underlying_dict(d, {})
pairs = [('Bar', 1), ('Foo', 2)]
d = TransformDict(str.lower, pairs)
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, dict(pairs))
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, **dict(pairs))
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, {'Bar': 1}, Foo=2)
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
def test_transform_func(self):
# Test the `transform_func` attribute
d = TransformDict(str.lower)
self.assertIs(d.transform_func, str.lower)
# The attribute is read-only
with self.assertRaises(AttributeError):
d.transform_func = str.upper
def test_various_transforms(self):
d = TransformDict(lambda s: s.encode('utf-8'))
d['Foo'] = 5
self.assertEqual(d['Foo'], 5)
self.check_underlying_dict(d, {b'Foo': 5})
with self.assertRaises(AttributeError):
# 'bytes' object has no attribute 'encode'
d[b'Foo']
# Another example
d = TransformDict(str.swapcase)
d['Foo'] = 5
self.assertEqual(d['Foo'], 5)
self.check_underlying_dict(d, {'fOO': 5})
with self.assertRaises(KeyError):
d['fOO']
# NOTE: we mostly test the operations which are not inherited from
# MutableMapping.
def test_setitem_getitem(self):
d = TransformDict(str.lower)
with self.assertRaises(KeyError):
d['foo']
d['Foo'] = 5
self.assertEqual(d['foo'], 5)
self.assertEqual(d['Foo'], 5)
self.assertEqual(d['FOo'], 5)
with self.assertRaises(KeyError):
d['bar']
self.check_underlying_dict(d, {'foo': 5})
d['BAR'] = 6
self.assertEqual(d['Bar'], 6)
self.check_underlying_dict(d, {'foo': 5, 'bar': 6})
# Overwriting
d['foO'] = 7
self.assertEqual(d['foo'], 7)
self.assertEqual(d['Foo'], 7)
self.assertEqual(d['FOo'], 7)
self.check_underlying_dict(d, {'foo': 7, 'bar': 6})
def test_delitem(self):
d = TransformDict(str.lower, Foo=5)
d['baR'] = 3
del d['fOO']
with self.assertRaises(KeyError):
del d['Foo']
with self.assertRaises(KeyError):
del d['foo']
self.check_underlying_dict(d, {'bar': 3})
def test_get(self):
d = TransformDict(str.lower)
default = object()
self.assertIs(d.get('foo'), None)
self.assertIs(d.get('foo', default), default)
d['Foo'] = 5
self.assertEqual(d.get('foo'), 5)
self.assertEqual(d.get('FOO'), 5)
self.assertIs(d.get('bar'), None)
self.check_underlying_dict(d, {'foo': 5})
def test_getitem(self):
d = TransformDict(str.lower)
d['Foo'] = 5
self.assertEqual(d.getitem('foo'), ('Foo', 5))
self.assertEqual(d.getitem('FOO'), ('Foo', 5))
with self.assertRaises(KeyError):
d.getitem('bar')
def test_pop(self):
d = TransformDict(str.lower)
default = object()
with self.assertRaises(KeyError):
d.pop('foo')
self.assertIs(d.pop('foo', default), default)
d['Foo'] = 5
self.assertIn('foo', d)
self.assertEqual(d.pop('foo'), 5)
self.assertNotIn('foo', d)
self.check_underlying_dict(d, {})
d['Foo'] = 5
self.assertIn('Foo', d)
self.assertEqual(d.pop('FOO'), 5)
self.assertNotIn('foo', d)
self.check_underlying_dict(d, {})
with self.assertRaises(KeyError):
d.pop('foo')
def test_clear(self):
d = TransformDict(str.lower)
d.clear()
self.check_underlying_dict(d, {})
d['Foo'] = 5
d['baR'] = 3
self.check_underlying_dict(d, {'foo': 5, 'bar': 3})
d.clear()
self.check_underlying_dict(d, {})
def test_contains(self):
d = TransformDict(str.lower)
self.assertIs(False, 'foo' in d)
d['Foo'] = 5
self.assertIs(True, 'Foo' in d)
self.assertIs(True, 'foo' in d)
self.assertIs(True, 'FOO' in d)
self.assertIs(False, 'bar' in d)
def test_len(self):
d = TransformDict(str.lower)
self.assertEqual(len(d), 0)
d['Foo'] = 5
self.assertEqual(len(d), 1)
d['BAR'] = 6
self.assertEqual(len(d), 2)
d['foo'] = 7
self.assertEqual(len(d), 2)
d['baR'] = 3
self.assertEqual(len(d), 2)
del d['Bar']
self.assertEqual(len(d), 1)
def test_iter(self):
d = TransformDict(str.lower)
it = iter(d)
with self.assertRaises(StopIteration):
next(it)
d['Foo'] = 5
d['BAR'] = 6
self.assertEqual(set(x for x in d), {'Foo', 'BAR'})
def test_first_key_retained(self):
d = TransformDict(str.lower, {'Foo': 5, 'BAR': 6})
self.assertEqual(set(d), {'Foo', 'BAR'})
d['foo'] = 7
d['baR'] = 8
d['quux'] = 9
self.assertEqual(set(d), {'Foo', 'BAR', 'quux'})
del d['foo']
d['FOO'] = 9
del d['bar']
d.setdefault('Bar', 15)
d.setdefault('BAR', 15)
self.assertEqual(set(d), {'FOO', 'Bar', 'quux'})
def test_repr(self):
d = TransformDict(str.lower)
self.assertEqual(repr(d),
"TransformDict(<method 'lower' of 'str' objects>, {})")
d['Foo'] = 5
self.assertEqual(repr(d),
"TransformDict(<method 'lower' of 'str' objects>, {'Foo': 5})")
def test_repr_non_hashable_keys(self):
d = TransformDict(id)
self.assertEqual(repr(d),
"TransformDict(<built-in function id>, {})")
d[[1]] = 2
self.assertEqual(repr(d),
"TransformDict(<built-in function id>, [([1], 2)])")
class TransformDictMappingTests(TransformDictTestBase,
mapping_tests.BasicTestMappingProtocol):
TransformDict = TransformDict
type2test = partial(TransformDict, str.lower)
def check_shallow_copy(self, copy_func):
d = self.TransformDict(str_lower, {'Foo': []})
e = copy_func(d)
self.assertIs(e.__class__, self.TransformDict)
self.assertIs(e._transform, str_lower)
self.check_underlying_dict(e, {'foo': []})
e['Bar'] = 6
self.assertEqual(e['bar'], 6)
with self.assertRaises(KeyError):
d['bar']
e['foo'].append(5)
self.assertEqual(d['foo'], [5])
self.assertEqual(set(e), {'Foo', 'Bar'})
def check_deep_copy(self, copy_func):
d = self.TransformDict(str_lower, {'Foo': []})
e = copy_func(d)
self.assertIs(e.__class__, self.TransformDict)
self.assertIs(e._transform, str_lower)
self.check_underlying_dict(e, {'foo': []})
e['Bar'] = 6
self.assertEqual(e['bar'], 6)
with self.assertRaises(KeyError):
d['bar']
e['foo'].append(5)
self.assertEqual(d['foo'], [])
self.check_underlying_dict(e, {'foo': [5], 'bar': 6})
self.assertEqual(set(e), {'Foo', 'Bar'})
def test_copy(self):
self.check_shallow_copy(lambda d: d.copy())
def test_copy_copy(self):
self.check_shallow_copy(copy.copy)
def test_cast_as_dict(self):
d = self.TransformDict(str.lower, {'Foo': 5})
e = dict(d)
self.assertEqual(e, {'Foo': 5})
def test_copy_deepcopy(self):
self.check_deep_copy(copy.deepcopy)
def test_pickling(self):
def pickle_unpickle(obj, proto):
data = pickle.dumps(obj, proto)
return pickle.loads(data)
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
with self.subTest(pickle_protocol=proto):
self.check_deep_copy(partial(pickle_unpickle, proto=proto))
class MyTransformDict(TransformDict):
pass
class TransformDictSubclassMappingTests(TransformDictMappingTests):
TransformDict = MyTransformDict
type2test = partial(MyTransformDict, str.lower)
def test_main(verbose=None):
test_classes = [TestTransformDict, TransformDictMappingTests,
TransformDictSubclassMappingTests]
support.run_unittest(*test_classes)
if __name__ == "__main__":
test_main(verbose=True)

36
attic/functions/accgen.py Normal file
View File

@@ -0,0 +1,36 @@
"""
Accumulator generator examples
http://www.paulgraham.com/accgen.html
>>> f3 = foo(3)
>>> f3(2)
5
>>> f3(2)
7
>>> f3(2)
9
"""
class foo0:
def __init__(self, n):
self.n = n
def __call__(self, i):
self.n += i
return self.n
def foo0(n):
def bar(i):
bar.s += i
return bar.s
bar.s = n
return bar
def foo(n):
def bar(i):
nonlocal n
n += i
return n
return bar

View File

@@ -0,0 +1,25 @@
metro_data = [
('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]
from collections import namedtuple
LatLong = namedtuple('LatLong', 'lat long')
Metropolis = namedtuple('Metropolis', 'name cc pop coord')
metro_areas = [Metropolis(name, cc, pop, LatLong(lat, long_))
for name, cc, pop, (lat, long_) in metro_data]
metro_areas[0]
metro_areas[0].coord.lat
from operator import attrgetter
name_lat = attrgetter('name', 'coord.lat')
for city in sorted(metro_areas, key=attrgetter('coord.lat')):
print(name_lat(city))

View File

@@ -0,0 +1,31 @@
>>> metro_data = [
... ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
... ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
... ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
... ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
... ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
... ]
# BEGIN ATTRGETTER_DEMO
>>> from collections import namedtuple
>>> LatLong = namedtuple('LatLong', 'lat long') # <1>
>>> Metropolis = namedtuple('Metropolis', 'name cc pop coord') # <2>
>>> metro_areas = [Metropolis(name, cc, pop, LatLong(lat, long_)) # <3>
... for name, cc, pop, (lat, long_) in metro_data]
>>> metro_areas[0]
Metropolis(name='Tokyo', cc='JP', pop=36.933, coord=LatLong(lat=35.689722, long=139.691667))
>>> metro_areas[0].coord.lat # <4>
35.689722
>>> from operator import attrgetter
>>> name_lat = attrgetter('name', 'coord.lat') # <5>
>>>
>>> for city in sorted(metro_areas, key=attrgetter('coord.lat')): # <6>
... print(name_lat(city)) # <7>
...
('Sao Paulo', -23.547778)
('Mexico City', 19.433333)
('Delhi NCR', 28.613889)
('Tokyo', 35.689722)
('New York-Newark', 40.808611)
# END ATTRGETTER_DEMO

5
attic/functions/hello.py Normal file
View File

@@ -0,0 +1,5 @@
import bobo
@bobo.query('/')
def hello(person):
return 'Hello %s!' % person

View File

@@ -0,0 +1,76 @@
"""StrKeyDict always converts non-string keys to `str`
Tests for item retrieval using `d[key]` notation::
>>> d = StrKeyDict([('2', 'two'), ('4', 'four')])
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
Test for item assignment using non-string key::
>>> d[0] = 'zero'
>>> d['0']
'zero'
Tests for update using a `dict` or a sequence of pairs::
>>> d.update({6:'six', '8':'eight'})
>>> sorted(d.keys())
['0', '2', '4', '6', '8']
>>> d.update([(10, 'ten'), ('12', 'twelve')])
>>> sorted(d.keys())
['0', '10', '12', '2', '4', '6', '8']
>>> d.update([1, 3, 5])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
"""
# BEGIN STRKEYDICT
import collections
import collections.abc
class StrKeyDict(collections.UserDict): # <1>
def __init__(self, args, normalize=str, **kwargs):
super().__init__(self, *args, **kwargs)
self.normalize = normalize
def __missing__(self, key): # <2>
if self.normalize(key) == key:
raise KeyError(key)
return self[self.normalize(key)]
def __contains__(self, key):
return self.normalize(key) in self.data # <3>
def __setitem__(self, key, item):
self.data[self.normalize(key)] = item # <4>
def update(self, iterable=None, **kwds):
if iterable is not None:
if isinstance(iterable, collections.abc.Mapping): # <5>
pairs = iterable.items()
else:
pairs = ((k, v) for k, v in iterable) # <6>
for key, value in pairs:
self[key] = value # <7>
if kwds:
self.update(kwds) # <8>
# END STRKEYDICT

View File

@@ -0,0 +1,20 @@
import random
import collections
SIZE = 15
random.seed(1729)
target_list = [random.randrange(SIZE*2) for i in range(SIZE)]
target_list.sort()
random.seed(1729)
display_list = [' '] * SIZE
occurrences = collections.Counter()
for i in range(SIZE):
new_item = random.randrange(SIZE*2)
pos = target_list.index(new_item) + occurrences[new_item]
occurrences[new_item] += 1
display_list[pos] = '%2s, ' % new_item
print('[' + ''.join(display_list) + ']')

View File

@@ -0,0 +1,29 @@
"""
>>> bisect_find([], 1)
-1
>>> import array
>>> import random
>>> SIZE = 10
>>> my_array = array.array('l', range(0, SIZE, 2))
>>> random.seed(42)
>>> for i in range(SIZE):
... print(i, bisect_find(my_array, i))
0 0
1 -1
2 1
3 -1
4 2
5 -1
6 3
7 -1
8 4
9 -1
"""
from bisect import bisect
def bisect_find(seq, item):
left_pos = bisect(seq, item) - 1
return left_pos if seq and seq[left_pos] == item else -1

View File

@@ -0,0 +1,29 @@
"""
>>> bisect_in([], 1)
False
>>> import array
>>> import random
>>> SIZE = 10
>>> my_array = array.array('l', range(0, SIZE, 2))
>>> random.seed(42)
>>> for i in range(SIZE):
... print(i, bisect_in(my_array, i))
0 True
1 False
2 True
3 False
4 True
5 False
6 True
7 False
8 True
9 False
"""
from bisect import bisect
def bisect_in(seq, item):
pos = bisect(seq, item)
return seq[pos-1] == item if seq else False

View File

@@ -0,0 +1,32 @@
"""
bisect_time.py
"""
import timeit
SETUP = '''
SIZE = 10**6
import array
import random
from bisect_find import bisect_find
random.seed(42)
haystack = [random.randrange(SIZE)*2 for i in range(SIZE)]
needles = [random.choice(haystack) + i % 2 for i in range(20)]
'''
BISECT = '''
print('bisect:', end=' ')
for n in needles:
print(bisect_find(haystack, n), end=' ')
print()
'''
SORT = '''
print(' in:', end=' ')
for n in needles:
print(int(n in haystack), end=' ')
print()
'''
print(min(timeit.Timer(BISECT, SETUP).repeat(7, 1)))
print(min(timeit.Timer(SORT, SETUP).repeat(7, 1)))

Binary file not shown.

View File

@@ -0,0 +1,10 @@
1 0 LOAD_NAME 0 (s)
3 LOAD_NAME 1 (a)
6 DUP_TOP_TWO
7 BINARY_SUBSCR <1>
8 LOAD_NAME 2 (b)
11 INPLACE_ADD <2>
12 ROT_THREE
13 STORE_SUBSCR <3>
14 LOAD_CONST 0 (None)
17 RETURN_VALUE

View File

@@ -0,0 +1,54 @@
>>> from frenchdeck2 import FrenchDeck2, Card
>>> beer_card = Card('7', 'diamonds')
>>> beer_card
Card(rank='7', suit='diamonds')
>>> deck = FrenchDeck2()
>>> len(deck)
52
>>> deck[:3]
[Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]
>>> deck[12::13]
[Card(rank='A', suit='spades'), Card(rank='A', suit='diamonds'), Card(rank='A', suit='clubs'), Card(rank='A', suit='hearts')]
>>> Card('Q', 'hearts') in deck
True
>>> Card('Z', 'clubs') in deck
False
>>> for card in deck: # doctest: +ELLIPSIS
... print(card)
Card(rank='2', suit='spades')
Card(rank='3', suit='spades')
Card(rank='4', suit='spades')
...
>>> for card in reversed(deck): # doctest: +ELLIPSIS
... print(card)
Card(rank='A', suit='hearts')
Card(rank='K', suit='hearts')
Card(rank='Q', suit='hearts')
...
>>> for n, card in enumerate(deck, 1): # doctest: +ELLIPSIS
... print(n, card)
1 Card(rank='2', suit='spades')
2 Card(rank='3', suit='spades')
3 Card(rank='4', suit='spades')
...
>>> def alt_color_rank(card):
... rank_value = FrenchDeck2.ranks.index(card.rank)
... suits = 'diamonds clubs hearts spades'.split()
... return rank_value * len(suits) + suits.index(card.suit)
Rank test:
>>> alt_color_rank(Card('2', 'diamonds'))
0
>>> alt_color_rank(Card('A', 'spades'))
51
>>> for card in sorted(deck, key=alt_color_rank): # doctest: +ELLIPSIS
... print(card)
Card(rank='2', suit='diamonds')
Card(rank='2', suit='clubs')
Card(rank='2', suit='hearts')
...
Card(rank='A', suit='clubs')
Card(rank='A', suit='hearts')
Card(rank='A', suit='spades')

View File

@@ -0,0 +1,26 @@
import collections
Card = collections.namedtuple('Card', ['rank', 'suit'])
class FrenchDeck2(collections.MutableSequence):
ranks = [str(n) for n in range(2, 11)] + list('JQKA')
suits = 'spades diamonds clubs hearts'.split()
def __init__(self):
self._cards = [Card(rank, suit) for suit in self.suits
for rank in self.ranks]
def __len__(self):
return len(self._cards)
def __getitem__(self, position):
return self._cards[position]
def __setitem__(self, position, value): # <1>
self._cards[position] = value
def __delitem__(self, position): # <2>
del self._cards[position]
def insert(self, position, value): # <3>
self._cards.insert(position, value)

View File

@@ -0,0 +1,49 @@
FILENAME = 'metro_areas.txt'
class MetroArea:
def __init__(self, name, country, pop, pop_change, area):
self.name = name
self.country = country
self.pop = pop
self.pop_change = pop_change
self.area = area
def __repr__(self):
return '{0.name}, {0.country} ({0.pop})'.format(self)
def density(self):
return self.pop / self.area
def load():
metro_areas = []
with open(FILENAME, encoding='utf-8') as text:
for line in text:
if line.startswith('#'):
continue
# Country Name Rank Population Yr_change % Area(km2) Pop/km2
country, name, _, pop, pop_change, _, area, _ = line.split('\t')
pop = float(pop.replace(',', ''))
pop_change = float(pop_change)
area = float(area)
metro_areas.append((name, country, pop, pop_change, area))
return metro_areas
def list(metro_areas):
print('{:^18} {:>6} {:>4} {:>6}'.format('name', 'cc', 'pop', 'chg', 'area'))
for metro in metro_areas:
print('{:18} {:2} {:6.0f} {:4.0f} {:6.0f}'.format(*metro))
def list_instances(metro_areas):
metro_areas = [MetroArea(*fields) for fields in metro_areas]
for metro in metro_areas:
print(metro)
if __name__ == '__main__':
#list(load())
list_instances(load())

View File

@@ -0,0 +1,29 @@
# THE 20 LARGEST WORLD METROPOLITAN AREAS, 2003: DATA FOR FORSTALL DEFINITIONS
# (Populations in thousands estimated for 1 July 2003.)
# CC Name Rank Population Yr_change % Area(km2) Pop/km2
JP Tokyo 1 32,450 213 0.66 8014 4049.2
KR Seoul 2 20,550 227 1.12 5076 4048.5
MX Mexico City 3 20,450 307 1.54 7346 2783.8
US New York 4 19,750 120 0.61 17884 1104.3
IN Mumbai (Bombay) 5 19,200 472 2.53 2350 8170.2
ID Jakarta 6 18,900 225 1.21 5100 3705.9
BR Sao Paulo 7 18,850 289 1.57 8479 2223.1
IN Delhi-New Delhi 8 18,600 686 3.86 3182 5845.4
JP Osaka-Kobe-Kyoto 9 17,375 28 0.16 6930 2507.2
CN Shanghai 10 16,650 335 2.07 5177 3216.1
PH Manila 11 16,300 461 2.96 2521 6465.7
CN Hong Kong-Shenzhen 12 15,800 797 5.42 3051 5178.6
US Los Angeles 13 15,250 205 1.38 10780 1414.7
IN Kolkata (Calcutta) 14 15,100 257 1.74 1785 8459.4
RU Moscow 15 15,000 103 0.69 14925 1005.0
EG Cairo 16 14,450 257 1.89 1600 9031.3
AR Buenos Aires 17 13,170 79 0.62 10888 1209.6
UK London 18 12,875 112 0.87 11391 1130.3
CN Beijing 19 12,500 301 2.49 6562 1904.9
PK Karachi 20 11,800 370 3.43 1100 10727.3
# Data source:
# WHICH ARE THE LARGEST? WHY PUBLISHED POPULATIONS
# FOR MAJOR WORLD URBAN AREAS VARY SO GREATLY
# Richard L. Forstall, Richard P. Greene, James B. Pick
# http://web.archive.org/web/20130114203922/http://www.uic.edu/cuppa/cityfutures/papers/webpapers/cityfuturespapers/session3_4/3_4whicharethe.pdf
# Country codes added by L. Ramalho

View File

@@ -0,0 +1,21 @@
invoice = """
0.....6.................................40........52...55........
1909 Pimoroni PiBrella $17.50 3 $52.50
1489 6mm Tactile Switch x20 $4.95 2 $9.90
1510 Panavise Jr. - PV-201 $28.00 1 $28.00
1601 PiTFT Mini Kit 320x240 $34.95 1 $34.95
"""
structure = dict(
SKU = slice(0, 6),
DESCRIPTION = slice(6, 40),
UNIT_PRICE = slice(40, 52),
QUANTITY = slice(52, 55),
ITEM_TOTAL = slice(55, None),
)
for line in invoice.split('\n')[2:]:
line_item = {}
for field, chunk in structure.items():
line_item[field] = line[chunk].strip()
print(line_item)

View File

@@ -0,0 +1,75 @@
>>> from sentence import Sentence
>>> s = Sentence('The time has come')
>>> s
Sentence('The time has come')
>>> s[0]
'The'
>>> list(s)
['The', 'time', 'has', 'come']
>>> s = Sentence('"The time has come," the Walrus said,')
>>> s
Sentence('"The time ha... Walrus said,')
>>> s[0]
'The'
>>> s[1]
'time'
>>> for word in s:
... print(word)
The
time
has
come
the
Walrus
said
>>> list(s)
['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']
>>> s[-2]
'Walrus'
>>> s[2:4]
['has', 'come']
>>> s[:4]
['The', 'time', 'has', 'come']
>>> s[4:]
['the', 'Walrus', 'said']
>>> s3 = Sentence('Pig and Pepper')
>>> it = iter(s3)
>>> it # doctest: +ELLIPSIS
<iterator object at 0x...>
>>> next(it)
'Pig'
>>> next(it)
'and'
>>> next(it)
'Pepper'
>>> next(it)
Traceback (most recent call last):
...
StopIteration
>>> list(it)
[]
>>> list(iter(s3))
['Pig', 'and', 'Pepper']
>>> s = Sentence('''The right of the people to be secure in
... their persons, houses, papers, and effects, against
... unreasonable searches and seizures, shall not be violated,''')
>>> s
Sentence('The right of... be violated,')
>>> list(s) # doctest: +ELLIPSIS
['The', 'right', 'of', 'the', 'people', ... 'not', 'be', 'violated']
>>> s = Sentence('Agora vou-me. Ou me vão?')
>>> s
Sentence('Agora vou-me. Ou me vão?')
>>> list(s)
['Agora', 'vou', 'me', 'Ou', 'me', 'vão']

View File

@@ -0,0 +1,24 @@
"""
Sentence: access words by index
"""
import re
import reprlib
RE_WORD = re.compile('\w+')
class Sentence:
def __init__(self, text):
self.text = text
self.words = RE_WORD.findall(text) # <1>
def __getitem__(self, index):
return self.words[index] # <2>
def __len__(self, index): # <3>
return len(self.words)
def __repr__(self):
return 'Sentence(%s)' % reprlib.repr(self.text) # <4>

View File

@@ -0,0 +1,79 @@
>>> from sentence_slice import SentenceSlice
>>> s = SentenceSlice('the')
>>> s.tokens
['the']
>>> s.words
['the']
>>> s = SentenceSlice('the quick brown fox')
>>> s.tokens
['the', ' ', 'quick', ' ', 'brown', ' ', 'fox']
>>> s.words
['the', 'quick', 'brown', 'fox']
>>> s[0]
'the'
>>> s[1]
'quick'
>>> s[-1]
'fox'
>>> s[2:4]
SentenceSlice('brown fox')
>>> s[1:]
SentenceSlice('quick brown fox')
>>> s[:3]
SentenceSlice('the quick brown')
>>> s = SentenceSlice('"The time has come," the Walrus said,')
>>> s.tokens
['"', 'The', ' ', 'time', ' ', 'has', ' ', 'come', ',"', ' ', 'the', ' ', 'Walrus', ' ', 'said', ',']
>>> s.words
['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']
>>> s[:3]
SentenceSlice('"The time has')
>>> s[:4]
SentenceSlice('"The time has come,"')
>>> s[4:]
SentenceSlice('the Walrus said,')
>>> s[1:5]
SentenceSlice('time has come," the')
>>> s[1:6]
SentenceSlice('time has come," the Walrus')
>>> s[1:7]
SentenceSlice('time has com... Walrus said,')
>>> s[1:8]
SentenceSlice('time has com... Walrus said,')
>>> s[6:]
SentenceSlice('said,')
>>> s[7:]
SentenceSlice('')
>>> s[8:]
SentenceSlice('')
>>> s[:-3]
SentenceSlice('"The time has come,"')
>>> s[-4:-2]
SentenceSlice('come," the')
>>> s[0:2]
SentenceSlice('"The time')
>>> s = SentenceSlice('''"The time has come," the Walrus said,
... "To talk of many things:"''')
>>> s.tokens
['"', 'The', ' ', 'time', ' ', 'has', ' ', 'come', ',"', ' ', 'the', ' ', 'Walrus', ' ', 'said', ',', '\n', '"', 'To', ' ', 'talk', ' ', 'of', ' ', 'many', ' ', 'things', ':"']
>>> s.words
['The', 'time', 'has', 'come', 'the', 'Walrus', 'said', 'To', 'talk', 'of', 'many', 'things']
>>> s = SentenceSlice('Agora vou-me. Ou me vão?')
>>> s.tokens
['Agora', ' ', 'vou', '-', 'me', '.', ' ', 'Ou', ' ', 'me', ' ', 'vão', '?']
>>> s.words
['Agora', 'vou', 'me', 'Ou', 'me', 'vão']
>>> s[1:]
SentenceSlice('vou-me. Ou me vão?')
>>> s[:2]
SentenceSlice('Agora vou-')
>>> s[2:]
SentenceSlice('-me. Ou me vão?')

View File

@@ -0,0 +1,66 @@
"""
SentenceSlice: access words by index, sub-sentences by slices
"""
import re
import reprlib
RE_TOKEN = re.compile('\w+|\s+|[^\w\s]+')
RE_WORD = re.compile('\w+')
RE_PUNCTUATION = re.compile('[^\w\s]+')
class SentenceSlice:
def __init__(self, text):
self.text = text
self.tokens = RE_TOKEN.findall(text)
self.words = [t for t in self.tokens if RE_WORD.match(t)]
self.word_index = [i for i, t in enumerate(self.tokens)
if RE_WORD.match(t)]
def __repr__(self):
return 'SentenceSlice(%s)' % reprlib.repr(self.text)
def __getitem__(self, position):
if isinstance(position, slice):
if position.step is not None:
raise LookupError('slice step is not supported')
start, stop = self._handle_defaults(position)
start, stop = self._widen(start, stop)
tokens = self.tokens[start:stop]
return SentenceSlice(''.join(tokens))
else:
return self.words[position]
def __len__(self, index):
return len(self.words)
# helper functions -- implementation detail
def _handle_defaults(self, position):
"""handle missing or overflow/underflow start/stop"""
if position.start is None: # missing
start = 0
elif position.start >= len(self.word_index): # overflow
start = len(self.tokens)
else:
start = self.word_index[position.start]
if (position.stop is None # missing
or position.stop > len(self.word_index)): # overflow
stop = self.word_index[-1]
else:
stop = self.word_index[position.stop-1]
return start, stop + 1 # stop after last word selected
def _widen(self, start, stop):
"""widen range of tokens to get punctuation to the left of
start and to the right of stop"""
if start < len(self.tokens):
while (start > 0 and
RE_PUNCTUATION.match(self.tokens[start-1])):
start -= 1
while (stop < len(self.tokens) and
RE_PUNCTUATION.match(self.tokens[stop])):
stop += 1
return start, stop

View File

@@ -0,0 +1,33 @@
"""
>>> sd = SliceDump()
>>> sd[1]
1
>>> sd[2:5]
slice(2, 5, None)
>>> sd[:2]
slice(None, 2, None)
>>> sd[7:]
slice(7, None, None)
>>> sd[:]
slice(None, None, None)
>>> sd[1:9:3]
slice(1, 9, 3)
>>> sd[1:9:3, 2:3]
(slice(1, 9, 3), slice(2, 3, None))
>>> s = sd[1:9:3]
>>> s.indices(20)
(1, 9, 3)
>>> s.indices(5)
(1, 5, 3)
>>> s.indices(1)
(1, 1, 3)
>>> s.indices(0)
(0, 0, 3)
"""
class SliceDump:
def __getitem__(self, pos):
return pos

View File

@@ -0,0 +1,38 @@
"""
>>> s = SliceDemo()
>>> s[1]
__getitem__: 1
1
>>> s[2:5]
__getitem__: slice(2, 5, None)
[2, 3, 4]
>>> s[:2]
__getitem__: slice(None, 2, None)
[0, 1]
>>> s[7:]
__getitem__: slice(7, None, None)
[7, 8, 9]
>>> s[:]
__getitem__: slice(None, None, None)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> s[1:9:3]
__getitem__: slice(1, 9, 3)
[1, 4, 7]
>>> s[1:9:3, 2:3]
__getitem__: (slice(1, 9, 3), slice(2, 3, None))
ERROR: list indices must be integers, not tuple
"""
class SliceDemo:
def __init__(self):
self.items = list(range(10))
def __getitem__(self, pos):
print('__getitem__:', pos)
try:
return self.items.__getitem__(pos)
except TypeError as e:
print('ERROR:', e)

View File

@@ -0,0 +1,38 @@
"""
Extended slicing:
>>> s = SliceViewer()
>>> s[1]
1
>>> s[:]
slice(None, None, None)
>>> s[1:2]
slice(1, 2, None)
>>> s[1:2:3]
slice(1, 2, 3)
>>> s[1:2:3:4]
Traceback (most recent call last):
...
SyntaxError: invalid syntax
N-dimensional indexing:
>>> s[1, 2]
(1, 2)
N-dimensional slicing:
>>> s[1:3, 2]
(slice(1, 3, None), 2)
>>> s[1, :2:]
(1, slice(None, 2, None))
>>> s[:, :]
(slice(None, None, None), slice(None, None, None))
"""
class SliceViewer:
def __getitem__(self, position):
return position

View File

@@ -0,0 +1,43 @@
"""string concatenation demos"""
from time import perf_counter
def load_lines():
with open('war-and-peace.txt') as fp:
return fp.readlines() * 100 # replace with 200 or more for surprises!!!
def chrono(f):
def inner(lines):
t0 = perf_counter()
text = f(lines)
elapsed = perf_counter() - t0
print('%15s: %fs' % (f.__name__, elapsed))
return text
return inner
@chrono
def iadd_joiner(lines):
text = ''
for line in lines:
text += line
return text
@chrono
def list_joiner(lines):
parts = []
for line in lines:
parts.append(line)
return ''.join(parts)
@chrono
def genexp_joiner(lines):
return ''.join(line for line in lines)
if __name__=='__main__':
lines = load_lines()
print('joining %s lines' % len(lines))
text0 = iadd_joiner(lines)
text1 = list_joiner(lines)
text2 = genexp_joiner(lines)
assert len(text0) == len(text1) == len(text2), repr(
(len(text0), len(text1), len(text2)))

173
attic/sequences/table.py Normal file
View File

@@ -0,0 +1,173 @@
"""
=============
Row tests
=============
>>> row = Row([1, 2, 3, 4])
>>> row[1]
2
>>> row[1:3]
Row([2, 3])
=============
Table tests
=============
Create an empty table
>>> t3x4 = Table.blank(3, 4)
>>> t3x4
Table(Row([None, None, None, None]),
Row([None, None, None, None]),
Row([None, None, None, None]))
>>> for i in range(3):
... for j in range(4):
... t3x4[i][j] = chr(65 + i * 4 + j)
...
>>> t3x4
Table(Row(['A', 'B', 'C', 'D']),
Row(['E', 'F', 'G', 'H']),
Row(['I', 'J', 'K', 'L']))
>>> t3x4[1]
Row(['E', 'F', 'G', 'H'])
>>> t3x4[1:]
Table(Row(['E', 'F', 'G', 'H']),
Row(['I', 'J', 'K', 'L']))
>>> t3x4[1][2]
'G'
>>> t3x4[1, 2]
'G'
Slicing returns a table, so index 2 below would be trying to get row index 2
of a table that has only rows 0 and 1:
>>> t3x4[1:][2]
Traceback (most recent call last):
...
IndexError: no row at index 2 of 2-row table
>>> t3x4[:, 2]
Table(Row(['C']),
Row(['G']),
Row(['K']))
>>> t3x4[1:, 2]
Table(Row(['G']),
Row(['K']))
>>> t3x4[1, 2:]
Row(['G', 'H'])
>>> t3x4[:, 1:3]
Table(Row(['B', 'C']),
Row(['F', 'G']),
Row(['J', 'K']))
>>> t3x4[:, :]
Table(Row(['A', 'B', 'C', 'D']),
Row(['E', 'F', 'G', 'H']),
Row(['I', 'J', 'K', 'L']))
>>> t3x4[:, :] == t3x4
True
===============
Error handling
===============
>>> t3x4[5]
Traceback (most recent call last):
...
IndexError: no row at index 5 of 3-row table
>>> t3x4[1,]
Traceback (most recent call last):
...
IndexError: index must be [i] or [i, j]
>>> t3x4[1, 2, 3]
Traceback (most recent call last):
...
IndexError: index must be [i] or [i, j]
>>> t3x4[10:, 2]
Traceback (most recent call last):
...
ValueError: Table must have at least one row.
>>> t3x4[1, 20:]
Traceback (most recent call last):
...
ValueError: Row must have at least one cell.
"""
import collections
class Row(collections.UserList):
def __init__(self, cells):
super().__init__(cells)
if len(self) < 1:
raise ValueError('Row must have at least one cell.')
def __getitem__(self, position):
if isinstance(position, slice):
return Row(self.data[position]) # build sub-row
else:
return self.data[position] # return cell value
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.data)
class Table(collections.UserList):
"""A table with rows, all of the same width"""
def __init__(self, rows):
super().__init__(Row(r) for r in rows)
if len(self) < 1:
raise ValueError('Table must have at least one row.')
self.width = self.check_width()
def check_width(self):
row_widths = {len(row) for row in self.data}
if len(row_widths) > 1:
raise ValueError('All rows must have equal length.')
return row_widths.pop()
@classmethod
def blank(class_, rows, columns, filler=None):
return class_([[filler] * columns for i in range(rows)])
def __repr__(self):
prefix = '%s(' % self.__class__.__name__
indent = ' ' * len(prefix)
rows = (',\n' + indent).join(
repr(row) for row in self.data)
return prefix + rows + ')'
def _get_indexes(self, position):
if isinstance(position, tuple): # multiple indexes
if len(position) == 2: # two indexes: t[i, j]
return position
else:
raise IndexError('index must be [i] or [i, j]')
else: # one index: t[i]
return position, None
def __getitem__(self, position):
i, j = self._get_indexes(position)
if isinstance(i, slice):
if j is None: # build sub-table w/ full rows
return Table(self.data[position])
else: # build sub-table w/ sub-rows
return Table(cells[j] for cells in self.data[i])
else: # i is number
try:
row = self.data[i]
except IndexError:
msg = 'no row at index %r of %d-row table'
raise IndexError(msg % (position, len(self)))
if j is None: # return row at table[i]
return row
else:
return row[j] # return row[j] or row[a:b]

View File

@@ -0,0 +1,14 @@
>>> coordinates = (-23.547778, -46.635833)
>>> lat, long_ = coordinates
>>> long_
-46.635833
>>> lat
-23.547778
>>> traveler_ids = [('USA', '311975855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]
>>> for country, passport_no in sorted(traveler_ids):
... print('%s:%s' % (country, passport_no))
BRA:CE342567
ESP:XDA205856
USA:311975855

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
καφέ

View File

@@ -0,0 +1 @@
café

View File

@@ -0,0 +1,19 @@
import sys
from unicodedata import name, normalize
changed = 0
assigned = 0
for i in range(sys.maxunicode):
char = chr(i)
char_name = name(char, None)
if char_name is None:
continue
cf = char.casefold()
assigned += 1
if cf != char.lower():
cf_display = ' '.join(cf)
cf_names = ';'.join(name(c) for c in cf)
changed += 1
print('%4d U+%04x' % (changed, i), char, cf_display, char_name + ' -> ' + cf_names, sep='\t')
print(changed, '/', assigned, '=', changed/assigned*100)

View File

@@ -0,0 +1,15 @@
import sys
import unicodedata
categories = set()
for i in range(sys.maxunicode):
char = chr(i)
name = unicodedata.name(char, None)
if name is None:
continue
cat = unicodedata.category(char)
if cat[0] not in categories:
print('U+%04x' % i, char.center(6),
cat, name, sep='\t')
categories.add(cat[0])

View File

@@ -0,0 +1,23 @@
#!/usr/bin/env python
from unicodedata import name
import sys
if len(sys.argv) > 1:
query = sys.argv[1:]
else:
query = input('search words: ').split()
query = [s.upper() for s in query]
count = 0
for i in range(20, sys.maxunicode):
car = chr(i)
descr = name(car, None)
if descr is None:
continue
words = descr.split()
if all(word in words for word in query):
print('{i:5d} {i:04x} {car:^5} {descr}'.format(**locals()))
count += 1
print('{0} character(s) found'.format(count))

View File

@@ -0,0 +1,9 @@
import sys
import unicodedata
for i in range(sys.maxunicode):
char = chr(i)
if unicodedata.category(char) == 'Sc':
name = unicodedata.name(char, None)
print('U+%04x' % i, char.center(6),
name, sep='\t')

View File

@@ -0,0 +1,54 @@
import unicodedata
encodings = 'ascii latin1 cp1252 cp437 gb2312 utf-8 utf-16le'.split()
widths = {encoding:1 for encoding in encodings[:-3]}
widths.update(zip(encodings[-3:], (2, 4, 4)))
chars = sorted([
'A', # \u0041 : LATIN CAPITAL LETTER A
'¿', # \u00bf : INVERTED QUESTION MARK
'Ã', # \u00c3 : LATIN CAPITAL LETTER A WITH TILDE
'á', # \u00e1 : LATIN SMALL LETTER A WITH ACUTE
'Ω', # \u03a9 : GREEK CAPITAL LETTER OMEGA
'µ',
'Ц',
'', # \u20ac : EURO SIGN
'',
'',
'',
'', # \u6c23 : CJK UNIFIED IDEOGRAPH-6C23
'𝄞', # \u1d11e : MUSICAL SYMBOL G CLEF
])
callout1_code = 0x278a # ➊ DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
missing_mark = '*'
def list_chars():
for char in chars:
print('%r, # \\u%04x : %s' % (char, ord(char), unicodedata.name(char)))
def show_encodings():
print(end='\t\t')
for encoding in encodings:
print(encoding.ljust(widths[encoding] * 2), end='\t')
print()
for lineno, char in enumerate(chars):
codepoint = 'U+{:04X}'.format(ord(char))
print(char, codepoint, sep='\t', end='\t')
for encoding in encodings:
try:
bytes = char.encode(encoding)
dump = ' '.join('%02X' % byte for byte in bytes)
except UnicodeEncodeError:
dump = missing_mark
dump = dump.ljust(widths[encoding] * 2)
print(dump, end='\t')
# print(chr(callout1_code + lineno))
print(unicodedata.name(char))
# print()
#list_chars()
show_encodings()

View File

@@ -0,0 +1,7 @@
café = 1
café = 2
names = {(name, tuple(name)):value
for name, value in globals().items()
if not name.startswith('__')}
print(names)

View File

@@ -0,0 +1,12 @@
src = """
café = 1
cafe\u0301 = 2
names = {(name, tuple(name)):value
for name, value in globals().items()
if not name.startswith('__')}
print(names)
"""
with open('identifier_norm.py', 'tw', encoding='utf8') as out:
out.write(src)

View File

@@ -0,0 +1,14 @@
import sys
from unicodedata import name, normalize
for i in range(sys.maxunicode):
char = chr(i)
char_name = name(char, None)
if char_name is None:
continue
nfc = normalize('NFC', char)
if nfc == char:
continue
if len(nfc) > 1:
nfc_display = ' '.join(nfc)
print('U+%04x' % i, char, nfc_display, char_name, sep='\t')

View File

@@ -0,0 +1,16 @@
import sys
from unicodedata import name, normalize
for i in range(sys.maxunicode):
char = chr(i)
char_name = name(char, None)
if char_name is None:
continue
kc = normalize('NFKC', char)
if kc == char:
continue
kd = normalize('NFKD', char)
if kc != kd:
kc_display = ' '.join(kc)
kd_display = ' '.join(kd)
print('U+%04x' % i, char, kc_display, kd_display, char_name, sep='\t')

View File

@@ -0,0 +1,15 @@
import sys
from unicodedata import name
for i in range(sys.maxunicode):
char = chr(i)
try:
char_name = name(char)
except ValueError: # no such name
continue
flags = []
flags.append('D' if char.isdigit() else '')
flags.append('N' if char.isnumeric() else '')
if any(flags):
flags = '\t'.join(flags)
print('U+%04x' % i, char, flags, char_name, sep='\t')

View File

@@ -0,0 +1,9 @@
U+0031 1 re_dig isdig isnum 1.00 DIGIT ONE
U+00b2 ² - isdig isnum 2.00 SUPERSCRIPT TWO
U+00bc ¼ - - isnum 0.25 VULGAR FRACTION ONE QUARTER
U+0969 ३ re_dig isdig isnum 3.00 DEVANAGARI DIGIT THREE
U+136b ፫ - isdig isnum 3.00 ETHIOPIC DIGIT THREE
U+216b Ⅻ - - isnum 12.00 ROMAN NUMERAL TWELVE
U+2466 ⑦ - isdig isnum 7.00 CIRCLED DIGIT SEVEN
U+2480 ⒀ - - isnum 13.00 PARENTHESIZED NUMBER THIRTEEN
U+3285 ㊅ - - isnum 6.00 CIRCLED IDEOGRAPH SIX

View File

@@ -0,0 +1,3 @@
# coding: cp1252
print('Olá, Mundo!')

View File

@@ -0,0 +1,16 @@
import sys
from unicodedata import name, normalize
total_count = 0
bmp_count = 0
for i in range(sys.maxunicode):
char = chr(i)
char_name = name(char, None)
if char_name is None:
continue
total_count += 1
if i <= 0xffff:
bmp_count += 1
print(total_count, bmp_count, bmp_count/total_count, bmp_count/total_count*100)

View File

@@ -0,0 +1,26 @@
import locale
def check(sorted_list):
return 'CORRECT' if fruits == sorted_list else 'WRONG'
fruits = ['açaí', 'acerola', 'atemoia', 'cajá', 'caju']
print(locale.getlocale(locale.LC_COLLATE))
print('manual_sort ', fruits)
plain_sort = sorted(fruits)
print('plain_sort ', plain_sort, check(plain_sort))
locale_sort1 = sorted(fruits, key=locale.strxfrm)
print('locale_sort1', locale_sort1, check(locale_sort1))
locale.setlocale(locale.LC_COLLATE, 'pt_BR.UTF-8')
print('locale set to:', locale.getlocale(locale.LC_COLLATE))
locale_sort2 = sorted(fruits, key=locale.strxfrm)
print('locale_sort2', locale_sort2, check(locale_sort2))

View File

@@ -0,0 +1,18 @@
from pyuca import Collator
def check(sorted_list):
return 'CORRECT' if fruits == sorted_list else 'WRONG'
fruits = ['açaí', 'acerola', 'atemoia', 'cajá', 'caju']
print('manual_sort', fruits)
plain_sort = sorted(fruits)
print('plain_sort ', plain_sort, check(plain_sort))
coll = Collator()
pyuca_sort = sorted(fruits, key=coll.sort_key)
print('pyuca_sort ', pyuca_sort, check(pyuca_sort))

View File

@@ -0,0 +1,11 @@
PS > pip install pyuca
Downloading/unpacking pyuca
Running setup.py (path:C:\Users\...) egg_info for package pyuca
Installing collected packages: pyuca
Running setup.py install for pyuca
Successfully installed pyuca
Cleaning up...
PS > python .\sorting_uca.py
manual_sort ['açaí', 'acaíba', 'acerola', 'cajá', 'caju']
plain_sort ['acaíba', 'acerola', 'açaí', 'caju', 'cajá'] WRONG
pyuca_sort ['açaí', 'acaíba', 'acerola', 'cajá', 'caju'] CORRECT

View File

@@ -0,0 +1,14 @@
last_len = 0
last_repr = ''
lengths = set()
for i in range(0x110000):
r = repr(chr(i))[1:-1]
if len(r) != last_len:
lengths.add(len(r))
last_len = len(r)
if i > 0:
prev_repr = repr(chr(i-1))[1:-1]
print('{}'.format(prev_repr))
print('U+{:04x} {:{max_len}} ...'.format(i, r, max_len=max(lengths)), end=' ')
last_repr = r

View File

@@ -0,0 +1,50 @@
from itertools import groupby
def bare_repr(codepoint):
return repr(chr(codepoint))[1:-1]
def display(codepoint):
repstr = repr(chr(codepoint))[1:-1]
print('U+{:04x} {:{max_len}}'.format(
codepoint, repstr, max_len=max(lengths)))
def repr_shape(codepoint):
brepr = bare_repr(codepoint)
if len(brepr) == 1:
shape = 'GLYPH'
else:
shape = brepr[:2]
escapes.add(shape)
return len(brepr), shape
escapes = set()
group_gen = groupby((codepoint for codepoint in range(0x110000)), repr_shape)
for len_shape, group in group_gen:
len_brepr, shape = len_shape
group = list(group)
cp_first = group[0]
cp_last = group[-1]
cp_mid = group[len(group)//2]
if len(group) == 1:
glyph_sample = bare_repr(cp_first) if shape == 'GLYPH' else ''
print('{:6d} U+{:04X} {:5} {}'.format(
len(group), cp_first, shape, glyph_sample))
else:
if len(group) == 2:
if shape == 'GLYPH':
glyph_sample = bare_repr(cp_first) + ' ' + bare_repr(cp_last)
else:
glyph_sample = ''
print('{:6d} U+{:04X} , U+{:04X} {:5} {}'.format(
len(group), cp_first, cp_last, shape, glyph_sample))
else:
if shape == 'GLYPH':
glyph_sample = ' '.join([bare_repr(cp_first),
bare_repr(cp_mid), bare_repr(cp_last)])
else:
glyph_sample = ''
print('{:6d} U+{:04X}...U+{:04X} {:5} {}'.format(
len(group), cp_first, cp_last, shape, glyph_sample))
print('escapes:', ' '.join(sorted(escapes, key=str.upper)))

View File

@@ -0,0 +1,22 @@
>>> s = 'naïve' <1>
>>> b = b'naïve' <2>
Traceback (most recent call last):
...
SyntaxError: bytes can only contain ASCII literal characters.
>>> b = bytes('naïve', 'iso8859-1') <3>
>>> b <4>
b'na\xefve'
>>> s <5>
'naïve'
>>> b == s.encode('iso8859-1') <6>
True
>>> s[2] <7>
'ï'
>>> b[2] <8>
239
>>> ord(s[2]) <9>
239
>>> s.upper() <10>
'NAÏVE'
>>> b.upper() <11>
b'NA\xefVE'