updated contents from Atlas repo

This commit is contained in:
Luciano Ramalho
2014-10-14 14:26:55 -03:00
parent 40688c038d
commit 981d5bc473
157 changed files with 71134 additions and 1 deletions

View File

@@ -0,0 +1,55 @@
"""
Container ``in`` operator performance test
"""
import sys
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
if {container_type} is dict:
haystack = dict.fromkeys(selected, 1)
else:
haystack = {container_type}(selected)
if {verbose}:
print(type(haystack), end=' ')
print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[::{size}//500])
if {verbose}:
print(' needles: %10d' % len(needles), end=' ')
'''
TEST = '''
found = 0
for n in needles:
if n in haystack:
found += 1
if {verbose}:
print(' found: %10d' % found)
'''
def test(container_type, verbose):
MAX_EXPONENT = 7
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(container_type=container_type,
size=size, verbose=verbose)
test = TEST.format(verbose=verbose)
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))
if __name__=='__main__':
if '-v' in sys.argv:
sys.argv.remove('-v')
verbose = True
else:
verbose = False
if len(sys.argv) != 2:
print('Usage: %s <container_type>' % sys.argv[0])
else:
test(sys.argv[1], verbose)

View File

@@ -0,0 +1,37 @@
"""
Generate data for container performance test
"""
import random
import array
MAX_EXPONENT = 7
HAYSTACK_LEN = 10 ** MAX_EXPONENT
NEEDLES_LEN = 10 ** (MAX_EXPONENT - 1)
SAMPLE_LEN = HAYSTACK_LEN + NEEDLES_LEN // 2
needles = array.array('d')
sample = {1/random.random() for i in range(SAMPLE_LEN)}
print('initial sample: %d elements' % len(sample))
# complete sample, in case duplicate random numbers were discarded
while len(sample) < SAMPLE_LEN:
sample.add(1/random.random())
print('complete sample: %d elements' % len(sample))
sample = array.array('d', sample)
random.shuffle(sample)
not_selected = sample[:NEEDLES_LEN // 2]
print('not selected: %d samples' % len(not_selected))
print(' writing not_selected.arr')
with open('not_selected.arr', 'wb') as fp:
not_selected.tofile(fp)
selected = sample[NEEDLES_LEN // 2:]
print('selected: %d samples' % len(selected))
print(' writing selected.arr')
with open('selected.arr', 'wb') as fp:
selected.tofile(fp)

30
dicts/dialcodes.py Normal file
View File

@@ -0,0 +1,30 @@
# BEGIN DIALCODES
# dial codes of the top 10 most populous countries
DIAL_CODES = [
(86, 'China'),
(91, 'India'),
(1, 'United States'),
(62, 'Indonesia'),
(55, 'Brazil'),
(92, 'Pakistan'),
(880, 'Bangladesh'),
(234, 'Nigeria'),
(7, 'Russia'),
(81, 'Japan'),
]
d1 = dict(DIAL_CODES) # <1>
print('d1:', d1.keys())
d2 = dict(sorted(DIAL_CODES)) # <2>
print('d2:', d2.keys())
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1])) # <3>
print('d3:', d3.keys())
assert d1 == d2 and d2 == d3 # <4>
# END DIALCODES
"""
# BEGIN DIALCODES_OUTPUT
d1: dict_keys([880, 1, 86, 55, 7, 234, 91, 92, 62, 81])
d2: dict_keys([880, 1, 91, 86, 81, 55, 234, 7, 92, 62])
d3: dict_keys([880, 81, 1, 86, 55, 7, 234, 91, 92, 62])
# END DIALCODES_OUTPUT
"""

34
dicts/dict_perftest.py Normal file
View File

@@ -0,0 +1,34 @@
"""
Dict performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = dict((n, n.as_integer_ratio()) for n in selected)
print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
# print(' needles: %10d' % len(needles), end=' ')
'''
TEST = '''
found = 0
for n in needles:
if n in haystack:
found += 1
# print(' found: %10d' % found)
'''
MAX_EXPONENT = 7
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(size=size)
tt = timeit.repeat(stmt=TEST, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))

20
dicts/hashdiff.py Normal file
View File

@@ -0,0 +1,20 @@
import sys
MAX_BITS = len(format(sys.maxsize, 'b'))
print('%s-bit Python build' % (MAX_BITS + 1))
def hash_diff(o1, o2):
h1 = '{:>0{}b}'.format(hash(o1), MAX_BITS)
h2 = '{:>0{}b}'.format(hash(o2), MAX_BITS)
diff = ''.join('!' if b1 != b2 else ' ' for b1, b2 in zip(h1, h2))
count = '!= {}'.format(diff.count('!'))
width = max(len(repr(o1)), len(repr(o2)), 8)
sep = '-' * (width * 2 + MAX_BITS)
return '{!r:{width}} {}\n{:{width}} {} {}\n{!r:{width}} {}\n{}'.format(
o1, h1, ' ' * width, diff, count, o2, h2, sep, width=width)
if __name__ == '__main__':
print(hash_diff(1, 1.0))
print(hash_diff(1.0, 1.0001))
print(hash_diff(1.0001, 1.0002))
print(hash_diff(1.0002, 1.0003))

25
dicts/index.py Normal file
View File

@@ -0,0 +1,25 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
# BEGIN INDEX
"""Build an index mapping word -> list of occurrences"""
import sys
import re
WORD_RE = re.compile('\w+')
index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start()+1
location = (line_no, column_no)
index.setdefault(word, []).append(location) # <1>
# print in alphabetical order
for word in sorted(index, key=str.upper):
print(word, index[word])
# END INDEX

28
dicts/index0.py Normal file
View File

@@ -0,0 +1,28 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
# BEGIN INDEX0
"""Build an index mapping word -> list of occurrences"""
import sys
import re
WORD_RE = re.compile('\w+')
index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start()+1
location = (line_no, column_no)
# this is ugly; coded like this to make a point
occurrences = index.get(word, []) # <1>
occurrences.append(location) # <2>
index[word] = occurrences # <3>
# print in alphabetical order
for word in sorted(index, key=str.upper):
print(word, index[word])
# END INDEX0

22
dicts/index_alex.py Normal file
View File

@@ -0,0 +1,22 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
"""Build a map word -> list-of-line-numbers"""
import sys
import re
NONWORD_RE = re.compile('\W+')
idx = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for n, line in enumerate(fp, 1):
for word in NONWORD_RE.split(line):
if word.strip():
idx.setdefault(word, []).append(n)
# print in alphabetical order
for word in sorted(idx, key=str.upper):
print(word, idx[word])

26
dicts/index_default.py Normal file
View File

@@ -0,0 +1,26 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
# BEGIN INDEX_DEFAULT
"""Build an index mapping word -> list of occurrences"""
import sys
import re
import collections
WORD_RE = re.compile('\w+')
index = collections.defaultdict(list) # <1>
with open(sys.argv[1], encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start()+1
location = (line_no, column_no)
index[word].append(location) # <2>
# print in alphabetical order
for word in sorted(index, key=str.upper):
print(word, index[word])
# END INDEX_DEFAULT

48
dicts/set_perftest.py Normal file
View File

@@ -0,0 +1,48 @@
"""
Set performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = {type}(selected)
# print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
needles = set(needles)
# print(' needles: %10d' % len(needles), end=' ')
'''
tests = [
('FOR_LOOP_TEST', '''
found = 0
for n in needles:
if n in haystack:
found += 1
assert found == 500
'''),
('SET_&_TEST', '''
found = len(needles & haystack)
assert found == 500
'''
)]
MAX_EXPONENT = 7
for collection_type in 'dict.fromkeys set list'.split():
if collection_type == 'set':
available_tests = tests
else:
available_tests = tests[:1]
for test_name, test in available_tests:
print('*' * 25, collection_type, test_name)
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(type=collection_type, size=size)
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:9.6f}'.format(size, MAX_EXPONENT + 1, min(tt)))

72
dicts/strkeydict.py Normal file
View File

@@ -0,0 +1,72 @@
"""StrKeyDict always converts non-string keys to `str`
Tests for item retrieval using `d[key]` notation::
>>> d = StrKeyDict([('2', 'two'), ('4', 'four')])
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
Test for item assignment using non-string key::
>>> d[0] = 'zero'
>>> d['0']
'zero'
Tests for update using a `dict` or a sequence of pairs::
>>> d.update({6:'six', '8':'eight'})
>>> sorted(d.keys())
['0', '2', '4', '6', '8']
>>> d.update([(10, 'ten'), ('12', 'twelve')])
>>> sorted(d.keys())
['0', '10', '12', '2', '4', '6', '8']
>>> d.update([1, 3, 5])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
"""
# BEGIN STRKEYDICT
import collections
import collections.abc
class StrKeyDict(collections.UserDict): # <1>
def __missing__(self, key): # <2>
if isinstance(key, str):
raise KeyError(key)
return self[str(key)]
def __contains__(self, key):
return str(key) in self.data # <3>
def __setitem__(self, key, item):
self.data[str(key)] = item # <4>
def update(self, iterable=None, **kwds):
if iterable is not None:
if isinstance(iterable, collections.abc.Mapping): # <5>
pairs = iterable.items()
else:
pairs = ((k, v) for k, v in iterable) # <6>
for key, value in pairs:
self[key] = value # <7>
if kwds:
self.update(kwds) # <8>
# END STRKEYDICT

39
dicts/strkeydict0.py Normal file
View File

@@ -0,0 +1,39 @@
"""StrKeyDict0 converts non-string keys to `str` on lookup
# BEGIN STRKEYDICT0_TESTS
Tests for item retrieval using `d[key]` notation::
>>> d = StrKeyDict0([('2', 'two'), ('4', 'four')])
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
# END STRKEYDICT0_TESTS
"""
# BEGIN STRKEYDICT0
class StrKeyDict0(dict): # <1>
def __missing__(self, key):
if isinstance(key, str): # <2>
raise KeyError(key)
return self[str(key)] # <3>
def __contains__(self, key):
return key in self.keys() or str(key) in self.keys() # <4>
# END STRKEYDICT0