updated contents from Atlas repo
This commit is contained in:
55
dicts/container_perftest.py
Normal file
55
dicts/container_perftest.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Container ``in`` operator performance test
|
||||
"""
|
||||
import sys
|
||||
import timeit
|
||||
|
||||
SETUP = '''
|
||||
import array
|
||||
selected = array.array('d')
|
||||
with open('selected.arr', 'rb') as fp:
|
||||
selected.fromfile(fp, {size})
|
||||
if {container_type} is dict:
|
||||
haystack = dict.fromkeys(selected, 1)
|
||||
else:
|
||||
haystack = {container_type}(selected)
|
||||
if {verbose}:
|
||||
print(type(haystack), end=' ')
|
||||
print('haystack: %10d' % len(haystack), end=' ')
|
||||
needles = array.array('d')
|
||||
with open('not_selected.arr', 'rb') as fp:
|
||||
needles.fromfile(fp, 500)
|
||||
needles.extend(selected[::{size}//500])
|
||||
if {verbose}:
|
||||
print(' needles: %10d' % len(needles), end=' ')
|
||||
'''
|
||||
|
||||
TEST = '''
|
||||
found = 0
|
||||
for n in needles:
|
||||
if n in haystack:
|
||||
found += 1
|
||||
if {verbose}:
|
||||
print(' found: %10d' % found)
|
||||
'''
|
||||
|
||||
def test(container_type, verbose):
|
||||
MAX_EXPONENT = 7
|
||||
for n in range(3, MAX_EXPONENT + 1):
|
||||
size = 10**n
|
||||
setup = SETUP.format(container_type=container_type,
|
||||
size=size, verbose=verbose)
|
||||
test = TEST.format(verbose=verbose)
|
||||
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
|
||||
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))
|
||||
|
||||
if __name__=='__main__':
|
||||
if '-v' in sys.argv:
|
||||
sys.argv.remove('-v')
|
||||
verbose = True
|
||||
else:
|
||||
verbose = False
|
||||
if len(sys.argv) != 2:
|
||||
print('Usage: %s <container_type>' % sys.argv[0])
|
||||
else:
|
||||
test(sys.argv[1], verbose)
|
||||
37
dicts/container_perftest_datagen.py
Normal file
37
dicts/container_perftest_datagen.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Generate data for container performance test
|
||||
"""
|
||||
|
||||
import random
|
||||
import array
|
||||
|
||||
MAX_EXPONENT = 7
|
||||
HAYSTACK_LEN = 10 ** MAX_EXPONENT
|
||||
NEEDLES_LEN = 10 ** (MAX_EXPONENT - 1)
|
||||
SAMPLE_LEN = HAYSTACK_LEN + NEEDLES_LEN // 2
|
||||
|
||||
needles = array.array('d')
|
||||
|
||||
sample = {1/random.random() for i in range(SAMPLE_LEN)}
|
||||
print('initial sample: %d elements' % len(sample))
|
||||
|
||||
# complete sample, in case duplicate random numbers were discarded
|
||||
while len(sample) < SAMPLE_LEN:
|
||||
sample.add(1/random.random())
|
||||
|
||||
print('complete sample: %d elements' % len(sample))
|
||||
|
||||
sample = array.array('d', sample)
|
||||
random.shuffle(sample)
|
||||
|
||||
not_selected = sample[:NEEDLES_LEN // 2]
|
||||
print('not selected: %d samples' % len(not_selected))
|
||||
print(' writing not_selected.arr')
|
||||
with open('not_selected.arr', 'wb') as fp:
|
||||
not_selected.tofile(fp)
|
||||
|
||||
selected = sample[NEEDLES_LEN // 2:]
|
||||
print('selected: %d samples' % len(selected))
|
||||
print(' writing selected.arr')
|
||||
with open('selected.arr', 'wb') as fp:
|
||||
selected.tofile(fp)
|
||||
30
dicts/dialcodes.py
Normal file
30
dicts/dialcodes.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# BEGIN DIALCODES
|
||||
# dial codes of the top 10 most populous countries
|
||||
DIAL_CODES = [
|
||||
(86, 'China'),
|
||||
(91, 'India'),
|
||||
(1, 'United States'),
|
||||
(62, 'Indonesia'),
|
||||
(55, 'Brazil'),
|
||||
(92, 'Pakistan'),
|
||||
(880, 'Bangladesh'),
|
||||
(234, 'Nigeria'),
|
||||
(7, 'Russia'),
|
||||
(81, 'Japan'),
|
||||
]
|
||||
|
||||
d1 = dict(DIAL_CODES) # <1>
|
||||
print('d1:', d1.keys())
|
||||
d2 = dict(sorted(DIAL_CODES)) # <2>
|
||||
print('d2:', d2.keys())
|
||||
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1])) # <3>
|
||||
print('d3:', d3.keys())
|
||||
assert d1 == d2 and d2 == d3 # <4>
|
||||
# END DIALCODES
|
||||
"""
|
||||
# BEGIN DIALCODES_OUTPUT
|
||||
d1: dict_keys([880, 1, 86, 55, 7, 234, 91, 92, 62, 81])
|
||||
d2: dict_keys([880, 1, 91, 86, 81, 55, 234, 7, 92, 62])
|
||||
d3: dict_keys([880, 81, 1, 86, 55, 7, 234, 91, 92, 62])
|
||||
# END DIALCODES_OUTPUT
|
||||
"""
|
||||
34
dicts/dict_perftest.py
Normal file
34
dicts/dict_perftest.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
Dict performance test
|
||||
"""
|
||||
|
||||
import timeit
|
||||
|
||||
SETUP = '''
|
||||
import array
|
||||
selected = array.array('d')
|
||||
with open('selected.arr', 'rb') as fp:
|
||||
selected.fromfile(fp, {size})
|
||||
haystack = dict((n, n.as_integer_ratio()) for n in selected)
|
||||
print('haystack: %10d' % len(haystack), end=' ')
|
||||
needles = array.array('d')
|
||||
with open('not_selected.arr', 'rb') as fp:
|
||||
needles.fromfile(fp, 500)
|
||||
needles.extend(selected[:500])
|
||||
# print(' needles: %10d' % len(needles), end=' ')
|
||||
'''
|
||||
|
||||
TEST = '''
|
||||
found = 0
|
||||
for n in needles:
|
||||
if n in haystack:
|
||||
found += 1
|
||||
# print(' found: %10d' % found)
|
||||
'''
|
||||
|
||||
MAX_EXPONENT = 7
|
||||
for n in range(3, MAX_EXPONENT + 1):
|
||||
size = 10**n
|
||||
setup = SETUP.format(size=size)
|
||||
tt = timeit.repeat(stmt=TEST, setup=setup, repeat=5, number=1)
|
||||
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))
|
||||
20
dicts/hashdiff.py
Normal file
20
dicts/hashdiff.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import sys
|
||||
|
||||
MAX_BITS = len(format(sys.maxsize, 'b'))
|
||||
print('%s-bit Python build' % (MAX_BITS + 1))
|
||||
|
||||
def hash_diff(o1, o2):
|
||||
h1 = '{:>0{}b}'.format(hash(o1), MAX_BITS)
|
||||
h2 = '{:>0{}b}'.format(hash(o2), MAX_BITS)
|
||||
diff = ''.join('!' if b1 != b2 else ' ' for b1, b2 in zip(h1, h2))
|
||||
count = '!= {}'.format(diff.count('!'))
|
||||
width = max(len(repr(o1)), len(repr(o2)), 8)
|
||||
sep = '-' * (width * 2 + MAX_BITS)
|
||||
return '{!r:{width}} {}\n{:{width}} {} {}\n{!r:{width}} {}\n{}'.format(
|
||||
o1, h1, ' ' * width, diff, count, o2, h2, sep, width=width)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(hash_diff(1, 1.0))
|
||||
print(hash_diff(1.0, 1.0001))
|
||||
print(hash_diff(1.0001, 1.0002))
|
||||
print(hash_diff(1.0002, 1.0003))
|
||||
25
dicts/index.py
Normal file
25
dicts/index.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# adapted from Alex Martelli's example in "Re-learning Python"
|
||||
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
|
||||
# (slide 41) Ex: lines-by-word file index
|
||||
|
||||
# BEGIN INDEX
|
||||
"""Build an index mapping word -> list of occurrences"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
WORD_RE = re.compile('\w+')
|
||||
|
||||
index = {}
|
||||
with open(sys.argv[1], encoding='utf-8') as fp:
|
||||
for line_no, line in enumerate(fp, 1):
|
||||
for match in WORD_RE.finditer(line):
|
||||
word = match.group()
|
||||
column_no = match.start()+1
|
||||
location = (line_no, column_no)
|
||||
index.setdefault(word, []).append(location) # <1>
|
||||
|
||||
# print in alphabetical order
|
||||
for word in sorted(index, key=str.upper):
|
||||
print(word, index[word])
|
||||
# END INDEX
|
||||
28
dicts/index0.py
Normal file
28
dicts/index0.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# adapted from Alex Martelli's example in "Re-learning Python"
|
||||
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
|
||||
# (slide 41) Ex: lines-by-word file index
|
||||
|
||||
# BEGIN INDEX0
|
||||
"""Build an index mapping word -> list of occurrences"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
WORD_RE = re.compile('\w+')
|
||||
|
||||
index = {}
|
||||
with open(sys.argv[1], encoding='utf-8') as fp:
|
||||
for line_no, line in enumerate(fp, 1):
|
||||
for match in WORD_RE.finditer(line):
|
||||
word = match.group()
|
||||
column_no = match.start()+1
|
||||
location = (line_no, column_no)
|
||||
# this is ugly; coded like this to make a point
|
||||
occurrences = index.get(word, []) # <1>
|
||||
occurrences.append(location) # <2>
|
||||
index[word] = occurrences # <3>
|
||||
|
||||
# print in alphabetical order
|
||||
for word in sorted(index, key=str.upper):
|
||||
print(word, index[word])
|
||||
# END INDEX0
|
||||
22
dicts/index_alex.py
Normal file
22
dicts/index_alex.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# adapted from Alex Martelli's example in "Re-learning Python"
|
||||
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
|
||||
# (slide 41) Ex: lines-by-word file index
|
||||
|
||||
|
||||
"""Build a map word -> list-of-line-numbers"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
NONWORD_RE = re.compile('\W+')
|
||||
|
||||
idx = {}
|
||||
with open(sys.argv[1], encoding='utf-8') as fp:
|
||||
for n, line in enumerate(fp, 1):
|
||||
for word in NONWORD_RE.split(line):
|
||||
if word.strip():
|
||||
idx.setdefault(word, []).append(n)
|
||||
|
||||
# print in alphabetical order
|
||||
for word in sorted(idx, key=str.upper):
|
||||
print(word, idx[word])
|
||||
26
dicts/index_default.py
Normal file
26
dicts/index_default.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# adapted from Alex Martelli's example in "Re-learning Python"
|
||||
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
|
||||
# (slide 41) Ex: lines-by-word file index
|
||||
|
||||
# BEGIN INDEX_DEFAULT
|
||||
"""Build an index mapping word -> list of occurrences"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
import collections
|
||||
|
||||
WORD_RE = re.compile('\w+')
|
||||
|
||||
index = collections.defaultdict(list) # <1>
|
||||
with open(sys.argv[1], encoding='utf-8') as fp:
|
||||
for line_no, line in enumerate(fp, 1):
|
||||
for match in WORD_RE.finditer(line):
|
||||
word = match.group()
|
||||
column_no = match.start()+1
|
||||
location = (line_no, column_no)
|
||||
index[word].append(location) # <2>
|
||||
|
||||
# print in alphabetical order
|
||||
for word in sorted(index, key=str.upper):
|
||||
print(word, index[word])
|
||||
# END INDEX_DEFAULT
|
||||
48
dicts/set_perftest.py
Normal file
48
dicts/set_perftest.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Set performance test
|
||||
"""
|
||||
|
||||
import timeit
|
||||
|
||||
SETUP = '''
|
||||
import array
|
||||
selected = array.array('d')
|
||||
with open('selected.arr', 'rb') as fp:
|
||||
selected.fromfile(fp, {size})
|
||||
haystack = {type}(selected)
|
||||
# print('haystack: %10d' % len(haystack), end=' ')
|
||||
needles = array.array('d')
|
||||
with open('not_selected.arr', 'rb') as fp:
|
||||
needles.fromfile(fp, 500)
|
||||
needles.extend(selected[:500])
|
||||
needles = set(needles)
|
||||
# print(' needles: %10d' % len(needles), end=' ')
|
||||
'''
|
||||
|
||||
tests = [
|
||||
('FOR_LOOP_TEST', '''
|
||||
found = 0
|
||||
for n in needles:
|
||||
if n in haystack:
|
||||
found += 1
|
||||
assert found == 500
|
||||
'''),
|
||||
('SET_&_TEST', '''
|
||||
found = len(needles & haystack)
|
||||
assert found == 500
|
||||
'''
|
||||
)]
|
||||
|
||||
MAX_EXPONENT = 7
|
||||
for collection_type in 'dict.fromkeys set list'.split():
|
||||
if collection_type == 'set':
|
||||
available_tests = tests
|
||||
else:
|
||||
available_tests = tests[:1]
|
||||
for test_name, test in available_tests:
|
||||
print('*' * 25, collection_type, test_name)
|
||||
for n in range(3, MAX_EXPONENT + 1):
|
||||
size = 10**n
|
||||
setup = SETUP.format(type=collection_type, size=size)
|
||||
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
|
||||
print('|{:{}d}|{:9.6f}'.format(size, MAX_EXPONENT + 1, min(tt)))
|
||||
72
dicts/strkeydict.py
Normal file
72
dicts/strkeydict.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""StrKeyDict always converts non-string keys to `str`
|
||||
|
||||
Tests for item retrieval using `d[key]` notation::
|
||||
|
||||
>>> d = StrKeyDict([('2', 'two'), ('4', 'four')])
|
||||
>>> d['2']
|
||||
'two'
|
||||
>>> d[4]
|
||||
'four'
|
||||
>>> d[1]
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
KeyError: '1'
|
||||
|
||||
Tests for the `in` operator::
|
||||
|
||||
>>> 2 in d
|
||||
True
|
||||
>>> 1 in d
|
||||
False
|
||||
|
||||
Test for item assignment using non-string key::
|
||||
|
||||
>>> d[0] = 'zero'
|
||||
>>> d['0']
|
||||
'zero'
|
||||
|
||||
Tests for update using a `dict` or a sequence of pairs::
|
||||
|
||||
>>> d.update({6:'six', '8':'eight'})
|
||||
>>> sorted(d.keys())
|
||||
['0', '2', '4', '6', '8']
|
||||
>>> d.update([(10, 'ten'), ('12', 'twelve')])
|
||||
>>> sorted(d.keys())
|
||||
['0', '10', '12', '2', '4', '6', '8']
|
||||
>>> d.update([1, 3, 5])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: 'int' object is not iterable
|
||||
|
||||
"""
|
||||
# BEGIN STRKEYDICT
|
||||
|
||||
import collections
|
||||
import collections.abc
|
||||
|
||||
|
||||
class StrKeyDict(collections.UserDict): # <1>
|
||||
|
||||
def __missing__(self, key): # <2>
|
||||
if isinstance(key, str):
|
||||
raise KeyError(key)
|
||||
return self[str(key)]
|
||||
|
||||
def __contains__(self, key):
|
||||
return str(key) in self.data # <3>
|
||||
|
||||
def __setitem__(self, key, item):
|
||||
self.data[str(key)] = item # <4>
|
||||
|
||||
def update(self, iterable=None, **kwds):
|
||||
if iterable is not None:
|
||||
if isinstance(iterable, collections.abc.Mapping): # <5>
|
||||
pairs = iterable.items()
|
||||
else:
|
||||
pairs = ((k, v) for k, v in iterable) # <6>
|
||||
for key, value in pairs:
|
||||
self[key] = value # <7>
|
||||
if kwds:
|
||||
self.update(kwds) # <8>
|
||||
|
||||
# END STRKEYDICT
|
||||
39
dicts/strkeydict0.py
Normal file
39
dicts/strkeydict0.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""StrKeyDict0 converts non-string keys to `str` on lookup
|
||||
|
||||
# BEGIN STRKEYDICT0_TESTS
|
||||
|
||||
Tests for item retrieval using `d[key]` notation::
|
||||
|
||||
>>> d = StrKeyDict0([('2', 'two'), ('4', 'four')])
|
||||
>>> d['2']
|
||||
'two'
|
||||
>>> d[4]
|
||||
'four'
|
||||
>>> d[1]
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
KeyError: '1'
|
||||
|
||||
Tests for the `in` operator::
|
||||
|
||||
>>> 2 in d
|
||||
True
|
||||
>>> 1 in d
|
||||
False
|
||||
|
||||
# END STRKEYDICT0_TESTS
|
||||
"""
|
||||
|
||||
# BEGIN STRKEYDICT0
|
||||
|
||||
class StrKeyDict0(dict): # <1>
|
||||
|
||||
def __missing__(self, key):
|
||||
if isinstance(key, str): # <2>
|
||||
raise KeyError(key)
|
||||
return self[str(key)] # <3>
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.keys() or str(key) in self.keys() # <4>
|
||||
|
||||
# END STRKEYDICT0
|
||||
Reference in New Issue
Block a user