updated from Atlas

This commit is contained in:
Luciano Ramalho
2015-04-01 22:48:56 -03:00
parent aab93699a4
commit 573e1a94c4
109 changed files with 5 additions and 6 deletions

View File

@@ -0,0 +1,34 @@
"""
Dict performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = dict((n, n.as_integer_ratio()) for n in selected)
print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
# print(' needles: %10d' % len(needles), end=' ')
'''
TEST = '''
found = 0
for n in needles:
if n in haystack:
found += 1
# print(' found: %10d' % found)
'''
MAX_EXPONENT = 7
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(size=size)
tt = timeit.repeat(stmt=TEST, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:f}'.format(size, MAX_EXPONENT + 1, min(tt)))

22
attic/dicts/index_alex.py Normal file
View File

@@ -0,0 +1,22 @@
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
"""Build a map word -> list-of-line-numbers"""
import sys
import re
NONWORD_RE = re.compile('\W+')
idx = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for n, line in enumerate(fp, 1):
for word in NONWORD_RE.split(line):
if word.strip():
idx.setdefault(word, []).append(n)
# print in alphabetical order
for word in sorted(idx, key=str.upper):
print(word, idx[word])

View File

@@ -0,0 +1,48 @@
"""
Set performance test
"""
import timeit
SETUP = '''
import array
selected = array.array('d')
with open('selected.arr', 'rb') as fp:
selected.fromfile(fp, {size})
haystack = {type}(selected)
# print('haystack: %10d' % len(haystack), end=' ')
needles = array.array('d')
with open('not_selected.arr', 'rb') as fp:
needles.fromfile(fp, 500)
needles.extend(selected[:500])
needles = set(needles)
# print(' needles: %10d' % len(needles), end=' ')
'''
tests = [
('FOR_LOOP_TEST', '''
found = 0
for n in needles:
if n in haystack:
found += 1
assert found == 500
'''),
('SET_&_TEST', '''
found = len(needles & haystack)
assert found == 500
'''
)]
MAX_EXPONENT = 7
for collection_type in 'dict.fromkeys set list'.split():
if collection_type == 'set':
available_tests = tests
else:
available_tests = tests[:1]
for test_name, test in available_tests:
print('*' * 25, collection_type, test_name)
for n in range(3, MAX_EXPONENT + 1):
size = 10**n
setup = SETUP.format(type=collection_type, size=size)
tt = timeit.repeat(stmt=test, setup=setup, repeat=5, number=1)
print('|{:{}d}|{:9.6f}'.format(size, MAX_EXPONENT + 1, min(tt)))

View File

@@ -0,0 +1,53 @@
"""StrKeyDict0 converts non-string keys to `str` on lookup
# BEGIN STRKEYDICT0_TESTS
Tests for item retrieval using `d[key]` notation::
>>> d = StrKeyDict0([('2', 'two'), ('4', 'four')])
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for item retrieval using `d.get(key)` notation::
>>> d.get('2')
'two'
>>> d.get(4)
'four'
>>> d.get(1, 'N/A')
'N/A'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
# END STRKEYDICT0_TESTS
"""
# BEGIN STRKEYDICT0
import collections
class StrKeyDict0(collections.UserDict): # <1>
def __missing__(self, key):
if isinstance(key, str): # <2>
raise KeyError(key)
return self[str(key)] # <3>
def __contains__(self, key):
return str(key) in self.data # <3>
# END STRKEYDICT0

View File

@@ -0,0 +1,95 @@
"""StrKeyDict always converts non-string keys to `str`
This is a variation of `strkeydict.StrKeyDict` implemented
as a `dict` built-in subclass (instead of a `UserDict` subclass)
Test for initializer: keys are converted to `str`.
>>> d = StrKeyDict([(2, 'two'), ('4', 'four')])
>>> sorted(d.keys())
['2', '4']
Tests for item retrieval using `d[key]` notation::
>>> d['2']
'two'
>>> d[4]
'four'
>>> d[1]
Traceback (most recent call last):
...
KeyError: '1'
Tests for item retrieval using `d.get(key)` notation::
>>> d.get('2')
'two'
>>> d.get(4)
'four'
>>> d.get(1, 'N/A')
'N/A'
Tests for the `in` operator::
>>> 2 in d
True
>>> 1 in d
False
Test for item assignment using non-string key::
>>> d[0] = 'zero'
>>> d['0']
'zero'
Tests for update using a `dict` or a sequence of pairs::
>>> d.update({6:'six', '8':'eight'})
>>> sorted(d.keys())
['0', '2', '4', '6', '8']
>>> d.update([(10, 'ten'), ('12', 'twelve')])
>>> sorted(d.keys())
['0', '10', '12', '2', '4', '6', '8']
>>> d.update([1, 3, 5])
Traceback (most recent call last):
...
TypeError: 'int' object is not iterable
"""
import collections.abc
class StrKeyDict(dict):
def __init__(self, iterable=None, **kwds):
super().__init__()
self.update(iterable, **kwds)
def __missing__(self, key):
if isinstance(key, str):
raise KeyError(key)
return self[str(key)]
def __contains__(self, key):
return key in self.keys() or str(key) in self.keys()
def __setitem__(self, key, item):
super().__setitem__(str(key), item)
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def update(self, iterable=None, **kwds):
if iterable is not None:
if isinstance(iterable, collections.abc.Mapping):
pairs = iterable.items()
else:
pairs = ((k, v) for k, v in iterable)
for key, value in pairs:
self[key] = value
if kwds:
self.update(kwds)

View File

@@ -0,0 +1,300 @@
"""Unit tests for transformdict.py."""
import unittest
from test import support
from test import mapping_tests
import pickle
import copy
from functools import partial
from transformdict import TransformDict
def str_lower(s):
return s.lower()
class TransformDictTestBase(unittest.TestCase):
def check_underlying_dict(self, d, expected):
"""
Check for implementation details.
"""
self.assertEqual(d._data, expected)
self.assertEqual(set(d._original), set(expected))
self.assertEqual([d._transform(v) for v in d._original.values()],
list(d._original.keys()))
class TestTransformDict(TransformDictTestBase):
def test_init(self):
with self.assertRaises(TypeError):
TransformDict()
with self.assertRaises(TypeError):
# Too many positional args
TransformDict(str.lower, {}, {})
with self.assertRaises(TypeError):
# Not a callable
TransformDict(object())
d = TransformDict(str.lower)
self.check_underlying_dict(d, {})
pairs = [('Bar', 1), ('Foo', 2)]
d = TransformDict(str.lower, pairs)
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, dict(pairs))
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, **dict(pairs))
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
d = TransformDict(str.lower, {'Bar': 1}, Foo=2)
self.assertEqual(sorted(d.items()), pairs)
self.check_underlying_dict(d, {'bar': 1, 'foo': 2})
def test_transform_func(self):
# Test the `transform_func` attribute
d = TransformDict(str.lower)
self.assertIs(d.transform_func, str.lower)
# The attribute is read-only
with self.assertRaises(AttributeError):
d.transform_func = str.upper
def test_various_transforms(self):
d = TransformDict(lambda s: s.encode('utf-8'))
d['Foo'] = 5
self.assertEqual(d['Foo'], 5)
self.check_underlying_dict(d, {b'Foo': 5})
with self.assertRaises(AttributeError):
# 'bytes' object has no attribute 'encode'
d[b'Foo']
# Another example
d = TransformDict(str.swapcase)
d['Foo'] = 5
self.assertEqual(d['Foo'], 5)
self.check_underlying_dict(d, {'fOO': 5})
with self.assertRaises(KeyError):
d['fOO']
# NOTE: we mostly test the operations which are not inherited from
# MutableMapping.
def test_setitem_getitem(self):
d = TransformDict(str.lower)
with self.assertRaises(KeyError):
d['foo']
d['Foo'] = 5
self.assertEqual(d['foo'], 5)
self.assertEqual(d['Foo'], 5)
self.assertEqual(d['FOo'], 5)
with self.assertRaises(KeyError):
d['bar']
self.check_underlying_dict(d, {'foo': 5})
d['BAR'] = 6
self.assertEqual(d['Bar'], 6)
self.check_underlying_dict(d, {'foo': 5, 'bar': 6})
# Overwriting
d['foO'] = 7
self.assertEqual(d['foo'], 7)
self.assertEqual(d['Foo'], 7)
self.assertEqual(d['FOo'], 7)
self.check_underlying_dict(d, {'foo': 7, 'bar': 6})
def test_delitem(self):
d = TransformDict(str.lower, Foo=5)
d['baR'] = 3
del d['fOO']
with self.assertRaises(KeyError):
del d['Foo']
with self.assertRaises(KeyError):
del d['foo']
self.check_underlying_dict(d, {'bar': 3})
def test_get(self):
d = TransformDict(str.lower)
default = object()
self.assertIs(d.get('foo'), None)
self.assertIs(d.get('foo', default), default)
d['Foo'] = 5
self.assertEqual(d.get('foo'), 5)
self.assertEqual(d.get('FOO'), 5)
self.assertIs(d.get('bar'), None)
self.check_underlying_dict(d, {'foo': 5})
def test_getitem(self):
d = TransformDict(str.lower)
d['Foo'] = 5
self.assertEqual(d.getitem('foo'), ('Foo', 5))
self.assertEqual(d.getitem('FOO'), ('Foo', 5))
with self.assertRaises(KeyError):
d.getitem('bar')
def test_pop(self):
d = TransformDict(str.lower)
default = object()
with self.assertRaises(KeyError):
d.pop('foo')
self.assertIs(d.pop('foo', default), default)
d['Foo'] = 5
self.assertIn('foo', d)
self.assertEqual(d.pop('foo'), 5)
self.assertNotIn('foo', d)
self.check_underlying_dict(d, {})
d['Foo'] = 5
self.assertIn('Foo', d)
self.assertEqual(d.pop('FOO'), 5)
self.assertNotIn('foo', d)
self.check_underlying_dict(d, {})
with self.assertRaises(KeyError):
d.pop('foo')
def test_clear(self):
d = TransformDict(str.lower)
d.clear()
self.check_underlying_dict(d, {})
d['Foo'] = 5
d['baR'] = 3
self.check_underlying_dict(d, {'foo': 5, 'bar': 3})
d.clear()
self.check_underlying_dict(d, {})
def test_contains(self):
d = TransformDict(str.lower)
self.assertIs(False, 'foo' in d)
d['Foo'] = 5
self.assertIs(True, 'Foo' in d)
self.assertIs(True, 'foo' in d)
self.assertIs(True, 'FOO' in d)
self.assertIs(False, 'bar' in d)
def test_len(self):
d = TransformDict(str.lower)
self.assertEqual(len(d), 0)
d['Foo'] = 5
self.assertEqual(len(d), 1)
d['BAR'] = 6
self.assertEqual(len(d), 2)
d['foo'] = 7
self.assertEqual(len(d), 2)
d['baR'] = 3
self.assertEqual(len(d), 2)
del d['Bar']
self.assertEqual(len(d), 1)
def test_iter(self):
d = TransformDict(str.lower)
it = iter(d)
with self.assertRaises(StopIteration):
next(it)
d['Foo'] = 5
d['BAR'] = 6
self.assertEqual(set(x for x in d), {'Foo', 'BAR'})
def test_first_key_retained(self):
d = TransformDict(str.lower, {'Foo': 5, 'BAR': 6})
self.assertEqual(set(d), {'Foo', 'BAR'})
d['foo'] = 7
d['baR'] = 8
d['quux'] = 9
self.assertEqual(set(d), {'Foo', 'BAR', 'quux'})
del d['foo']
d['FOO'] = 9
del d['bar']
d.setdefault('Bar', 15)
d.setdefault('BAR', 15)
self.assertEqual(set(d), {'FOO', 'Bar', 'quux'})
def test_repr(self):
d = TransformDict(str.lower)
self.assertEqual(repr(d),
"TransformDict(<method 'lower' of 'str' objects>, {})")
d['Foo'] = 5
self.assertEqual(repr(d),
"TransformDict(<method 'lower' of 'str' objects>, {'Foo': 5})")
def test_repr_non_hashable_keys(self):
d = TransformDict(id)
self.assertEqual(repr(d),
"TransformDict(<built-in function id>, {})")
d[[1]] = 2
self.assertEqual(repr(d),
"TransformDict(<built-in function id>, [([1], 2)])")
class TransformDictMappingTests(TransformDictTestBase,
mapping_tests.BasicTestMappingProtocol):
TransformDict = TransformDict
type2test = partial(TransformDict, str.lower)
def check_shallow_copy(self, copy_func):
d = self.TransformDict(str_lower, {'Foo': []})
e = copy_func(d)
self.assertIs(e.__class__, self.TransformDict)
self.assertIs(e._transform, str_lower)
self.check_underlying_dict(e, {'foo': []})
e['Bar'] = 6
self.assertEqual(e['bar'], 6)
with self.assertRaises(KeyError):
d['bar']
e['foo'].append(5)
self.assertEqual(d['foo'], [5])
self.assertEqual(set(e), {'Foo', 'Bar'})
def check_deep_copy(self, copy_func):
d = self.TransformDict(str_lower, {'Foo': []})
e = copy_func(d)
self.assertIs(e.__class__, self.TransformDict)
self.assertIs(e._transform, str_lower)
self.check_underlying_dict(e, {'foo': []})
e['Bar'] = 6
self.assertEqual(e['bar'], 6)
with self.assertRaises(KeyError):
d['bar']
e['foo'].append(5)
self.assertEqual(d['foo'], [])
self.check_underlying_dict(e, {'foo': [5], 'bar': 6})
self.assertEqual(set(e), {'Foo', 'Bar'})
def test_copy(self):
self.check_shallow_copy(lambda d: d.copy())
def test_copy_copy(self):
self.check_shallow_copy(copy.copy)
def test_cast_as_dict(self):
d = self.TransformDict(str.lower, {'Foo': 5})
e = dict(d)
self.assertEqual(e, {'Foo': 5})
def test_copy_deepcopy(self):
self.check_deep_copy(copy.deepcopy)
def test_pickling(self):
def pickle_unpickle(obj, proto):
data = pickle.dumps(obj, proto)
return pickle.loads(data)
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
with self.subTest(pickle_protocol=proto):
self.check_deep_copy(partial(pickle_unpickle, proto=proto))
class MyTransformDict(TransformDict):
pass
class TransformDictSubclassMappingTests(TransformDictMappingTests):
TransformDict = MyTransformDict
type2test = partial(MyTransformDict, str.lower)
def test_main(verbose=None):
test_classes = [TestTransformDict, TransformDictMappingTests,
TransformDictSubclassMappingTests]
support.run_unittest(*test_classes)
if __name__ == "__main__":
test_main(verbose=True)