dynamic attributes, descriptors and first concurrency examples

This commit is contained in:
Luciano Ramalho 2015-01-17 22:40:40 -02:00
parent 0618105a47
commit dd1a53ff71
27 changed files with 1151 additions and 216 deletions

201
concurrency/charfinder.py Executable file
View File

@ -0,0 +1,201 @@
#!/usr/bin/env python3
"""
Unicode character finder utility:
find characters based on words in their official names.
This can be used from the command line, just pass words as arguments.
Here is the ``main`` function which makes it happen::
>>> main('rook') # doctest: +NORMALIZE_WHITESPACE
U+2656 WHITE CHESS ROOK
U+265C BLACK CHESS ROOK
(2 matches for 'rook')
>>> main('rook', 'black') # doctest: +NORMALIZE_WHITESPACE
U+265C BLACK CHESS ROOK
(1 match for 'rook black')
>>> main('white bishop') # doctest: +NORMALIZE_WHITESPACE
U+2657 WHITE CHESS BISHOP
(1 match for 'white bishop')
>>> main("jabberwocky's vest")
(No match for "jabberwocky's vest")
For exploring words that occur in the character names, there is the
``word_report`` function::
>>> index = UnicodeNameIndex(sample_chars)
>>> index.word_report()
3 SIGN
2 A
2 EURO
2 LATIN
2 LETTER
1 CAPITAL
1 CURRENCY
1 DOLLAR
1 SMALL
>>> index = UnicodeNameIndex()
>>> index.word_report(7)
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'``
are not indexed. Those names are not useful for searching, since the only
unique part of the name is the codepoint in hexadecimal.
"""
import sys
import re
import unicodedata
import pickle
import warnings
RE_WORD = re.compile('\w+')
INDEX_NAME = 'charfinder_index.pickle'
MINIMUM_SAVE_LEN = 10000
CJK_PREFIX = 'CJK UNIFIED IDEOGRAPH'
sample_chars = [
'$', # DOLLAR SIGN
'A', # LATIN CAPITAL LETTER A
'a', # LATIN SMALL LETTER A
'\u20a0', # EURO-CURRENCY SIGN
'\u20ac', # EURO SIGN
]
def tokenize(text):
"""return iterable of uppercased words"""
for match in RE_WORD.finditer(text):
yield match.group().upper()
class UnicodeNameIndex:
def __init__(self, chars=None):
self.load(chars)
def load(self, chars=None):
self.index = None
if chars is None:
try:
with open(INDEX_NAME, 'rb') as fp:
self.index = pickle.load(fp)
except OSError:
pass
if self.index is None:
self.build_index(chars)
if len(self.index) > MINIMUM_SAVE_LEN:
try:
self.save()
except OSError as exc:
warnings.warn('Could not save {!r}: {}'
.format(INDEX_NAME, exc))
def save(self):
with open(INDEX_NAME, 'wb') as fp:
pickle.dump(self.index, fp)
def build_index(self, chars=None):
if chars is None:
chars = (chr(i) for i in range(32, sys.maxunicode))
index = {}
for char in chars:
try:
name = unicodedata.name(char)
except ValueError:
continue
if name.startswith(CJK_PREFIX):
name = CJK_PREFIX
code = ord(char)
for word in tokenize(name):
index.setdefault(word, set()).add(code)
self.index = index
def __len__(self):
return len(self.index)
def word_rank(self, top=None):
res = [(len(self.index[key]), key) for key in self.index]
res.sort(key=lambda item: (-item[0], item[1]))
if top is not None:
res = res[:top]
return res
def word_report(self, top=None):
"""
Generate report with most frequent words
>>> index = UnicodeNameIndex()
>>> index.word_report(7)
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
"""
for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key))
def find_codes(self, query):
result_sets = []
for word in tokenize(query):
if word in self.index:
result_sets.append(self.index[word])
else: # shorcut: no such word
result_sets = []
break
if result_sets:
result = result_sets[0]
result.intersection_update(*result_sets[1:])
else:
result = set()
if len(result) > 0:
for code in sorted(result):
yield code
def describe(self, code):
code_str = 'U+{:04X}'.format(code)
char = chr(code)
name = unicodedata.name(char)
return '{:7}\t{}\t{}'.format(code_str, char, name)
def find_descriptions(self, query):
for code in self.find_codes(query):
yield self.describe(code)
def main(*args):
index = UnicodeNameIndex()
query = ' '.join(args)
counter = 0
for line in index.find_descriptions(query):
print(line)
counter += 1
if counter == 0:
msg = 'No match'
elif counter == 1:
msg = '1 match'
else:
msg = '{} matches'.format(counter)
print('({} for {!r})'.format(msg, query))
if __name__ == '__main__':
if len(sys.argv) > 1:
main(*sys.argv[1:])
else:
print('Usage: {} word1 [word2]...'.format(sys.argv[0]))

View File

@ -0,0 +1,69 @@
import asyncio
from aiohttp import web
from charfinder import UnicodeNameIndex
TEMPLATE = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>title</title>
</head>
<body>
<form action="/">
<input type="search" name="query" value="{query}">
<input type="submit" value="find">
</form>
<p>{message}</p>
<hr>
<pre>
{result}
</pre>
</body>
</html>
'''
CONTENT_TYPE = 'text/html; charset=UTF-8'
index = None # a UnicodeNameIndex instance
@asyncio.coroutine
def handle(request):
query = request.GET.get('query', '')
print('Query: {!r}'.format(query))
if query:
lines = list(index.find_descriptions(query))
res = '\n'.join(lines)
plural = 'es' if len(lines) > 1 else ''
msg = '{} match{} for {!r}'.format(len(lines), plural, query)
else:
lines = []
res = ''
msg = 'Type words describing characters, e.g. chess.'
text = TEMPLATE.format(query=query, result=res, message=msg)
return web.Response(content_type=CONTENT_TYPE, text=text)
@asyncio.coroutine
def init(loop):
app = web.Application(loop=loop)
app.router.add_route('GET', '/', handle)
server = yield from loop.create_server(app.make_handler(),
'127.0.0.1', 8080)
host = server.sockets[0].getsockname()
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
def main():
loop = asyncio.get_event_loop()
loop.run_until_complete(init(loop))
loop.run_forever()
if __name__ == '__main__':
index = UnicodeNameIndex()
main()

70
concurrency/tcp_charserver.py Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
import asyncio
from charfinder import UnicodeNameIndex
CRLF = b'\r\n'
PROMPT = b'?> '
index = None # a UnicodeNameIndex instance
def writeln(writer, arg):
if isinstance(arg, str):
lines = [arg]
else:
lines = arg
writer.writelines(line.encode() + CRLF for line in lines)
@asyncio.coroutine
def handle_queries(reader, writer):
while True:
writer.write(PROMPT)
yield from writer.drain()
data = yield from reader.readline()
try:
query = data.decode().strip()
except UnicodeDecodeError:
query = '\x00'
if ord(query[:1]) < 32:
break
client = writer.get_extra_info('peername')
print('Received from {}: {}'.format(client, query))
lines = list(index.find_descriptions(query))
if lines:
writeln(writer, lines)
plural = 'es' if len(lines) > 1 else ''
msg = '({} match{} for {!r})'.format(len(lines), plural, query)
writeln(writer, msg)
print('Sent: {} lines + total'.format(len(lines)))
else:
writeln(writer, '(No match for {!r})'.format(query))
print('Sent: 1 line, no match')
yield from writer.drain()
print('Close the client socket')
writer.close()
def main():
loop = asyncio.get_event_loop()
coro = asyncio.start_server(handle_queries, '127.0.0.1', 8888, loop=loop)
server = loop.run_until_complete(coro)
host = server.sockets[0].getsockname()
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
try:
loop.run_forever()
except KeyboardInterrupt: # CTRL+C pressed
pass
server.close()
loop.run_until_complete(server.wait_closed())
loop.close()
if __name__ == '__main__':
index = UnicodeNameIndex()
main()

View File

@ -0,0 +1,86 @@
import pytest
from charfinder import UnicodeNameIndex, tokenize, sample_chars
from unicodedata import name
@pytest.fixture
def sample_index():
return UnicodeNameIndex(sample_chars)
@pytest.fixture(scope="module")
def full_index():
return UnicodeNameIndex()
def test_tokenize():
assert list(tokenize('')) == []
assert list(tokenize('a b')) == ['A', 'B']
assert list(tokenize('a-b')) == ['A', 'B']
assert list(tokenize('abc')) == ['ABC']
assert list(tokenize('café')) == ['CAFÉ']
def test_index():
sample_index = UnicodeNameIndex(sample_chars)
assert len(sample_index) == 9
def test_find_word_no_match(sample_index):
res = list(sample_index.find_codes('qwertyuiop'))
assert len(res) == 0
def test_find_word_1_match(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('currency')]
assert res == [(8352, 'EURO-CURRENCY SIGN')]
def test_find_word_2_matches(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('Euro')]
assert res == [(8352, 'EURO-CURRENCY SIGN'),
(8364, 'EURO SIGN')]
def test_find_2_words_no_matches(sample_index):
res = list(sample_index.find_codes('Euro letter'))
assert len(res) == 0
def test_find_2_words_no_matches_because_one_not_found(sample_index):
res = list(sample_index.find_codes('letter qwertyuiop'))
assert len(res) == 0
def test_find_2_words_1_match(sample_index):
res = list(sample_index.find_codes('sign dollar'))
assert len(res) == 1
def test_find_2_words_2_matches(sample_index):
res = list(sample_index.find_codes('latin letter'))
assert len(res) == 2
def test_find_codes_many_matches_full(full_index):
res = list(full_index.find_codes('letter'))
assert len(res) > 7000
def test_find_1_word_1_match_full(full_index):
res = [(code, name(chr(code)))
for code in full_index.find_codes('registered')]
assert res == [(174, 'REGISTERED SIGN')]
def test_find_1_word_2_matches_full(full_index):
res = list(full_index.find_codes('rook'))
assert len(res) == 2
def test_find_3_words_no_matches_full(full_index):
res = list(full_index.find_codes('no such character'))
assert len(res) == 0

View File

@ -15,8 +15,8 @@ But, without validation, these public attributes can cause trouble::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> raisins.subtotal()
69.5
>>> raisins.weight = -20
>>> raisins.subtotal()
>>> raisins.weight = -20 # garbage in...
>>> raisins.subtotal() # garbage out...
-139.0
# END LINEITEM_PROBLEM_V1

View File

@ -23,36 +23,47 @@ No change was made::
>>> raisins.weight
10
The value of the attributes managed by the properties are stored in
instance attributes, created in each ``LineItem`` instance::
# BEGIN LINEITEM_V2_PROP_DEMO
>>> nutmeg = LineItem('Moluccan nutmeg', 8, 13.95)
>>> nutmeg.weight, nutmeg.price # <1>
(8, 13.95)
>>> sorted(vars(nutmeg).items()) # <2>
[('description', 'Moluccan nutmeg'), ('price', 13.95), ('weight', 8)]
# END LINEITEM_V2_PROP_DEMO
"""
# BEGIN LINEITEM_V2_PROP
# BEGIN LINEITEM_V2_PROP_FACTORY_FUNCTION
def quantity(storage_name): # <1>
@property # <2>
def new_prop(self):
return self.__dict__[storage_name] # <3>
def qty_getter(instance): # <2>
return instance.__dict__[storage_name] # <3>
@new_prop.setter
def new_prop(self, value):
def qty_setter(instance, value): # <4>
if value > 0:
self.__dict__[storage_name] = value # <4>
instance.__dict__[storage_name] = value # <5>
else:
raise ValueError('value must be > 0')
return new_prop # <5>
return property(qty_getter, qty_setter) # <6>
# END LINEITEM_V2_PROP_FACTORY_FUNCTION
# BEGIN LINEITEM_V2_PROP_CLASS
class LineItem:
weight = quantity('weight') # <6>
price = quantity('price') # <7>
weight = quantity('weight') # <1>
price = quantity('price') # <2>
def __init__(self, description, weight, price):
self.description = description
self.weight = weight
self.weight = weight # <3>
self.price = price
def subtotal(self):
return self.weight * self.price
# END LINEITEM_V2_PROP
return self.weight * self.price # <4>
# END LINEITEM_V2_PROP_CLASS

View File

@ -29,11 +29,11 @@ instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
['_Quantity_0', '_Quantity_1', '__class__', ...
['_Quantity:0', '_Quantity:1', '__class__', ...
'description', 'price', 'subtotal', 'weight']
>>> raisins._Quantity_0
>>> getattr(raisins, '_Quantity:0')
10
>>> raisins._Quantity_1
>>> getattr(raisins, '_Quantity:1')
6.95
"""
@ -47,7 +47,7 @@ class Quantity:
cls = self.__class__ # <2>
prefix = cls.__name__
index = cls.__counter
self.storage_name = '_{}_{}'.format(prefix, index) # <3>
self.storage_name = '_{}:{}'.format(prefix, index) # <3>
cls.__counter += 1 # <4>
def __get__(self, instance, owner): # <5>

View File

@ -29,11 +29,11 @@ instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
['_Quantity_0', '_Quantity_1', '__class__', ...
['_Quantity:0', '_Quantity:1', '__class__', ...
'description', 'price', 'subtotal', 'weight']
>>> raisins._Quantity_0
>>> getattr(raisins, '_Quantity:0')
10
>>> raisins._Quantity_1
>>> getattr(raisins, '_Quantity:1')
6.95
If the descriptor is accessed in the class, the descriptor object is
@ -56,7 +56,7 @@ class Quantity:
cls = self.__class__
prefix = cls.__name__
index = cls.__counter
self.storage_name = '_{}_{}'.format(prefix, index)
self.storage_name = '_{}:{}'.format(prefix, index)
cls.__counter += 1
def __get__(self, instance, owner):

View File

@ -29,11 +29,11 @@ instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
['_Quantity_0', '_Quantity_1', '__class__', ...
['_Quantity:0', '_Quantity:1', '__class__', ...
'description', 'price', 'subtotal', 'weight']
>>> raisins._Quantity_0
>>> getattr(raisins, '_Quantity:0')
10
>>> raisins._Quantity_1
>>> getattr(raisins, '_Quantity:1')
6.95
If the descriptor is accessed in the class, the descriptor object is

View File

@ -28,12 +28,12 @@ alternate attributes, created by the descriptors in each ``LineItem``
instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> sorted(dir(raisins)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
[..., '_quantity_0', '_quantity_1', 'description',
'price', 'subtotal', 'weight']
>>> raisins._quantity_0
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
[... '_quantity:0', '_quantity:1', 'description',
'price', 'subtotal', 'weight']
>>> getattr(raisins, '_quantity:0')
10
>>> raisins._quantity_1
>>> getattr(raisins, '_quantity:1')
6.95
"""
@ -46,24 +46,22 @@ def quantity(): # <1>
except AttributeError:
quantity.counter = 0 # <3>
storage_name = '_{}_{}'.format('quantity', quantity.counter) # <4>
storage_name = '_{}:{}'.format('quantity', quantity.counter) # <4>
@property # <5>
def prop(self):
return getattr(self, storage_name)
def qty_getter(instance): # <5>
return getattr(instance, storage_name)
@prop.setter
def prop(self, value):
def qty_setter(instance, value):
if value > 0:
setattr(self, storage_name, value)
setattr(instance, storage_name, value)
else:
raise ValueError('value must be > 0')
return prop # <6>
return property(qty_getter, qty_setter)
# END LINEITEM_V4_PROP
class LineItem:
weight = quantity() # <7>
weight = quantity()
price = quantity()
def __init__(self, description, weight, price):
@ -73,4 +71,4 @@ class LineItem:
def subtotal(self):
return self.weight * self.price
# END LINEITEM_V4_PROP

View File

@ -29,12 +29,12 @@ instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
['_NonBlank_0', '_Quantity_0', '_Quantity_1', '__class__', ...
['_NonBlank:0', '_Quantity:0', '_Quantity:1', '__class__', ...
'description', 'price', 'subtotal', 'weight']
>>> raisins._Quantity_0
>>> getattr(raisins, '_Quantity:0')
10
>>> raisins._Quantity_1
6.95
>>> getattr(raisins, '_NonBlank:0')
'Golden raisins'
If the descriptor is accessed in the class, the descriptor object is
returned:

View File

@ -0,0 +1,81 @@
"""
A line item for a bulk food order has description, weight and price fields::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> raisins.weight, raisins.description, raisins.price
(10, 'Golden raisins', 6.95)
A ``subtotal`` method gives the total price for that line item::
>>> raisins.subtotal()
69.5
The weight of a ``LineItem`` must be greater than 0::
>>> raisins.weight = -20
Traceback (most recent call last):
...
ValueError: value must be > 0; -20 is not valid.
No change was made::
>>> raisins.weight
10
The value of the attributes managed by the descriptors are stored in
alternate attributes, created by the descriptors in each ``LineItem``
instance::
>>> raisins = LineItem('Golden raisins', 10, 6.95)
>>> dir(raisins) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
['_Check:0', '_Check:1', '_Check:2', '__class__', ...
'description', 'price', 'subtotal', 'weight']
>>> [getattr(raisins, name) for name in dir(raisins) if name.startswith('_Check:')]
['Golden raisins', 10, 6.95]
If the descriptor is accessed in the class, the descriptor object is
returned:
>>> LineItem.price # doctest: +ELLIPSIS
<model_v6.Check object at 0x...>
>>> br_nuts = LineItem('Brazil nuts', 10, 34.95)
>>> br_nuts.price
34.95
The `NonBlank` descriptor prevents empty or blank strings to be used
for the description:
>>> br_nuts.description = ' '
Traceback (most recent call last):
...
ValueError: ' ' is not valid.
"""
# BEGIN LINEITEM_V5
import model_v6 as model # <1>
def gt_zero(x):
'''value must be > 0'''
return x if x > 0 else model.INVALID
def non_blank(txt):
txt = txt.strip()
return txt if txt else model.INVALID
class LineItem:
description = model.Check(non_blank) # <2>
weight = model.Check(gt_zero)
price = model.Check(gt_zero)
def __init__(self, description, weight, price):
self.description = description
self.weight = weight
self.price = price
def subtotal(self):
return self.weight * self.price
# END LINEITEM_V5

View File

@ -1,166 +1,199 @@
"""
Data descriptor (a.k.a. overriding or enforced descriptor):
Overriding descriptor (a.k.a. data descriptor or enforced descriptor):
>>> o = Model()
>>> o.data # doctest: +ELLIPSIS
DataDescriptor.__get__() invoked with args:
self = <descriptorkinds.DataDescriptor object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
>>> Model.data # doctest: +ELLIPSIS
DataDescriptor.__get__() invoked with args:
self = <descriptorkinds.DataDescriptor object at 0x...>
instance = None
owner = <class 'descriptorkinds.Model'>
# BEGIN DESCR_KINDS_DEMO1
>>> obj = Managed() # <1>
>>> obj.over # <2>
-> Overriding.__get__(<Overriding object>, <Managed object>, <class Managed>)
>>> Managed.over # <3>
-> Overriding.__get__(<Overriding object>, None, <class Managed>)
>>> obj.over = 7 # <4>
-> Overriding.__set__(<Overriding object>, <Managed object>, 7)
>>> obj.over # <5>
-> Overriding.__get__(<Overriding object>, <Managed object>, <class Managed>)
>>> obj.__dict__['over'] = 8 # <6>
>>> vars(obj) # <7>
{'over': 8}
>>> obj.over # <8>
-> Overriding.__get__(<Overriding object>, <Managed object>, <class Managed>)
A data descriptor cannot be shadowed by assigning to an instance:
# END DESCR_KINDS_DEMO1
>>> o.data = 7 # doctest: +ELLIPSIS
DataDescriptor.__set__() invoked with args:
self = <descriptorkinds.DataDescriptor object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> o.data # doctest: +ELLIPSIS
DataDescriptor.__get__() invoked with args:
self = <descriptorkinds.DataDescriptor object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
Overriding descriptor without ``__get__``:
(these tests are reproduced below without +ELLIPSIS directives for inclusion in the book;
look for DESCR_KINDS_DEMO2)
Not even by poking the attribute into the instance ``__dict__``:
>>> obj.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
>>> Managed.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
>>> obj.over_no_get = 7
-> OverridingNoGet.__set__(<OverridingNoGet object>, <Managed object>, 7)
>>> obj.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
>>> obj.__dict__['over_no_get'] = 9
>>> obj.over_no_get
9
>>> obj.over_no_get = 7
-> OverridingNoGet.__set__(<OverridingNoGet object>, <Managed object>, 7)
>>> obj.over_no_get
9
>>> o.__dict__['data'] = 8
>>> o.data # doctest: +ELLIPSIS
DataDescriptor.__get__() invoked with args:
self = <descriptorkinds.DataDescriptor object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
Non-overriding descriptor (a.k.a. non-data descriptor or shadowable descriptor):
# BEGIN DESCR_KINDS_DEMO3
Data descriptor without ``__get__``:
>>> o.data_no_get # doctest: +ELLIPSIS
<descriptorkinds.DataDescriptorNoGet object at 0x...>
>>> Model.data_no_get # doctest: +ELLIPSIS
<descriptorkinds.DataDescriptorNoGet object at 0x...>
>>> o.data_no_get = 7 # doctest: +ELLIPSIS
DataDescriptorNoGet.__set__() invoked with args:
self = <descriptorkinds.DataDescriptorNoGet object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> o.data_no_get # doctest: +ELLIPSIS
<descriptorkinds.DataDescriptorNoGet object at 0x...>
Poking the attribute into the instance ``__dict__`` means you can read the new
value for the attribute, but setting it still triggers ``__set__``:
>>> o.__dict__['data_no_get'] = 8
>>> o.data_no_get
8
>>> o.data_no_get = 7 # doctest: +ELLIPSIS
DataDescriptorNoGet.__set__() invoked with args:
self = <descriptorkinds.DataDescriptorNoGet object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> o.data_no_get # doctest: +ELLIPSIS
8
Non-data descriptor (a.k.a. non-overriding or shadowable descriptor):
>>> o = Model()
>>> o.non_data # doctest: +ELLIPSIS
NonDataDescriptor.__get__() invoked with args:
self = <descriptorkinds.NonDataDescriptor object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
>>> Model.non_data # doctest: +ELLIPSIS
NonDataDescriptor.__get__() invoked with args:
self = <descriptorkinds.NonDataDescriptor object at 0x...>
instance = None
owner = <class 'descriptorkinds.Model'>
A non-data descriptor can be shadowed by assigning to an instance:
>>> o.non_data = 7
>>> o.non_data
7
Methods are non-data descriptors:
>>> o.spam # doctest: +ELLIPSIS
<bound method Model.spam of <descriptorkinds.Model object at 0x...>>
>>> Model.spam # doctest: +ELLIPSIS
<function Model.spam at 0x...>
>>> o.spam() # doctest: +ELLIPSIS
Model.spam() invoked with arg:
self = <descriptorkinds.Model object at 0x...>
>>> o.spam = 7
>>> o.spam
>>> obj = Managed()
>>> obj.non_over # <1>
-> NonOverriding.__get__(<NonOverriding object>, <Managed object>, <class Managed>)
>>> obj.non_over = 7 # <2>
>>> obj.non_over # <3>
7
>>> Managed.non_over # <4>
-> NonOverriding.__get__(<NonOverriding object>, None, <class Managed>)
>>> del obj.non_over # <5>
>>> obj.non_over # <6>
-> NonOverriding.__get__(<NonOverriding object>, <Managed object>, <class Managed>)
# END DESCR_KINDS_DEMO3
No descriptor type survives being overwritten on the class itself:
>>> Model.data = 1
>>> o.data
1
>>> Model.data_no_get = 2
>>> o.data_no_get
2
>>> Model.non_data = 3
>>> o.non_data
# BEGIN DESCR_KINDS_DEMO4
>>> obj = Managed() # <1>
>>> Managed.over = 1 # <2>
>>> Managed.over_no_get = 2
>>> Managed.non_over = 3
>>> obj.over, obj.over_no_get, obj.non_over # <3>
(1, 2, 3)
# END DESCR_KINDS_DEMO4
Methods are non-overriding descriptors:
>>> obj.spam # doctest: +ELLIPSIS
<bound method Managed.spam of <descriptorkinds.Managed object at 0x...>>
>>> Managed.spam # doctest: +ELLIPSIS
<function Managed.spam at 0x...>
>>> obj.spam()
-> Managed.spam(<Managed object>)
>>> Managed.spam()
Traceback (most recent call last):
...
TypeError: spam() missing 1 required positional argument: 'self'
>>> Managed.spam(obj)
-> Managed.spam(<Managed object>)
>>> Managed.spam.__get__(obj) # doctest: +ELLIPSIS
<bound method Managed.spam of <descriptorkinds.Managed object at 0x...>>
>>> obj.spam.__func__ is Managed.spam
True
>>> obj.spam = 7
>>> obj.spam
7
"""
"""
NOTE: These tests are here because I can't add callouts after +ELLIPSIS
directives and if doctest runs them without +ELLIPSIS I get test failures.
class DataDescriptor:
"a.k.a. overriding or enforced descriptor"
# BEGIN DESCR_KINDS_DEMO2
>>> obj.over_no_get # <1>
<__main__.OverridingNoGet object at 0x665bcc>
>>> Managed.over_no_get # <2>
<__main__.OverridingNoGet object at 0x665bcc>
>>> obj.over_no_get = 7 # <3>
-> OverridingNoGet.__set__(<OverridingNoGet object>, <Managed object>, 7)
>>> obj.over_no_get # <4>
<__main__.OverridingNoGet object at 0x665bcc>
>>> obj.__dict__['over_no_get'] = 9 # <5>
>>> obj.over_no_get # <6>
9
>>> obj.over_no_get = 7 # <7>
-> OverridingNoGet.__set__(<OverridingNoGet object>, <Managed object>, 7)
>>> obj.over_no_get # <8>
9
# END DESCR_KINDS_DEMO2
Methods are non-overriding descriptors:
# BEGIN DESCR_KINDS_DEMO5
>>> obj = Managed()
>>> obj.spam # <1>
<bound method Managed.spam of <descriptorkinds.Managed object at 0x74c80c>>
>>> Managed.spam # <2>
<function Managed.spam at 0x734734>
>>> obj.spam = 7 # <3>
>>> obj.spam
7
# END DESCR_KINDS_DEMO5
"""
# BEGIN DESCR_KINDS
### auxiliary functions for display only ###
def cls_name(obj_or_cls):
cls = type(obj_or_cls)
if cls is type:
cls = obj_or_cls
return cls.__name__.split('.')[-1]
def display(obj):
cls = type(obj)
if cls is type:
return '<class {}>'.format(obj.__name__)
elif cls in [type(None), int]:
return repr(obj)
else:
return '<{} object>'.format(cls_name(obj))
def print_args(name, *args):
pseudo_args = ', '.join(display(x) for x in args)
print('-> {}.__{}__({})'.format(cls_name(args[0]), name, pseudo_args))
### essential classes for this example ###
class Overriding: # <1>
"""a.k.a. data descriptor or enforced descriptor"""
def __get__(self, instance, owner):
print('DataDescriptor.__get__() invoked with args:')
print(' self = ', self)
print(' instance = ', instance)
print(' owner = ', owner)
print_args('get', self, instance, owner) # <2>
def __set__(self, instance, value):
print('DataDescriptor.__set__() invoked with args:')
print(' self = ', self)
print(' instance = ', instance)
print(' value = ', value)
print_args('set', self, instance, value)
class DataDescriptorNoGet:
class OverridingNoGet: # <3>
"""an overriding descriptor without ``__get__``"""
def __set__(self, instance, value):
print('DataDescriptorNoGet.__set__() invoked with args:')
print(' self = ', self)
print(' instance = ', instance)
print(' value = ', value)
print_args('set', self, instance, value)
class NonDataDescriptor:
"a.k.a. non-overriding or shadowable descriptor"
class NonOverriding: # <4>
"""a.k.a. non-data or shadowable descriptor"""
def __get__(self, instance, owner):
print('NonDataDescriptor.__get__() invoked with args:')
print(' self = ', self)
print(' instance = ', instance)
print(' owner = ', owner)
print_args('get', self, instance, owner)
class Model:
data = DataDescriptor()
data_no_get = DataDescriptorNoGet()
non_data = NonDataDescriptor()
class Managed: # <5>
over = Overriding()
over_no_get = OverridingNoGet()
non_over = NonOverriding()
def spam(self):
print('Model.spam() invoked with arg:')
print(' self = ', self)
def spam(self): # <6>
print('-> Managed.spam({})'.format(display(self)))
# END DESCR_KINDS

View File

@ -0,0 +1,169 @@
"""
Overriding descriptor (a.k.a. data descriptor or enforced descriptor):
>>> obj = Model()
>>> obj.over # doctest: +ELLIPSIS
Overriding.__get__() invoked with args:
self = <descriptorkinds.Overriding object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
>>> Model.over # doctest: +ELLIPSIS
Overriding.__get__() invoked with args:
self = <descriptorkinds.Overriding object at 0x...>
instance = None
owner = <class 'descriptorkinds.Model'>
An overriding descriptor cannot be shadowed by assigning to an instance:
>>> obj = Model()
>>> obj.over = 7 # doctest: +ELLIPSIS
Overriding.__set__() invoked with args:
self = <descriptorkinds.Overriding object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> obj.over # doctest: +ELLIPSIS
Overriding.__get__() invoked with args:
self = <descriptorkinds.Overriding object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
Not even by poking the attribute into the instance ``__dict__``:
>>> obj.__dict__['over'] = 8
>>> obj.over # doctest: +ELLIPSIS
Overriding.__get__() invoked with args:
self = <descriptorkinds.Overriding object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
>>> vars(obj)
{'over': 8}
Overriding descriptor without ``__get__``:
>>> obj.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
>>> Model.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
>>> obj.over_no_get = 7 # doctest: +ELLIPSIS
OverridingNoGet.__set__() invoked with args:
self = <descriptorkinds.OverridingNoGet object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> obj.over_no_get # doctest: +ELLIPSIS
<descriptorkinds.OverridingNoGet object at 0x...>
Poking the attribute into the instance ``__dict__`` means you can read the new
value for the attribute, but setting it still triggers ``__set__``:
>>> obj.__dict__['over_no_get'] = 9
>>> obj.over_no_get
9
>>> obj.over_no_get = 7 # doctest: +ELLIPSIS
OverridingNoGet.__set__() invoked with args:
self = <descriptorkinds.OverridingNoGet object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
value = 7
>>> obj.over_no_get
9
Non-overriding descriptor (a.k.a. non-data descriptor or shadowable descriptor):
>>> obj = Model()
>>> obj.non_over # doctest: +ELLIPSIS
NonOverriding.__get__() invoked with args:
self = <descriptorkinds.NonOverriding object at 0x...>
instance = <descriptorkinds.Model object at 0x...>
owner = <class 'descriptorkinds.Model'>
>>> Model.non_over # doctest: +ELLIPSIS
NonOverriding.__get__() invoked with args:
self = <descriptorkinds.NonOverriding object at 0x...>
instance = None
owner = <class 'descriptorkinds.Model'>
A non-overriding descriptor can be shadowed by assigning to an instance:
>>> obj.non_over = 7
>>> obj.non_over
7
Methods are non-over descriptors:
>>> obj.spam # doctest: +ELLIPSIS
<bound method Model.spam of <descriptorkinds.Model object at 0x...>>
>>> Model.spam # doctest: +ELLIPSIS
<function Model.spam at 0x...>
>>> obj.spam() # doctest: +ELLIPSIS
Model.spam() invoked with arg:
self = <descriptorkinds.Model object at 0x...>
>>> obj.spam = 7
>>> obj.spam
7
No descriptor type survives being overwritten on the class itself:
>>> Model.over = 1
>>> obj.over
1
>>> Model.over_no_get = 2
>>> obj.over_no_get
2
>>> Model.non_over = 3
>>> obj.non_over
7
"""
# BEGIN DESCRIPTORKINDS
def print_args(name, *args): # <1>
cls_name = args[0].__class__.__name__
arg_names = ['self', 'instance', 'owner']
if name == 'set':
arg_names[-1] = 'value'
print('{}.__{}__() invoked with args:'.format(cls_name, name))
for arg_name, value in zip(arg_names, args):
print(' {:8} = {}'.format(arg_name, value))
class Overriding: # <2>
"""a.k.a. data descriptor or enforced descriptor"""
def __get__(self, instance, owner):
print_args('get', self, instance, owner) # <3>
def __set__(self, instance, value):
print_args('set', self, instance, value)
class OverridingNoGet: # <4>
"""an overriding descriptor without ``__get__``"""
def __set__(self, instance, value):
print_args('set', self, instance, value)
class NonOverriding: # <5>
"""a.k.a. non-data or shadowable descriptor"""
def __get__(self, instance, owner):
print_args('get', self, instance, owner)
class Model: # <6>
over = Overriding()
over_no_get = OverridingNoGet()
non_over = NonOverriding()
def spam(self): # <7>
print('Model.spam() invoked with arg:')
print(' self =', self)
#END DESCRIPTORKINDS

View File

@ -0,0 +1,41 @@
"""
# BEGIN FUNC_DESCRIPTOR_DEMO
>>> word = Text('forward')
>>> word # <1>
Text('forward')
>>> word.reverse() # <2>
Text('drawrof')
>>> Text.reverse(Text('backward')) # <3>
Text('drawkcab')
>>> type(Text.reverse), type(word.reverse) # <4>
(<class 'function'>, <class 'method'>)
>>> list(map(Text.reverse, ['repaid', (10, 20, 30), Text('stressed')])) # <5>
['diaper', (30, 20, 10), Text('desserts')]
>>> Text.reverse.__get__(word) # <6>
<bound method Text.reverse of Text('forward')>
>>> Text.reverse.__get__(None, Text) # <7>
<function Text.reverse at 0x101244e18>
>>> word.reverse # <8>
<bound method Text.reverse of Text('forward')>
>>> word.reverse.__self__ # <9>
Text('forward')
>>> word.reverse.__func__ is Text.reverse # <10>
True
# END FUNC_DESCRIPTOR_DEMO
"""
# BEGIN FUNC_DESCRIPTOR_EX
import collections
class Text(collections.UserString):
def __repr__(self):
return 'Text({!r})'.format(self.data)
def reverse(self):
return self[::-1]
# END FUNC_DESCRIPTOR_EX

View File

@ -6,7 +6,7 @@ class Quantity:
cls = self.__class__
prefix = cls.__name__
index = cls.__counter
self.storage_name = '_{}_{}'.format(prefix, index)
self.storage_name = '_{}:{}'.format(prefix, index)
cls.__counter += 1
def __get__(self, instance, owner):

View File

@ -9,7 +9,7 @@ class AutoStorage: # <1>
cls = self.__class__
prefix = cls.__name__
index = cls.__counter
self.storage_name = '_{}_{}'.format(prefix, index)
self.storage_name = '_{}:{}'.format(prefix, index)
cls.__counter += 1
def __get__(self, instance, owner):
@ -34,6 +34,7 @@ class Validated(abc.ABC, AutoStorage): # <3>
class Quantity(Validated): # <7>
"""a number greater than zero"""
def validate(self, instance, value):
if value <= 0:
@ -42,6 +43,7 @@ class Quantity(Validated): # <7>
class NonBlank(Validated):
"""a string with at least one non-space character"""
def validate(self, instance, value):
value = value.strip()

72
descriptors/model_v6.py Normal file
View File

@ -0,0 +1,72 @@
# BEGIN MODEL_V5
import abc
class AutoStorage: # <1>
__counter = 0
def __init__(self):
cls = self.__class__
prefix = cls.__name__
index = cls.__counter
self.storage_name = '_{}:{}'.format(prefix, index)
cls.__counter += 1
def __get__(self, instance, owner):
if instance is None:
return self
else:
return getattr(instance, self.storage_name)
def __set__(self, instance, value):
setattr(instance, self.storage_name, value) # <2>
class Validated(abc.ABC, AutoStorage): # <3>
def __set__(self, instance, value):
value = self.validate(instance, value) # <4>
super().__set__(instance, value) # <5>
@abc.abstractmethod
def validate(self, instance, value): # <6>
"""return validated value or raise ValueError"""
INVALID = object()
class Check(Validated):
def __init__(self, checker):
super().__init__()
self.checker = checker
if checker.__doc__ is None:
doc = ''
else:
doc = checker.__doc__ + '; '
self.message = doc + '{!r} is not valid.'
def validate(self, instance, value):
result = self.checker(value)
if result is INVALID:
raise ValueError(self.message.format(value))
return result
class Quantity(Validated): # <7>
def validate(self, instance, value):
if value <= 0:
raise ValueError('value must be > 0')
return value
class NonBlank(Validated):
def validate(self, instance, value):
value = value.strip()
if len(value) == 0:
raise ValueError('value cannot be empty or blank')
return value # <8>
# END MODEL_V5

View File

@ -1,4 +1,4 @@
@ -11,7 +11,7 @@
"serial": 33451,
"name": "Migrating to the Web Using Dart and Polymer - A Guide for Legacy OOP Developers",
"event_type": "40-minute conference session",
"time_start": "2014-07-23 17:00:00",
"time_stop": "2014-07-23 17:40:00",
"venue_serial": 1458,
@ -13304,4 +13304,4 @@

View File

@ -0,0 +1,23 @@
import shelve
from schedule2 import DB_NAME, CONFERENCE, load_db
from schedule2 import DbRecord, Event
with shelve.open(DB_NAME) as db:
if CONFERENCE not in db:
load_db(db)
DbRecord.set_db(db)
event = DbRecord.fetch('event.33950')
print(event)
print(event.venue)
print(event.venue.name)
for spkr in event.speakers:
print('{0.serial}: {0.name}'.format(spkr))
print(repr(Event.venue))
event2 = DbRecord.fetch('event.33451')
print(event2)
print(event2.fetch)
print(event2.venue)

View File

@ -9,14 +9,21 @@ explore1.py: Script to explore the OSCON schedule feed
357
>>> sorted(feed.Schedule.keys()) # <3>
['conferences', 'events', 'speakers', 'venues']
>>> feed.Schedule.speakers[-1].name # <4>
>>> for key, value in sorted(feed.Schedule.items()): # <4>
... print('{:3} {}'.format(len(value), key))
...
1 conferences
484 events
357 speakers
53 venues
>>> feed.Schedule.speakers[-1].name # <5>
'Carina C. Zona'
>>> talk = feed.Schedule.events[40] # <5>
>>> talk = feed.Schedule.events[40] # <6>
>>> talk.name
'There *Will* Be Bugs'
>>> talk.speakers # <6>
>>> talk.speakers # <7>
[3471, 5199]
>>> talk.flavor # <7>
>>> talk.flavor # <8>
Traceback (most recent call last):
...
KeyError: 'flavor'
@ -34,13 +41,13 @@ class FrozenJSON:
"""
def __init__(self, mapping):
self._data = dict(mapping) # <1>
self.__data = dict(mapping) # <1>
def __getattr__(self, name): # <2>
if hasattr(self._data, name):
return getattr(self._data, name) # <3>
if hasattr(self.__data, name):
return getattr(self.__data, name) # <3>
else:
return FrozenJSON.build(self._data[name]) # <4>
return FrozenJSON.build(self.__data[name]) # <4>
@classmethod
def build(cls, obj): # <5>

View File

@ -40,11 +40,11 @@ class FrozenJSON:
return arg
def __init__(self, mapping):
self._data = dict(mapping)
self.__data = dict(mapping)
def __getattr__(self, name):
if hasattr(self._data, name):
return getattr(self._data, name)
if hasattr(self.__data, name):
return getattr(self.__data, name)
else:
return FrozenJSON(self._data[name]) # <4>
return FrozenJSON(self.__data[name]) # <4>
# END EXPLORE2

View File

@ -10,10 +10,8 @@ schedule1.py: traversing OSCON schedule data
>>> speaker = db['speaker.3471'] # <4>
>>> type(speaker) # <5>
<class 'schedule1.Record'>
>>> speaker.name # <6>
'Anna Martelli Ravenscroft'
>>> speaker.twitter
'annaraven'
>>> speaker.name, speaker.twitter # <6>
('Anna Martelli Ravenscroft', 'annaraven')
>>> db.close() # <7>
# END SCHEDULE1_DEMO
@ -23,7 +21,7 @@ schedule1.py: traversing OSCON schedule data
# BEGIN SCHEDULE1
import warnings
import osconfeed
import osconfeed # <1>
DB_NAME = 'data/schedule1_db'
CONFERENCE = 'conference.115'
@ -31,17 +29,17 @@ CONFERENCE = 'conference.115'
class Record:
def __init__(self, **kwargs):
self.__dict__.update(kwargs) # <1>
self.__dict__.update(kwargs) # <2>
def load_db(db):
raw_data = osconfeed.load() # <2>
raw_data = osconfeed.load() # <3>
warnings.warn('loading ' + DB_NAME)
for collection, rec_list in raw_data['Schedule'].items(): # <3>
record_type = collection[:-1] # <4>
for collection, rec_list in raw_data['Schedule'].items(): # <4>
record_type = collection[:-1] # <5>
for record in rec_list:
key = '{}.{}'.format(record_type, record['serial']) # <5>
record['serial'] = key # <6>
db[key] = Record(**record) # <7>
key = '{}.{}'.format(record_type, record['serial']) # <6>
record['serial'] = key # <7>
db[key] = Record(**record) # <8>
# END SCHEDULE1

View File

@ -56,15 +56,15 @@ class MissingDatabaseError(RuntimeError):
class DbRecord(Record): # <2>
_db = None # <3>
__db = None # <3>
@staticmethod # <4>
def set_db(db):
DbRecord._db = db # <5>
DbRecord.__db = db # <5>
@staticmethod # <6>
def get_db():
return DbRecord._db
return DbRecord.__db
@classmethod # <7>
def fetch(cls, ident):
@ -93,22 +93,23 @@ class Event(DbRecord): # <1>
@property
def venue(self):
key = 'venue.{}'.format(self.venue_serial)
return self.fetch(key) # <2>
return self.__class__.fetch(key) # <2>
@property
def speakers(self):
if not hasattr(self, '_speaker_objs'): # <3>
spkr_serials = self.__dict__['speakers'] # <4>
self._speaker_objs = [self.fetch('speaker.{}'.format(key))
for key in spkr_serials] # <5>
return self._speaker_objs # <6>
fetch = self.__class__.fetch # <5>
self._speaker_objs = [fetch('speaker.{}'.format(key))
for key in spkr_serials] # <6>
return self._speaker_objs # <7>
def __repr__(self):
if hasattr(self, 'name'): # <7>
if hasattr(self, 'name'): # <8>
cls_name = self.__class__.__name__
return '<{} {!r}>'.format(cls_name, self.name)
else:
return super().__repr__() # <8>
return super().__repr__() # <9>
# END SCHEDULE2_EVENT
@ -127,5 +128,5 @@ def load_db(db):
for record in rec_list: # <7>
key = '{}.{}'.format(record_type, record['serial'])
record['serial'] = key
db[key] = factory(**record) # <>
db[key] = factory(**record) # <8>
# END SCHEDULE2_LOAD

View File

@ -0,0 +1,29 @@
"""
Alex Martelli, _Python in a Nutshell, 2e._ (O'Reilly, 2006), p. 101
==========================
Properties and inheritance
==========================
Properties are inherited normally, just like any other attribute.
However, theres a little trap for the unwary: the methods called
upon to access a property are those that are defined in the class
in which the property itself is defined, without intrinsic use of
further overriding that may happen in subclasses. For example:
"""
class B(object):
def f(self):
return 23
g = property(f)
class C(B):
def f(self):
return 42
c = C()
print(c.g) # prints 23, not 42

44
metaprog/special_attrs.py Normal file
View File

@ -0,0 +1,44 @@
'''
4.13. Special Attributes
The implementation adds a few special read-only attributes to
several object types, where they are relevant.
Some of these are not reported by the dir() built-in function.
https://docs.python.org/3/library/stdtypes.html#special-attributes
'''
obj_attrs = {'__dict__', '__class__'}
cls_data_attrs = {'__slots__', '__bases__', '__name__', '__qualname__', '__mro__'}
cls_methods = {'mro', '__subclasses__'}
cls_attrs = cls_data_attrs | cls_methods
an_object = object()
class EmptyClass():
pass
an_instance = EmptyClass()
class EmptySlots():
__slots__ = ()
a_slots_instance = EmptySlots()
objs = EmptyClass, EmptySlots, an_object, an_instance, a_slots_instance
for obj in objs:
print('-' * 60)
print(repr(obj), ':', type(obj))
dir_obj = set(dir(obj))
print('obj_attrs not listed:', sorted(obj_attrs - dir_obj))
print('all cls_attrs :', sorted(cls_attrs))
print('cls_attrs not listed:', sorted(cls_attrs - dir_obj))