final concurrency examples

This commit is contained in:
Luciano Ramalho 2015-03-13 18:24:31 -03:00
parent 39e87de5cd
commit 2d7a96742b
26 changed files with 1231 additions and 481 deletions

14
futures/callbackhell.js Normal file
View File

@ -0,0 +1,14 @@
fetch1(request1, function (response1) {
// phase 1
var request2 = step1(response1);
fetch2(request2, function (response2) {
// phase 2
var request3 = step2(response2);
fetch3(request3, function (response3) {
// phase 3
step3(response3);
});
});
});

15
futures/callbackhell.py Normal file
View File

@ -0,0 +1,15 @@
def phase1(response1):
request2 = step1(response1)
fetch2(request2, phase2)
def phase2(response2):
request3 = step2(response2)
fetch3(request3, phase3)
def phase3(response3):
step3(response3)
fetch1(request1, phase1)

223
futures/charfinder/charfinder.py Executable file
View File

@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""
Unicode character finder utility:
find characters based on words in their official names.
This can be used from the command line, just pass words as arguments.
Here is the ``main`` function which makes it happen::
>>> main('rook') # doctest: +NORMALIZE_WHITESPACE
U+2656 WHITE CHESS ROOK
U+265C BLACK CHESS ROOK
(2 matches for 'rook')
>>> main('rook', 'black') # doctest: +NORMALIZE_WHITESPACE
U+265C BLACK CHESS ROOK
(1 match for 'rook black')
>>> main('white bishop') # doctest: +NORMALIZE_WHITESPACE
U+2657 WHITE CHESS BISHOP
(1 match for 'white bishop')
>>> main("jabberwocky's vest")
(No match for "jabberwocky's vest")
For exploring words that occur in the character names, there is the
``word_report`` function::
>>> index = UnicodeNameIndex(sample_chars)
>>> index.word_report()
3 SIGN
2 A
2 EURO
2 LATIN
2 LETTER
1 CAPITAL
1 CURRENCY
1 DOLLAR
1 SMALL
>>> index = UnicodeNameIndex()
>>> index.word_report(10)
75821 CJK
75761 IDEOGRAPH
74656 UNIFIED
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
are indexed with those three words only, excluding the hexadecimal
codepoint at the end of the name.
"""
import sys
import re
import unicodedata
import pickle
import warnings
import itertools
import functools
from collections import namedtuple
RE_WORD = re.compile('\w+')
RE_UNICODE_NAME = re.compile('^[A-Z0-9 -]+$')
RE_CODEPOINT = re.compile('U\+([0-9A-F]{4,6})')
INDEX_NAME = 'charfinder_index.pickle'
MINIMUM_SAVE_LEN = 10000
CJK_UNI_PREFIX = 'CJK UNIFIED IDEOGRAPH'
CJK_CMP_PREFIX = 'CJK COMPATIBILITY IDEOGRAPH'
sample_chars = [
'$', # DOLLAR SIGN
'A', # LATIN CAPITAL LETTER A
'a', # LATIN SMALL LETTER A
'\u20a0', # EURO-CURRENCY SIGN
'\u20ac', # EURO SIGN
]
CharDescription = namedtuple('CharDescription', 'code_str char name')
QueryResult = namedtuple('QueryResult', 'count items')
def tokenize(text):
"""return iterable of uppercased words"""
for match in RE_WORD.finditer(text):
yield match.group().upper()
def query_type(text):
text_upper = text.upper()
if 'U+' in text_upper:
return 'CODEPOINT'
elif RE_UNICODE_NAME.match(text_upper):
return 'NAME'
else:
return 'CHARACTERS'
class UnicodeNameIndex:
def __init__(self, chars=None):
self.load(chars)
def load(self, chars=None):
self.index = None
if chars is None:
try:
with open(INDEX_NAME, 'rb') as fp:
self.index = pickle.load(fp)
except OSError:
pass
if self.index is None:
self.build_index(chars)
if len(self.index) > MINIMUM_SAVE_LEN:
try:
self.save()
except OSError as exc:
warnings.warn('Could not save {!r}: {}'
.format(INDEX_NAME, exc))
def save(self):
with open(INDEX_NAME, 'wb') as fp:
pickle.dump(self.index, fp)
def build_index(self, chars=None):
if chars is None:
chars = (chr(i) for i in range(32, sys.maxunicode))
index = {}
for char in chars:
try:
name = unicodedata.name(char)
except ValueError:
continue
if name.startswith(CJK_UNI_PREFIX):
name = CJK_UNI_PREFIX
elif name.startswith(CJK_CMP_PREFIX):
name = CJK_CMP_PREFIX
for word in tokenize(name):
index.setdefault(word, set()).add(char)
self.index = index
def word_rank(self, top=None):
res = [(len(self.index[key]), key) for key in self.index]
res.sort(key=lambda item: (-item[0], item[1]))
if top is not None:
res = res[:top]
return res
def word_report(self, top=None):
for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key))
def find_chars(self, query, start=0, stop=None):
stop = sys.maxsize if stop is None else stop
result_sets = []
for word in tokenize(query):
chars = self.index.get(word)
if chars is None: # shorcut: no such word
result_sets = []
break
result_sets.append(chars)
if not result_sets:
return QueryResult(0, ())
result = functools.reduce(set.intersection, result_sets)
result = sorted(result) # must sort to support start, stop
result_iter = itertools.islice(result, start, stop)
return QueryResult(len(result),
(char for char in result_iter))
def describe(self, char):
code_str = 'U+{:04X}'.format(ord(char))
name = unicodedata.name(char)
return CharDescription(code_str, char, name)
def find_descriptions(self, query, start=0, stop=None):
for char in self.find_chars(query, start, stop).items:
yield self.describe(char)
def get_descriptions(self, chars):
for char in chars:
yield self.describe(char)
def describe_str(self, char):
return '{:7}\t{}\t{}'.format(*self.describe(char))
def find_description_strs(self, query, start=0, stop=None):
for char in self.find_chars(query, start, stop).items:
yield self.describe_str(char)
@staticmethod # not an instance method due to concurrency
def status(query, counter):
if counter == 0:
msg = 'No match'
elif counter == 1:
msg = '1 match'
else:
msg = '{} matches'.format(counter)
return '{} for {!r}'.format(msg, query)
def main(*args):
index = UnicodeNameIndex()
query = ' '.join(args)
n = 0
for n, line in enumerate(index.find_description_strs(query), 1):
print(line)
print('({})'.format(index.status(query, n)))
if __name__ == '__main__':
if len(sys.argv) > 1:
main(*sys.argv[1:])
else:
print('Usage: {} word1 [word2]...'.format(sys.argv[0]))

View File

@ -0,0 +1,19 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Charfinder</title>
</head>
<body>
Examples: {links}
<p>
<form action="/">
<input type="search" name="query" value="{query}">
<input type="submit" value="find"> {message}
</form>
</p>
<table>
{result}
</table>
</body>
</html>

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import sys
import asyncio
from aiohttp import web
from charfinder import UnicodeNameIndex
TEMPLATE_NAME = 'http_charfinder.html'
CONTENT_TYPE = 'text/html; charset=UTF-8'
SAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit'
' Roman face Ethiopic black mark symbol dot'
' operator Braille hexagram').split()
ROW_TPL = '<tr><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
LINK_TPL = '<a href="/?query={0}" title="find &quot;{0}&quot;">{0}</a>'
LINKS_HTML = ', '.join(LINK_TPL.format(word) for word in
sorted(SAMPLE_WORDS, key=str.upper))
index = UnicodeNameIndex()
with open(TEMPLATE_NAME) as tpl:
template = tpl.read()
template = template.replace('{links}', LINKS_HTML)
# BEGIN HTTP_CHARFINDER_HOME
def home(request): # <1>
query = request.GET.get('query', '').strip() # <2>
print('Query: {!r}'.format(query)) # <3>
if query: # <4>
descriptions = list(index.find_descriptions(query))
res = '\n'.join(ROW_TPL.format(**vars(descr))
for descr in descriptions)
msg = index.status(query, len(descriptions))
else:
descriptions = []
res = ''
msg = 'Enter words describing characters.'
text = template.format(query=query, result=res, message=msg)
print('Sending {} results'.format(len(descriptions)))
return web.Response(content_type=CONTENT_TYPE, text=text)
# END HTTP_CHARFINDER_HOME
# BEGIN HTTP_CHARFINDER_SETUP
@asyncio.coroutine
def init(loop, address, port): # <1>
app = web.Application(loop=loop) # <2>
app.router.add_route('GET', '/', home) # <3>
handler = app.make_handler() # <4>
server = yield from loop.create_server(handler,
address, port) # <5>
return server.sockets[0].getsockname() # <6>
def main(address="127.0.0.1", port=8888):
port = int(port)
loop = asyncio.get_event_loop()
host = loop.run_until_complete(init(loop, address, port)) # <7>
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
try:
loop.run_forever() # <8>
except KeyboardInterrupt: # CTRL+C pressed
pass
print('Server shutting down.')
loop.close() # <9>
if __name__ == '__main__':
main(*sys.argv[1:])
# END HTTP_CHARFINDER_SETUP

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# BEGIN TCP_CHARFINDER_TOP
import sys
import asyncio
from charfinder import UnicodeNameIndex # <1>
CRLF = b'\r\n'
PROMPT = b'?> '
index = UnicodeNameIndex() # <2>
@asyncio.coroutine
def handle_queries(reader, writer): # <3>
while True: # <4>
writer.write(PROMPT) # can't yield from! # <5>
yield from writer.drain() # must yield from! # <6>
data = yield from reader.readline() # <7>
try:
query = data.decode().strip()
except UnicodeDecodeError: # <8>
query = '\x00'
client = writer.get_extra_info('peername') # <9>
print('Received from {}: {!r}'.format(client, query)) # <10>
if query:
if ord(query[:1]) < 32: # <11>
break
lines = list(index.find_description_strs(query)) # <12>
if lines:
writer.writelines(line.encode() + CRLF for line in lines) # <13>
writer.write(index.status(query, len(lines)).encode() + CRLF) # <14>
yield from writer.drain() # <15>
print('Sent {} results'.format(len(lines))) # <16>
print('Close the client socket') # <17>
writer.close() # <18>
# END TCP_CHARFINDER_TOP
# BEGIN TCP_CHARFINDER_MAIN
def main(address='127.0.0.1', port=2323): # <1>
port = int(port)
loop = asyncio.get_event_loop()
server_coro = asyncio.start_server(handle_queries, address, port,
loop=loop) # <2>
server = loop.run_until_complete(server_coro) # <3>
host = server.sockets[0].getsockname() # <4>
print('Serving on {}. Hit CTRL-C to stop.'.format(host)) # <5>
try:
loop.run_forever() # <6>
except KeyboardInterrupt: # CTRL+C pressed
pass
print('Server shutting down.')
server.close() # <7>
loop.run_until_complete(server.wait_closed()) # <8>
loop.close() # <9>
if __name__ == '__main__':
main(*sys.argv[1:]) # <10>
# END TCP_CHARFINDER_MAIN

View File

@ -0,0 +1,115 @@
import pytest
from charfinder import UnicodeNameIndex, tokenize, sample_chars, query_type
from unicodedata import name
@pytest.fixture
def sample_index():
return UnicodeNameIndex(sample_chars)
@pytest.fixture(scope="module")
def full_index():
return UnicodeNameIndex()
def test_query_type():
assert query_type('blue') == 'NAME'
def test_tokenize():
assert list(tokenize('')) == []
assert list(tokenize('a b')) == ['A', 'B']
assert list(tokenize('a-b')) == ['A', 'B']
assert list(tokenize('abc')) == ['ABC']
assert list(tokenize('café')) == ['CAFÉ']
def test_index():
sample_index = UnicodeNameIndex(sample_chars)
assert len(sample_index.index) == 9
def test_find_word_no_match(sample_index):
res = sample_index.find_chars('qwertyuiop')
assert len(res.items) == 0
def test_find_word_1_match(sample_index):
res = [(ord(char), name(char))
for char in sample_index.find_chars('currency').items]
assert res == [(8352, 'EURO-CURRENCY SIGN')]
def test_find_word_1_match_character_result(sample_index):
res = [name(char) for char in
sample_index.find_chars('currency').items]
assert res == ['EURO-CURRENCY SIGN']
def test_find_word_2_matches(sample_index):
res = [(ord(char), name(char))
for char in sample_index.find_chars('Euro').items]
assert res == [(8352, 'EURO-CURRENCY SIGN'),
(8364, 'EURO SIGN')]
def test_find_2_words_no_matches(sample_index):
res = sample_index.find_chars('Euro letter')
assert res.count == 0
def test_find_2_words_no_matches_because_one_not_found(sample_index):
res = sample_index.find_chars('letter qwertyuiop')
assert res.count == 0
def test_find_2_words_1_match(sample_index):
res = sample_index.find_chars('sign dollar')
assert res.count == 1
def test_find_2_words_2_matches(sample_index):
res = sample_index.find_chars('latin letter')
assert res.count == 2
def test_find_chars_many_matches_full(full_index):
res = full_index.find_chars('letter')
assert res.count > 7000
def test_find_1_word_1_match_full(full_index):
res = [(ord(char), name(char))
for char in full_index.find_chars('registered').items]
assert res == [(174, 'REGISTERED SIGN')]
def test_find_1_word_2_matches_full(full_index):
res = full_index.find_chars('rook')
assert res.count == 2
def test_find_3_words_no_matches_full(full_index):
res = full_index.find_chars('no such character')
assert res.count == 0
def test_find_with_start(sample_index):
res = [(ord(char), name(char))
for char in sample_index.find_chars('sign', 1).items]
assert res == [(8352, 'EURO-CURRENCY SIGN'), (8364, 'EURO SIGN')]
def test_find_with_stop(sample_index):
res = [(ord(char), name(char))
for char in sample_index.find_chars('sign', 0, 2).items]
assert res == [(36, 'DOLLAR SIGN'), (8352, 'EURO-CURRENCY SIGN')]
def test_find_with_start_stop(sample_index):
res = [(ord(char), name(char))
for char in sample_index.find_chars('sign', 1, 2).items]
assert res == [(8352, 'EURO-CURRENCY SIGN')]

View File

@ -0,0 +1,14 @@
@asyncio.coroutine
def three_phases():
response1 = yield from fetch1(request1)
# phase 1
request2 = step1(response1)
response2 = yield from fetch2(request2)
# phase 2
request3 = step2(response2)
response3 = yield from fetch3(request3)
# phase 3
step3(response3)
loop.create_task(three_phases)

View File

@ -5,58 +5,59 @@ Sequential version
Sample run::
$ python3 flags.py
BD retrieved.
BR retrieved.
CD retrieved.
...
TR retrieved.
US retrieved.
VN retrieved.
BD BR CD CN DE EG ET FR ID IN IR JP MX NG PH PK RU TR US VN
20 flags downloaded in 10.16s
"""
# BEGIN FLAGS_PY
import os
import time
import sys
import requests
import requests # <1>
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
'MX PH VN ET EG DE IR TR CD FR').split()
'MX PH VN ET EG DE IR TR CD FR').split() # <2>
BASE_URL = 'http://python.pro.br/fluent/data/flags'
BASE_URL = 'http://flupy.org/data/flags' # <3>
DEST_DIR = 'downloads/'
DEST_DIR = 'downloads/' # <4>
def save_flag(img, filename):
def save_flag(img, filename): # <5>
path = os.path.join(DEST_DIR, filename)
with open(path, 'wb') as fp:
fp.write(img)
def get_flag(cc):
def get_flag(cc): # <6>
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = requests.get(url)
return res.content
resp = requests.get(url)
return resp.content
def download_many(cc_list):
for cc in sorted(cc_list):
def show(text): # <7>
print(text, end=' ')
sys.stdout.flush()
def download_many(cc_list): # <8>
for cc in sorted(cc_list): # <9>
image = get_flag(cc)
print('{} retrieved.'.format(cc))
show(cc)
save_flag(image, cc.lower() + '.gif')
return len(cc_list)
def main(download_many):
def main(download_many): # <10>
t0 = time.time()
count = download_many(POP20_CC)
elapsed = time.time() - t0
msg = '{} flags downloaded in {:.2f}s'
msg = '\n{} flags downloaded in {:.2f}s'
print(msg.format(count, elapsed))
if __name__ == '__main__':
main(download_many)
main(download_many) # <11>
# END FLAGS_PY

View File

@ -1,112 +1,120 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
asyncio version
Sample run::
$
$ python3 flags2_asyncio.py -s ERROR -e -m 200
ERROR site: http://localhost:8003/flags
Searching for 676 flags: from AA to ZZ
200 concurrent connections will be used.
--------------------
146 flags downloaded.
363 not found.
167 errors.
Elapsed time: 2.59s
"""
# BEGIN FLAGS2_ASYNCIO_TOP
import asyncio
from collections import namedtuple
from enum import Enum
import collections
import aiohttp
from aiohttp import web
import tqdm
from flag_utils import main, save_flag, Counts
from flags2_common import main, HTTPStatus, Result, save_flag
# default set low to avoid errors from remote site:
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
class FetchError(Exception): # <1>
def __init__(self, country_code):
self.country_code = country_code
@asyncio.coroutine
def get_flag(base_url, cc):
def get_flag(base_url, cc): # <2>
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
if res.status == 200:
image = yield from res.read()
resp = yield from aiohttp.request('GET', url)
if resp.status == 200:
image = yield from resp.read()
return image
elif res.status == 404:
elif resp.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
raise aiohttp.HttpProcessingError(
code=resp.status, message=resp.reason,
headers=resp.headers)
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
def download_one(cc, base_url, semaphore, verbose): # <3>
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
msg = ''
except aiohttp.errors.HttpProcessingError as exc:
status = Status.error
msg = '{} failed: {exc.code} - {exc.message}'
msg = msg.format(cc, exc=exc)
except aiohttp.errors.ClientError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
# we chain all exceptions, you should get original exception from __cause__
context = '(unknown context)'
msg = '{} failed: {}'.format(cc, context)
status = Status.error
with (yield from semaphore): # <4>
image = yield from get_flag(base_url, cc) # <5>
except web.HTTPNotFound: # <6>
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
raise FetchError(cc) from exc # <7>
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
save_flag(image, cc.lower() + '.gif') # <8>
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS2_ASYNCIO_TOP
# BEGIN FLAGS2_ASYNCIO_DOWNLOAD_MANY
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, max_req):
semaphore = asyncio.Semaphore(max_req)
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
results = []
to_do_iter = asyncio.as_completed(to_do)
def downloader_coro(cc_list, base_url, verbose, concur_req): # <1>
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req) # <2>
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)] # <3>
to_do_iter = asyncio.as_completed(to_do) # <4>
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
result = yield from future
results.append(result)
return results
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5>
for future in to_do_iter: # <6>
try:
res = yield from future # <7>
except FetchError as exc: # <8>
country_code = exc.country_code # <9>
try:
error_msg = exc.__cause__.args[0] # <10>
except IndexError:
error_msg = exc.__cause__.__class__.__name__ # <11>
else:
error_msg = ''
status = res.status
if error_msg: # <12>
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, max_req):
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
coro = downloader_coro(cc_list, base_url, verbose, max_req)
done = loop.run_until_complete(coro)
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([res for res in done
if res.status == status]))
loop.close()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro) # <13>
loop.close() # <14>
return Counts(*counts)
return counts
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# END FLAGS2_ASYNCIO_DOWNLOAD_MANY

View File

@ -1,100 +0,0 @@
"""Download flags of top 10 countries by population
asyncio version
Sample run::
$
"""
import asyncio
from collections import namedtuple
from enum import Enum
import aiohttp
from aiohttp import web
from flag_utils import main, save_flag, Counts
# default set low to avoid errors from remote site:
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
@asyncio.coroutine
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
if res.status == 200:
image = yield from res.read()
return image
elif res.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
msg = ''
except aiohttp.errors.HttpProcessingError as exc:
status = Status.error
msg = '{} failed: {exc.code} - {exc.message}'
msg = msg.format(cc, exc=exc)
except aiohttp.errors.ClientError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
# we chain all exceptions, you should get original exception from __cause__
context = '(unknown context)'
msg = '{} failed: {}'.format(cc, context)
status = Status.error
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
def download_many(cc_list, base_url, verbose, max_req):
semaphore = asyncio.Semaphore(max_req)
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([task for task in done
if task.result().status == status]))
for task in pending:
task.cancel()
loop.close()
return Counts(*counts)
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@ -0,0 +1,112 @@
"""Download flags of countries (with error handling).
asyncio version using thread pool to save files
Sample run::
$
"""
import asyncio
import collections
import aiohttp
from aiohttp import web
import tqdm
from flags2_common import main, HTTPStatus, Result, save_flag
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
class FetchError(Exception):
def __init__(self, country_code):
self.country_code = country_code
@asyncio.coroutine
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
resp = yield from aiohttp.request('GET', url)
if resp.status == 200:
image = yield from resp.read()
return image
elif resp.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.HttpProcessingError(
code=resp.status, message=resp.reason,
headers=resp.headers)
# BEGIN FLAGS2_ASYNCIO_EXECUTOR
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
raise FetchError(cc) from exc
else:
loop = asyncio.get_event_loop() # <1>
loop.run_in_executor(None, # <2>
save_flag, image, cc.lower() + '.gif') # <3>
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS2_ASYNCIO_EXECUTOR
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req)
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)]
to_do_iter = asyncio.as_completed(to_do)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = yield from future
except FetchError as exc:
country_code = exc.country_code
try:
error_msg = exc.__cause__.args[0]
except IndexError:
error_msg = exc.__cause__.__class__.__name__
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro)
loop.close()
return counts
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@ -11,9 +11,8 @@ from enum import Enum
Result = namedtuple('Result', 'status data')
Counts = namedtuple('Counts', 'ok not_found error')
Status = Enum('Status', 'ok not_found error')
HTTPStatus = Enum('Status', 'ok not_found error')
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
'MX PH VN ET EG DE IR TR CD FR').split()
@ -22,7 +21,7 @@ DEFAULT_CONCUR_REQ = 1
MAX_CONCUR_REQ = 1
SERVERS = {
'REMOTE': 'http://python.pro.br/fluent/data/flags',
'REMOTE': 'http://flupy.org/data/flags',
'LOCAL': 'http://localhost:8001/flags',
'DELAY': 'http://localhost:8002/flags',
'ERROR': 'http://localhost:8003/flags',
@ -53,17 +52,17 @@ def initial_report(cc_list, actual_req, server_label):
print(msg.format(actual_req, plural))
def final_report(cc_list, counts, start_time):
def final_report(cc_list, counter, start_time):
elapsed = time.time() - start_time
print('-' * 20)
msg = '{} flag{} downloaded.'
plural = 's' if counts.ok != 1 else ''
print(msg.format(counts.ok, plural))
if counts.not_found:
print(counts.not_found, 'not found.')
if counts.error:
plural = 's' if counts.error != 1 else ''
print('{} error{}.'.format(counts.error, plural))
plural = 's' if counter[HTTPStatus.ok] != 1 else ''
print(msg.format(counter[HTTPStatus.ok], plural))
if counter[HTTPStatus.not_found]:
print(counter[HTTPStatus.not_found], 'not found.')
if counter[HTTPStatus.error]:
plural = 's' if counter[HTTPStatus.error] != 1 else ''
print('{} error{}.'.format(counter[HTTPStatus.error], plural))
print('Elapsed time: {:.2f}s'.format(elapsed))
@ -144,5 +143,7 @@ def main(download_many, default_concur_req, max_concur_req):
initial_report(cc_list, actual_req, args.server)
base_url = SERVERS[args.server]
t0 = time.time()
counts = download_many(cc_list, base_url, args.verbose, actual_req)
final_report(cc_list, counts, t0)
counter = download_many(cc_list, base_url, args.verbose, actual_req)
assert sum(counter.values()) == len(cc_list), \
'some downloads are unaccounted for'
final_report(cc_list, counter, t0)

View File

@ -4,71 +4,84 @@ Sequential version
Sample run::
$
$ python3 flags2_sequential.py -s DELAY b
DELAY site: http://localhost:8002/flags
Searching for 26 flags: from BA to BZ
1 concurrent connection will be used.
--------------------
17 flags downloaded.
9 not found.
Elapsed time: 13.36s
"""
import collections
import requests
import tqdm
from flag_utils import main, save_flag, Counts, Status, Result
from flags2_common import main, save_flag, HTTPStatus, Result
DEFAULT_CONCUR_REQ = 1
MAX_CONCUR_REQ = 1
# BEGIN FLAGS2_BASIC_HTTP_FUNCTIONS
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = requests.get(url)
if res.status_code != 200:
res.raise_for_status()
return res.content
resp = requests.get(url)
if resp.status_code != 200: # <1>
resp.raise_for_status()
return resp.content
def download_one(cc, base_url, verbose=False):
try:
image = get_flag(base_url, cc)
except requests.exceptions.HTTPError as exc:
except requests.exceptions.HTTPError as exc: # <2>
res = exc.response
if res.status_code == 404:
status = Status.not_found
msg = ''
else:
status = Status.error
msg = 'error {res.status_code} - {res.reason}'
msg = msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
status = Status.error
msg = 'failed: {}'.format(cc, exc.args)
status = HTTPStatus.not_found # <3>
msg = 'not found'
else: # <4>
raise
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
if verbose: # <5>
print(cc, msg)
return Result(status, cc)
return Result(status, cc) # <6>
# END FLAGS2_BASIC_HTTP_FUNCTIONS
# BEGIN FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
def download_many(cc_list, base_url, verbose, max_req):
counts = [0, 0, 0]
counter = collections.Counter() # <1>
cc_iter = sorted(cc_list) # <2>
if not verbose:
cc_iter = tqdm.tqdm(sorted(cc_list))
else:
cc_iter = sorted(cc_list)
for cc in cc_iter:
cc_iter = tqdm.tqdm(cc_iter) # <3>
for cc in cc_iter: # <4>
try:
res = download_one(cc, base_url, verbose)
except Exception as exc:
msg = 'Unexpected exception for {}: {!r}'
print(msg.format(cc, exc))
else:
counts[res.status.value-1] += 1
res = download_one(cc, base_url, verbose) # <5>
except requests.exceptions.HTTPError as exc: # <6>
error_msg = 'HTTP error {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc: # <7>
error_msg = 'Connection error'
else: # <8>
error_msg = ''
status = res.status
return Counts(*counts)
if error_msg:
status = HTTPStatus.error # <9>
counter[status] += 1 # <10>
if verbose and error_msg: # <11>
print('*** Error for {}: {}'.format(cc, error_msg))
return counter # <12>
# END FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@ -1,42 +1,68 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
ThreadPool version
Sample run::
$
$ python3 flags2_threadpool.py -s ERROR -e
ERROR site: http://localhost:8003/flags
Searching for 676 flags: from AA to ZZ
30 concurrent connections will be used.
--------------------
150 flags downloaded.
361 not found.
165 errors.
Elapsed time: 7.46s
"""
# BEGIN FLAGS2_THREADPOOL
import collections
from concurrent import futures
import tqdm
import requests
import tqdm # <1>
from flag_utils import main, Counts
from flags2_sequential import get_flag, download_one
from flags2_common import main, HTTPStatus # <2>
from flags2_sequential import download_one # <3>
DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000
DEFAULT_CONCUR_REQ = 30 # <4>
MAX_CONCUR_REQ = 1000 # <5>
def download_many(cc_list, base_url, verbose, concur_req):
with futures.ThreadPoolExecutor(concur_req) as executor:
to_do = [executor.submit(download_one, cc, base_url, verbose)
for cc in sorted(cc_list)]
counts = [0, 0, 0]
to_do_iter = futures.as_completed(to_do)
counter = collections.Counter()
with futures.ThreadPoolExecutor(concur_req) as executor: # <6>
to_do_map = {} # <7>
for cc in sorted(cc_list): # <8>
future = executor.submit(download_one,
cc, base_url, verbose) # <9>
to_do_map[future] = cc # <10>
to_do_iter = futures.as_completed(to_do_map) # <11>
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <12>
for future in to_do_iter: # <13>
try:
res = future.result()
except Exception as exc:
print('*** Unexpected exception:', exc)
res = future.result() # <14>
except requests.exceptions.HTTPError as exc: # <15>
error_msg = 'HTTP {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
error_msg = 'Connection error'
else:
counts[res.status.value-1] += 1
error_msg = ''
status = res.status
return Counts(*counts)
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
cc = to_do_map[future] # <16>
print('*** Error for {}: {}'.format(cc, error_msg))
return counter
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# END FLAGS2_THREADPOOL

View File

@ -1,122 +1,132 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
asyncio version
asyncio version using thread pool to save files
Sample run::
$ python3 pop10_asyncio1.py
CN retrieved.
US retrieved.
BR retrieved.
NG retrieved.
PK retrieved.
RU retrieved.
ID retrieved.
IN retrieved.
BD retrieved.
JP retrieved.
10 flags downloaded in 0.45s
$
"""
import asyncio
from collections import namedtuple
from enum import Enum
import collections
import aiohttp
from aiohttp import web
import tqdm
from flags_sequential2 import BASE_URL
from flags_sequential2 import save_flag, main, Counts
from flags2_common import main, HTTPStatus, Result, save_flag
MAX_TASKS = 100 if 'localhost' in BASE_URL else 5
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
class FetchError(Exception):
def __init__(self, country_code):
self.country_code = country_code
# BEGIN FLAGS3_ASYNCIO
@asyncio.coroutine
def http_get(url):
res = yield from aiohttp.request('GET', url)
if res.status == 200:
ctype = res.headers.get('Content-type', '').lower()
if 'json' in ctype or url.endswith('json'):
data = yield from res.json()
data = yield from res.json() # <1>
else:
data = yield from res.read()
data = yield from res.read() # <2>
return data
elif res.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
code=res.status, message=res.reason,
headers=res.headers)
@asyncio.coroutine
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
return (yield from http_get(url))
@asyncio.coroutine
def get_country(cc):
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
metadata = yield from http_get(url)
def get_country(base_url, cc):
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
metadata = yield from http_get(url) # <3>
return metadata['country']
@asyncio.coroutine
def download_one(cc, semaphore):
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
return (yield from http_get(url)) # <4>
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore): # <5>
image = yield from get_flag(base_url, cc)
with (yield from semaphore):
image = yield from get_flag(cc)
with (yield from semaphore):
country = yield from get_country(cc)
country = yield from get_country(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
except aiohttp.errors.HttpProcessingError as exc:
msg = '{} failed: {exc.code} - {exc.message}'
print(msg.format(cc, exc=exc))
status = Status.error
except aiohttp.errors.ClientResponseError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
context = '(unknown context)'
msg = '{} failed: {}'
print(msg.format(cc, context))
status = Status.error
else:
print('{} retrieved.'.format(cc.upper()))
country = country.replace(' ', '_')
save_flag(image, '{}-{}.gif'.format(country, cc))
status = Status.ok
return Result(status, cc)
def download_many(cc_list):
semaphore = asyncio.Semaphore(MAX_TASKS)
to_do = [download_one(cc, semaphore) for cc in cc_list]
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([task for task in done
if task.result().status == status]))
for task in pending:
task.cancel()
raise FetchError(cc) from exc
else:
country = country.replace(' ', '_')
filename = '{}-{}.gif'.format(country, cc)
loop = asyncio.get_event_loop()
loop.run_in_executor(None, save_flag, image, filename)
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS3_ASYNCIO
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req)
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)]
to_do_iter = asyncio.as_completed(to_do)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = yield from future
except FetchError as exc:
country_code = exc.country_code
try:
error_msg = exc.__cause__.args[0]
except IndexError:
error_msg = exc.__cause__.__class__.__name__
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro)
loop.close()
return Counts(*counts)
return counts
if __name__ == '__main__':
main(download_many)
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@ -1,77 +1,88 @@
"""Download flags of top 10 countries by population
"""Download flags and names of countries.
ThreadPool version
Sample run::
$ python3 pop10_threadpool1.py
BR retrieved.
PK retrieved.
BD retrieved.
JP retrieved.
CN retrieved.
IN retrieved.
RU retrieved.
NG retrieved.
US retrieved.
ID retrieved.
10 flags downloaded in 0.63s
"""
import collections
from concurrent import futures
from collections import namedtuple
from enum import Enum
import requests
import tqdm
from flags_sequential2 import BASE_URL
from flags_sequential2 import save_flag, get_flag, main, Counts
from flags2_common import main, save_flag, HTTPStatus, Result
from flags2_sequential import get_flag
MAX_WORKERS = 200
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000
def get_country(cc):
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
def get_country(base_url, cc):
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
res = requests.get(url)
if res.status_code != 200:
res.raise_for_status()
return res.json()['country']
def download_one(cc):
def download_one(cc, base_url, verbose=False):
try:
image = get_flag(cc)
country = get_country(cc)
image = get_flag(base_url, cc)
country = get_country(base_url, cc)
except requests.exceptions.HTTPError as exc:
res = exc.response
if res.status_code == 404:
status = Status.not_found
else:
msg = '{} failed: {res.status_code} - {res.reason}'
print(msg.format(cc, res=exc.response))
status = Status.error
status = HTTPStatus.not_found
msg = 'not found'
else: # <4>
raise
else:
print('{} retrieved.'.format(cc))
country = country.replace(' ', '_')
save_flag(image, '{}-{}.gif'.format(country, cc))
status = Status.ok
status = HTTPStatus.ok
msg = 'OK'
if verbose:
print(cc, msg)
return Result(status, cc)
def download_many(cc_list):
workers = min(len(cc_list), MAX_WORKERS)
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
res = list(res)
counts = []
for status in Status:
counts.append(len([r for r in res if r.status == status]))
return Counts(*counts)
def download_many(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
with futures.ThreadPoolExecutor(concur_req) as executor:
to_do_map = {}
for cc in sorted(cc_list):
future = executor.submit(download_one,
cc, base_url, verbose)
to_do_map[future] = cc
to_do_iter = futures.as_completed(to_do_map)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = future.result()
except requests.exceptions.HTTPError as exc:
error_msg = 'HTTP {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
error_msg = 'Connection error'
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
cc = to_do_map[future]
print('*** Error for {}: {}'.format(cc, error_msg))
return counter
if __name__ == '__main__':
main(download_many)
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@ -0,0 +1,48 @@
"""Download flags of top 20 countries by population
asyncio + aiottp version
Sample run::
$ python3 flags_asyncio.py
EG VN IN TR RU ID US DE CN MX JP BD NG ET FR BR PH PK CD IR
20 flags downloaded in 1.07s
"""
# BEGIN FLAGS_ASYNCIO
import asyncio
import aiohttp # <1>
from flags import BASE_URL, save_flag, show, main # <2>
@asyncio.coroutine # <3>
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
resp = yield from aiohttp.request('GET', url) # <4>
image = yield from resp.read() # <5>
return image
@asyncio.coroutine
def download_one(cc): # <6>
image = yield from get_flag(cc) # <7>
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop() # <8>
to_do = [download_one(cc) for cc in sorted(cc_list)] # <9>
wait_coro = asyncio.wait(to_do) # <10>
res, _ = loop.run_until_complete(wait_coro) # <11>
loop.close() # <12>
return len(res)
if __name__ == '__main__':
main(download_many)
# END FLAGS_ASYNCIO

View File

@ -1,45 +0,0 @@
"""Download flags of top 20 countries by population
asyncio+aiottp version
Sample run::
$ python3 flags_asyncio0.py
EG retrieved.
BD retrieved.
JP retrieved.
...
CD retrieved.
PH retrieved.
ET retrieved.
20 flags downloaded in 1.05s
"""
import asyncio
import aiohttp
from flags import BASE_URL, save_flag, main
@asyncio.coroutine
def download_one(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
image = yield from res.read()
print('{} retrieved.'.format(cc))
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop()
to_do = [download_one(cc) for cc in cc_list]
res, _ = loop.run_until_complete(asyncio.wait(to_do))
loop.close()
return len(res)
if __name__ == '__main__':
main(download_many)

View File

@ -1,51 +0,0 @@
"""Download flags of top 20 countries by population
asyncio+aiottp version
Sample run::
$ python3 flags_asyncio.py
NG retrieved.
FR retrieved.
IN retrieved.
...
EG retrieved.
DE retrieved.
IR retrieved.
20 flags downloaded in 1.08s
"""
import asyncio
import aiohttp
from flags import BASE_URL, save_flag, main
@asyncio.coroutine
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
image = yield from res.read()
return image
@asyncio.coroutine
def download_one(cc):
image = yield from get_flag(cc)
print('{} retrieved.'.format(cc))
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop()
to_do = [download_one(cc) for cc in cc_list]
res, _ = loop.run_until_complete(asyncio.wait(to_do))
loop.close()
return len(res)
if __name__ == '__main__':
main(download_many)

View File

@ -1,6 +1,6 @@
"""Download flags of top 20 countries by population
ThreadPool version
ThreadPoolExecutor version
Sample run::
@ -15,28 +15,29 @@ Sample run::
20 flags downloaded in 0.93s
"""
# BEGIN FLAGS_THREADPOOL
from concurrent import futures
from flags import save_flag, get_flag, main
from flags import save_flag, get_flag, show, main # <1>
MAX_WORKERS = 100
MAX_WORKERS = 20 # <2>
def download_one(cc):
def download_one(cc): # <3>
image = get_flag(cc)
print('{} retrieved.'.format(cc.upper()))
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
workers = min(len(cc_list), MAX_WORKERS)
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
workers = min(MAX_WORKERS, len(cc_list)) # <4>
with futures.ThreadPoolExecutor(workers) as executor: # <5>
res = executor.map(download_one, sorted(cc_list)) # <6>
return len(list(res))
return len(list(res)) # <7>
if __name__ == '__main__':
main(download_many)
main(download_many) # <8>
# END FLAGS_THREADPOOL

View File

@ -0,0 +1,55 @@
"""Download flags of top 20 countries by population
ThreadPoolExecutor version 2, with ``as_completed``.
Sample run::
$ python3 flags_threadpool.py
BD retrieved.
EG retrieved.
CN retrieved.
...
PH retrieved.
US retrieved.
IR retrieved.
20 flags downloaded in 0.93s
"""
from concurrent import futures
from flags import save_flag, get_flag, show, main
MAX_WORKERS = 20
def download_one(cc):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
# BEGIN FLAGS_THREADPOOL_AS_COMPLETED
def download_many(cc_list):
cc_list = cc_list[:5] # <1>
with futures.ThreadPoolExecutor(max_workers=3) as executor: # <2>
to_do = []
for cc in sorted(cc_list): # <3>
future = executor.submit(download_one, cc) # <4>
to_do.append(future) # <5>
msg = 'Scheduled for {}: {}'
print(msg.format(cc, future)) # <6>
results = []
for future in futures.as_completed(to_do): # <7>
res = future.result() # <8>
msg = '{} result: {!r}'
print(msg.format(future, res)) # <9>
results.append(res)
return len(results)
# END FLAGS_THREADPOOL_AS_COMPLETED
if __name__ == '__main__':
main(download_many)

View File

@ -0,0 +1,34 @@
"""
Experiment with ``ThreadPoolExecutor.map``
"""
# BEGIN EXECUTOR_MAP
from time import sleep, strftime
from concurrent import futures
def display(*args): # <1>
print(strftime('[%H:%M:%S]'), end=' ')
print(*args)
def loiter(n): # <2>
msg = '{}loiter({}): doing nothing for {}s...'
display(msg.format('\t'*n, n, n))
sleep(n)
msg = '{}loiter({}): done.'
display(msg.format('\t'*n, n))
return n * 10 # <3>
def main():
display('Script starting.')
executor = futures.ThreadPoolExecutor(max_workers=3) # <4>
results = executor.map(loiter, range(5)) # <5>
display('results:', results) # <6>.
display('Waiting for individual results:')
for i, result in enumerate(results): # <7>
display('result {}: {}'.format(i, result))
main()
# END EXECUTOR_MAP

View File

@ -0,0 +1,31 @@
"""
Experiments with futures
"""
from time import sleep, strftime
from concurrent import futures
def display(*args):
print(strftime('[%H:%M:%S]'), end=' ')
print(*args)
def loiter(n):
msg = '{}loiter({}): doing nothing for {}s...'
display(msg.format('\t'*n, n, n))
sleep(n)
msg = '{}loiter({}): done.'
display(msg.format('\t'*n, n))
return n * 10
def demo_submit():
executor = futures.ThreadPoolExecutor(3)
future_list = [executor.submit(loiter, n) for n in range(5)]
display('done?', [future.done() for future in future_list])
display('Waiting for results...')
for i, result in enumerate(future.result() for future in future_list):
display('result[{}]: {}'.format(i, result))
demo_submit()

41
futures/future_yield.py Normal file
View File

@ -0,0 +1,41 @@
"""
An experiment showing that ``asyncio.Future`` is an iterable (it
implements `__iter__`) designed to be used with ``yield from``.
Priming the future returns itself. After the result of the future
is set, next iteration produces the result as the ``value`` attribute
of ``StopIteration``.
Sample run::
$ python3 future_yield.py
a, future: <Future pending> 0x66514c
b, prime_res: <Future pending> 0x66514c
b, exc.value: 42
"""
import asyncio
@asyncio.coroutine
def a(future):
print('a, future:\t', future, hex(id(future)))
res = yield from future
return res
def b():
future = asyncio.Future()
coro = a(future)
prime_res = next(coro)
print('b, prime_res:\t', prime_res, hex(id(future)))
# If next(coro) is called again before the result of
# the future is set, we get:
# AssertionError: yield from wasn't used with future
#result = next(coro) # uncomment to see AssertionError
future.set_result(42)
try:
next(coro)
except StopIteration as exc:
print('b, exc.value:\t', exc.value)
b()

19
futures/future_yield2.py Normal file
View File

@ -0,0 +1,19 @@
@asyncio.coroutine
def a(future):
print('a, future:', future, hex(id(future)))
res = yield from future
return res
def b():
future = asyncio.Future()
coro = a(future)
prime_result = next(coro)
print('b, prime_result:', prime_result, hex(id(future)))
loop = asyncio.get_event_loop()
future = asyncio.Future()
print('future:', future, hex(id(future)))
tasks = [asyncio.async(a(future))]
res = loop.run_until_complete(b())