final concurrency examples
This commit is contained in:
14
futures/callbackhell.js
Normal file
14
futures/callbackhell.js
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
fetch1(request1, function (response1) {
|
||||||
|
// phase 1
|
||||||
|
var request2 = step1(response1);
|
||||||
|
|
||||||
|
fetch2(request2, function (response2) {
|
||||||
|
// phase 2
|
||||||
|
var request3 = step2(response2);
|
||||||
|
|
||||||
|
fetch3(request3, function (response3) {
|
||||||
|
// phase 3
|
||||||
|
step3(response3);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
15
futures/callbackhell.py
Normal file
15
futures/callbackhell.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
def phase1(response1):
|
||||||
|
request2 = step1(response1)
|
||||||
|
fetch2(request2, phase2)
|
||||||
|
|
||||||
|
|
||||||
|
def phase2(response2):
|
||||||
|
request3 = step2(response2)
|
||||||
|
fetch3(request3, phase3)
|
||||||
|
|
||||||
|
|
||||||
|
def phase3(response3):
|
||||||
|
step3(response3)
|
||||||
|
|
||||||
|
|
||||||
|
fetch1(request1, phase1)
|
||||||
223
futures/charfinder/charfinder.py
Executable file
223
futures/charfinder/charfinder.py
Executable file
@@ -0,0 +1,223 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
Unicode character finder utility:
|
||||||
|
find characters based on words in their official names.
|
||||||
|
|
||||||
|
This can be used from the command line, just pass words as arguments.
|
||||||
|
|
||||||
|
Here is the ``main`` function which makes it happen::
|
||||||
|
|
||||||
|
>>> main('rook') # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
U+2656 ♖ WHITE CHESS ROOK
|
||||||
|
U+265C ♜ BLACK CHESS ROOK
|
||||||
|
(2 matches for 'rook')
|
||||||
|
>>> main('rook', 'black') # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
U+265C ♜ BLACK CHESS ROOK
|
||||||
|
(1 match for 'rook black')
|
||||||
|
>>> main('white bishop') # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
U+2657 ♗ WHITE CHESS BISHOP
|
||||||
|
(1 match for 'white bishop')
|
||||||
|
>>> main("jabberwocky's vest")
|
||||||
|
(No match for "jabberwocky's vest")
|
||||||
|
|
||||||
|
|
||||||
|
For exploring words that occur in the character names, there is the
|
||||||
|
``word_report`` function::
|
||||||
|
|
||||||
|
>>> index = UnicodeNameIndex(sample_chars)
|
||||||
|
>>> index.word_report()
|
||||||
|
3 SIGN
|
||||||
|
2 A
|
||||||
|
2 EURO
|
||||||
|
2 LATIN
|
||||||
|
2 LETTER
|
||||||
|
1 CAPITAL
|
||||||
|
1 CURRENCY
|
||||||
|
1 DOLLAR
|
||||||
|
1 SMALL
|
||||||
|
>>> index = UnicodeNameIndex()
|
||||||
|
>>> index.word_report(10)
|
||||||
|
75821 CJK
|
||||||
|
75761 IDEOGRAPH
|
||||||
|
74656 UNIFIED
|
||||||
|
13196 SYLLABLE
|
||||||
|
11735 HANGUL
|
||||||
|
7616 LETTER
|
||||||
|
2232 WITH
|
||||||
|
2180 SIGN
|
||||||
|
2122 SMALL
|
||||||
|
1709 CAPITAL
|
||||||
|
|
||||||
|
Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
|
||||||
|
are indexed with those three words only, excluding the hexadecimal
|
||||||
|
codepoint at the end of the name.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
|
import pickle
|
||||||
|
import warnings
|
||||||
|
import itertools
|
||||||
|
import functools
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
RE_WORD = re.compile('\w+')
|
||||||
|
RE_UNICODE_NAME = re.compile('^[A-Z0-9 -]+$')
|
||||||
|
RE_CODEPOINT = re.compile('U\+([0-9A-F]{4,6})')
|
||||||
|
|
||||||
|
INDEX_NAME = 'charfinder_index.pickle'
|
||||||
|
MINIMUM_SAVE_LEN = 10000
|
||||||
|
CJK_UNI_PREFIX = 'CJK UNIFIED IDEOGRAPH'
|
||||||
|
CJK_CMP_PREFIX = 'CJK COMPATIBILITY IDEOGRAPH'
|
||||||
|
|
||||||
|
sample_chars = [
|
||||||
|
'$', # DOLLAR SIGN
|
||||||
|
'A', # LATIN CAPITAL LETTER A
|
||||||
|
'a', # LATIN SMALL LETTER A
|
||||||
|
'\u20a0', # EURO-CURRENCY SIGN
|
||||||
|
'\u20ac', # EURO SIGN
|
||||||
|
]
|
||||||
|
|
||||||
|
CharDescription = namedtuple('CharDescription', 'code_str char name')
|
||||||
|
|
||||||
|
QueryResult = namedtuple('QueryResult', 'count items')
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize(text):
|
||||||
|
"""return iterable of uppercased words"""
|
||||||
|
for match in RE_WORD.finditer(text):
|
||||||
|
yield match.group().upper()
|
||||||
|
|
||||||
|
|
||||||
|
def query_type(text):
|
||||||
|
text_upper = text.upper()
|
||||||
|
if 'U+' in text_upper:
|
||||||
|
return 'CODEPOINT'
|
||||||
|
elif RE_UNICODE_NAME.match(text_upper):
|
||||||
|
return 'NAME'
|
||||||
|
else:
|
||||||
|
return 'CHARACTERS'
|
||||||
|
|
||||||
|
|
||||||
|
class UnicodeNameIndex:
|
||||||
|
|
||||||
|
def __init__(self, chars=None):
|
||||||
|
self.load(chars)
|
||||||
|
|
||||||
|
def load(self, chars=None):
|
||||||
|
self.index = None
|
||||||
|
if chars is None:
|
||||||
|
try:
|
||||||
|
with open(INDEX_NAME, 'rb') as fp:
|
||||||
|
self.index = pickle.load(fp)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
if self.index is None:
|
||||||
|
self.build_index(chars)
|
||||||
|
if len(self.index) > MINIMUM_SAVE_LEN:
|
||||||
|
try:
|
||||||
|
self.save()
|
||||||
|
except OSError as exc:
|
||||||
|
warnings.warn('Could not save {!r}: {}'
|
||||||
|
.format(INDEX_NAME, exc))
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
with open(INDEX_NAME, 'wb') as fp:
|
||||||
|
pickle.dump(self.index, fp)
|
||||||
|
|
||||||
|
def build_index(self, chars=None):
|
||||||
|
if chars is None:
|
||||||
|
chars = (chr(i) for i in range(32, sys.maxunicode))
|
||||||
|
index = {}
|
||||||
|
for char in chars:
|
||||||
|
try:
|
||||||
|
name = unicodedata.name(char)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if name.startswith(CJK_UNI_PREFIX):
|
||||||
|
name = CJK_UNI_PREFIX
|
||||||
|
elif name.startswith(CJK_CMP_PREFIX):
|
||||||
|
name = CJK_CMP_PREFIX
|
||||||
|
|
||||||
|
for word in tokenize(name):
|
||||||
|
index.setdefault(word, set()).add(char)
|
||||||
|
|
||||||
|
self.index = index
|
||||||
|
|
||||||
|
def word_rank(self, top=None):
|
||||||
|
res = [(len(self.index[key]), key) for key in self.index]
|
||||||
|
res.sort(key=lambda item: (-item[0], item[1]))
|
||||||
|
if top is not None:
|
||||||
|
res = res[:top]
|
||||||
|
return res
|
||||||
|
|
||||||
|
def word_report(self, top=None):
|
||||||
|
for postings, key in self.word_rank(top):
|
||||||
|
print('{:5} {}'.format(postings, key))
|
||||||
|
|
||||||
|
def find_chars(self, query, start=0, stop=None):
|
||||||
|
stop = sys.maxsize if stop is None else stop
|
||||||
|
result_sets = []
|
||||||
|
for word in tokenize(query):
|
||||||
|
chars = self.index.get(word)
|
||||||
|
if chars is None: # shorcut: no such word
|
||||||
|
result_sets = []
|
||||||
|
break
|
||||||
|
result_sets.append(chars)
|
||||||
|
|
||||||
|
if not result_sets:
|
||||||
|
return QueryResult(0, ())
|
||||||
|
|
||||||
|
result = functools.reduce(set.intersection, result_sets)
|
||||||
|
result = sorted(result) # must sort to support start, stop
|
||||||
|
result_iter = itertools.islice(result, start, stop)
|
||||||
|
return QueryResult(len(result),
|
||||||
|
(char for char in result_iter))
|
||||||
|
|
||||||
|
def describe(self, char):
|
||||||
|
code_str = 'U+{:04X}'.format(ord(char))
|
||||||
|
name = unicodedata.name(char)
|
||||||
|
return CharDescription(code_str, char, name)
|
||||||
|
|
||||||
|
def find_descriptions(self, query, start=0, stop=None):
|
||||||
|
for char in self.find_chars(query, start, stop).items:
|
||||||
|
yield self.describe(char)
|
||||||
|
|
||||||
|
def get_descriptions(self, chars):
|
||||||
|
for char in chars:
|
||||||
|
yield self.describe(char)
|
||||||
|
|
||||||
|
def describe_str(self, char):
|
||||||
|
return '{:7}\t{}\t{}'.format(*self.describe(char))
|
||||||
|
|
||||||
|
def find_description_strs(self, query, start=0, stop=None):
|
||||||
|
for char in self.find_chars(query, start, stop).items:
|
||||||
|
yield self.describe_str(char)
|
||||||
|
|
||||||
|
@staticmethod # not an instance method due to concurrency
|
||||||
|
def status(query, counter):
|
||||||
|
if counter == 0:
|
||||||
|
msg = 'No match'
|
||||||
|
elif counter == 1:
|
||||||
|
msg = '1 match'
|
||||||
|
else:
|
||||||
|
msg = '{} matches'.format(counter)
|
||||||
|
return '{} for {!r}'.format(msg, query)
|
||||||
|
|
||||||
|
|
||||||
|
def main(*args):
|
||||||
|
index = UnicodeNameIndex()
|
||||||
|
query = ' '.join(args)
|
||||||
|
n = 0
|
||||||
|
for n, line in enumerate(index.find_description_strs(query), 1):
|
||||||
|
print(line)
|
||||||
|
print('({})'.format(index.status(query, n)))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
main(*sys.argv[1:])
|
||||||
|
else:
|
||||||
|
print('Usage: {} word1 [word2]...'.format(sys.argv[0]))
|
||||||
19
futures/charfinder/http_charfinder.html
Normal file
19
futures/charfinder/http_charfinder.html
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Charfinder</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Examples: {links}
|
||||||
|
<p>
|
||||||
|
<form action="/">
|
||||||
|
<input type="search" name="query" value="{query}">
|
||||||
|
<input type="submit" value="find"> {message}
|
||||||
|
</form>
|
||||||
|
</p>
|
||||||
|
<table>
|
||||||
|
{result}
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
71
futures/charfinder/http_charfinder.py
Executable file
71
futures/charfinder/http_charfinder.py
Executable file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
from aiohttp import web
|
||||||
|
|
||||||
|
from charfinder import UnicodeNameIndex
|
||||||
|
|
||||||
|
TEMPLATE_NAME = 'http_charfinder.html'
|
||||||
|
CONTENT_TYPE = 'text/html; charset=UTF-8'
|
||||||
|
SAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit'
|
||||||
|
' Roman face Ethiopic black mark symbol dot'
|
||||||
|
' operator Braille hexagram').split()
|
||||||
|
|
||||||
|
ROW_TPL = '<tr><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
|
||||||
|
LINK_TPL = '<a href="/?query={0}" title="find "{0}"">{0}</a>'
|
||||||
|
LINKS_HTML = ', '.join(LINK_TPL.format(word) for word in
|
||||||
|
sorted(SAMPLE_WORDS, key=str.upper))
|
||||||
|
|
||||||
|
|
||||||
|
index = UnicodeNameIndex()
|
||||||
|
with open(TEMPLATE_NAME) as tpl:
|
||||||
|
template = tpl.read()
|
||||||
|
template = template.replace('{links}', LINKS_HTML)
|
||||||
|
|
||||||
|
# BEGIN HTTP_CHARFINDER_HOME
|
||||||
|
def home(request): # <1>
|
||||||
|
query = request.GET.get('query', '').strip() # <2>
|
||||||
|
print('Query: {!r}'.format(query)) # <3>
|
||||||
|
if query: # <4>
|
||||||
|
descriptions = list(index.find_descriptions(query))
|
||||||
|
res = '\n'.join(ROW_TPL.format(**vars(descr))
|
||||||
|
for descr in descriptions)
|
||||||
|
msg = index.status(query, len(descriptions))
|
||||||
|
else:
|
||||||
|
descriptions = []
|
||||||
|
res = ''
|
||||||
|
msg = 'Enter words describing characters.'
|
||||||
|
|
||||||
|
text = template.format(query=query, result=res, message=msg)
|
||||||
|
print('Sending {} results'.format(len(descriptions)))
|
||||||
|
return web.Response(content_type=CONTENT_TYPE, text=text)
|
||||||
|
# END HTTP_CHARFINDER_HOME
|
||||||
|
|
||||||
|
|
||||||
|
# BEGIN HTTP_CHARFINDER_SETUP
|
||||||
|
@asyncio.coroutine
|
||||||
|
def init(loop, address, port): # <1>
|
||||||
|
app = web.Application(loop=loop) # <2>
|
||||||
|
app.router.add_route('GET', '/', home) # <3>
|
||||||
|
handler = app.make_handler() # <4>
|
||||||
|
server = yield from loop.create_server(handler,
|
||||||
|
address, port) # <5>
|
||||||
|
return server.sockets[0].getsockname() # <6>
|
||||||
|
|
||||||
|
def main(address="127.0.0.1", port=8888):
|
||||||
|
port = int(port)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
host = loop.run_until_complete(init(loop, address, port)) # <7>
|
||||||
|
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
|
||||||
|
try:
|
||||||
|
loop.run_forever() # <8>
|
||||||
|
except KeyboardInterrupt: # CTRL+C pressed
|
||||||
|
pass
|
||||||
|
print('Server shutting down.')
|
||||||
|
loop.close() # <9>
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(*sys.argv[1:])
|
||||||
|
# END HTTP_CHARFINDER_SETUP
|
||||||
64
futures/charfinder/tcp_charfinder.py
Executable file
64
futures/charfinder/tcp_charfinder.py
Executable file
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# BEGIN TCP_CHARFINDER_TOP
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from charfinder import UnicodeNameIndex # <1>
|
||||||
|
|
||||||
|
CRLF = b'\r\n'
|
||||||
|
PROMPT = b'?> '
|
||||||
|
|
||||||
|
index = UnicodeNameIndex() # <2>
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def handle_queries(reader, writer): # <3>
|
||||||
|
while True: # <4>
|
||||||
|
writer.write(PROMPT) # can't yield from! # <5>
|
||||||
|
yield from writer.drain() # must yield from! # <6>
|
||||||
|
data = yield from reader.readline() # <7>
|
||||||
|
try:
|
||||||
|
query = data.decode().strip()
|
||||||
|
except UnicodeDecodeError: # <8>
|
||||||
|
query = '\x00'
|
||||||
|
client = writer.get_extra_info('peername') # <9>
|
||||||
|
print('Received from {}: {!r}'.format(client, query)) # <10>
|
||||||
|
if query:
|
||||||
|
if ord(query[:1]) < 32: # <11>
|
||||||
|
break
|
||||||
|
lines = list(index.find_description_strs(query)) # <12>
|
||||||
|
if lines:
|
||||||
|
writer.writelines(line.encode() + CRLF for line in lines) # <13>
|
||||||
|
writer.write(index.status(query, len(lines)).encode() + CRLF) # <14>
|
||||||
|
|
||||||
|
yield from writer.drain() # <15>
|
||||||
|
print('Sent {} results'.format(len(lines))) # <16>
|
||||||
|
|
||||||
|
print('Close the client socket') # <17>
|
||||||
|
writer.close() # <18>
|
||||||
|
# END TCP_CHARFINDER_TOP
|
||||||
|
|
||||||
|
# BEGIN TCP_CHARFINDER_MAIN
|
||||||
|
def main(address='127.0.0.1', port=2323): # <1>
|
||||||
|
port = int(port)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
server_coro = asyncio.start_server(handle_queries, address, port,
|
||||||
|
loop=loop) # <2>
|
||||||
|
server = loop.run_until_complete(server_coro) # <3>
|
||||||
|
|
||||||
|
host = server.sockets[0].getsockname() # <4>
|
||||||
|
print('Serving on {}. Hit CTRL-C to stop.'.format(host)) # <5>
|
||||||
|
try:
|
||||||
|
loop.run_forever() # <6>
|
||||||
|
except KeyboardInterrupt: # CTRL+C pressed
|
||||||
|
pass
|
||||||
|
|
||||||
|
print('Server shutting down.')
|
||||||
|
server.close() # <7>
|
||||||
|
loop.run_until_complete(server.wait_closed()) # <8>
|
||||||
|
loop.close() # <9>
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(*sys.argv[1:]) # <10>
|
||||||
|
# END TCP_CHARFINDER_MAIN
|
||||||
115
futures/charfinder/test_charfinder.py
Normal file
115
futures/charfinder/test_charfinder.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from charfinder import UnicodeNameIndex, tokenize, sample_chars, query_type
|
||||||
|
from unicodedata import name
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_index():
|
||||||
|
return UnicodeNameIndex(sample_chars)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def full_index():
|
||||||
|
return UnicodeNameIndex()
|
||||||
|
|
||||||
|
|
||||||
|
def test_query_type():
|
||||||
|
assert query_type('blue') == 'NAME'
|
||||||
|
|
||||||
|
|
||||||
|
def test_tokenize():
|
||||||
|
assert list(tokenize('')) == []
|
||||||
|
assert list(tokenize('a b')) == ['A', 'B']
|
||||||
|
assert list(tokenize('a-b')) == ['A', 'B']
|
||||||
|
assert list(tokenize('abc')) == ['ABC']
|
||||||
|
assert list(tokenize('café')) == ['CAFÉ']
|
||||||
|
|
||||||
|
|
||||||
|
def test_index():
|
||||||
|
sample_index = UnicodeNameIndex(sample_chars)
|
||||||
|
assert len(sample_index.index) == 9
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_word_no_match(sample_index):
|
||||||
|
res = sample_index.find_chars('qwertyuiop')
|
||||||
|
assert len(res.items) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_word_1_match(sample_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in sample_index.find_chars('currency').items]
|
||||||
|
assert res == [(8352, 'EURO-CURRENCY SIGN')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_word_1_match_character_result(sample_index):
|
||||||
|
res = [name(char) for char in
|
||||||
|
sample_index.find_chars('currency').items]
|
||||||
|
assert res == ['EURO-CURRENCY SIGN']
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_word_2_matches(sample_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in sample_index.find_chars('Euro').items]
|
||||||
|
assert res == [(8352, 'EURO-CURRENCY SIGN'),
|
||||||
|
(8364, 'EURO SIGN')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_2_words_no_matches(sample_index):
|
||||||
|
res = sample_index.find_chars('Euro letter')
|
||||||
|
assert res.count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_2_words_no_matches_because_one_not_found(sample_index):
|
||||||
|
res = sample_index.find_chars('letter qwertyuiop')
|
||||||
|
assert res.count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_2_words_1_match(sample_index):
|
||||||
|
res = sample_index.find_chars('sign dollar')
|
||||||
|
assert res.count == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_2_words_2_matches(sample_index):
|
||||||
|
res = sample_index.find_chars('latin letter')
|
||||||
|
assert res.count == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_chars_many_matches_full(full_index):
|
||||||
|
res = full_index.find_chars('letter')
|
||||||
|
assert res.count > 7000
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_1_word_1_match_full(full_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in full_index.find_chars('registered').items]
|
||||||
|
assert res == [(174, 'REGISTERED SIGN')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_1_word_2_matches_full(full_index):
|
||||||
|
res = full_index.find_chars('rook')
|
||||||
|
assert res.count == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_3_words_no_matches_full(full_index):
|
||||||
|
res = full_index.find_chars('no such character')
|
||||||
|
assert res.count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_with_start(sample_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in sample_index.find_chars('sign', 1).items]
|
||||||
|
assert res == [(8352, 'EURO-CURRENCY SIGN'), (8364, 'EURO SIGN')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_with_stop(sample_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in sample_index.find_chars('sign', 0, 2).items]
|
||||||
|
assert res == [(36, 'DOLLAR SIGN'), (8352, 'EURO-CURRENCY SIGN')]
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_with_start_stop(sample_index):
|
||||||
|
res = [(ord(char), name(char))
|
||||||
|
for char in sample_index.find_chars('sign', 1, 2).items]
|
||||||
|
assert res == [(8352, 'EURO-CURRENCY SIGN')]
|
||||||
|
|
||||||
14
futures/coroutine_purgatory.py
Normal file
14
futures/coroutine_purgatory.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
@asyncio.coroutine
|
||||||
|
def three_phases():
|
||||||
|
response1 = yield from fetch1(request1)
|
||||||
|
# phase 1
|
||||||
|
request2 = step1(response1)
|
||||||
|
response2 = yield from fetch2(request2)
|
||||||
|
# phase 2
|
||||||
|
request3 = step2(response2)
|
||||||
|
response3 = yield from fetch3(request3)
|
||||||
|
# phase 3
|
||||||
|
step3(response3)
|
||||||
|
|
||||||
|
|
||||||
|
loop.create_task(three_phases)
|
||||||
@@ -5,58 +5,59 @@ Sequential version
|
|||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$ python3 flags.py
|
$ python3 flags.py
|
||||||
BD retrieved.
|
BD BR CD CN DE EG ET FR ID IN IR JP MX NG PH PK RU TR US VN
|
||||||
BR retrieved.
|
|
||||||
CD retrieved.
|
|
||||||
...
|
|
||||||
TR retrieved.
|
|
||||||
US retrieved.
|
|
||||||
VN retrieved.
|
|
||||||
20 flags downloaded in 10.16s
|
20 flags downloaded in 10.16s
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# BEGIN FLAGS_PY
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
import requests
|
import requests # <1>
|
||||||
|
|
||||||
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
|
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
|
||||||
'MX PH VN ET EG DE IR TR CD FR').split()
|
'MX PH VN ET EG DE IR TR CD FR').split() # <2>
|
||||||
|
|
||||||
BASE_URL = 'http://python.pro.br/fluent/data/flags'
|
BASE_URL = 'http://flupy.org/data/flags' # <3>
|
||||||
|
|
||||||
DEST_DIR = 'downloads/'
|
DEST_DIR = 'downloads/' # <4>
|
||||||
|
|
||||||
|
|
||||||
def save_flag(img, filename):
|
def save_flag(img, filename): # <5>
|
||||||
path = os.path.join(DEST_DIR, filename)
|
path = os.path.join(DEST_DIR, filename)
|
||||||
with open(path, 'wb') as fp:
|
with open(path, 'wb') as fp:
|
||||||
fp.write(img)
|
fp.write(img)
|
||||||
|
|
||||||
|
|
||||||
def get_flag(cc):
|
def get_flag(cc): # <6>
|
||||||
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
||||||
res = requests.get(url)
|
resp = requests.get(url)
|
||||||
return res.content
|
return resp.content
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
def show(text): # <7>
|
||||||
for cc in sorted(cc_list):
|
print(text, end=' ')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
def download_many(cc_list): # <8>
|
||||||
|
for cc in sorted(cc_list): # <9>
|
||||||
image = get_flag(cc)
|
image = get_flag(cc)
|
||||||
print('{} retrieved.'.format(cc))
|
show(cc)
|
||||||
save_flag(image, cc.lower() + '.gif')
|
save_flag(image, cc.lower() + '.gif')
|
||||||
|
|
||||||
return len(cc_list)
|
return len(cc_list)
|
||||||
|
|
||||||
|
|
||||||
def main(download_many):
|
def main(download_many): # <10>
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
count = download_many(POP20_CC)
|
count = download_many(POP20_CC)
|
||||||
elapsed = time.time() - t0
|
elapsed = time.time() - t0
|
||||||
msg = '{} flags downloaded in {:.2f}s'
|
msg = '\n{} flags downloaded in {:.2f}s'
|
||||||
print(msg.format(count, elapsed))
|
print(msg.format(count, elapsed))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many)
|
main(download_many) # <11>
|
||||||
|
# END FLAGS_PY
|
||||||
|
|||||||
@@ -1,112 +1,120 @@
|
|||||||
"""Download flags of top 10 countries by population
|
"""Download flags of countries (with error handling).
|
||||||
|
|
||||||
asyncio version
|
asyncio version
|
||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$
|
$ python3 flags2_asyncio.py -s ERROR -e -m 200
|
||||||
|
ERROR site: http://localhost:8003/flags
|
||||||
|
Searching for 676 flags: from AA to ZZ
|
||||||
|
200 concurrent connections will be used.
|
||||||
|
--------------------
|
||||||
|
146 flags downloaded.
|
||||||
|
363 not found.
|
||||||
|
167 errors.
|
||||||
|
Elapsed time: 2.59s
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# BEGIN FLAGS2_ASYNCIO_TOP
|
||||||
import asyncio
|
import asyncio
|
||||||
from collections import namedtuple
|
import collections
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
from flag_utils import main, save_flag, Counts
|
from flags2_common import main, HTTPStatus, Result, save_flag
|
||||||
|
|
||||||
# default set low to avoid errors from remote site:
|
# default set low to avoid errors from remote site, such as
|
||||||
# 503 - Service Temporarily Unavailable
|
# 503 - Service Temporarily Unavailable
|
||||||
DEFAULT_CONCUR_REQ = 5
|
DEFAULT_CONCUR_REQ = 5
|
||||||
MAX_CONCUR_REQ = 1000
|
MAX_CONCUR_REQ = 1000
|
||||||
|
|
||||||
TIMEOUT = 120 # seconds
|
|
||||||
|
|
||||||
Status = Enum('Status', 'ok not_found error')
|
class FetchError(Exception): # <1>
|
||||||
Result = namedtuple('Result', 'status data')
|
def __init__(self, country_code):
|
||||||
|
self.country_code = country_code
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def get_flag(base_url, cc):
|
def get_flag(base_url, cc): # <2>
|
||||||
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
||||||
res = yield from aiohttp.request('GET', url)
|
resp = yield from aiohttp.request('GET', url)
|
||||||
if res.status == 200:
|
if resp.status == 200:
|
||||||
image = yield from res.read()
|
image = yield from resp.read()
|
||||||
return image
|
return image
|
||||||
elif res.status == 404:
|
elif resp.status == 404:
|
||||||
raise web.HTTPNotFound()
|
raise web.HTTPNotFound()
|
||||||
else:
|
else:
|
||||||
raise aiohttp.errors.HttpProcessingError(
|
raise aiohttp.HttpProcessingError(
|
||||||
code=res.status, message=res.reason, headers=res.headers)
|
code=resp.status, message=resp.reason,
|
||||||
|
headers=resp.headers)
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def download_one(cc, base_url, semaphore, verbose):
|
def download_one(cc, base_url, semaphore, verbose): # <3>
|
||||||
try:
|
try:
|
||||||
with (yield from semaphore):
|
with (yield from semaphore): # <4>
|
||||||
image = yield from get_flag(base_url, cc)
|
image = yield from get_flag(base_url, cc) # <5>
|
||||||
except web.HTTPNotFound:
|
except web.HTTPNotFound: # <6>
|
||||||
status = Status.not_found
|
status = HTTPStatus.not_found
|
||||||
msg = ''
|
msg = 'not found'
|
||||||
except aiohttp.errors.HttpProcessingError as exc:
|
except Exception as exc:
|
||||||
status = Status.error
|
raise FetchError(cc) from exc # <7>
|
||||||
msg = '{} failed: {exc.code} - {exc.message}'
|
|
||||||
msg = msg.format(cc, exc=exc)
|
|
||||||
except aiohttp.errors.ClientError as exc:
|
|
||||||
try:
|
|
||||||
context = exc.__context__.__class__.__name__
|
|
||||||
except AttributeError:
|
|
||||||
# we chain all exceptions, you should get original exception from __cause__
|
|
||||||
context = '(unknown context)'
|
|
||||||
msg = '{} failed: {}'.format(cc, context)
|
|
||||||
status = Status.error
|
|
||||||
else:
|
else:
|
||||||
save_flag(image, cc.lower() + '.gif')
|
save_flag(image, cc.lower() + '.gif') # <8>
|
||||||
status = Status.ok
|
status = HTTPStatus.ok
|
||||||
msg = 'OK'
|
msg = 'OK'
|
||||||
|
|
||||||
if verbose and msg:
|
if verbose and msg:
|
||||||
print(cc, msg)
|
print(cc, msg)
|
||||||
|
|
||||||
return Result(status, cc)
|
return Result(status, cc)
|
||||||
|
# END FLAGS2_ASYNCIO_TOP
|
||||||
|
|
||||||
|
# BEGIN FLAGS2_ASYNCIO_DOWNLOAD_MANY
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def downloader_coro(cc_list, base_url, verbose, max_req):
|
def downloader_coro(cc_list, base_url, verbose, concur_req): # <1>
|
||||||
semaphore = asyncio.Semaphore(max_req)
|
counter = collections.Counter()
|
||||||
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
|
semaphore = asyncio.Semaphore(concur_req) # <2>
|
||||||
results = []
|
to_do = [download_one(cc, base_url, semaphore, verbose)
|
||||||
to_do_iter = asyncio.as_completed(to_do)
|
for cc in sorted(cc_list)] # <3>
|
||||||
|
|
||||||
|
to_do_iter = asyncio.as_completed(to_do) # <4>
|
||||||
if not verbose:
|
if not verbose:
|
||||||
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5>
|
||||||
for future in to_do_iter:
|
for future in to_do_iter: # <6>
|
||||||
result = yield from future
|
try:
|
||||||
results.append(result)
|
res = yield from future # <7>
|
||||||
return results
|
except FetchError as exc: # <8>
|
||||||
|
country_code = exc.country_code # <9>
|
||||||
|
try:
|
||||||
|
error_msg = exc.__cause__.args[0] # <10>
|
||||||
|
except IndexError:
|
||||||
|
error_msg = exc.__cause__.__class__.__name__ # <11>
|
||||||
|
else:
|
||||||
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
|
if error_msg: # <12>
|
||||||
|
status = HTTPStatus.error
|
||||||
|
counter[status] += 1
|
||||||
|
if verbose and error_msg:
|
||||||
|
msg = '*** Error for {}: {}'
|
||||||
|
print(msg.format(country_code, error_msg))
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list, base_url, verbose, max_req):
|
def download_many(cc_list, base_url, verbose, concur_req):
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
#loop.set_debug(True)
|
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
|
||||||
try:
|
counts = loop.run_until_complete(coro) # <13>
|
||||||
coro = downloader_coro(cc_list, base_url, verbose, max_req)
|
loop.close() # <14>
|
||||||
done = loop.run_until_complete(coro)
|
|
||||||
except Exception as exc:
|
|
||||||
print('*' * 60)
|
|
||||||
print(exc)
|
|
||||||
print(vars(exc))
|
|
||||||
print('*' * 60)
|
|
||||||
counts = []
|
|
||||||
for status in Status:
|
|
||||||
counts.append(len([res for res in done
|
|
||||||
if res.status == status]))
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
return Counts(*counts)
|
return counts
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
|
# END FLAGS2_ASYNCIO_DOWNLOAD_MANY
|
||||||
|
|||||||
@@ -1,100 +0,0 @@
|
|||||||
"""Download flags of top 10 countries by population
|
|
||||||
|
|
||||||
asyncio version
|
|
||||||
|
|
||||||
Sample run::
|
|
||||||
|
|
||||||
$
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
from collections import namedtuple
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
from aiohttp import web
|
|
||||||
|
|
||||||
from flag_utils import main, save_flag, Counts
|
|
||||||
|
|
||||||
# default set low to avoid errors from remote site:
|
|
||||||
# 503 - Service Temporarily Unavailable
|
|
||||||
DEFAULT_CONCUR_REQ = 5
|
|
||||||
MAX_CONCUR_REQ = 1000
|
|
||||||
|
|
||||||
TIMEOUT = 120 # seconds
|
|
||||||
|
|
||||||
Status = Enum('Status', 'ok not_found error')
|
|
||||||
Result = namedtuple('Result', 'status data')
|
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def get_flag(base_url, cc):
|
|
||||||
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
|
||||||
res = yield from aiohttp.request('GET', url)
|
|
||||||
if res.status == 200:
|
|
||||||
image = yield from res.read()
|
|
||||||
return image
|
|
||||||
elif res.status == 404:
|
|
||||||
raise web.HTTPNotFound()
|
|
||||||
else:
|
|
||||||
raise aiohttp.errors.HttpProcessingError(
|
|
||||||
code=res.status, message=res.reason, headers=res.headers)
|
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def download_one(cc, base_url, semaphore, verbose):
|
|
||||||
try:
|
|
||||||
with (yield from semaphore):
|
|
||||||
image = yield from get_flag(base_url, cc)
|
|
||||||
except web.HTTPNotFound:
|
|
||||||
status = Status.not_found
|
|
||||||
msg = ''
|
|
||||||
except aiohttp.errors.HttpProcessingError as exc:
|
|
||||||
status = Status.error
|
|
||||||
msg = '{} failed: {exc.code} - {exc.message}'
|
|
||||||
msg = msg.format(cc, exc=exc)
|
|
||||||
except aiohttp.errors.ClientError as exc:
|
|
||||||
try:
|
|
||||||
context = exc.__context__.__class__.__name__
|
|
||||||
except AttributeError:
|
|
||||||
# we chain all exceptions, you should get original exception from __cause__
|
|
||||||
context = '(unknown context)'
|
|
||||||
msg = '{} failed: {}'.format(cc, context)
|
|
||||||
status = Status.error
|
|
||||||
else:
|
|
||||||
save_flag(image, cc.lower() + '.gif')
|
|
||||||
status = Status.ok
|
|
||||||
msg = 'OK'
|
|
||||||
|
|
||||||
if verbose and msg:
|
|
||||||
print(cc, msg)
|
|
||||||
|
|
||||||
return Result(status, cc)
|
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list, base_url, verbose, max_req):
|
|
||||||
semaphore = asyncio.Semaphore(max_req)
|
|
||||||
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
#loop.set_debug(True)
|
|
||||||
try:
|
|
||||||
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
|
|
||||||
except Exception as exc:
|
|
||||||
print('*' * 60)
|
|
||||||
print(exc)
|
|
||||||
print(vars(exc))
|
|
||||||
print('*' * 60)
|
|
||||||
counts = []
|
|
||||||
for status in Status:
|
|
||||||
counts.append(len([task for task in done
|
|
||||||
if task.result().status == status]))
|
|
||||||
for task in pending:
|
|
||||||
task.cancel()
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
return Counts(*counts)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
|
||||||
112
futures/countries/flags2_asyncio_executor.py
Normal file
112
futures/countries/flags2_asyncio_executor.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
"""Download flags of countries (with error handling).
|
||||||
|
|
||||||
|
asyncio version using thread pool to save files
|
||||||
|
|
||||||
|
Sample run::
|
||||||
|
|
||||||
|
$
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import collections
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from aiohttp import web
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
from flags2_common import main, HTTPStatus, Result, save_flag
|
||||||
|
|
||||||
|
# default set low to avoid errors from remote site, such as
|
||||||
|
# 503 - Service Temporarily Unavailable
|
||||||
|
DEFAULT_CONCUR_REQ = 5
|
||||||
|
MAX_CONCUR_REQ = 1000
|
||||||
|
|
||||||
|
|
||||||
|
class FetchError(Exception):
|
||||||
|
def __init__(self, country_code):
|
||||||
|
self.country_code = country_code
|
||||||
|
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def get_flag(base_url, cc):
|
||||||
|
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
||||||
|
resp = yield from aiohttp.request('GET', url)
|
||||||
|
if resp.status == 200:
|
||||||
|
image = yield from resp.read()
|
||||||
|
return image
|
||||||
|
elif resp.status == 404:
|
||||||
|
raise web.HTTPNotFound()
|
||||||
|
else:
|
||||||
|
raise aiohttp.HttpProcessingError(
|
||||||
|
code=resp.status, message=resp.reason,
|
||||||
|
headers=resp.headers)
|
||||||
|
|
||||||
|
# BEGIN FLAGS2_ASYNCIO_EXECUTOR
|
||||||
|
@asyncio.coroutine
|
||||||
|
def download_one(cc, base_url, semaphore, verbose):
|
||||||
|
try:
|
||||||
|
with (yield from semaphore):
|
||||||
|
image = yield from get_flag(base_url, cc)
|
||||||
|
except web.HTTPNotFound:
|
||||||
|
status = HTTPStatus.not_found
|
||||||
|
msg = 'not found'
|
||||||
|
except Exception as exc:
|
||||||
|
raise FetchError(cc) from exc
|
||||||
|
else:
|
||||||
|
loop = asyncio.get_event_loop() # <1>
|
||||||
|
loop.run_in_executor(None, # <2>
|
||||||
|
save_flag, image, cc.lower() + '.gif') # <3>
|
||||||
|
status = HTTPStatus.ok
|
||||||
|
msg = 'OK'
|
||||||
|
|
||||||
|
if verbose and msg:
|
||||||
|
print(cc, msg)
|
||||||
|
|
||||||
|
return Result(status, cc)
|
||||||
|
# END FLAGS2_ASYNCIO_EXECUTOR
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def downloader_coro(cc_list, base_url, verbose, concur_req):
|
||||||
|
counter = collections.Counter()
|
||||||
|
semaphore = asyncio.Semaphore(concur_req)
|
||||||
|
to_do = [download_one(cc, base_url, semaphore, verbose)
|
||||||
|
for cc in sorted(cc_list)]
|
||||||
|
|
||||||
|
to_do_iter = asyncio.as_completed(to_do)
|
||||||
|
if not verbose:
|
||||||
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
||||||
|
for future in to_do_iter:
|
||||||
|
try:
|
||||||
|
res = yield from future
|
||||||
|
except FetchError as exc:
|
||||||
|
country_code = exc.country_code
|
||||||
|
try:
|
||||||
|
error_msg = exc.__cause__.args[0]
|
||||||
|
except IndexError:
|
||||||
|
error_msg = exc.__cause__.__class__.__name__
|
||||||
|
else:
|
||||||
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
|
if error_msg:
|
||||||
|
status = HTTPStatus.error
|
||||||
|
counter[status] += 1
|
||||||
|
if verbose and error_msg:
|
||||||
|
msg = '*** Error for {}: {}'
|
||||||
|
print(msg.format(country_code, error_msg))
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
|
def download_many(cc_list, base_url, verbose, concur_req):
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
|
||||||
|
counts = loop.run_until_complete(coro)
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
@@ -11,9 +11,8 @@ from enum import Enum
|
|||||||
|
|
||||||
|
|
||||||
Result = namedtuple('Result', 'status data')
|
Result = namedtuple('Result', 'status data')
|
||||||
Counts = namedtuple('Counts', 'ok not_found error')
|
|
||||||
|
|
||||||
Status = Enum('Status', 'ok not_found error')
|
HTTPStatus = Enum('Status', 'ok not_found error')
|
||||||
|
|
||||||
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
|
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
|
||||||
'MX PH VN ET EG DE IR TR CD FR').split()
|
'MX PH VN ET EG DE IR TR CD FR').split()
|
||||||
@@ -22,7 +21,7 @@ DEFAULT_CONCUR_REQ = 1
|
|||||||
MAX_CONCUR_REQ = 1
|
MAX_CONCUR_REQ = 1
|
||||||
|
|
||||||
SERVERS = {
|
SERVERS = {
|
||||||
'REMOTE': 'http://python.pro.br/fluent/data/flags',
|
'REMOTE': 'http://flupy.org/data/flags',
|
||||||
'LOCAL': 'http://localhost:8001/flags',
|
'LOCAL': 'http://localhost:8001/flags',
|
||||||
'DELAY': 'http://localhost:8002/flags',
|
'DELAY': 'http://localhost:8002/flags',
|
||||||
'ERROR': 'http://localhost:8003/flags',
|
'ERROR': 'http://localhost:8003/flags',
|
||||||
@@ -53,17 +52,17 @@ def initial_report(cc_list, actual_req, server_label):
|
|||||||
print(msg.format(actual_req, plural))
|
print(msg.format(actual_req, plural))
|
||||||
|
|
||||||
|
|
||||||
def final_report(cc_list, counts, start_time):
|
def final_report(cc_list, counter, start_time):
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
print('-' * 20)
|
print('-' * 20)
|
||||||
msg = '{} flag{} downloaded.'
|
msg = '{} flag{} downloaded.'
|
||||||
plural = 's' if counts.ok != 1 else ''
|
plural = 's' if counter[HTTPStatus.ok] != 1 else ''
|
||||||
print(msg.format(counts.ok, plural))
|
print(msg.format(counter[HTTPStatus.ok], plural))
|
||||||
if counts.not_found:
|
if counter[HTTPStatus.not_found]:
|
||||||
print(counts.not_found, 'not found.')
|
print(counter[HTTPStatus.not_found], 'not found.')
|
||||||
if counts.error:
|
if counter[HTTPStatus.error]:
|
||||||
plural = 's' if counts.error != 1 else ''
|
plural = 's' if counter[HTTPStatus.error] != 1 else ''
|
||||||
print('{} error{}.'.format(counts.error, plural))
|
print('{} error{}.'.format(counter[HTTPStatus.error], plural))
|
||||||
print('Elapsed time: {:.2f}s'.format(elapsed))
|
print('Elapsed time: {:.2f}s'.format(elapsed))
|
||||||
|
|
||||||
|
|
||||||
@@ -144,5 +143,7 @@ def main(download_many, default_concur_req, max_concur_req):
|
|||||||
initial_report(cc_list, actual_req, args.server)
|
initial_report(cc_list, actual_req, args.server)
|
||||||
base_url = SERVERS[args.server]
|
base_url = SERVERS[args.server]
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
counts = download_many(cc_list, base_url, args.verbose, actual_req)
|
counter = download_many(cc_list, base_url, args.verbose, actual_req)
|
||||||
final_report(cc_list, counts, t0)
|
assert sum(counter.values()) == len(cc_list), \
|
||||||
|
'some downloads are unaccounted for'
|
||||||
|
final_report(cc_list, counter, t0)
|
||||||
@@ -4,71 +4,84 @@ Sequential version
|
|||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$
|
$ python3 flags2_sequential.py -s DELAY b
|
||||||
|
DELAY site: http://localhost:8002/flags
|
||||||
|
Searching for 26 flags: from BA to BZ
|
||||||
|
1 concurrent connection will be used.
|
||||||
|
--------------------
|
||||||
|
17 flags downloaded.
|
||||||
|
9 not found.
|
||||||
|
Elapsed time: 13.36s
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
from flag_utils import main, save_flag, Counts, Status, Result
|
from flags2_common import main, save_flag, HTTPStatus, Result
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_CONCUR_REQ = 1
|
DEFAULT_CONCUR_REQ = 1
|
||||||
MAX_CONCUR_REQ = 1
|
MAX_CONCUR_REQ = 1
|
||||||
|
|
||||||
|
# BEGIN FLAGS2_BASIC_HTTP_FUNCTIONS
|
||||||
def get_flag(base_url, cc):
|
def get_flag(base_url, cc):
|
||||||
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
||||||
res = requests.get(url)
|
resp = requests.get(url)
|
||||||
if res.status_code != 200:
|
if resp.status_code != 200: # <1>
|
||||||
res.raise_for_status()
|
resp.raise_for_status()
|
||||||
return res.content
|
return resp.content
|
||||||
|
|
||||||
|
|
||||||
def download_one(cc, base_url, verbose=False):
|
def download_one(cc, base_url, verbose=False):
|
||||||
try:
|
try:
|
||||||
image = get_flag(base_url, cc)
|
image = get_flag(base_url, cc)
|
||||||
except requests.exceptions.HTTPError as exc:
|
except requests.exceptions.HTTPError as exc: # <2>
|
||||||
res = exc.response
|
res = exc.response
|
||||||
if res.status_code == 404:
|
if res.status_code == 404:
|
||||||
status = Status.not_found
|
status = HTTPStatus.not_found # <3>
|
||||||
msg = ''
|
msg = 'not found'
|
||||||
else:
|
else: # <4>
|
||||||
status = Status.error
|
raise
|
||||||
msg = 'error {res.status_code} - {res.reason}'
|
|
||||||
msg = msg.format(res=exc.response)
|
|
||||||
except requests.exceptions.ConnectionError as exc:
|
|
||||||
status = Status.error
|
|
||||||
msg = 'failed: {}'.format(cc, exc.args)
|
|
||||||
else:
|
else:
|
||||||
save_flag(image, cc.lower() + '.gif')
|
save_flag(image, cc.lower() + '.gif')
|
||||||
status = Status.ok
|
status = HTTPStatus.ok
|
||||||
msg = 'OK'
|
msg = 'OK'
|
||||||
|
|
||||||
if verbose and msg:
|
if verbose: # <5>
|
||||||
print(cc, msg)
|
print(cc, msg)
|
||||||
|
|
||||||
return Result(status, cc)
|
return Result(status, cc) # <6>
|
||||||
|
# END FLAGS2_BASIC_HTTP_FUNCTIONS
|
||||||
|
|
||||||
|
# BEGIN FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
|
||||||
def download_many(cc_list, base_url, verbose, max_req):
|
def download_many(cc_list, base_url, verbose, max_req):
|
||||||
counts = [0, 0, 0]
|
counter = collections.Counter() # <1>
|
||||||
|
cc_iter = sorted(cc_list) # <2>
|
||||||
if not verbose:
|
if not verbose:
|
||||||
cc_iter = tqdm.tqdm(sorted(cc_list))
|
cc_iter = tqdm.tqdm(cc_iter) # <3>
|
||||||
else:
|
for cc in cc_iter: # <4>
|
||||||
cc_iter = sorted(cc_list)
|
|
||||||
for cc in cc_iter:
|
|
||||||
try:
|
try:
|
||||||
res = download_one(cc, base_url, verbose)
|
res = download_one(cc, base_url, verbose) # <5>
|
||||||
except Exception as exc:
|
except requests.exceptions.HTTPError as exc: # <6>
|
||||||
msg = 'Unexpected exception for {}: {!r}'
|
error_msg = 'HTTP error {res.status_code} - {res.reason}'
|
||||||
print(msg.format(cc, exc))
|
error_msg = error_msg.format(res=exc.response)
|
||||||
else:
|
except requests.exceptions.ConnectionError as exc: # <7>
|
||||||
counts[res.status.value-1] += 1
|
error_msg = 'Connection error'
|
||||||
|
else: # <8>
|
||||||
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
return Counts(*counts)
|
if error_msg:
|
||||||
|
status = HTTPStatus.error # <9>
|
||||||
|
counter[status] += 1 # <10>
|
||||||
|
if verbose and error_msg: # <11>
|
||||||
|
print('*** Error for {}: {}'.format(cc, error_msg))
|
||||||
|
|
||||||
|
return counter # <12>
|
||||||
|
# END FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
|
|||||||
@@ -1,42 +1,68 @@
|
|||||||
"""Download flags of top 10 countries by population
|
"""Download flags of countries (with error handling).
|
||||||
|
|
||||||
ThreadPool version
|
ThreadPool version
|
||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$
|
$ python3 flags2_threadpool.py -s ERROR -e
|
||||||
|
ERROR site: http://localhost:8003/flags
|
||||||
|
Searching for 676 flags: from AA to ZZ
|
||||||
|
30 concurrent connections will be used.
|
||||||
|
--------------------
|
||||||
|
150 flags downloaded.
|
||||||
|
361 not found.
|
||||||
|
165 errors.
|
||||||
|
Elapsed time: 7.46s
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# BEGIN FLAGS2_THREADPOOL
|
||||||
|
import collections
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
|
||||||
import tqdm
|
import requests
|
||||||
|
import tqdm # <1>
|
||||||
|
|
||||||
from flag_utils import main, Counts
|
from flags2_common import main, HTTPStatus # <2>
|
||||||
from flags2_sequential import get_flag, download_one
|
from flags2_sequential import download_one # <3>
|
||||||
|
|
||||||
DEFAULT_CONCUR_REQ = 30
|
DEFAULT_CONCUR_REQ = 30 # <4>
|
||||||
MAX_CONCUR_REQ = 1000
|
MAX_CONCUR_REQ = 1000 # <5>
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list, base_url, verbose, concur_req):
|
def download_many(cc_list, base_url, verbose, concur_req):
|
||||||
with futures.ThreadPoolExecutor(concur_req) as executor:
|
counter = collections.Counter()
|
||||||
to_do = [executor.submit(download_one, cc, base_url, verbose)
|
with futures.ThreadPoolExecutor(concur_req) as executor: # <6>
|
||||||
for cc in sorted(cc_list)]
|
to_do_map = {} # <7>
|
||||||
counts = [0, 0, 0]
|
for cc in sorted(cc_list): # <8>
|
||||||
to_do_iter = futures.as_completed(to_do)
|
future = executor.submit(download_one,
|
||||||
|
cc, base_url, verbose) # <9>
|
||||||
|
to_do_map[future] = cc # <10>
|
||||||
|
to_do_iter = futures.as_completed(to_do_map) # <11>
|
||||||
if not verbose:
|
if not verbose:
|
||||||
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <12>
|
||||||
for future in to_do_iter:
|
for future in to_do_iter: # <13>
|
||||||
try:
|
try:
|
||||||
res = future.result()
|
res = future.result() # <14>
|
||||||
except Exception as exc:
|
except requests.exceptions.HTTPError as exc: # <15>
|
||||||
print('*** Unexpected exception:', exc)
|
error_msg = 'HTTP {res.status_code} - {res.reason}'
|
||||||
|
error_msg = error_msg.format(res=exc.response)
|
||||||
|
except requests.exceptions.ConnectionError as exc:
|
||||||
|
error_msg = 'Connection error'
|
||||||
else:
|
else:
|
||||||
counts[res.status.value-1] += 1
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
return Counts(*counts)
|
if error_msg:
|
||||||
|
status = HTTPStatus.error
|
||||||
|
counter[status] += 1
|
||||||
|
if verbose and error_msg:
|
||||||
|
cc = to_do_map[future] # <16>
|
||||||
|
print('*** Error for {}: {}'.format(cc, error_msg))
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
|
# END FLAGS2_THREADPOOL
|
||||||
|
|||||||
@@ -1,122 +1,132 @@
|
|||||||
"""Download flags of top 10 countries by population
|
"""Download flags of countries (with error handling).
|
||||||
|
|
||||||
asyncio version
|
asyncio version using thread pool to save files
|
||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$ python3 pop10_asyncio1.py
|
$
|
||||||
CN retrieved.
|
|
||||||
US retrieved.
|
|
||||||
BR retrieved.
|
|
||||||
NG retrieved.
|
|
||||||
PK retrieved.
|
|
||||||
RU retrieved.
|
|
||||||
ID retrieved.
|
|
||||||
IN retrieved.
|
|
||||||
BD retrieved.
|
|
||||||
JP retrieved.
|
|
||||||
10 flags downloaded in 0.45s
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from collections import namedtuple
|
import collections
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
|
import tqdm
|
||||||
|
|
||||||
from flags_sequential2 import BASE_URL
|
from flags2_common import main, HTTPStatus, Result, save_flag
|
||||||
from flags_sequential2 import save_flag, main, Counts
|
|
||||||
|
|
||||||
MAX_TASKS = 100 if 'localhost' in BASE_URL else 5
|
# default set low to avoid errors from remote site, such as
|
||||||
TIMEOUT = 120 # seconds
|
# 503 - Service Temporarily Unavailable
|
||||||
|
DEFAULT_CONCUR_REQ = 5
|
||||||
Status = Enum('Status', 'ok not_found error')
|
MAX_CONCUR_REQ = 1000
|
||||||
Result = namedtuple('Result', 'status data')
|
|
||||||
|
|
||||||
|
|
||||||
|
class FetchError(Exception):
|
||||||
|
def __init__(self, country_code):
|
||||||
|
self.country_code = country_code
|
||||||
|
|
||||||
|
# BEGIN FLAGS3_ASYNCIO
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def http_get(url):
|
def http_get(url):
|
||||||
res = yield from aiohttp.request('GET', url)
|
res = yield from aiohttp.request('GET', url)
|
||||||
if res.status == 200:
|
if res.status == 200:
|
||||||
ctype = res.headers.get('Content-type', '').lower()
|
ctype = res.headers.get('Content-type', '').lower()
|
||||||
|
|
||||||
if 'json' in ctype or url.endswith('json'):
|
if 'json' in ctype or url.endswith('json'):
|
||||||
data = yield from res.json()
|
data = yield from res.json() # <1>
|
||||||
else:
|
else:
|
||||||
data = yield from res.read()
|
data = yield from res.read() # <2>
|
||||||
return data
|
return data
|
||||||
|
|
||||||
elif res.status == 404:
|
elif res.status == 404:
|
||||||
raise web.HTTPNotFound()
|
raise web.HTTPNotFound()
|
||||||
else:
|
else:
|
||||||
raise aiohttp.errors.HttpProcessingError(
|
raise aiohttp.errors.HttpProcessingError(
|
||||||
code=res.status, message=res.reason, headers=res.headers)
|
code=res.status, message=res.reason,
|
||||||
|
headers=res.headers)
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def get_flag(cc):
|
def get_country(base_url, cc):
|
||||||
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
|
||||||
return (yield from http_get(url))
|
metadata = yield from http_get(url) # <3>
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def get_country(cc):
|
|
||||||
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
|
|
||||||
metadata = yield from http_get(url)
|
|
||||||
return metadata['country']
|
return metadata['country']
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def download_one(cc, semaphore):
|
def get_flag(base_url, cc):
|
||||||
|
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
||||||
|
return (yield from http_get(url)) # <4>
|
||||||
|
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def download_one(cc, base_url, semaphore, verbose):
|
||||||
try:
|
try:
|
||||||
|
with (yield from semaphore): # <5>
|
||||||
|
image = yield from get_flag(base_url, cc)
|
||||||
with (yield from semaphore):
|
with (yield from semaphore):
|
||||||
image = yield from get_flag(cc)
|
country = yield from get_country(base_url, cc)
|
||||||
with (yield from semaphore):
|
|
||||||
country = yield from get_country(cc)
|
|
||||||
except web.HTTPNotFound:
|
except web.HTTPNotFound:
|
||||||
status = Status.not_found
|
status = HTTPStatus.not_found
|
||||||
except aiohttp.errors.HttpProcessingError as exc:
|
msg = 'not found'
|
||||||
msg = '{} failed: {exc.code} - {exc.message}'
|
|
||||||
print(msg.format(cc, exc=exc))
|
|
||||||
status = Status.error
|
|
||||||
except aiohttp.errors.ClientResponseError as exc:
|
|
||||||
try:
|
|
||||||
context = exc.__context__.__class__.__name__
|
|
||||||
except AttributeError:
|
|
||||||
context = '(unknown context)'
|
|
||||||
msg = '{} failed: {}'
|
|
||||||
print(msg.format(cc, context))
|
|
||||||
status = Status.error
|
|
||||||
else:
|
|
||||||
print('{} retrieved.'.format(cc.upper()))
|
|
||||||
country = country.replace(' ', '_')
|
|
||||||
save_flag(image, '{}-{}.gif'.format(country, cc))
|
|
||||||
status = Status.ok
|
|
||||||
return Result(status, cc)
|
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
|
||||||
semaphore = asyncio.Semaphore(MAX_TASKS)
|
|
||||||
to_do = [download_one(cc, semaphore) for cc in cc_list]
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
#loop.set_debug(True)
|
|
||||||
try:
|
|
||||||
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print('*' * 60)
|
raise FetchError(cc) from exc
|
||||||
print(exc)
|
else:
|
||||||
print(vars(exc))
|
country = country.replace(' ', '_')
|
||||||
print('*' * 60)
|
filename = '{}-{}.gif'.format(country, cc)
|
||||||
counts = []
|
loop = asyncio.get_event_loop()
|
||||||
for status in Status:
|
loop.run_in_executor(None, save_flag, image, filename)
|
||||||
counts.append(len([task for task in done
|
status = HTTPStatus.ok
|
||||||
if task.result().status == status]))
|
msg = 'OK'
|
||||||
for task in pending:
|
|
||||||
task.cancel()
|
if verbose and msg:
|
||||||
|
print(cc, msg)
|
||||||
|
|
||||||
|
return Result(status, cc)
|
||||||
|
# END FLAGS3_ASYNCIO
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def downloader_coro(cc_list, base_url, verbose, concur_req):
|
||||||
|
counter = collections.Counter()
|
||||||
|
semaphore = asyncio.Semaphore(concur_req)
|
||||||
|
to_do = [download_one(cc, base_url, semaphore, verbose)
|
||||||
|
for cc in sorted(cc_list)]
|
||||||
|
|
||||||
|
to_do_iter = asyncio.as_completed(to_do)
|
||||||
|
if not verbose:
|
||||||
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
||||||
|
for future in to_do_iter:
|
||||||
|
try:
|
||||||
|
res = yield from future
|
||||||
|
except FetchError as exc:
|
||||||
|
country_code = exc.country_code
|
||||||
|
try:
|
||||||
|
error_msg = exc.__cause__.args[0]
|
||||||
|
except IndexError:
|
||||||
|
error_msg = exc.__cause__.__class__.__name__
|
||||||
|
else:
|
||||||
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
|
if error_msg:
|
||||||
|
status = HTTPStatus.error
|
||||||
|
counter[status] += 1
|
||||||
|
if verbose and error_msg:
|
||||||
|
msg = '*** Error for {}: {}'
|
||||||
|
print(msg.format(country_code, error_msg))
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
|
def download_many(cc_list, base_url, verbose, concur_req):
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
|
||||||
|
counts = loop.run_until_complete(coro)
|
||||||
loop.close()
|
loop.close()
|
||||||
|
|
||||||
return Counts(*counts)
|
return counts
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many)
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
|
|||||||
@@ -1,77 +1,88 @@
|
|||||||
"""Download flags of top 10 countries by population
|
"""Download flags and names of countries.
|
||||||
|
|
||||||
ThreadPool version
|
ThreadPool version
|
||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
$ python3 pop10_threadpool1.py
|
|
||||||
BR retrieved.
|
|
||||||
PK retrieved.
|
|
||||||
BD retrieved.
|
|
||||||
JP retrieved.
|
|
||||||
CN retrieved.
|
|
||||||
IN retrieved.
|
|
||||||
RU retrieved.
|
|
||||||
NG retrieved.
|
|
||||||
US retrieved.
|
|
||||||
ID retrieved.
|
|
||||||
10 flags downloaded in 0.63s
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import collections
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
from collections import namedtuple
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import tqdm
|
||||||
|
|
||||||
from flags_sequential2 import BASE_URL
|
from flags2_common import main, save_flag, HTTPStatus, Result
|
||||||
from flags_sequential2 import save_flag, get_flag, main, Counts
|
from flags2_sequential import get_flag
|
||||||
|
|
||||||
MAX_WORKERS = 200
|
DEFAULT_CONCUR_REQ = 30
|
||||||
|
MAX_CONCUR_REQ = 1000
|
||||||
Status = Enum('Status', 'ok not_found error')
|
|
||||||
Result = namedtuple('Result', 'status data')
|
|
||||||
|
|
||||||
|
|
||||||
def get_country(cc):
|
def get_country(base_url, cc):
|
||||||
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
|
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
|
||||||
res = requests.get(url)
|
res = requests.get(url)
|
||||||
if res.status_code != 200:
|
if res.status_code != 200:
|
||||||
res.raise_for_status()
|
res.raise_for_status()
|
||||||
return res.json()['country']
|
return res.json()['country']
|
||||||
|
|
||||||
|
|
||||||
def download_one(cc):
|
def download_one(cc, base_url, verbose=False):
|
||||||
try:
|
try:
|
||||||
image = get_flag(cc)
|
image = get_flag(base_url, cc)
|
||||||
country = get_country(cc)
|
country = get_country(base_url, cc)
|
||||||
except requests.exceptions.HTTPError as exc:
|
except requests.exceptions.HTTPError as exc:
|
||||||
res = exc.response
|
res = exc.response
|
||||||
if res.status_code == 404:
|
if res.status_code == 404:
|
||||||
status = Status.not_found
|
status = HTTPStatus.not_found
|
||||||
else:
|
msg = 'not found'
|
||||||
msg = '{} failed: {res.status_code} - {res.reason}'
|
else: # <4>
|
||||||
print(msg.format(cc, res=exc.response))
|
raise
|
||||||
status = Status.error
|
|
||||||
else:
|
else:
|
||||||
print('{} retrieved.'.format(cc))
|
|
||||||
country = country.replace(' ', '_')
|
country = country.replace(' ', '_')
|
||||||
save_flag(image, '{}-{}.gif'.format(country, cc))
|
save_flag(image, '{}-{}.gif'.format(country, cc))
|
||||||
status = Status.ok
|
status = HTTPStatus.ok
|
||||||
|
msg = 'OK'
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(cc, msg)
|
||||||
|
|
||||||
return Result(status, cc)
|
return Result(status, cc)
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
def download_many(cc_list, base_url, verbose, concur_req):
|
||||||
workers = min(len(cc_list), MAX_WORKERS)
|
counter = collections.Counter()
|
||||||
with futures.ThreadPoolExecutor(workers) as executor:
|
with futures.ThreadPoolExecutor(concur_req) as executor:
|
||||||
res = executor.map(download_one, sorted(cc_list))
|
to_do_map = {}
|
||||||
res = list(res)
|
for cc in sorted(cc_list):
|
||||||
counts = []
|
future = executor.submit(download_one,
|
||||||
for status in Status:
|
cc, base_url, verbose)
|
||||||
counts.append(len([r for r in res if r.status == status]))
|
to_do_map[future] = cc
|
||||||
return Counts(*counts)
|
to_do_iter = futures.as_completed(to_do_map)
|
||||||
|
if not verbose:
|
||||||
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
||||||
|
for future in to_do_iter:
|
||||||
|
try:
|
||||||
|
res = future.result()
|
||||||
|
except requests.exceptions.HTTPError as exc:
|
||||||
|
error_msg = 'HTTP {res.status_code} - {res.reason}'
|
||||||
|
error_msg = error_msg.format(res=exc.response)
|
||||||
|
except requests.exceptions.ConnectionError as exc:
|
||||||
|
error_msg = 'Connection error'
|
||||||
|
else:
|
||||||
|
error_msg = ''
|
||||||
|
status = res.status
|
||||||
|
|
||||||
|
if error_msg:
|
||||||
|
status = HTTPStatus.error
|
||||||
|
counter[status] += 1
|
||||||
|
if verbose and error_msg:
|
||||||
|
cc = to_do_map[future]
|
||||||
|
print('*** Error for {}: {}'.format(cc, error_msg))
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many)
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||||
|
|||||||
48
futures/countries/flags_asyncio.py
Normal file
48
futures/countries/flags_asyncio.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""Download flags of top 20 countries by population
|
||||||
|
|
||||||
|
asyncio + aiottp version
|
||||||
|
|
||||||
|
Sample run::
|
||||||
|
|
||||||
|
$ python3 flags_asyncio.py
|
||||||
|
EG VN IN TR RU ID US DE CN MX JP BD NG ET FR BR PH PK CD IR
|
||||||
|
20 flags downloaded in 1.07s
|
||||||
|
|
||||||
|
"""
|
||||||
|
# BEGIN FLAGS_ASYNCIO
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import aiohttp # <1>
|
||||||
|
|
||||||
|
from flags import BASE_URL, save_flag, show, main # <2>
|
||||||
|
|
||||||
|
|
||||||
|
@asyncio.coroutine # <3>
|
||||||
|
def get_flag(cc):
|
||||||
|
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
||||||
|
resp = yield from aiohttp.request('GET', url) # <4>
|
||||||
|
image = yield from resp.read() # <5>
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def download_one(cc): # <6>
|
||||||
|
image = yield from get_flag(cc) # <7>
|
||||||
|
show(cc)
|
||||||
|
save_flag(image, cc.lower() + '.gif')
|
||||||
|
return cc
|
||||||
|
|
||||||
|
|
||||||
|
def download_many(cc_list):
|
||||||
|
loop = asyncio.get_event_loop() # <8>
|
||||||
|
to_do = [download_one(cc) for cc in sorted(cc_list)] # <9>
|
||||||
|
wait_coro = asyncio.wait(to_do) # <10>
|
||||||
|
res, _ = loop.run_until_complete(wait_coro) # <11>
|
||||||
|
loop.close() # <12>
|
||||||
|
|
||||||
|
return len(res)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(download_many)
|
||||||
|
# END FLAGS_ASYNCIO
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
"""Download flags of top 20 countries by population
|
|
||||||
|
|
||||||
asyncio+aiottp version
|
|
||||||
|
|
||||||
Sample run::
|
|
||||||
|
|
||||||
$ python3 flags_asyncio0.py
|
|
||||||
EG retrieved.
|
|
||||||
BD retrieved.
|
|
||||||
JP retrieved.
|
|
||||||
...
|
|
||||||
CD retrieved.
|
|
||||||
PH retrieved.
|
|
||||||
ET retrieved.
|
|
||||||
20 flags downloaded in 1.05s
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
from flags import BASE_URL, save_flag, main
|
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def download_one(cc):
|
|
||||||
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
|
||||||
res = yield from aiohttp.request('GET', url)
|
|
||||||
image = yield from res.read()
|
|
||||||
print('{} retrieved.'.format(cc))
|
|
||||||
save_flag(image, cc.lower() + '.gif')
|
|
||||||
return cc
|
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
to_do = [download_one(cc) for cc in cc_list]
|
|
||||||
res, _ = loop.run_until_complete(asyncio.wait(to_do))
|
|
||||||
loop.close()
|
|
||||||
return len(res)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main(download_many)
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
"""Download flags of top 20 countries by population
|
|
||||||
|
|
||||||
asyncio+aiottp version
|
|
||||||
|
|
||||||
Sample run::
|
|
||||||
|
|
||||||
$ python3 flags_asyncio.py
|
|
||||||
NG retrieved.
|
|
||||||
FR retrieved.
|
|
||||||
IN retrieved.
|
|
||||||
...
|
|
||||||
EG retrieved.
|
|
||||||
DE retrieved.
|
|
||||||
IR retrieved.
|
|
||||||
20 flags downloaded in 1.08s
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
from flags import BASE_URL, save_flag, main
|
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def get_flag(cc):
|
|
||||||
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
|
|
||||||
res = yield from aiohttp.request('GET', url)
|
|
||||||
image = yield from res.read()
|
|
||||||
return image
|
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
|
||||||
def download_one(cc):
|
|
||||||
image = yield from get_flag(cc)
|
|
||||||
print('{} retrieved.'.format(cc))
|
|
||||||
save_flag(image, cc.lower() + '.gif')
|
|
||||||
return cc
|
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
to_do = [download_one(cc) for cc in cc_list]
|
|
||||||
res, _ = loop.run_until_complete(asyncio.wait(to_do))
|
|
||||||
loop.close()
|
|
||||||
return len(res)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main(download_many)
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Download flags of top 20 countries by population
|
"""Download flags of top 20 countries by population
|
||||||
|
|
||||||
ThreadPool version
|
ThreadPoolExecutor version
|
||||||
|
|
||||||
Sample run::
|
Sample run::
|
||||||
|
|
||||||
@@ -15,28 +15,29 @@ Sample run::
|
|||||||
20 flags downloaded in 0.93s
|
20 flags downloaded in 0.93s
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# BEGIN FLAGS_THREADPOOL
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
|
||||||
from flags import save_flag, get_flag, main
|
from flags import save_flag, get_flag, show, main # <1>
|
||||||
|
|
||||||
MAX_WORKERS = 100
|
MAX_WORKERS = 20 # <2>
|
||||||
|
|
||||||
|
|
||||||
def download_one(cc):
|
def download_one(cc): # <3>
|
||||||
image = get_flag(cc)
|
image = get_flag(cc)
|
||||||
print('{} retrieved.'.format(cc.upper()))
|
show(cc)
|
||||||
save_flag(image, cc.lower() + '.gif')
|
save_flag(image, cc.lower() + '.gif')
|
||||||
return cc
|
return cc
|
||||||
|
|
||||||
|
|
||||||
def download_many(cc_list):
|
def download_many(cc_list):
|
||||||
workers = min(len(cc_list), MAX_WORKERS)
|
workers = min(MAX_WORKERS, len(cc_list)) # <4>
|
||||||
with futures.ThreadPoolExecutor(workers) as executor:
|
with futures.ThreadPoolExecutor(workers) as executor: # <5>
|
||||||
res = executor.map(download_one, sorted(cc_list))
|
res = executor.map(download_one, sorted(cc_list)) # <6>
|
||||||
|
|
||||||
return len(list(res))
|
return len(list(res)) # <7>
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(download_many)
|
main(download_many) # <8>
|
||||||
|
# END FLAGS_THREADPOOL
|
||||||
|
|||||||
55
futures/countries/flags_threadpool_ac.py
Normal file
55
futures/countries/flags_threadpool_ac.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
"""Download flags of top 20 countries by population
|
||||||
|
|
||||||
|
ThreadPoolExecutor version 2, with ``as_completed``.
|
||||||
|
|
||||||
|
Sample run::
|
||||||
|
|
||||||
|
$ python3 flags_threadpool.py
|
||||||
|
BD retrieved.
|
||||||
|
EG retrieved.
|
||||||
|
CN retrieved.
|
||||||
|
...
|
||||||
|
PH retrieved.
|
||||||
|
US retrieved.
|
||||||
|
IR retrieved.
|
||||||
|
20 flags downloaded in 0.93s
|
||||||
|
|
||||||
|
"""
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
|
from flags import save_flag, get_flag, show, main
|
||||||
|
|
||||||
|
MAX_WORKERS = 20
|
||||||
|
|
||||||
|
|
||||||
|
def download_one(cc):
|
||||||
|
image = get_flag(cc)
|
||||||
|
show(cc)
|
||||||
|
save_flag(image, cc.lower() + '.gif')
|
||||||
|
return cc
|
||||||
|
|
||||||
|
|
||||||
|
# BEGIN FLAGS_THREADPOOL_AS_COMPLETED
|
||||||
|
def download_many(cc_list):
|
||||||
|
cc_list = cc_list[:5] # <1>
|
||||||
|
with futures.ThreadPoolExecutor(max_workers=3) as executor: # <2>
|
||||||
|
to_do = []
|
||||||
|
for cc in sorted(cc_list): # <3>
|
||||||
|
future = executor.submit(download_one, cc) # <4>
|
||||||
|
to_do.append(future) # <5>
|
||||||
|
msg = 'Scheduled for {}: {}'
|
||||||
|
print(msg.format(cc, future)) # <6>
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for future in futures.as_completed(to_do): # <7>
|
||||||
|
res = future.result() # <8>
|
||||||
|
msg = '{} result: {!r}'
|
||||||
|
print(msg.format(future, res)) # <9>
|
||||||
|
results.append(res)
|
||||||
|
|
||||||
|
return len(results)
|
||||||
|
# END FLAGS_THREADPOOL_AS_COMPLETED
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(download_many)
|
||||||
|
|
||||||
34
futures/demo_executor_map.py
Normal file
34
futures/demo_executor_map.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
"""
|
||||||
|
Experiment with ``ThreadPoolExecutor.map``
|
||||||
|
"""
|
||||||
|
# BEGIN EXECUTOR_MAP
|
||||||
|
from time import sleep, strftime
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
|
|
||||||
|
def display(*args): # <1>
|
||||||
|
print(strftime('[%H:%M:%S]'), end=' ')
|
||||||
|
print(*args)
|
||||||
|
|
||||||
|
|
||||||
|
def loiter(n): # <2>
|
||||||
|
msg = '{}loiter({}): doing nothing for {}s...'
|
||||||
|
display(msg.format('\t'*n, n, n))
|
||||||
|
sleep(n)
|
||||||
|
msg = '{}loiter({}): done.'
|
||||||
|
display(msg.format('\t'*n, n))
|
||||||
|
return n * 10 # <3>
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
display('Script starting.')
|
||||||
|
executor = futures.ThreadPoolExecutor(max_workers=3) # <4>
|
||||||
|
results = executor.map(loiter, range(5)) # <5>
|
||||||
|
display('results:', results) # <6>.
|
||||||
|
display('Waiting for individual results:')
|
||||||
|
for i, result in enumerate(results): # <7>
|
||||||
|
display('result {}: {}'.format(i, result))
|
||||||
|
|
||||||
|
|
||||||
|
main()
|
||||||
|
# END EXECUTOR_MAP
|
||||||
31
futures/demo_executor_submit.py
Normal file
31
futures/demo_executor_submit.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
"""
|
||||||
|
Experiments with futures
|
||||||
|
"""
|
||||||
|
|
||||||
|
from time import sleep, strftime
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
|
def display(*args):
|
||||||
|
print(strftime('[%H:%M:%S]'), end=' ')
|
||||||
|
print(*args)
|
||||||
|
|
||||||
|
|
||||||
|
def loiter(n):
|
||||||
|
msg = '{}loiter({}): doing nothing for {}s...'
|
||||||
|
display(msg.format('\t'*n, n, n))
|
||||||
|
sleep(n)
|
||||||
|
msg = '{}loiter({}): done.'
|
||||||
|
display(msg.format('\t'*n, n))
|
||||||
|
return n * 10
|
||||||
|
|
||||||
|
|
||||||
|
def demo_submit():
|
||||||
|
executor = futures.ThreadPoolExecutor(3)
|
||||||
|
future_list = [executor.submit(loiter, n) for n in range(5)]
|
||||||
|
display('done?', [future.done() for future in future_list])
|
||||||
|
display('Waiting for results...')
|
||||||
|
for i, result in enumerate(future.result() for future in future_list):
|
||||||
|
display('result[{}]: {}'.format(i, result))
|
||||||
|
|
||||||
|
|
||||||
|
demo_submit()
|
||||||
41
futures/future_yield.py
Normal file
41
futures/future_yield.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
"""
|
||||||
|
An experiment showing that ``asyncio.Future`` is an iterable (it
|
||||||
|
implements `__iter__`) designed to be used with ``yield from``.
|
||||||
|
|
||||||
|
Priming the future returns itself. After the result of the future
|
||||||
|
is set, next iteration produces the result as the ``value`` attribute
|
||||||
|
of ``StopIteration``.
|
||||||
|
|
||||||
|
Sample run::
|
||||||
|
|
||||||
|
$ python3 future_yield.py
|
||||||
|
a, future: <Future pending> 0x66514c
|
||||||
|
b, prime_res: <Future pending> 0x66514c
|
||||||
|
b, exc.value: 42
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def a(future):
|
||||||
|
print('a, future:\t', future, hex(id(future)))
|
||||||
|
res = yield from future
|
||||||
|
return res
|
||||||
|
|
||||||
|
def b():
|
||||||
|
future = asyncio.Future()
|
||||||
|
coro = a(future)
|
||||||
|
prime_res = next(coro)
|
||||||
|
print('b, prime_res:\t', prime_res, hex(id(future)))
|
||||||
|
# If next(coro) is called again before the result of
|
||||||
|
# the future is set, we get:
|
||||||
|
# AssertionError: yield from wasn't used with future
|
||||||
|
#result = next(coro) # uncomment to see AssertionError
|
||||||
|
future.set_result(42)
|
||||||
|
try:
|
||||||
|
next(coro)
|
||||||
|
except StopIteration as exc:
|
||||||
|
print('b, exc.value:\t', exc.value)
|
||||||
|
|
||||||
|
b()
|
||||||
19
futures/future_yield2.py
Normal file
19
futures/future_yield2.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
@asyncio.coroutine
|
||||||
|
def a(future):
|
||||||
|
print('a, future:', future, hex(id(future)))
|
||||||
|
res = yield from future
|
||||||
|
return res
|
||||||
|
|
||||||
|
def b():
|
||||||
|
future = asyncio.Future()
|
||||||
|
coro = a(future)
|
||||||
|
prime_result = next(coro)
|
||||||
|
print('b, prime_result:', prime_result, hex(id(future)))
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
future = asyncio.Future()
|
||||||
|
print('future:', future, hex(id(future)))
|
||||||
|
tasks = [asyncio.async(a(future))]
|
||||||
|
|
||||||
|
res = loop.run_until_complete(b())
|
||||||
|
|
||||||
Reference in New Issue
Block a user