final concurrency examples

This commit is contained in:
Luciano Ramalho
2015-03-13 18:24:31 -03:00
parent 39e87de5cd
commit 2d7a96742b
26 changed files with 1231 additions and 481 deletions

View File

@@ -5,58 +5,59 @@ Sequential version
Sample run::
$ python3 flags.py
BD retrieved.
BR retrieved.
CD retrieved.
...
TR retrieved.
US retrieved.
VN retrieved.
BD BR CD CN DE EG ET FR ID IN IR JP MX NG PH PK RU TR US VN
20 flags downloaded in 10.16s
"""
# BEGIN FLAGS_PY
import os
import time
import sys
import requests
import requests # <1>
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
'MX PH VN ET EG DE IR TR CD FR').split()
'MX PH VN ET EG DE IR TR CD FR').split() # <2>
BASE_URL = 'http://python.pro.br/fluent/data/flags'
BASE_URL = 'http://flupy.org/data/flags' # <3>
DEST_DIR = 'downloads/'
DEST_DIR = 'downloads/' # <4>
def save_flag(img, filename):
def save_flag(img, filename): # <5>
path = os.path.join(DEST_DIR, filename)
with open(path, 'wb') as fp:
fp.write(img)
def get_flag(cc):
def get_flag(cc): # <6>
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = requests.get(url)
return res.content
resp = requests.get(url)
return resp.content
def download_many(cc_list):
for cc in sorted(cc_list):
def show(text): # <7>
print(text, end=' ')
sys.stdout.flush()
def download_many(cc_list): # <8>
for cc in sorted(cc_list): # <9>
image = get_flag(cc)
print('{} retrieved.'.format(cc))
show(cc)
save_flag(image, cc.lower() + '.gif')
return len(cc_list)
def main(download_many):
def main(download_many): # <10>
t0 = time.time()
count = download_many(POP20_CC)
elapsed = time.time() - t0
msg = '{} flags downloaded in {:.2f}s'
msg = '\n{} flags downloaded in {:.2f}s'
print(msg.format(count, elapsed))
if __name__ == '__main__':
main(download_many)
main(download_many) # <11>
# END FLAGS_PY

View File

@@ -1,112 +1,120 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
asyncio version
Sample run::
$
$ python3 flags2_asyncio.py -s ERROR -e -m 200
ERROR site: http://localhost:8003/flags
Searching for 676 flags: from AA to ZZ
200 concurrent connections will be used.
--------------------
146 flags downloaded.
363 not found.
167 errors.
Elapsed time: 2.59s
"""
# BEGIN FLAGS2_ASYNCIO_TOP
import asyncio
from collections import namedtuple
from enum import Enum
import collections
import aiohttp
from aiohttp import web
import tqdm
from flag_utils import main, save_flag, Counts
from flags2_common import main, HTTPStatus, Result, save_flag
# default set low to avoid errors from remote site:
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
class FetchError(Exception): # <1>
def __init__(self, country_code):
self.country_code = country_code
@asyncio.coroutine
def get_flag(base_url, cc):
def get_flag(base_url, cc): # <2>
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
if res.status == 200:
image = yield from res.read()
resp = yield from aiohttp.request('GET', url)
if resp.status == 200:
image = yield from resp.read()
return image
elif res.status == 404:
elif resp.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
raise aiohttp.HttpProcessingError(
code=resp.status, message=resp.reason,
headers=resp.headers)
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
def download_one(cc, base_url, semaphore, verbose): # <3>
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
msg = ''
except aiohttp.errors.HttpProcessingError as exc:
status = Status.error
msg = '{} failed: {exc.code} - {exc.message}'
msg = msg.format(cc, exc=exc)
except aiohttp.errors.ClientError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
# we chain all exceptions, you should get original exception from __cause__
context = '(unknown context)'
msg = '{} failed: {}'.format(cc, context)
status = Status.error
with (yield from semaphore): # <4>
image = yield from get_flag(base_url, cc) # <5>
except web.HTTPNotFound: # <6>
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
raise FetchError(cc) from exc # <7>
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
save_flag(image, cc.lower() + '.gif') # <8>
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS2_ASYNCIO_TOP
# BEGIN FLAGS2_ASYNCIO_DOWNLOAD_MANY
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, max_req):
semaphore = asyncio.Semaphore(max_req)
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
results = []
to_do_iter = asyncio.as_completed(to_do)
def downloader_coro(cc_list, base_url, verbose, concur_req): # <1>
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req) # <2>
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)] # <3>
to_do_iter = asyncio.as_completed(to_do) # <4>
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
result = yield from future
results.append(result)
return results
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5>
for future in to_do_iter: # <6>
try:
res = yield from future # <7>
except FetchError as exc: # <8>
country_code = exc.country_code # <9>
try:
error_msg = exc.__cause__.args[0] # <10>
except IndexError:
error_msg = exc.__cause__.__class__.__name__ # <11>
else:
error_msg = ''
status = res.status
if error_msg: # <12>
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, max_req):
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
coro = downloader_coro(cc_list, base_url, verbose, max_req)
done = loop.run_until_complete(coro)
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([res for res in done
if res.status == status]))
loop.close()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro) # <13>
loop.close() # <14>
return Counts(*counts)
return counts
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# END FLAGS2_ASYNCIO_DOWNLOAD_MANY

View File

@@ -1,100 +0,0 @@
"""Download flags of top 10 countries by population
asyncio version
Sample run::
$
"""
import asyncio
from collections import namedtuple
from enum import Enum
import aiohttp
from aiohttp import web
from flag_utils import main, save_flag, Counts
# default set low to avoid errors from remote site:
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
@asyncio.coroutine
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
if res.status == 200:
image = yield from res.read()
return image
elif res.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
msg = ''
except aiohttp.errors.HttpProcessingError as exc:
status = Status.error
msg = '{} failed: {exc.code} - {exc.message}'
msg = msg.format(cc, exc=exc)
except aiohttp.errors.ClientError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
# we chain all exceptions, you should get original exception from __cause__
context = '(unknown context)'
msg = '{} failed: {}'.format(cc, context)
status = Status.error
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
def download_many(cc_list, base_url, verbose, max_req):
semaphore = asyncio.Semaphore(max_req)
to_do = [download_one(cc, base_url, semaphore, verbose) for cc in cc_list]
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([task for task in done
if task.result().status == status]))
for task in pending:
task.cancel()
loop.close()
return Counts(*counts)
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@@ -0,0 +1,112 @@
"""Download flags of countries (with error handling).
asyncio version using thread pool to save files
Sample run::
$
"""
import asyncio
import collections
import aiohttp
from aiohttp import web
import tqdm
from flags2_common import main, HTTPStatus, Result, save_flag
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
class FetchError(Exception):
def __init__(self, country_code):
self.country_code = country_code
@asyncio.coroutine
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
resp = yield from aiohttp.request('GET', url)
if resp.status == 200:
image = yield from resp.read()
return image
elif resp.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.HttpProcessingError(
code=resp.status, message=resp.reason,
headers=resp.headers)
# BEGIN FLAGS2_ASYNCIO_EXECUTOR
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore):
image = yield from get_flag(base_url, cc)
except web.HTTPNotFound:
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
raise FetchError(cc) from exc
else:
loop = asyncio.get_event_loop() # <1>
loop.run_in_executor(None, # <2>
save_flag, image, cc.lower() + '.gif') # <3>
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS2_ASYNCIO_EXECUTOR
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req)
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)]
to_do_iter = asyncio.as_completed(to_do)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = yield from future
except FetchError as exc:
country_code = exc.country_code
try:
error_msg = exc.__cause__.args[0]
except IndexError:
error_msg = exc.__cause__.__class__.__name__
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro)
loop.close()
return counts
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@@ -11,9 +11,8 @@ from enum import Enum
Result = namedtuple('Result', 'status data')
Counts = namedtuple('Counts', 'ok not_found error')
Status = Enum('Status', 'ok not_found error')
HTTPStatus = Enum('Status', 'ok not_found error')
POP20_CC = ('CN IN US ID BR PK NG BD RU JP '
'MX PH VN ET EG DE IR TR CD FR').split()
@@ -22,7 +21,7 @@ DEFAULT_CONCUR_REQ = 1
MAX_CONCUR_REQ = 1
SERVERS = {
'REMOTE': 'http://python.pro.br/fluent/data/flags',
'REMOTE': 'http://flupy.org/data/flags',
'LOCAL': 'http://localhost:8001/flags',
'DELAY': 'http://localhost:8002/flags',
'ERROR': 'http://localhost:8003/flags',
@@ -53,17 +52,17 @@ def initial_report(cc_list, actual_req, server_label):
print(msg.format(actual_req, plural))
def final_report(cc_list, counts, start_time):
def final_report(cc_list, counter, start_time):
elapsed = time.time() - start_time
print('-' * 20)
msg = '{} flag{} downloaded.'
plural = 's' if counts.ok != 1 else ''
print(msg.format(counts.ok, plural))
if counts.not_found:
print(counts.not_found, 'not found.')
if counts.error:
plural = 's' if counts.error != 1 else ''
print('{} error{}.'.format(counts.error, plural))
plural = 's' if counter[HTTPStatus.ok] != 1 else ''
print(msg.format(counter[HTTPStatus.ok], plural))
if counter[HTTPStatus.not_found]:
print(counter[HTTPStatus.not_found], 'not found.')
if counter[HTTPStatus.error]:
plural = 's' if counter[HTTPStatus.error] != 1 else ''
print('{} error{}.'.format(counter[HTTPStatus.error], plural))
print('Elapsed time: {:.2f}s'.format(elapsed))
@@ -144,5 +143,7 @@ def main(download_many, default_concur_req, max_concur_req):
initial_report(cc_list, actual_req, args.server)
base_url = SERVERS[args.server]
t0 = time.time()
counts = download_many(cc_list, base_url, args.verbose, actual_req)
final_report(cc_list, counts, t0)
counter = download_many(cc_list, base_url, args.verbose, actual_req)
assert sum(counter.values()) == len(cc_list), \
'some downloads are unaccounted for'
final_report(cc_list, counter, t0)

View File

@@ -4,71 +4,84 @@ Sequential version
Sample run::
$
$ python3 flags2_sequential.py -s DELAY b
DELAY site: http://localhost:8002/flags
Searching for 26 flags: from BA to BZ
1 concurrent connection will be used.
--------------------
17 flags downloaded.
9 not found.
Elapsed time: 13.36s
"""
import collections
import requests
import tqdm
from flag_utils import main, save_flag, Counts, Status, Result
from flags2_common import main, save_flag, HTTPStatus, Result
DEFAULT_CONCUR_REQ = 1
MAX_CONCUR_REQ = 1
# BEGIN FLAGS2_BASIC_HTTP_FUNCTIONS
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
res = requests.get(url)
if res.status_code != 200:
res.raise_for_status()
return res.content
resp = requests.get(url)
if resp.status_code != 200: # <1>
resp.raise_for_status()
return resp.content
def download_one(cc, base_url, verbose=False):
try:
image = get_flag(base_url, cc)
except requests.exceptions.HTTPError as exc:
except requests.exceptions.HTTPError as exc: # <2>
res = exc.response
if res.status_code == 404:
status = Status.not_found
msg = ''
else:
status = Status.error
msg = 'error {res.status_code} - {res.reason}'
msg = msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
status = Status.error
msg = 'failed: {}'.format(cc, exc.args)
status = HTTPStatus.not_found # <3>
msg = 'not found'
else: # <4>
raise
else:
save_flag(image, cc.lower() + '.gif')
status = Status.ok
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
if verbose: # <5>
print(cc, msg)
return Result(status, cc)
return Result(status, cc) # <6>
# END FLAGS2_BASIC_HTTP_FUNCTIONS
# BEGIN FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
def download_many(cc_list, base_url, verbose, max_req):
counts = [0, 0, 0]
counter = collections.Counter() # <1>
cc_iter = sorted(cc_list) # <2>
if not verbose:
cc_iter = tqdm.tqdm(sorted(cc_list))
else:
cc_iter = sorted(cc_list)
for cc in cc_iter:
cc_iter = tqdm.tqdm(cc_iter) # <3>
for cc in cc_iter: # <4>
try:
res = download_one(cc, base_url, verbose)
except Exception as exc:
msg = 'Unexpected exception for {}: {!r}'
print(msg.format(cc, exc))
else:
counts[res.status.value-1] += 1
res = download_one(cc, base_url, verbose) # <5>
except requests.exceptions.HTTPError as exc: # <6>
error_msg = 'HTTP error {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc: # <7>
error_msg = 'Connection error'
else: # <8>
error_msg = ''
status = res.status
return Counts(*counts)
if error_msg:
status = HTTPStatus.error # <9>
counter[status] += 1 # <10>
if verbose and error_msg: # <11>
print('*** Error for {}: {}'.format(cc, error_msg))
return counter # <12>
# END FLAGS2_DOWNLOAD_MANY_SEQUENTIAL
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@@ -1,42 +1,68 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
ThreadPool version
Sample run::
$
$ python3 flags2_threadpool.py -s ERROR -e
ERROR site: http://localhost:8003/flags
Searching for 676 flags: from AA to ZZ
30 concurrent connections will be used.
--------------------
150 flags downloaded.
361 not found.
165 errors.
Elapsed time: 7.46s
"""
# BEGIN FLAGS2_THREADPOOL
import collections
from concurrent import futures
import tqdm
import requests
import tqdm # <1>
from flag_utils import main, Counts
from flags2_sequential import get_flag, download_one
from flags2_common import main, HTTPStatus # <2>
from flags2_sequential import download_one # <3>
DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000
DEFAULT_CONCUR_REQ = 30 # <4>
MAX_CONCUR_REQ = 1000 # <5>
def download_many(cc_list, base_url, verbose, concur_req):
with futures.ThreadPoolExecutor(concur_req) as executor:
to_do = [executor.submit(download_one, cc, base_url, verbose)
for cc in sorted(cc_list)]
counts = [0, 0, 0]
to_do_iter = futures.as_completed(to_do)
counter = collections.Counter()
with futures.ThreadPoolExecutor(concur_req) as executor: # <6>
to_do_map = {} # <7>
for cc in sorted(cc_list): # <8>
future = executor.submit(download_one,
cc, base_url, verbose) # <9>
to_do_map[future] = cc # <10>
to_do_iter = futures.as_completed(to_do_map) # <11>
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <12>
for future in to_do_iter: # <13>
try:
res = future.result()
except Exception as exc:
print('*** Unexpected exception:', exc)
res = future.result() # <14>
except requests.exceptions.HTTPError as exc: # <15>
error_msg = 'HTTP {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
error_msg = 'Connection error'
else:
counts[res.status.value-1] += 1
error_msg = ''
status = res.status
return Counts(*counts)
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
cc = to_do_map[future] # <16>
print('*** Error for {}: {}'.format(cc, error_msg))
return counter
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# END FLAGS2_THREADPOOL

View File

@@ -1,122 +1,132 @@
"""Download flags of top 10 countries by population
"""Download flags of countries (with error handling).
asyncio version
asyncio version using thread pool to save files
Sample run::
$ python3 pop10_asyncio1.py
CN retrieved.
US retrieved.
BR retrieved.
NG retrieved.
PK retrieved.
RU retrieved.
ID retrieved.
IN retrieved.
BD retrieved.
JP retrieved.
10 flags downloaded in 0.45s
$
"""
import asyncio
from collections import namedtuple
from enum import Enum
import collections
import aiohttp
from aiohttp import web
import tqdm
from flags_sequential2 import BASE_URL
from flags_sequential2 import save_flag, main, Counts
from flags2_common import main, HTTPStatus, Result, save_flag
MAX_TASKS = 100 if 'localhost' in BASE_URL else 5
TIMEOUT = 120 # seconds
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
class FetchError(Exception):
def __init__(self, country_code):
self.country_code = country_code
# BEGIN FLAGS3_ASYNCIO
@asyncio.coroutine
def http_get(url):
res = yield from aiohttp.request('GET', url)
if res.status == 200:
ctype = res.headers.get('Content-type', '').lower()
if 'json' in ctype or url.endswith('json'):
data = yield from res.json()
data = yield from res.json() # <1>
else:
data = yield from res.read()
data = yield from res.read() # <2>
return data
elif res.status == 404:
raise web.HTTPNotFound()
else:
raise aiohttp.errors.HttpProcessingError(
code=res.status, message=res.reason, headers=res.headers)
code=res.status, message=res.reason,
headers=res.headers)
@asyncio.coroutine
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
return (yield from http_get(url))
@asyncio.coroutine
def get_country(cc):
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
metadata = yield from http_get(url)
def get_country(base_url, cc):
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
metadata = yield from http_get(url) # <3>
return metadata['country']
@asyncio.coroutine
def download_one(cc, semaphore):
def get_flag(base_url, cc):
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
return (yield from http_get(url)) # <4>
@asyncio.coroutine
def download_one(cc, base_url, semaphore, verbose):
try:
with (yield from semaphore): # <5>
image = yield from get_flag(base_url, cc)
with (yield from semaphore):
image = yield from get_flag(cc)
with (yield from semaphore):
country = yield from get_country(cc)
country = yield from get_country(base_url, cc)
except web.HTTPNotFound:
status = Status.not_found
except aiohttp.errors.HttpProcessingError as exc:
msg = '{} failed: {exc.code} - {exc.message}'
print(msg.format(cc, exc=exc))
status = Status.error
except aiohttp.errors.ClientResponseError as exc:
try:
context = exc.__context__.__class__.__name__
except AttributeError:
context = '(unknown context)'
msg = '{} failed: {}'
print(msg.format(cc, context))
status = Status.error
else:
print('{} retrieved.'.format(cc.upper()))
country = country.replace(' ', '_')
save_flag(image, '{}-{}.gif'.format(country, cc))
status = Status.ok
return Result(status, cc)
def download_many(cc_list):
semaphore = asyncio.Semaphore(MAX_TASKS)
to_do = [download_one(cc, semaphore) for cc in cc_list]
loop = asyncio.get_event_loop()
#loop.set_debug(True)
try:
done, pending = loop.run_until_complete(asyncio.wait(to_do, timeout=TIMEOUT))
status = HTTPStatus.not_found
msg = 'not found'
except Exception as exc:
print('*' * 60)
print(exc)
print(vars(exc))
print('*' * 60)
counts = []
for status in Status:
counts.append(len([task for task in done
if task.result().status == status]))
for task in pending:
task.cancel()
raise FetchError(cc) from exc
else:
country = country.replace(' ', '_')
filename = '{}-{}.gif'.format(country, cc)
loop = asyncio.get_event_loop()
loop.run_in_executor(None, save_flag, image, filename)
status = HTTPStatus.ok
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# END FLAGS3_ASYNCIO
@asyncio.coroutine
def downloader_coro(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
semaphore = asyncio.Semaphore(concur_req)
to_do = [download_one(cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)]
to_do_iter = asyncio.as_completed(to_do)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = yield from future
except FetchError as exc:
country_code = exc.country_code
try:
error_msg = exc.__cause__.args[0]
except IndexError:
error_msg = exc.__cause__.__class__.__name__
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
msg = '*** Error for {}: {}'
print(msg.format(country_code, error_msg))
return counter
def download_many(cc_list, base_url, verbose, concur_req):
loop = asyncio.get_event_loop()
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
counts = loop.run_until_complete(coro)
loop.close()
return Counts(*counts)
return counts
if __name__ == '__main__':
main(download_many)
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@@ -1,77 +1,88 @@
"""Download flags of top 10 countries by population
"""Download flags and names of countries.
ThreadPool version
Sample run::
$ python3 pop10_threadpool1.py
BR retrieved.
PK retrieved.
BD retrieved.
JP retrieved.
CN retrieved.
IN retrieved.
RU retrieved.
NG retrieved.
US retrieved.
ID retrieved.
10 flags downloaded in 0.63s
"""
import collections
from concurrent import futures
from collections import namedtuple
from enum import Enum
import requests
import tqdm
from flags_sequential2 import BASE_URL
from flags_sequential2 import save_flag, get_flag, main, Counts
from flags2_common import main, save_flag, HTTPStatus, Result
from flags2_sequential import get_flag
MAX_WORKERS = 200
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000
def get_country(cc):
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
def get_country(base_url, cc):
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
res = requests.get(url)
if res.status_code != 200:
res.raise_for_status()
return res.json()['country']
def download_one(cc):
def download_one(cc, base_url, verbose=False):
try:
image = get_flag(cc)
country = get_country(cc)
image = get_flag(base_url, cc)
country = get_country(base_url, cc)
except requests.exceptions.HTTPError as exc:
res = exc.response
if res.status_code == 404:
status = Status.not_found
else:
msg = '{} failed: {res.status_code} - {res.reason}'
print(msg.format(cc, res=exc.response))
status = Status.error
status = HTTPStatus.not_found
msg = 'not found'
else: # <4>
raise
else:
print('{} retrieved.'.format(cc))
country = country.replace(' ', '_')
save_flag(image, '{}-{}.gif'.format(country, cc))
status = Status.ok
status = HTTPStatus.ok
msg = 'OK'
if verbose:
print(cc, msg)
return Result(status, cc)
def download_many(cc_list):
workers = min(len(cc_list), MAX_WORKERS)
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
res = list(res)
counts = []
for status in Status:
counts.append(len([r for r in res if r.status == status]))
return Counts(*counts)
def download_many(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
with futures.ThreadPoolExecutor(concur_req) as executor:
to_do_map = {}
for cc in sorted(cc_list):
future = executor.submit(download_one,
cc, base_url, verbose)
to_do_map[future] = cc
to_do_iter = futures.as_completed(to_do_map)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = future.result()
except requests.exceptions.HTTPError as exc:
error_msg = 'HTTP {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
error_msg = 'Connection error'
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
cc = to_do_map[future]
print('*** Error for {}: {}'.format(cc, error_msg))
return counter
if __name__ == '__main__':
main(download_many)
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)

View File

@@ -0,0 +1,48 @@
"""Download flags of top 20 countries by population
asyncio + aiottp version
Sample run::
$ python3 flags_asyncio.py
EG VN IN TR RU ID US DE CN MX JP BD NG ET FR BR PH PK CD IR
20 flags downloaded in 1.07s
"""
# BEGIN FLAGS_ASYNCIO
import asyncio
import aiohttp # <1>
from flags import BASE_URL, save_flag, show, main # <2>
@asyncio.coroutine # <3>
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
resp = yield from aiohttp.request('GET', url) # <4>
image = yield from resp.read() # <5>
return image
@asyncio.coroutine
def download_one(cc): # <6>
image = yield from get_flag(cc) # <7>
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop() # <8>
to_do = [download_one(cc) for cc in sorted(cc_list)] # <9>
wait_coro = asyncio.wait(to_do) # <10>
res, _ = loop.run_until_complete(wait_coro) # <11>
loop.close() # <12>
return len(res)
if __name__ == '__main__':
main(download_many)
# END FLAGS_ASYNCIO

View File

@@ -1,45 +0,0 @@
"""Download flags of top 20 countries by population
asyncio+aiottp version
Sample run::
$ python3 flags_asyncio0.py
EG retrieved.
BD retrieved.
JP retrieved.
...
CD retrieved.
PH retrieved.
ET retrieved.
20 flags downloaded in 1.05s
"""
import asyncio
import aiohttp
from flags import BASE_URL, save_flag, main
@asyncio.coroutine
def download_one(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
image = yield from res.read()
print('{} retrieved.'.format(cc))
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop()
to_do = [download_one(cc) for cc in cc_list]
res, _ = loop.run_until_complete(asyncio.wait(to_do))
loop.close()
return len(res)
if __name__ == '__main__':
main(download_many)

View File

@@ -1,51 +0,0 @@
"""Download flags of top 20 countries by population
asyncio+aiottp version
Sample run::
$ python3 flags_asyncio.py
NG retrieved.
FR retrieved.
IN retrieved.
...
EG retrieved.
DE retrieved.
IR retrieved.
20 flags downloaded in 1.08s
"""
import asyncio
import aiohttp
from flags import BASE_URL, save_flag, main
@asyncio.coroutine
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
image = yield from res.read()
return image
@asyncio.coroutine
def download_one(cc):
image = yield from get_flag(cc)
print('{} retrieved.'.format(cc))
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
loop = asyncio.get_event_loop()
to_do = [download_one(cc) for cc in cc_list]
res, _ = loop.run_until_complete(asyncio.wait(to_do))
loop.close()
return len(res)
if __name__ == '__main__':
main(download_many)

View File

@@ -1,6 +1,6 @@
"""Download flags of top 20 countries by population
ThreadPool version
ThreadPoolExecutor version
Sample run::
@@ -15,28 +15,29 @@ Sample run::
20 flags downloaded in 0.93s
"""
# BEGIN FLAGS_THREADPOOL
from concurrent import futures
from flags import save_flag, get_flag, main
from flags import save_flag, get_flag, show, main # <1>
MAX_WORKERS = 100
MAX_WORKERS = 20 # <2>
def download_one(cc):
def download_one(cc): # <3>
image = get_flag(cc)
print('{} retrieved.'.format(cc.upper()))
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
workers = min(len(cc_list), MAX_WORKERS)
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
workers = min(MAX_WORKERS, len(cc_list)) # <4>
with futures.ThreadPoolExecutor(workers) as executor: # <5>
res = executor.map(download_one, sorted(cc_list)) # <6>
return len(list(res))
return len(list(res)) # <7>
if __name__ == '__main__':
main(download_many)
main(download_many) # <8>
# END FLAGS_THREADPOOL

View File

@@ -0,0 +1,55 @@
"""Download flags of top 20 countries by population
ThreadPoolExecutor version 2, with ``as_completed``.
Sample run::
$ python3 flags_threadpool.py
BD retrieved.
EG retrieved.
CN retrieved.
...
PH retrieved.
US retrieved.
IR retrieved.
20 flags downloaded in 0.93s
"""
from concurrent import futures
from flags import save_flag, get_flag, show, main
MAX_WORKERS = 20
def download_one(cc):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
# BEGIN FLAGS_THREADPOOL_AS_COMPLETED
def download_many(cc_list):
cc_list = cc_list[:5] # <1>
with futures.ThreadPoolExecutor(max_workers=3) as executor: # <2>
to_do = []
for cc in sorted(cc_list): # <3>
future = executor.submit(download_one, cc) # <4>
to_do.append(future) # <5>
msg = 'Scheduled for {}: {}'
print(msg.format(cc, future)) # <6>
results = []
for future in futures.as_completed(to_do): # <7>
res = future.result() # <8>
msg = '{} result: {!r}'
print(msg.format(future, res)) # <9>
results.append(res)
return len(results)
# END FLAGS_THREADPOOL_AS_COMPLETED
if __name__ == '__main__':
main(download_many)