final concurrency examples

This commit is contained in:
Luciano Ramalho
2015-03-13 18:24:31 -03:00
parent 39e87de5cd
commit 2d7a96742b
26 changed files with 1231 additions and 481 deletions

View File

@@ -1,77 +1,88 @@
"""Download flags of top 10 countries by population
"""Download flags and names of countries.
ThreadPool version
Sample run::
$ python3 pop10_threadpool1.py
BR retrieved.
PK retrieved.
BD retrieved.
JP retrieved.
CN retrieved.
IN retrieved.
RU retrieved.
NG retrieved.
US retrieved.
ID retrieved.
10 flags downloaded in 0.63s
"""
import collections
from concurrent import futures
from collections import namedtuple
from enum import Enum
import requests
import tqdm
from flags_sequential2 import BASE_URL
from flags_sequential2 import save_flag, get_flag, main, Counts
from flags2_common import main, save_flag, HTTPStatus, Result
from flags2_sequential import get_flag
MAX_WORKERS = 200
Status = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')
DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000
def get_country(cc):
url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
def get_country(base_url, cc):
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
res = requests.get(url)
if res.status_code != 200:
res.raise_for_status()
return res.json()['country']
def download_one(cc):
def download_one(cc, base_url, verbose=False):
try:
image = get_flag(cc)
country = get_country(cc)
image = get_flag(base_url, cc)
country = get_country(base_url, cc)
except requests.exceptions.HTTPError as exc:
res = exc.response
if res.status_code == 404:
status = Status.not_found
else:
msg = '{} failed: {res.status_code} - {res.reason}'
print(msg.format(cc, res=exc.response))
status = Status.error
status = HTTPStatus.not_found
msg = 'not found'
else: # <4>
raise
else:
print('{} retrieved.'.format(cc))
country = country.replace(' ', '_')
save_flag(image, '{}-{}.gif'.format(country, cc))
status = Status.ok
status = HTTPStatus.ok
msg = 'OK'
if verbose:
print(cc, msg)
return Result(status, cc)
def download_many(cc_list):
workers = min(len(cc_list), MAX_WORKERS)
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
res = list(res)
counts = []
for status in Status:
counts.append(len([r for r in res if r.status == status]))
return Counts(*counts)
def download_many(cc_list, base_url, verbose, concur_req):
counter = collections.Counter()
with futures.ThreadPoolExecutor(concur_req) as executor:
to_do_map = {}
for cc in sorted(cc_list):
future = executor.submit(download_one,
cc, base_url, verbose)
to_do_map[future] = cc
to_do_iter = futures.as_completed(to_do_map)
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for future in to_do_iter:
try:
res = future.result()
except requests.exceptions.HTTPError as exc:
error_msg = 'HTTP {res.status_code} - {res.reason}'
error_msg = error_msg.format(res=exc.response)
except requests.exceptions.ConnectionError as exc:
error_msg = 'Connection error'
else:
error_msg = ''
status = res.status
if error_msg:
status = HTTPStatus.error
counter[status] += 1
if verbose and error_msg:
cc = to_do_map[future]
print('*** Error for {}: {}'.format(cc, error_msg))
return counter
if __name__ == '__main__':
main(download_many)
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)