131 lines
3.5 KiB
Python
131 lines
3.5 KiB
Python
"""Download flags of countries (with error handling).
|
|
|
|
asyncio version using thread pool to save files
|
|
|
|
Sample run::
|
|
|
|
$
|
|
|
|
"""
|
|
|
|
import asyncio
|
|
import collections
|
|
|
|
import aiohttp
|
|
from aiohttp import web
|
|
import tqdm
|
|
|
|
from flags2_common import main, HTTPStatus, Result, save_flag
|
|
|
|
# default set low to avoid errors from remote site, such as
|
|
# 503 - Service Temporarily Unavailable
|
|
DEFAULT_CONCUR_REQ = 5
|
|
MAX_CONCUR_REQ = 1000
|
|
|
|
|
|
class FetchError(Exception):
|
|
def __init__(self, country_code):
|
|
self.country_code = country_code
|
|
|
|
# BEGIN FLAGS3_ASYNCIO
|
|
@asyncio.coroutine
|
|
def http_get(url):
|
|
res = yield from aiohttp.request('GET', url)
|
|
if res.status == 200:
|
|
ctype = res.headers.get('Content-type', '').lower()
|
|
if 'json' in ctype or url.endswith('json'):
|
|
data = yield from res.json() # <1>
|
|
else:
|
|
data = yield from res.read() # <2>
|
|
return data
|
|
|
|
elif res.status == 404:
|
|
raise web.HTTPNotFound()
|
|
else:
|
|
raise aiohttp.errors.HttpProcessingError(
|
|
code=res.status, message=res.reason,
|
|
headers=res.headers)
|
|
|
|
|
|
@asyncio.coroutine
|
|
def get_country(base_url, cc):
|
|
url = '{}/{cc}/metadata.json'.format(base_url, cc=cc.lower())
|
|
metadata = yield from http_get(url) # <3>
|
|
return metadata['country']
|
|
|
|
|
|
@asyncio.coroutine
|
|
def get_flag(base_url, cc):
|
|
url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
|
|
return (yield from http_get(url)) # <4>
|
|
|
|
|
|
@asyncio.coroutine
|
|
def download_one(cc, base_url, semaphore, verbose):
|
|
try:
|
|
with (yield from semaphore): # <5>
|
|
image = yield from get_flag(base_url, cc)
|
|
with (yield from semaphore):
|
|
country = yield from get_country(base_url, cc)
|
|
except web.HTTPNotFound:
|
|
status = HTTPStatus.not_found
|
|
msg = 'not found'
|
|
except Exception as exc:
|
|
raise FetchError(cc) from exc
|
|
else:
|
|
country = country.replace(' ', '_')
|
|
filename = '{}-{}.gif'.format(country, cc)
|
|
loop = asyncio.get_event_loop()
|
|
loop.run_in_executor(None, save_flag, image, filename)
|
|
status = HTTPStatus.ok
|
|
msg = 'OK'
|
|
|
|
if verbose and msg:
|
|
print(cc, msg)
|
|
|
|
return Result(status, cc)
|
|
# END FLAGS3_ASYNCIO
|
|
|
|
@asyncio.coroutine
|
|
def downloader_coro(cc_list, base_url, verbose, concur_req):
|
|
counter = collections.Counter()
|
|
semaphore = asyncio.Semaphore(concur_req)
|
|
to_do = [download_one(cc, base_url, semaphore, verbose)
|
|
for cc in sorted(cc_list)]
|
|
|
|
to_do_iter = asyncio.as_completed(to_do)
|
|
if not verbose:
|
|
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
|
|
for future in to_do_iter:
|
|
try:
|
|
res = yield from future
|
|
except FetchError as exc:
|
|
country_code = exc.country_code
|
|
try:
|
|
error_msg = exc.__cause__.args[0]
|
|
except IndexError:
|
|
error_msg = exc.__cause__.__class__.__name__
|
|
if verbose and error_msg:
|
|
msg = '*** Error for {}: {}'
|
|
print(msg.format(country_code, error_msg))
|
|
status = HTTPStatus.error
|
|
else:
|
|
status = res.status
|
|
|
|
counter[status] += 1
|
|
|
|
return counter
|
|
|
|
|
|
def download_many(cc_list, base_url, verbose, concur_req):
|
|
loop = asyncio.get_event_loop()
|
|
coro = downloader_coro(cc_list, base_url, verbose, concur_req)
|
|
counts = loop.run_until_complete(coro)
|
|
loop.close()
|
|
|
|
return counts
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|