sequential, threaded & async HTTP clients using HTTPX

This commit is contained in:
Luciano Ramalho
2021-10-02 17:12:42 -03:00
parent 7985fda09f
commit 4f1392d21c
12 changed files with 271 additions and 224 deletions

View File

@@ -2,17 +2,19 @@
"""Download flags of countries (with error handling).
asyncio async/await version using run_in_executor for save_flag.
asyncio async/await version
"""
# tag::FLAGS2_ASYNCIO_TOP[]
import asyncio
from collections import Counter
from http import HTTPStatus
from pathlib import Path
import aiohttp
import httpx
import tqdm # type: ignore
from flags2_common import main, HTTPStatus, Result, save_flag
from flags2_common import main, DownloadStatus, save_flag
# default set low to avoid errors from remote site, such as
# 503 - Service Temporarily Unavailable
@@ -20,90 +22,87 @@ DEFAULT_CONCUR_REQ = 5
MAX_CONCUR_REQ = 1000
class FetchError(Exception):
def __init__(self, country_code: str):
self.country_code = country_code
async def get_flag(session: aiohttp.ClientSession,
async def get_flag(session: httpx.AsyncClient, # <2>
base_url: str,
cc: str) -> bytes:
url = f'{base_url}/{cc}/{cc}.gif'.lower()
async with session.get(url) as resp:
if resp.status == 200:
return await resp.read()
else:
resp.raise_for_status()
return bytes()
resp = await session.get(url, timeout=3.1, follow_redirects=True) # <3>
resp.raise_for_status()
return resp.content
# tag::FLAGS2_ASYNCIO_EXECUTOR[]
async def download_one(session: aiohttp.ClientSession,
async def download_one(session: httpx.AsyncClient,
cc: str,
base_url: str,
semaphore: asyncio.Semaphore,
verbose: bool) -> Result:
verbose: bool) -> DownloadStatus:
try:
async with semaphore:
image = await get_flag(session, base_url, cc)
except aiohttp.ClientResponseError as exc:
if exc.status == 404:
status = HTTPStatus.not_found
msg = 'not found'
except httpx.HTTPStatusError as exc:
res = exc.response
if res.status_code == HTTPStatus.NOT_FOUND:
status = DownloadStatus.NOT_FOUND
msg = f'not found: {res.url}'
else:
raise FetchError(cc) from exc
raise
else:
loop = asyncio.get_running_loop() # <1>
loop.run_in_executor(None, # <2>
save_flag, image, f'{cc}.gif') # <3>
status = HTTPStatus.ok
# tag::FLAGS2_ASYNCIO_EXECUTOR[]
loop = asyncio.get_running_loop() # <1>
loop.run_in_executor(None, save_flag, # <2>
image, f'{cc}.gif') # <3>
# end::FLAGS2_ASYNCIO_EXECUTOR[]
status = DownloadStatus.OK
msg = 'OK'
if verbose and msg:
print(cc, msg)
return Result(status, cc)
# end::FLAGS2_ASYNCIO_EXECUTOR[]
return status
async def supervisor(cc_list: list[str],
base_url: str,
verbose: bool,
concur_req: int) -> Counter[HTTPStatus]:
counter: Counter[HTTPStatus] = Counter()
semaphore = asyncio.Semaphore(concur_req)
async with aiohttp.ClientSession() as session:
concur_req: int) -> Counter[DownloadStatus]: # <1>
counter: Counter[DownloadStatus] = Counter()
semaphore = asyncio.Semaphore(concur_req) # <2>
async with httpx.AsyncClient() as session:
to_do = [download_one(session, cc, base_url, semaphore, verbose)
for cc in sorted(cc_list)]
to_do_iter = asyncio.as_completed(to_do)
for cc in sorted(cc_list)] # <3>
to_do_iter = asyncio.as_completed(to_do) # <4>
if not verbose:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list))
for coro in to_do_iter:
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5>
for coro in to_do_iter: # <6>
try:
res = await coro
except FetchError as exc:
country_code = exc.country_code
try:
error_msg = exc.__cause__.message # type: ignore
except AttributeError:
error_msg = 'Unknown cause'
if verbose and error_msg:
print(f'*** Error for {country_code}: {error_msg}')
status = HTTPStatus.error
else:
status = res.status
status = await coro # <7>
except httpx.HTTPStatusError as exc: # <13>
error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}'
error_msg = error_msg.format(resp=exc.response)
error = exc
except httpx.RequestError as exc: # <15>
error_msg = f'{exc} {type(exc)}'.strip()
error = exc
except KeyboardInterrupt: # <7>
break
else: # <8>
error = None
counter[status] += 1
return counter
if error:
status = DownloadStatus.ERROR # <9>
if verbose: # <11>
cc = Path(str(error.request.url)).stem.upper()
print(f'{cc} error: {error_msg}')
counter[status] += 1 # <10>
return counter # <12>
def download_many(cc_list: list[str],
base_url: str,
verbose: bool,
concur_req: int) -> Counter[HTTPStatus]:
concur_req: int) -> Counter[DownloadStatus]:
coro = supervisor(cc_list, base_url, verbose, concur_req)
counts = asyncio.run(coro) # <14>
return counts
if __name__ == '__main__':
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
# end::FLAGS2_ASYNCIO_START[]