2e reviewed manuscript
This commit is contained in:
@@ -37,7 +37,7 @@ async def download_one(client: httpx.AsyncClient,
|
||||
try:
|
||||
async with semaphore: # <3>
|
||||
image = await get_flag(client, base_url, cc)
|
||||
except httpx.HTTPStatusError as exc: # <5>
|
||||
except httpx.HTTPStatusError as exc: # <4>
|
||||
res = exc.response
|
||||
if res.status_code == HTTPStatus.NOT_FOUND:
|
||||
status = DownloadStatus.NOT_FOUND
|
||||
@@ -45,7 +45,7 @@ async def download_one(client: httpx.AsyncClient,
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
await asyncio.to_thread(save_flag, image, f'{cc}.gif') # <6>
|
||||
await asyncio.to_thread(save_flag, image, f'{cc}.gif') # <5>
|
||||
status = DownloadStatus.OK
|
||||
msg = 'OK'
|
||||
if verbose and msg:
|
||||
|
||||
@@ -22,23 +22,23 @@ DEFAULT_CONCUR_REQ = 5
|
||||
MAX_CONCUR_REQ = 1000
|
||||
|
||||
|
||||
async def get_flag(session: httpx.AsyncClient, # <2>
|
||||
async def get_flag(client: httpx.AsyncClient, # <2>
|
||||
base_url: str,
|
||||
cc: str) -> bytes:
|
||||
url = f'{base_url}/{cc}/{cc}.gif'.lower()
|
||||
resp = await session.get(url, timeout=3.1, follow_redirects=True) # <3>
|
||||
resp = await client.get(url, timeout=3.1, follow_redirects=True) # <3>
|
||||
resp.raise_for_status()
|
||||
return resp.content
|
||||
|
||||
|
||||
async def download_one(session: httpx.AsyncClient,
|
||||
async def download_one(client: httpx.AsyncClient,
|
||||
cc: str,
|
||||
base_url: str,
|
||||
semaphore: asyncio.Semaphore,
|
||||
verbose: bool) -> DownloadStatus:
|
||||
try:
|
||||
async with semaphore:
|
||||
image = await get_flag(session, base_url, cc)
|
||||
image = await get_flag(client, base_url, cc)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
res = exc.response
|
||||
if res.status_code == HTTPStatus.NOT_FOUND:
|
||||
@@ -64,8 +64,8 @@ async def supervisor(cc_list: list[str],
|
||||
concur_req: int) -> Counter[DownloadStatus]: # <1>
|
||||
counter: Counter[DownloadStatus] = Counter()
|
||||
semaphore = asyncio.Semaphore(concur_req) # <2>
|
||||
async with httpx.AsyncClient() as session:
|
||||
to_do = [download_one(session, cc, base_url, semaphore, verbose)
|
||||
async with httpx.AsyncClient() as client:
|
||||
to_do = [download_one(client, cc, base_url, semaphore, verbose)
|
||||
for cc in sorted(cc_list)] # <3>
|
||||
to_do_iter = asyncio.as_completed(to_do) # <4>
|
||||
if not verbose:
|
||||
|
||||
119
20-executors/getflags/flags3_asyncio.py
Executable file
119
20-executors/getflags/flags3_asyncio.py
Executable file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Download flags of countries (with error handling).
|
||||
|
||||
asyncio async/await version
|
||||
|
||||
"""
|
||||
# tag::FLAGS2_ASYNCIO_TOP[]
|
||||
import asyncio
|
||||
from collections import Counter
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import tqdm # type: ignore
|
||||
|
||||
from flags2_common import main, DownloadStatus, save_flag
|
||||
|
||||
# low concurrency default to avoid errors from remote site,
|
||||
# such as 503 - Service Temporarily Unavailable
|
||||
DEFAULT_CONCUR_REQ = 5
|
||||
MAX_CONCUR_REQ = 1000
|
||||
|
||||
async def get_flag(client: httpx.AsyncClient, # <1>
|
||||
base_url: str,
|
||||
cc: str) -> bytes:
|
||||
url = f'{base_url}/{cc}/{cc}.gif'.lower()
|
||||
resp = await client.get(url, timeout=3.1, follow_redirects=True) # <2>
|
||||
resp.raise_for_status()
|
||||
return resp.content
|
||||
|
||||
# tag::FLAGS3_ASYNCIO_GET_COUNTRY[]
|
||||
async def get_country(client: httpx.AsyncClient,
|
||||
base_url: str,
|
||||
cc: str) -> str: # <1>
|
||||
url = f'{base_url}/{cc}/metadata.json'.lower()
|
||||
resp = await client.get(url, timeout=3.1, follow_redirects=True)
|
||||
resp.raise_for_status()
|
||||
metadata = resp.json() # <2>
|
||||
return metadata['country'] # <3>
|
||||
# end::FLAGS3_ASYNCIO_GET_COUNTRY[]
|
||||
|
||||
# tag::FLAGS3_ASYNCIO_DOWNLOAD_ONE[]
|
||||
async def download_one(client: httpx.AsyncClient,
|
||||
cc: str,
|
||||
base_url: str,
|
||||
semaphore: asyncio.Semaphore,
|
||||
verbose: bool) -> DownloadStatus:
|
||||
try:
|
||||
async with semaphore: # <1>
|
||||
image = await get_flag(client, base_url, cc)
|
||||
async with semaphore: # <2>
|
||||
country = await get_country(client, base_url, cc)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
res = exc.response
|
||||
if res.status_code == HTTPStatus.NOT_FOUND:
|
||||
status = DownloadStatus.NOT_FOUND
|
||||
msg = f'not found: {res.url}'
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
filename = country.replace(' ', '_') # <3>
|
||||
await asyncio.to_thread(save_flag, image, f'{filename}.gif')
|
||||
status = DownloadStatus.OK
|
||||
msg = 'OK'
|
||||
if verbose and msg:
|
||||
print(cc, msg)
|
||||
return status
|
||||
# end::FLAGS3_ASYNCIO_DOWNLOAD_ONE[]
|
||||
|
||||
# tag::FLAGS2_ASYNCIO_START[]
|
||||
async def supervisor(cc_list: list[str],
|
||||
base_url: str,
|
||||
verbose: bool,
|
||||
concur_req: int) -> Counter[DownloadStatus]: # <1>
|
||||
counter: Counter[DownloadStatus] = Counter()
|
||||
semaphore = asyncio.Semaphore(concur_req) # <2>
|
||||
async with httpx.AsyncClient() as client:
|
||||
to_do = [download_one(client, cc, base_url, semaphore, verbose)
|
||||
for cc in sorted(cc_list)] # <3>
|
||||
to_do_iter = asyncio.as_completed(to_do) # <4>
|
||||
if not verbose:
|
||||
to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5>
|
||||
error: httpx.HTTPError | None = None # <6>
|
||||
for coro in to_do_iter: # <7>
|
||||
try:
|
||||
status = await coro # <8>
|
||||
except httpx.HTTPStatusError as exc:
|
||||
error_msg = 'HTTP error {resp.status_code} - {resp.reason_phrase}'
|
||||
error_msg = error_msg.format(resp=exc.response)
|
||||
error = exc # <9>
|
||||
except httpx.RequestError as exc:
|
||||
error_msg = f'{exc} {type(exc)}'.strip()
|
||||
error = exc # <10>
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
|
||||
if error:
|
||||
status = DownloadStatus.ERROR # <11>
|
||||
if verbose:
|
||||
url = str(error.request.url) # <12>
|
||||
cc = Path(url).stem.upper() # <13>
|
||||
print(f'{cc} error: {error_msg}')
|
||||
counter[status] += 1
|
||||
|
||||
return counter
|
||||
|
||||
def download_many(cc_list: list[str],
|
||||
base_url: str,
|
||||
verbose: bool,
|
||||
concur_req: int) -> Counter[DownloadStatus]:
|
||||
coro = supervisor(cc_list, base_url, verbose, concur_req)
|
||||
counts = asyncio.run(coro) # <14>
|
||||
|
||||
return counts
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(download_many, DEFAULT_CONCUR_REQ, MAX_CONCUR_REQ)
|
||||
# end::FLAGS2_ASYNCIO_START[]
|
||||
@@ -17,15 +17,15 @@ from httpx import AsyncClient # <1>
|
||||
|
||||
from flags import BASE_URL, save_flag, main # <2>
|
||||
|
||||
async def download_one(session: AsyncClient, cc: str): # <3>
|
||||
image = await get_flag(session, cc)
|
||||
async def download_one(client: AsyncClient, cc: str): # <3>
|
||||
image = await get_flag(client, cc)
|
||||
save_flag(image, f'{cc}.gif')
|
||||
print(cc, end=' ', flush=True)
|
||||
return cc
|
||||
|
||||
async def get_flag(session: AsyncClient, cc: str) -> bytes: # <4>
|
||||
async def get_flag(client: AsyncClient, cc: str) -> bytes: # <4>
|
||||
url = f'{BASE_URL}/{cc}/{cc}.gif'.lower()
|
||||
resp = await session.get(url, timeout=6.1,
|
||||
resp = await client.get(url, timeout=6.1,
|
||||
follow_redirects=True) # <5>
|
||||
return resp.read() # <6>
|
||||
# end::FLAGS_ASYNCIO_TOP[]
|
||||
@@ -35,8 +35,8 @@ def download_many(cc_list: list[str]) -> int: # <1>
|
||||
return asyncio.run(supervisor(cc_list)) # <2>
|
||||
|
||||
async def supervisor(cc_list: list[str]) -> int:
|
||||
async with AsyncClient() as session: # <3>
|
||||
to_do = [download_one(session, cc)
|
||||
async with AsyncClient() as client: # <3>
|
||||
to_do = [download_one(client, cc)
|
||||
for cc in sorted(cc_list)] # <4>
|
||||
res = await asyncio.gather(*to_do) # <5>
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import httpx # make httpx classes available to .__subclasses__()
|
||||
|
||||
def tree(cls, level=0, last_sibling=True):
|
||||
yield cls, level, last_sibling
|
||||
|
||||
# get RuntimeError and exceptions defined in httpx
|
||||
subclasses = [sub for sub in cls.__subclasses__()
|
||||
if sub is RuntimeError or sub.__module__ == 'httpx']
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
import httpx
|
||||
|
||||
def tree(cls, level=0):
|
||||
yield cls.__name__, level
|
||||
for sub_cls in cls.__subclasses__():
|
||||
yield from tree(sub_cls, level+1)
|
||||
|
||||
|
||||
def display(cls):
|
||||
for cls_name, level in tree(cls):
|
||||
indent = ' ' * 4 * level
|
||||
print(f'{indent}{cls_name}')
|
||||
|
||||
|
||||
def find_roots(module):
|
||||
exceptions = []
|
||||
for name in dir(module):
|
||||
obj = getattr(module, name)
|
||||
if isinstance(obj, type) and issubclass(obj, BaseException):
|
||||
exceptions.append(obj)
|
||||
roots = []
|
||||
for exc in exceptions:
|
||||
root = True
|
||||
for other in exceptions:
|
||||
if exc is not other and issubclass(exc, other):
|
||||
root = False
|
||||
break
|
||||
if root:
|
||||
roots.append(exc)
|
||||
return roots
|
||||
|
||||
|
||||
def main():
|
||||
for exc in find_roots(httpx):
|
||||
display(exc)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user