short.py appends timestamps to short.htaccesss

This commit is contained in:
Luciano Ramalho 2025-05-22 13:44:46 -03:00
parent 5b743b5bd7
commit ec03da74ca
2 changed files with 26 additions and 22 deletions

View File

@ -6,8 +6,8 @@ short.py generates unique short URLs.
This script reads lines from stdin or files named as arguments, then: This script reads lines from stdin or files named as arguments, then:
1. retrieves or creates new short URLs, taking into account existing RedirectTemp 1. retrieves or creates new short URLs, taking into account existing RedirectTemp
directives in custom.htacess or short.htacess; directives in custom.htaccess or short.htaccess;
2. appends RedirectTemp directives for newly created short URLs to short.htacess; 2. appends RedirectTemp directives for newly created short URLs to short.htaccess;
3. outputs the list of (short, long) URLs retrieved or created. 3. outputs the list of (short, long) URLs retrieved or created.
""" """
@ -17,21 +17,25 @@ import itertools
from collections.abc import Iterator from collections.abc import Iterator
from time import strftime from time import strftime
HTACCESS_CUSTOM = 'custom.htaccess'
HTACCESS_SHORT = 'short.htaccess'
HTACCESS_FILES = (HTACCESS_CUSTOM, HTACCESS_SHORT)
BASE_DOMAIN = 'fpy.li' BASE_DOMAIN = 'fpy.li'
def load_redirects():
def load_redirects() -> tuple[dict, dict]:
redirects = {} redirects = {}
targets = {} targets = {}
for filename in ('custom.htaccess', 'short.htaccess'): for filename in HTACCESS_FILES:
with open(filename) as fp: with open(filename) as fp:
for line in fp: for line in fp:
if line.startswith('RedirectTemp'): if line.startswith('RedirectTemp'):
_, short, long = line.split() _, short, long = line.split()
short = short[1:] # Remove leading slash short = short[1:] # Remove leading slash
assert short not in redirects, f"{filename}: duplicate redirect from {short}" assert short not in redirects, f'{filename}: duplicate redirect from {short}'
# custom is live since 2022, we cannot change it remove duplicate targets # htaccess.custom is live since 2022, we can't change it remove duplicate targets
if not filename.startswith('custom'): if filename != HTACCESS_CUSTOM:
assert long not in targets, f"{filename}: Duplicate redirect to {long}" assert long not in targets, f'{filename}: duplicate redirect to {long}'
redirects[short] = long redirects[short] = long
targets[long] = short targets[long] = short
return redirects, targets return redirects, targets
@ -41,9 +45,7 @@ SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
def gen_short(start_len=1) -> Iterator[str]: def gen_short(start_len=1) -> Iterator[str]:
""" """Generate every possible sequence of SDIGITS, starting with start_len"""
Generate every possible sequence of SDIGITS, starting with start_len
"""
length = start_len length = start_len
while True: while True:
for short in itertools.product(SDIGITS, repeat=length): for short in itertools.product(SDIGITS, repeat=length):
@ -52,22 +54,20 @@ def gen_short(start_len=1) -> Iterator[str]:
def gen_unused_short(redirects: dict) -> Iterator[str]: def gen_unused_short(redirects: dict) -> Iterator[str]:
""" """Generate next available short URL of len >= 2."""
Generate next available short URL of len >= 2.
"""
for short in gen_short(2): for short in gen_short(2):
if short not in redirects: if short not in redirects:
yield short yield short
def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str,str]]: def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str, str]]:
"""return (short, long) pairs, appending directives to short.htaccess as needed""" """Return (short, long) pairs, appending directives to HTACCESS_SHORT as needed."""
iter_short = gen_unused_short(redirects) iter_short = gen_unused_short(redirects)
pairs = [] pairs = []
timestamp = strftime('%Y-%m-%d %H:%M:%S') timestamp = strftime('%Y-%m-%d %H:%M:%S')
with open('short.htaccess', 'a') as fp: with open(HTACCESS_SHORT, 'a') as fp:
for long in urls: for long in urls:
assert BASE_DOMAIN not in long, f"{long} is a {BASE_DOMAIN} URL" assert BASE_DOMAIN not in long, f'{long} is a {BASE_DOMAIN} URL'
if long in targets: if long in targets:
short = targets[long] short = targets[long]
else: else:
@ -79,16 +79,16 @@ def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str,s
timestamp = None timestamp = None
fp.write(f'RedirectTemp /{short} {long}\n') fp.write(f'RedirectTemp /{short} {long}\n')
pairs.append((short, long)) pairs.append((short, long))
return pairs return pairs
def main(): def main() -> None:
"""read URLS from filename arguments or stdin""" """read URLS from filename arguments or stdin"""
urls = [line.strip() for line in fileinput.input(encoding="utf-8")] urls = [line.strip() for line in fileinput.input(encoding='utf-8')]
redirects, targets = load_redirects() redirects, targets = load_redirects()
for short, long in shorten(urls, redirects, targets): for short, long in shorten(urls, redirects, targets):
print(f'{BASE_DOMAIN}/{short}\t{long}') print(f'{BASE_DOMAIN}/{short}\t{long}')
if __name__ == '__main__': if __name__ == '__main__':

4
ruff.toml Normal file
View File

@ -0,0 +1,4 @@
line-length = 100
[format]
# Like Python's repr(), use single quotes for strings.
quote-style = "single"