short.py now reads files and stdin

This commit is contained in:
Luciano Ramalho
2025-05-22 13:24:50 -03:00
parent 648e9f6394
commit 5b743b5bd7
3 changed files with 82 additions and 17 deletions

View File

@@ -1,8 +1,23 @@
#!/usr/bin/env python3
"""
short.py generates unique short URLs.
This script reads lines from stdin or files named as arguments, then:
1. retrieves or creates new short URLs, taking into account existing RedirectTemp
directives in custom.htacess or short.htacess;
2. appends RedirectTemp directives for newly created short URLs to short.htacess;
3. outputs the list of (short, long) URLs retrieved or created.
"""
import fileinput
import itertools
from collections.abc import Iterator
from time import strftime
BASE_DOMAIN = 'fpy.li'
def load_redirects():
redirects = {}
@@ -25,52 +40,55 @@ def load_redirects():
SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
def gen_short() -> Iterator[str]:
def gen_short(start_len=1) -> Iterator[str]:
"""
Generate every possible sequence of SDIGITS.
Generate every possible sequence of SDIGITS, starting with start_len
"""
length = 1
length = start_len
while True:
for short in itertools.product(SDIGITS, repeat=length):
yield ''.join(short)
length += 1
def gen_free_short(redirects: dict) -> Iterator[str]:
def gen_unused_short(redirects: dict) -> Iterator[str]:
"""
Generate next available short URL.
Generate next available short URL of len >= 2.
"""
for short in gen_short():
for short in gen_short(2):
if short not in redirects:
yield short
def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str,str]]:
"""return (short, long) pairs, updating short.htaccess as needed""'
iter_short = gen_free_short(redirects)
"""return (short, long) pairs, appending directives to short.htaccess as needed"""
iter_short = gen_unused_short(redirects)
pairs = []
timestamp = strftime('%Y-%m-%d %H:%M:%S')
with open('short.htaccess', 'a') as fp:
for long in urls:
assert 'fpy.li' not in long, f"{long} is a fpy.li URL"
assert BASE_DOMAIN not in long, f"{long} is a {BASE_DOMAIN} URL"
if long in targets:
short = targets[long]
else:
short = next(iter_short)
redirects[short] = url
targets[url] = short
fp.write(f"RedirectTemp /{short} {url}\n")
redirects[short] = long
targets[long] = short
if timestamp:
fp.write(f'\n# appended: {timestamp}\n')
timestamp = None
fp.write(f'RedirectTemp /{short} {long}\n')
pairs.append((short, long))
return pairs
def main():
from random import randrange
urls = [f'https://example.com/{randrange(100000)}.html' for n in range(7)]
"""read URLS from filename arguments or stdin"""
urls = [line.strip() for line in fileinput.input(encoding="utf-8")]
redirects, targets = load_redirects()
for short, long in shorten(urls, redirects, targets):
print(f'fpy.li/{short}\t{long}')
print(f'{BASE_DOMAIN}/{short}\t{long}')
if __name__ == '__main__':