short.py now reads files and stdin

2025-05-22 13:24:50 -03:00
parent 648e9f6394
commit 5b743b5bd7
3 changed files with 82 additions and 17 deletions
--- a/links/short.py
+++ b/links/short.py
@@ -1,8 +1,23 @@
 #!/usr/bin/env python3

+"""
+short.py generates unique short URLs.
+
+This script reads lines from stdin or files named as arguments, then:
+
+1. retrieves or creates new short URLs, taking into account existing RedirectTemp
+   directives in custom.htacess or short.htacess;
+2. appends RedirectTemp directives for newly created short URLs to short.htacess;
+3. outputs the list of (short, long) URLs retrieved or created.
+
+"""
+
+import fileinput
 import itertools
 from collections.abc import Iterator
+from time import strftime

+BASE_DOMAIN = 'fpy.li'

 def load_redirects():
    redirects = {}
@@ -25,52 +40,55 @@ def load_redirects():
 SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'


-def gen_short() -> Iterator[str]:
+def gen_short(start_len=1) -> Iterator[str]:
    """
-    Generate every possible sequence of SDIGITS.
+    Generate every possible sequence of SDIGITS, starting with start_len
    """
-    length = 1
+    length = start_len
    while True:
        for short in itertools.product(SDIGITS, repeat=length):
            yield ''.join(short)
        length += 1


-def gen_free_short(redirects: dict) -> Iterator[str]:
+def gen_unused_short(redirects: dict) -> Iterator[str]:
    """
-    Generate next available short URL.
+    Generate next available short URL of len >= 2.
    """
-    for short in gen_short():
+    for short in gen_short(2):
        if short not in redirects:
            yield short


 def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str,str]]:
-    """return (short, long) pairs, updating short.htaccess as needed""'
-    iter_short = gen_free_short(redirects)
+    """return (short, long) pairs, appending directives to short.htaccess as needed"""
+    iter_short = gen_unused_short(redirects)
    pairs = []
+    timestamp = strftime('%Y-%m-%d %H:%M:%S')
    with open('short.htaccess', 'a') as fp:
        for long in urls:
-            assert 'fpy.li' not in long, f"{long} is a fpy.li URL"
+            assert BASE_DOMAIN not in long, f"{long} is a {BASE_DOMAIN} URL"
            if long in targets:
                short = targets[long]
            else:
                short = next(iter_short)
-                redirects[short] = url
-                targets[url] = short
-                fp.write(f"RedirectTemp /{short} {url}\n")
+                redirects[short] = long
+                targets[long] = short
+                if timestamp:
+                    fp.write(f'\n# appended: {timestamp}\n')
+                    timestamp = None
+                fp.write(f'RedirectTemp /{short} {long}\n')
            pairs.append((short, long))
            
    return pairs


 def main():
-    from random import randrange
-    urls = [f'https://example.com/{randrange(100000)}.html' for n in range(7)]
-
+    """read URLS from filename arguments or stdin"""
+    urls = [line.strip() for line in fileinput.input(encoding="utf-8")]
    redirects, targets = load_redirects()
    for short, long in shorten(urls, redirects, targets):
-    		print(f'fpy.li/{short}\t{long}')
+    		print(f'{BASE_DOMAIN}/{short}\t{long}')


 if __name__ == '__main__':