short.py now reads files and stdin

This commit is contained in:
Luciano Ramalho 2025-05-22 13:24:50 -03:00
parent 648e9f6394
commit 5b743b5bd7
3 changed files with 82 additions and 17 deletions

47
links/sample-urls.txt Normal file
View File

@ -0,0 +1,47 @@
https://www.oreilly.com/library/view/fluent-python-2nd/9781492056348/
https://dask.org/
http://example.com/1572039572038573208
http://www.unicode.org/
https://www.techcrunch.com/2024/startup-funding-trends
https://blog.medium.com/writing-tips-for-beginners
https://github.com/microsoft/typescript
https://stackoverflow.com/questions/javascript-async-await
https://www.reddit.com/r/programming/hot
https://docs.google.com/spreadsheets/create
https://www.youtube.com/watch?v=dQw4w9WgXcQ
https://www.amazon.com/dp/B08N5WRWNW
https://support.apple.com/iphone-setup-guide
https://www.wikipedia.org/wiki/Machine_Learning
https://www.linkedin.com/in/johndoe123
https://www.instagram.com/p/CxYz123AbC/
https://twitter.com/elonmusk/status/1234567890
https://www.facebook.com/events/987654321
https://drive.google.com/file/d/1AbCdEfGhIjKlMnOp/view
https://www.dropbox.com/s/qwerty123/document.pdf
https://zoom.us/j/1234567890?pwd=abcdef
https://calendly.com/janedoe/30min-meeting
https://www.shopify.com/admin/products/new
https://stripe.com/docs/api/charges/create
https://www.paypal.com/invoice/create
https://mailchimp.com/campaigns/dashboard
https://analytics.google.com/analytics/web/
https://console.aws.amazon.com/s3/buckets
https://portal.azure.com/dashboard
https://www.figma.com/file/AbCdEf123456/design-system
https://www.notion.so/workspace/project-notes
https://trello.com/b/AbCdEfGh/marketing-board
https://slack.com/app_redirect?channel=general
https://discord.gg/AbCdEfGh123
https://www.twitch.tv/streamername/videos
https://www.spotify.com/playlist/37i9dQZF1DXcBWIGoYBM5M
https://www.netflix.com/browse/genre/83
https://www.hulu.com/series/breaking-bad-2008
https://www.airbnb.com/rooms/12345678
https://www.booking.com/hotel/us/grand-plaza.html
https://www.expedia.com/flights/search?trip=roundtrip
https://www.uber.com/ride/request
https://www.doordash.com/store/pizza-palace-123
https://www.grubhub.com/restaurant/tacos-el-rey-456
https://www.zillow.com/homes/for_sale/San-Francisco-CA
https://www.craigslist.org/about/sites
https://www.python.org/dev/peps/pep-0484/

View File

@ -1 +1 @@
# file created and managed by short.py
# content of short.htaccess file created and managed by short.py

View File

@ -1,8 +1,23 @@
#!/usr/bin/env python3
"""
short.py generates unique short URLs.
This script reads lines from stdin or files named as arguments, then:
1. retrieves or creates new short URLs, taking into account existing RedirectTemp
directives in custom.htacess or short.htacess;
2. appends RedirectTemp directives for newly created short URLs to short.htacess;
3. outputs the list of (short, long) URLs retrieved or created.
"""
import fileinput
import itertools
from collections.abc import Iterator
from time import strftime
BASE_DOMAIN = 'fpy.li'
def load_redirects():
redirects = {}
@ -25,52 +40,55 @@ def load_redirects():
SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
def gen_short() -> Iterator[str]:
def gen_short(start_len=1) -> Iterator[str]:
"""
Generate every possible sequence of SDIGITS.
Generate every possible sequence of SDIGITS, starting with start_len
"""
length = 1
length = start_len
while True:
for short in itertools.product(SDIGITS, repeat=length):
yield ''.join(short)
length += 1
def gen_free_short(redirects: dict) -> Iterator[str]:
def gen_unused_short(redirects: dict) -> Iterator[str]:
"""
Generate next available short URL.
Generate next available short URL of len >= 2.
"""
for short in gen_short():
for short in gen_short(2):
if short not in redirects:
yield short
def shorten(urls: list[str], redirects: dict, targets: dict) -> list[tuple[str,str]]:
"""return (short, long) pairs, updating short.htaccess as needed""'
iter_short = gen_free_short(redirects)
"""return (short, long) pairs, appending directives to short.htaccess as needed"""
iter_short = gen_unused_short(redirects)
pairs = []
timestamp = strftime('%Y-%m-%d %H:%M:%S')
with open('short.htaccess', 'a') as fp:
for long in urls:
assert 'fpy.li' not in long, f"{long} is a fpy.li URL"
assert BASE_DOMAIN not in long, f"{long} is a {BASE_DOMAIN} URL"
if long in targets:
short = targets[long]
else:
short = next(iter_short)
redirects[short] = url
targets[url] = short
fp.write(f"RedirectTemp /{short} {url}\n")
redirects[short] = long
targets[long] = short
if timestamp:
fp.write(f'\n# appended: {timestamp}\n')
timestamp = None
fp.write(f'RedirectTemp /{short} {long}\n')
pairs.append((short, long))
return pairs
def main():
from random import randrange
urls = [f'https://example.com/{randrange(100000)}.html' for n in range(7)]
"""read URLS from filename arguments or stdin"""
urls = [line.strip() for line in fileinput.input(encoding="utf-8")]
redirects, targets = load_redirects()
for short, long in shorten(urls, redirects, targets):
print(f'fpy.li/{short}\t{long}')
print(f'{BASE_DOMAIN}/{short}\t{long}')
if __name__ == '__main__':