Compare commits
4 Commits
cc4e26c67a
...
cf99650007
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf99650007 | ||
|
|
ec03da74ca | ||
|
|
5b743b5bd7 | ||
|
|
648e9f6394 |
47
links/sample-urls.txt
Normal file
47
links/sample-urls.txt
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
https://www.oreilly.com/library/view/fluent-python-2nd/9781492056348/
|
||||||
|
https://dask.org/
|
||||||
|
http://example.com/1572039572038573208
|
||||||
|
http://www.unicode.org/
|
||||||
|
https://www.techcrunch.com/2024/startup-funding-trends
|
||||||
|
https://blog.medium.com/writing-tips-for-beginners
|
||||||
|
https://github.com/microsoft/typescript
|
||||||
|
https://stackoverflow.com/questions/javascript-async-await
|
||||||
|
https://www.reddit.com/r/programming/hot
|
||||||
|
https://docs.google.com/spreadsheets/create
|
||||||
|
https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
||||||
|
https://www.amazon.com/dp/B08N5WRWNW
|
||||||
|
https://support.apple.com/iphone-setup-guide
|
||||||
|
https://www.wikipedia.org/wiki/Machine_Learning
|
||||||
|
https://www.linkedin.com/in/johndoe123
|
||||||
|
https://www.instagram.com/p/CxYz123AbC/
|
||||||
|
https://twitter.com/elonmusk/status/1234567890
|
||||||
|
https://www.facebook.com/events/987654321
|
||||||
|
https://drive.google.com/file/d/1AbCdEfGhIjKlMnOp/view
|
||||||
|
https://www.dropbox.com/s/qwerty123/document.pdf
|
||||||
|
https://zoom.us/j/1234567890?pwd=abcdef
|
||||||
|
https://calendly.com/janedoe/30min-meeting
|
||||||
|
https://www.shopify.com/admin/products/new
|
||||||
|
https://stripe.com/docs/api/charges/create
|
||||||
|
https://www.paypal.com/invoice/create
|
||||||
|
https://mailchimp.com/campaigns/dashboard
|
||||||
|
https://analytics.google.com/analytics/web/
|
||||||
|
https://console.aws.amazon.com/s3/buckets
|
||||||
|
https://portal.azure.com/dashboard
|
||||||
|
https://www.figma.com/file/AbCdEf123456/design-system
|
||||||
|
https://www.notion.so/workspace/project-notes
|
||||||
|
https://trello.com/b/AbCdEfGh/marketing-board
|
||||||
|
https://slack.com/app_redirect?channel=general
|
||||||
|
https://discord.gg/AbCdEfGh123
|
||||||
|
https://www.twitch.tv/streamername/videos
|
||||||
|
https://www.spotify.com/playlist/37i9dQZF1DXcBWIGoYBM5M
|
||||||
|
https://www.netflix.com/browse/genre/83
|
||||||
|
https://www.hulu.com/series/breaking-bad-2008
|
||||||
|
https://www.airbnb.com/rooms/12345678
|
||||||
|
https://www.booking.com/hotel/us/grand-plaza.html
|
||||||
|
https://www.expedia.com/flights/search?trip=roundtrip
|
||||||
|
https://www.uber.com/ride/request
|
||||||
|
https://www.doordash.com/store/pizza-palace-123
|
||||||
|
https://www.grubhub.com/restaurant/tacos-el-rey-456
|
||||||
|
https://www.zillow.com/homes/for_sale/San-Francisco-CA
|
||||||
|
https://www.craigslist.org/about/sites
|
||||||
|
https://www.python.org/dev/peps/pep-0484/
|
||||||
@@ -1 +1 @@
|
|||||||
# file created and managed by short.py
|
# content of short.htaccess file created and managed by short.py
|
||||||
|
|||||||
103
links/short.py
103
links/short.py
@@ -1,22 +1,41 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
short.py generates unique short URLs.
|
||||||
|
|
||||||
|
This script reads lines from stdin or files named as arguments, then:
|
||||||
|
|
||||||
|
1. retrieves or creates new short URLs, taking into account existing RedirectTemp
|
||||||
|
directives in custom.htaccess or short.htaccess;
|
||||||
|
2. appends RedirectTemp directives for newly created short URLs to short.htaccess;
|
||||||
|
3. outputs the list of (short, long) URLs retrieved or created.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import fileinput
|
||||||
import itertools
|
import itertools
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
|
from time import strftime
|
||||||
|
|
||||||
|
HTACCESS_CUSTOM = 'custom.htaccess'
|
||||||
|
HTACCESS_SHORT = 'short.htaccess'
|
||||||
|
HTACCESS_FILES = (HTACCESS_CUSTOM, HTACCESS_SHORT)
|
||||||
|
BASE_DOMAIN = 'fpy.li'
|
||||||
|
|
||||||
|
|
||||||
def load_redirects():
|
def load_redirects() -> tuple[dict, dict]:
|
||||||
redirects = {}
|
redirects = {}
|
||||||
targets = {}
|
targets = {}
|
||||||
for filename in ('custom.htaccess', 'short.htaccess'):
|
for filename in HTACCESS_FILES:
|
||||||
with open(filename) as fp:
|
with open(filename) as fp:
|
||||||
for line in fp:
|
for line in fp:
|
||||||
if line.startswith('RedirectTemp'):
|
if line.startswith('RedirectTemp'):
|
||||||
_, short, long = line.split()
|
_, short, long = line.split()
|
||||||
short = short[1:] # Remove leading slash
|
short = short[1:] # Remove leading slash
|
||||||
assert short not in redirects, f"{filename}: duplicate redirect from {short}"
|
assert short not in redirects, f'{filename}: duplicate redirect from {short}'
|
||||||
# custom is live since 2022, we cannot change it remove duplicate targets
|
# htaccess.custom is live since 2022, we can't change it remove duplicate targets
|
||||||
if not filename.startswith('custom'):
|
if filename != HTACCESS_CUSTOM:
|
||||||
assert long not in targets, f"{filename}: Duplicate redirect to {long}"
|
assert long not in targets, f'{filename}: duplicate redirect to {long}'
|
||||||
redirects[short] = long
|
redirects[short] = long
|
||||||
targets[long] = short
|
targets[long] = short
|
||||||
return redirects, targets
|
return redirects, targets
|
||||||
@@ -25,55 +44,51 @@ def load_redirects():
|
|||||||
SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
|
SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
|
||||||
|
|
||||||
|
|
||||||
def gen_short() -> Iterator[str]:
|
def gen_short(start_len=1) -> Iterator[str]:
|
||||||
"""
|
"""Generate every possible sequence of SDIGITS, starting with start_len"""
|
||||||
Generate every possible sequence of SDIGITS.
|
length = start_len
|
||||||
"""
|
|
||||||
length = 1
|
|
||||||
while True:
|
while True:
|
||||||
for short in itertools.product(SDIGITS, repeat=length):
|
for short in itertools.product(SDIGITS, repeat=length):
|
||||||
yield ''.join(short)
|
yield ''.join(short)
|
||||||
length += 1
|
length += 1
|
||||||
|
|
||||||
|
|
||||||
def shorten(n: int) -> str:
|
def gen_unused_short(redirects: dict) -> Iterator[str]:
|
||||||
"""
|
"""Generate next available short URL of len >= 2."""
|
||||||
Get Nth short URL made from SDIGITS, where 0 is the first.
|
for short in gen_short(2):
|
||||||
"""
|
|
||||||
iter_short = gen_short()
|
|
||||||
for _ in range(n+1):
|
|
||||||
short = next(iter_short)
|
|
||||||
return short
|
|
||||||
|
|
||||||
|
|
||||||
def gen_free_short(redirects: dict) -> Iterator[str]:
|
|
||||||
"""
|
|
||||||
Generate next available short URL.
|
|
||||||
"""
|
|
||||||
for short in gen_short():
|
|
||||||
if short not in redirects:
|
if short not in redirects:
|
||||||
yield short
|
yield short
|
||||||
|
|
||||||
|
|
||||||
def new_urls(urls: list[str], redirects: dict, targets: dict) -> None:
|
def shorten(urls: list[str]) -> list[tuple[str, str]]:
|
||||||
iter_short = gen_free_short(redirects)
|
"""Return (short, long) pairs, appending directives to HTACCESS_SHORT as needed."""
|
||||||
with open('short.htaccess', 'a') as fp:
|
|
||||||
for url in urls:
|
|
||||||
assert 'fpy.li' not in url, f"{url} is a fpy.li URL"
|
|
||||||
if url in targets:
|
|
||||||
continue
|
|
||||||
short = next(iter_short)
|
|
||||||
redirects[short] = url
|
|
||||||
targets[url] = short
|
|
||||||
fp.write(f"RedirectTemp /{short} {url}\n")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
from random import randrange
|
|
||||||
urls = [f'https://example.com/{randrange(100000)}.html' for n in range(7)]
|
|
||||||
|
|
||||||
redirects, targets = load_redirects()
|
redirects, targets = load_redirects()
|
||||||
new_urls(urls, redirects, targets)
|
iter_short = gen_unused_short(redirects)
|
||||||
|
pairs = []
|
||||||
|
timestamp = strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
with open(HTACCESS_SHORT, 'a') as fp:
|
||||||
|
for long in urls:
|
||||||
|
assert BASE_DOMAIN not in long, f'{long} is a {BASE_DOMAIN} URL'
|
||||||
|
if long in targets:
|
||||||
|
short = targets[long]
|
||||||
|
else:
|
||||||
|
short = next(iter_short)
|
||||||
|
redirects[short] = long
|
||||||
|
targets[long] = short
|
||||||
|
if timestamp:
|
||||||
|
fp.write(f'\n# appended: {timestamp}\n')
|
||||||
|
timestamp = None
|
||||||
|
fp.write(f'RedirectTemp /{short} {long}\n')
|
||||||
|
pairs.append((short, long))
|
||||||
|
|
||||||
|
return pairs
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""read URLS from filename arguments or stdin"""
|
||||||
|
urls = [line.strip() for line in fileinput.input(encoding='utf-8')]
|
||||||
|
for short, long in shorten(urls):
|
||||||
|
print(f'{BASE_DOMAIN}/{short}\t{long}')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Reference in New Issue
Block a user