example-code-2e/04-text-byte/categories.py

46 lines
1.1 KiB
Python
Raw Normal View History

2021-02-15 00:28:07 +01:00
import sys
import collections
from unicodedata import name, category
def category_stats():
counts = collections.Counter()
firsts = {}
for code in range(sys.maxunicode + 1):
char = chr(code)
cat = category(char)
if cat not in counts:
firsts[cat] = char
counts[cat] += 1
return counts, firsts
def category_scan(desired):
for code in range(sys.maxunicode + 1):
char = chr(code)
if category(char) == desired:
yield char
def main(args):
count = 0
if len(args) == 2:
for char in category_scan(args[1]):
print(char, end=' ')
count += 1
if count > 200:
break
print()
print(count, 'characters shown')
else:
counts, firsts = category_stats()
for cat, count in counts.most_common():
first = firsts[cat]
if cat == 'Cs':
first = f'(surrogate U+{ord(first):04X})'
print(f'{count:6} {cat} {first}')
if __name__ == '__main__':
main(sys.argv)