46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
import sys
|
|
import collections
|
|
from unicodedata import category
|
|
|
|
|
|
def category_stats():
|
|
counts = collections.Counter()
|
|
firsts = {}
|
|
for code in range(sys.maxunicode + 1):
|
|
char = chr(code)
|
|
cat = category(char)
|
|
if cat not in counts:
|
|
firsts[cat] = char
|
|
counts[cat] += 1
|
|
return counts, firsts
|
|
|
|
|
|
def category_scan(desired):
|
|
for code in range(sys.maxunicode + 1):
|
|
char = chr(code)
|
|
if category(char) == desired:
|
|
yield char
|
|
|
|
|
|
def main(args):
|
|
count = 0
|
|
if len(args) == 2:
|
|
for char in category_scan(args[1]):
|
|
print(char, end=' ')
|
|
count += 1
|
|
if count > 200:
|
|
break
|
|
print()
|
|
print(count, 'characters shown')
|
|
else:
|
|
counts, firsts = category_stats()
|
|
for i, (cat, count) in enumerate(counts.most_common(), 1):
|
|
first = firsts[cat]
|
|
if cat == 'Cs':
|
|
first = f'(surrogate U+{ord(first):04X})'
|
|
print(f'{i:2} {count:6} {cat} {first}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv)
|