2021-02-15 00:28:07 +01:00
|
|
|
import sys
|
|
|
|
import collections
|
2021-05-21 23:56:12 +02:00
|
|
|
from unicodedata import category
|
2021-02-15 00:28:07 +01:00
|
|
|
|
|
|
|
|
|
|
|
def category_stats():
|
|
|
|
counts = collections.Counter()
|
|
|
|
firsts = {}
|
|
|
|
for code in range(sys.maxunicode + 1):
|
|
|
|
char = chr(code)
|
|
|
|
cat = category(char)
|
|
|
|
if cat not in counts:
|
|
|
|
firsts[cat] = char
|
|
|
|
counts[cat] += 1
|
|
|
|
return counts, firsts
|
|
|
|
|
|
|
|
|
|
|
|
def category_scan(desired):
|
|
|
|
for code in range(sys.maxunicode + 1):
|
|
|
|
char = chr(code)
|
|
|
|
if category(char) == desired:
|
2021-05-21 23:56:12 +02:00
|
|
|
yield char
|
2021-02-15 00:28:07 +01:00
|
|
|
|
|
|
|
|
|
|
|
def main(args):
|
|
|
|
count = 0
|
|
|
|
if len(args) == 2:
|
|
|
|
for char in category_scan(args[1]):
|
|
|
|
print(char, end=' ')
|
|
|
|
count += 1
|
|
|
|
if count > 200:
|
|
|
|
break
|
2021-05-21 23:56:12 +02:00
|
|
|
print()
|
2021-02-15 00:28:07 +01:00
|
|
|
print(count, 'characters shown')
|
|
|
|
else:
|
|
|
|
counts, firsts = category_stats()
|
2021-07-08 04:45:54 +02:00
|
|
|
for i, (cat, count) in enumerate(counts.most_common(), 1):
|
2021-02-15 00:28:07 +01:00
|
|
|
first = firsts[cat]
|
|
|
|
if cat == 'Cs':
|
|
|
|
first = f'(surrogate U+{ord(first):04X})'
|
2021-07-08 04:45:54 +02:00
|
|
|
print(f'{i:2} {count:6} {cat} {first}')
|
2021-02-15 00:28:07 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main(sys.argv)
|