example-code-2e/04-text-byte/categories.py

import sys
import collections
from unicodedata import category


def category_stats():
    counts = collections.Counter()
    firsts = {}
    for code in range(sys.maxunicode + 1):
        char = chr(code)
        cat = category(char)
        if cat not in counts:
            firsts[cat] = char
        counts[cat] += 1
    return counts, firsts


def category_scan(desired):
    for code in range(sys.maxunicode + 1):
        char = chr(code)
        if category(char) == desired:
            yield char


def main(args):
    count = 0
    if len(args) == 2:
        for char in category_scan(args[1]):
            print(char, end=' ')
            count += 1
            if count > 200:
                break
        print()
        print(count, 'characters shown')
    else:
        counts, firsts = category_stats()
        for i, (cat, count) in enumerate(counts.most_common(), 1):
            first = firsts[cat]
            if cat == 'Cs':
                first = f'(surrogate U+{ord(first):04X})'
            print(f'{i:2} {count:6} {cat} {first}')


if __name__ == '__main__':
    main(sys.argv)
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00			`import sys`
			`import collections`
updade from Atlas repo 2021-05-21 23:56:12 +02:00			`from unicodedata import category`
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00

			`def category_stats():`
			`counts = collections.Counter()`
			`firsts = {}`
			`for code in range(sys.maxunicode + 1):`
			`char = chr(code)`
			`cat = category(char)`
			`if cat not in counts:`
			`firsts[cat] = char`
			`counts[cat] += 1`
			`return counts, firsts`


			`def category_scan(desired):`
			`for code in range(sys.maxunicode + 1):`
			`char = chr(code)`
			`if category(char) == desired:`
updade from Atlas repo 2021-05-21 23:56:12 +02:00			`yield char`
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00

			`def main(args):`
			`count = 0`
			`if len(args) == 2:`
			`for char in category_scan(args[1]):`
			`print(char, end=' ')`
			`count += 1`
			`if count > 200:`
			`break`
updade from Atlas repo 2021-05-21 23:56:12 +02:00			`print()`
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00			`print(count, 'characters shown')`
			`else:`
			`counts, firsts = category_stats()`
sync with O'Reilly Atlas 2021-07-08 04:45:54 +02:00			`for i, (cat, count) in enumerate(counts.most_common(), 1):`
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00			`first = firsts[cat]`
			`if cat == 'Cs':`
			`first = f'(surrogate U+{ord(first):04X})'`
sync with O'Reilly Atlas 2021-07-08 04:45:54 +02:00			`print(f'{i:2} {count:6} {cat} {first}')`
ch01-12: clean up by @eumiro 2021-02-15 00:28:07 +01:00

			`if __name__ == '__main__':`
			`main(sys.argv)`