55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
import unicodedata
|
|
|
|
encodings = 'ascii latin1 cp1252 cp437 gb2312 utf-8 utf-16le'.split()
|
|
|
|
widths = {encoding:1 for encoding in encodings[:-3]}
|
|
widths.update(zip(encodings[-3:], (2, 4, 4)))
|
|
|
|
chars = sorted([
|
|
'A', # \u0041 : LATIN CAPITAL LETTER A
|
|
'¿', # \u00bf : INVERTED QUESTION MARK
|
|
'Ã', # \u00c3 : LATIN CAPITAL LETTER A WITH TILDE
|
|
'á', # \u00e1 : LATIN SMALL LETTER A WITH ACUTE
|
|
'Ω', # \u03a9 : GREEK CAPITAL LETTER OMEGA
|
|
'µ',
|
|
'Ц',
|
|
'€', # \u20ac : EURO SIGN
|
|
'“',
|
|
'┌',
|
|
'气',
|
|
'氣', # \u6c23 : CJK UNIFIED IDEOGRAPH-6C23
|
|
'𝄞', # \u1d11e : MUSICAL SYMBOL G CLEF
|
|
])
|
|
|
|
callout1_code = 0x278a # ➊ DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
|
|
|
|
missing_mark = '*'
|
|
|
|
def list_chars():
|
|
for char in chars:
|
|
print('%r, # \\u%04x : %s' % (char, ord(char), unicodedata.name(char)))
|
|
|
|
def show_encodings():
|
|
print(end='\t\t')
|
|
for encoding in encodings:
|
|
print(encoding.ljust(widths[encoding] * 2), end='\t')
|
|
print()
|
|
|
|
for lineno, char in enumerate(chars):
|
|
codepoint = 'U+{:04X}'.format(ord(char))
|
|
print(char, codepoint, sep='\t', end='\t')
|
|
for encoding in encodings:
|
|
try:
|
|
bytes = char.encode(encoding)
|
|
dump = ' '.join('%02X' % byte for byte in bytes)
|
|
except UnicodeEncodeError:
|
|
dump = missing_mark
|
|
dump = dump.ljust(widths[encoding] * 2)
|
|
print(dump, end='\t')
|
|
# print(chr(callout1_code + lineno))
|
|
print(unicodedata.name(char))
|
|
# print()
|
|
|
|
#list_chars()
|
|
show_encodings()
|