diff --git a/04-text-byte/zwj_sample.ipynb b/04-text-byte/zwj_sample.ipynb new file mode 100644 index 0000000..993e5b9 --- /dev/null +++ b/04-text-byte/zwj_sample.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "👨‍🦰\tE11.0\tman: red hair\n", + "\t👨\tU+1F468\tMAN + ZWG\n", + "\t🦰\tU+1F9B0\tEMOJI COMPONENT RED HAIR\n", + "🧑‍🤝‍🧑\tE12.0\tpeople holding hands\n", + "\t🧑\tU+1F9D1\tADULT + ZWG\n", + "\t🤝\tU+1F91D\tHANDSHAKE + ZWG\n", + "\t🧑\tU+1F9D1\tADULT\n", + "🏊🏿‍♀️\tE4.0\twoman swimming: dark skin tone\n", + "\t🏊\tU+1F3CA\tSWIMMER\n", + "\t🏿\tU+1F3FF\tEMOJI MODIFIER FITZPATRICK TYPE-6 + ZWG\n", + "\t♀\tU+2640\tFEMALE SIGN + V16\n", + "👩🏾‍✈️\tE4.0\twoman pilot: medium-dark skin tone\n", + "\t👩\tU+1F469\tWOMAN\n", + "\t🏾\tU+1F3FE\tEMOJI MODIFIER FITZPATRICK TYPE-5 + ZWG\n", + "\t✈\tU+2708\tAIRPLANE + V16\n", + "👨‍👩‍👧\tE2.0\tfamily: man, woman, girl\n", + "\t👨\tU+1F468\tMAN + ZWG\n", + "\t👩\tU+1F469\tWOMAN + ZWG\n", + "\t👧\tU+1F467\tGIRL\n", + "🏳️‍⚧️\tE13.0\ttransgender flag\n", + "\t🏳\tU+1F3F3\tWAVING WHITE FLAG + V16 + ZWG\n", + "\t⚧\tU+26A7\tMALE WITH STROKE AND MALE AND FEMALE SIGN + V16\n", + "👩‍❤️‍💋‍👩\tE2.0\tkiss: woman, woman\n", + "\t👩\tU+1F469\tWOMAN + ZWG\n", + "\t❤\tU+2764\tHEAVY BLACK HEART + V16 + ZWG\n", + "\t💋\tU+1F48B\tKISS MARK + ZWG\n", + "\t👩\tU+1F469\tWOMAN\n" + ] + } + ], + "source": [ + "from unicodedata import name\n", + "\n", + "zwg_sample = \"\"\"\n", + "1F468 200D 1F9B0 |man: red hair |E11.0\n", + "1F9D1 200D 1F91D 200D 1F9D1 |people holding hands |E12.0\n", + "1F3CA 1F3FF 200D 2640 FE0F |woman swimming: dark skin tone |E4.0\n", + "1F469 1F3FE 200D 2708 FE0F |woman pilot: medium-dark skin tone |E4.0\n", + "1F468 200D 1F469 200D 1F467 |family: man, woman, girl |E2.0\n", + "1F3F3 FE0F 200D 26A7 FE0F |transgender flag |E13.0\n", + "1F469 200D 2764 FE0F 200D 1F48B 200D 1F469 |kiss: woman, woman |E2.0\n", + "\"\"\"\n", + "\n", + "markers = {'\\u200D': 'ZWG', # ZERO WIDTH JOINER\n", + " '\\uFE0F': 'V16', # VARIATION SELECTOR-16\n", + " }\n", + "\n", + "for line in zwg_sample.strip().split('\\n'):\n", + " code, descr, version = (s.strip() for s in line.split('|'))\n", + " chars = [chr(int(c, 16)) for c in code.split()]\n", + " print(''.join(chars), version, descr, sep='\\t', end='')\n", + " while chars:\n", + " char = chars.pop(0)\n", + " if char in markers:\n", + " print(' + ' + markers[char], end='')\n", + " else:\n", + " ucode = f'U+{ord(char):04X}'\n", + " print(f'\\n\\t{char}\\t{ucode}\\t{name(char)}', end='')\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/04-text-byte/zwj_sample.png b/04-text-byte/zwj_sample.png new file mode 100644 index 0000000..1b1a3a5 Binary files /dev/null and b/04-text-byte/zwj_sample.png differ