diff --git a/concurrency/charfinder.py b/concurrency/charfinder.py index ab4b0cc..df19b74 100755 --- a/concurrency/charfinder.py +++ b/concurrency/charfinder.py @@ -37,7 +37,10 @@ For exploring words that occur in the character names, there is the 1 DOLLAR 1 SMALL >>> index = UnicodeNameIndex() - >>> index.word_report(7) + >>> index.word_report(10) + 75821 CJK + 75761 IDEOGRAPH + 74656 UNIFIED 13196 SYLLABLE 11735 HANGUL 7616 LETTER @@ -46,9 +49,9 @@ For exploring words that occur in the character names, there is the 2122 SMALL 1709 CAPITAL -Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'`` -are not indexed. Those names are not useful for searching, since the only -unique part of the name is the codepoint in hexadecimal. +Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH' +are indexed with those three words only, excluding the hexadecimal +codepoint at the end of the name. """ @@ -134,19 +137,6 @@ class UnicodeNameIndex: return res def word_report(self, top=None): - """ - Generate report with most frequent words - - >>> index = UnicodeNameIndex() - >>> index.word_report(7) - 13196 SYLLABLE - 11735 HANGUL - 7616 LETTER - 2232 WITH - 2180 SIGN - 2122 SMALL - 1709 CAPITAL - """ for postings, key in self.word_rank(top): print('{:5} {}'.format(postings, key)) diff --git a/concurrency/http_charserver.py b/concurrency/http_charserver.py old mode 100644 new mode 100755 index 7c484fd..9514322 --- a/concurrency/http_charserver.py +++ b/concurrency/http_charserver.py @@ -1,20 +1,25 @@ +#!/usr/bin/env python3 + import asyncio from aiohttp import web from charfinder import UnicodeNameIndex -TEMPLATE = ''' +PAGE_TPL = '''
-+
+{message}
@@ -26,6 +31,10 @@ TEMPLATE = ''' CONTENT_TYPE = 'text/html; charset=UTF-8' +EXAMPLE_WORDS = ('chess cat circled Malayalam digit Roman face Ethiopic' + ' black mark symbol dot operator Braille hexagram').split() +LINK_TPL = '{0}' + index = None # a UnicodeNameIndex instance @@ -41,9 +50,12 @@ def handle(request): else: lines = [] res = '' - msg = 'Type words describing characters, e.g. chess.' + msg = 'Type words describing characters.' - text = TEMPLATE.format(query=query, result=res, message=msg) + links = ', '.join(LINK_TPL.format(word) + for word in sorted(EXAMPLE_WORDS, key=str.upper)) + text = PAGE_TPL.format(query=query, result=res, + message=msg, links=links) return web.Response(content_type=CONTENT_TYPE, text=text)