From 1517c88c4b0adca3ec4ba088d2da05cbd4667fd3 Mon Sep 17 00:00:00 2001 From: Luciano Ramalho Date: Sun, 18 Jan 2015 08:36:11 -0200 Subject: [PATCH] charfinder examples --- concurrency/charfinder.py | 24 +++++++----------------- concurrency/http_charserver.py | 28 ++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 25 deletions(-) mode change 100644 => 100755 concurrency/http_charserver.py diff --git a/concurrency/charfinder.py b/concurrency/charfinder.py index ab4b0cc..df19b74 100755 --- a/concurrency/charfinder.py +++ b/concurrency/charfinder.py @@ -37,7 +37,10 @@ For exploring words that occur in the character names, there is the 1 DOLLAR 1 SMALL >>> index = UnicodeNameIndex() - >>> index.word_report(7) + >>> index.word_report(10) + 75821 CJK + 75761 IDEOGRAPH + 74656 UNIFIED 13196 SYLLABLE 11735 HANGUL 7616 LETTER @@ -46,9 +49,9 @@ For exploring words that occur in the character names, there is the 2122 SMALL 1709 CAPITAL -Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'`` -are not indexed. Those names are not useful for searching, since the only -unique part of the name is the codepoint in hexadecimal. +Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH' +are indexed with those three words only, excluding the hexadecimal +codepoint at the end of the name. """ @@ -134,19 +137,6 @@ class UnicodeNameIndex: return res def word_report(self, top=None): - """ - Generate report with most frequent words - - >>> index = UnicodeNameIndex() - >>> index.word_report(7) - 13196 SYLLABLE - 11735 HANGUL - 7616 LETTER - 2232 WITH - 2180 SIGN - 2122 SMALL - 1709 CAPITAL - """ for postings, key in self.word_rank(top): print('{:5} {}'.format(postings, key)) diff --git a/concurrency/http_charserver.py b/concurrency/http_charserver.py old mode 100644 new mode 100755 index 7c484fd..9514322 --- a/concurrency/http_charserver.py +++ b/concurrency/http_charserver.py @@ -1,20 +1,25 @@ +#!/usr/bin/env python3 + import asyncio from aiohttp import web from charfinder import UnicodeNameIndex -TEMPLATE = ''' +PAGE_TPL = ''' - title + Charserver -
- - -
+

+

+ + + Examples: {links} +
+

{message}


@@ -26,6 +31,10 @@ TEMPLATE = '''
 
 CONTENT_TYPE = 'text/html; charset=UTF-8'
 
+EXAMPLE_WORDS = ('chess cat circled Malayalam digit Roman face Ethiopic'
+                 ' black mark symbol dot operator Braille hexagram').split()
+LINK_TPL = '{0}'
+
 index = None  # a UnicodeNameIndex instance
 
 
@@ -41,9 +50,12 @@ def handle(request):
     else:
         lines = []
         res = ''
-        msg = 'Type words describing characters, e.g. chess.'
+        msg = 'Type words describing characters.'
 
-    text = TEMPLATE.format(query=query, result=res, message=msg)
+    links = ', '.join(LINK_TPL.format(word)
+                      for word in sorted(EXAMPLE_WORDS, key=str.upper))
+    text = PAGE_TPL.format(query=query, result=res,
+                           message=msg, links=links)
     return web.Response(content_type=CONTENT_TYPE, text=text)