From 1517c88c4b0adca3ec4ba088d2da05cbd4667fd3 Mon Sep 17 00:00:00 2001
From: Luciano Ramalho
Date: Sun, 18 Jan 2015 08:36:11 -0200
Subject: [PATCH] charfinder examples
---
concurrency/charfinder.py | 24 +++++++-----------------
concurrency/http_charserver.py | 28 ++++++++++++++++++++--------
2 files changed, 27 insertions(+), 25 deletions(-)
mode change 100644 => 100755 concurrency/http_charserver.py
diff --git a/concurrency/charfinder.py b/concurrency/charfinder.py
index ab4b0cc..df19b74 100755
--- a/concurrency/charfinder.py
+++ b/concurrency/charfinder.py
@@ -37,7 +37,10 @@ For exploring words that occur in the character names, there is the
1 DOLLAR
1 SMALL
>>> index = UnicodeNameIndex()
- >>> index.word_report(7)
+ >>> index.word_report(10)
+ 75821 CJK
+ 75761 IDEOGRAPH
+ 74656 UNIFIED
13196 SYLLABLE
11735 HANGUL
7616 LETTER
@@ -46,9 +49,9 @@ For exploring words that occur in the character names, there is the
2122 SMALL
1709 CAPITAL
-Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'``
-are not indexed. Those names are not useful for searching, since the only
-unique part of the name is the codepoint in hexadecimal.
+Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
+are indexed with those three words only, excluding the hexadecimal
+codepoint at the end of the name.
"""
@@ -134,19 +137,6 @@ class UnicodeNameIndex:
return res
def word_report(self, top=None):
- """
- Generate report with most frequent words
-
- >>> index = UnicodeNameIndex()
- >>> index.word_report(7)
- 13196 SYLLABLE
- 11735 HANGUL
- 7616 LETTER
- 2232 WITH
- 2180 SIGN
- 2122 SMALL
- 1709 CAPITAL
- """
for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key))
diff --git a/concurrency/http_charserver.py b/concurrency/http_charserver.py
old mode 100644
new mode 100755
index 7c484fd..9514322
--- a/concurrency/http_charserver.py
+++ b/concurrency/http_charserver.py
@@ -1,20 +1,25 @@
+#!/usr/bin/env python3
+
import asyncio
from aiohttp import web
from charfinder import UnicodeNameIndex
-TEMPLATE = '''
+PAGE_TPL = '''
- title
+ Charserver
-
+
+
+
{message}
@@ -26,6 +31,10 @@ TEMPLATE = '''
CONTENT_TYPE = 'text/html; charset=UTF-8'
+EXAMPLE_WORDS = ('chess cat circled Malayalam digit Roman face Ethiopic'
+ ' black mark symbol dot operator Braille hexagram').split()
+LINK_TPL = '{0}'
+
index = None # a UnicodeNameIndex instance
@@ -41,9 +50,12 @@ def handle(request):
else:
lines = []
res = ''
- msg = 'Type words describing characters, e.g. chess.'
+ msg = 'Type words describing characters.'
- text = TEMPLATE.format(query=query, result=res, message=msg)
+ links = ', '.join(LINK_TPL.format(word)
+ for word in sorted(EXAMPLE_WORDS, key=str.upper))
+ text = PAGE_TPL.format(query=query, result=res,
+ message=msg, links=links)
return web.Response(content_type=CONTENT_TYPE, text=text)