charfinder examples

This commit is contained in:
Luciano Ramalho 2015-01-18 08:36:11 -02:00
parent dd1a53ff71
commit 1517c88c4b
2 changed files with 27 additions and 25 deletions

View File

@ -37,7 +37,10 @@ For exploring words that occur in the character names, there is the
1 DOLLAR
1 SMALL
>>> index = UnicodeNameIndex()
>>> index.word_report(7)
>>> index.word_report(10)
75821 CJK
75761 IDEOGRAPH
74656 UNIFIED
13196 SYLLABLE
11735 HANGUL
7616 LETTER
@ -46,9 +49,9 @@ For exploring words that occur in the character names, there is the
2122 SMALL
1709 CAPITAL
Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'``
are not indexed. Those names are not useful for searching, since the only
unique part of the name is the codepoint in hexadecimal.
Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
are indexed with those three words only, excluding the hexadecimal
codepoint at the end of the name.
"""
@ -134,19 +137,6 @@ class UnicodeNameIndex:
return res
def word_report(self, top=None):
"""
Generate report with most frequent words
>>> index = UnicodeNameIndex()
>>> index.word_report(7)
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
"""
for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key))

28
concurrency/http_charserver.py Normal file → Executable file
View File

@ -1,20 +1,25 @@
#!/usr/bin/env python3
import asyncio
from aiohttp import web
from charfinder import UnicodeNameIndex
TEMPLATE = '''
PAGE_TPL = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>title</title>
<title>Charserver</title>
</head>
<body>
<form action="/">
<input type="search" name="query" value="{query}">
<input type="submit" value="find">
</form>
<p>
<form action="/">
<input type="search" name="query" value="{query}">
<input type="submit" value="find">
Examples: {links}
</form>
</p>
<p>{message}</p>
<hr>
<pre>
@ -26,6 +31,10 @@ TEMPLATE = '''
CONTENT_TYPE = 'text/html; charset=UTF-8'
EXAMPLE_WORDS = ('chess cat circled Malayalam digit Roman face Ethiopic'
' black mark symbol dot operator Braille hexagram').split()
LINK_TPL = '<a href="/?query={0}" title="find &quot;{0}&quot;">{0}</a>'
index = None # a UnicodeNameIndex instance
@ -41,9 +50,12 @@ def handle(request):
else:
lines = []
res = ''
msg = 'Type words describing characters, e.g. chess.'
msg = 'Type words describing characters.'
text = TEMPLATE.format(query=query, result=res, message=msg)
links = ', '.join(LINK_TPL.format(word)
for word in sorted(EXAMPLE_WORDS, key=str.upper))
text = PAGE_TPL.format(query=query, result=res,
message=msg, links=links)
return web.Response(content_type=CONTENT_TYPE, text=text)