charfinder examples

This commit is contained in:
Luciano Ramalho 2015-01-18 08:36:11 -02:00
parent dd1a53ff71
commit 1517c88c4b
2 changed files with 27 additions and 25 deletions

View File

@ -37,7 +37,10 @@ For exploring words that occur in the character names, there is the
1 DOLLAR 1 DOLLAR
1 SMALL 1 SMALL
>>> index = UnicodeNameIndex() >>> index = UnicodeNameIndex()
>>> index.word_report(7) >>> index.word_report(10)
75821 CJK
75761 IDEOGRAPH
74656 UNIFIED
13196 SYLLABLE 13196 SYLLABLE
11735 HANGUL 11735 HANGUL
7616 LETTER 7616 LETTER
@ -46,9 +49,9 @@ For exploring words that occur in the character names, there is the
2122 SMALL 2122 SMALL
1709 CAPITAL 1709 CAPITAL
Note: character names starting with the string ``'CJK UNIFIED IDEOGRAPH'`` Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
are not indexed. Those names are not useful for searching, since the only are indexed with those three words only, excluding the hexadecimal
unique part of the name is the codepoint in hexadecimal. codepoint at the end of the name.
""" """
@ -134,19 +137,6 @@ class UnicodeNameIndex:
return res return res
def word_report(self, top=None): def word_report(self, top=None):
"""
Generate report with most frequent words
>>> index = UnicodeNameIndex()
>>> index.word_report(7)
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
"""
for postings, key in self.word_rank(top): for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key)) print('{:5} {}'.format(postings, key))

28
concurrency/http_charserver.py Normal file → Executable file
View File

@ -1,20 +1,25 @@
#!/usr/bin/env python3
import asyncio import asyncio
from aiohttp import web from aiohttp import web
from charfinder import UnicodeNameIndex from charfinder import UnicodeNameIndex
TEMPLATE = ''' PAGE_TPL = '''
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>title</title> <title>Charserver</title>
</head> </head>
<body> <body>
<form action="/"> <p>
<input type="search" name="query" value="{query}"> <form action="/">
<input type="submit" value="find"> <input type="search" name="query" value="{query}">
</form> <input type="submit" value="find">
Examples: {links}
</form>
</p>
<p>{message}</p> <p>{message}</p>
<hr> <hr>
<pre> <pre>
@ -26,6 +31,10 @@ TEMPLATE = '''
CONTENT_TYPE = 'text/html; charset=UTF-8' CONTENT_TYPE = 'text/html; charset=UTF-8'
EXAMPLE_WORDS = ('chess cat circled Malayalam digit Roman face Ethiopic'
' black mark symbol dot operator Braille hexagram').split()
LINK_TPL = '<a href="/?query={0}" title="find &quot;{0}&quot;">{0}</a>'
index = None # a UnicodeNameIndex instance index = None # a UnicodeNameIndex instance
@ -41,9 +50,12 @@ def handle(request):
else: else:
lines = [] lines = []
res = '' res = ''
msg = 'Type words describing characters, e.g. chess.' msg = 'Type words describing characters.'
text = TEMPLATE.format(query=query, result=res, message=msg) links = ', '.join(LINK_TPL.format(word)
for word in sorted(EXAMPLE_WORDS, key=str.upper))
text = PAGE_TPL.format(query=query, result=res,
message=msg, links=links)
return web.Response(content_type=CONTENT_TYPE, text=text) return web.Response(content_type=CONTENT_TYPE, text=text)