wikipedia pictures download example
This commit is contained in:
145
concurrency/charfinder/charfinder.html
Normal file
145
concurrency/charfinder/charfinder.html
Normal file
@@ -0,0 +1,145 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Charserver</title>
|
||||
<script type="text/javascript">
|
||||
//(function() {
|
||||
var BASE_URL = 'http://127.0.0.1:8888/chars';
|
||||
var RESULTS_PER_REQUEST = 10;
|
||||
var REQUEST_DELAY = 1000; // in milliseconds
|
||||
var httpRequest = new XMLHttpRequest();
|
||||
httpRequest.onreadystatechange = processResponse;
|
||||
|
||||
function requestMaker(start) {
|
||||
var makeRequest = function (event) {
|
||||
var query = document.getElementById('queryField').value;
|
||||
var limit = RESULTS_PER_REQUEST;
|
||||
httpRequest.open('GET', BASE_URL+'?query='+query+'&limit='+limit);
|
||||
httpRequest.send();
|
||||
document.getElementById('message').textContent = 'Query: ' + query;
|
||||
var table = document.getElementById('results');
|
||||
var tr;
|
||||
while (tr = table.lastChild) table.removeChild(tr);
|
||||
return false; // don't submit form
|
||||
}
|
||||
return makeRequest;
|
||||
}
|
||||
|
||||
function processResponse() {
|
||||
if (httpRequest.readyState === 4) {
|
||||
var query = document.getElementById('queryField').value;
|
||||
if (httpRequest.status === 200) {
|
||||
fillTable(httpRequest.responseText);
|
||||
} else {
|
||||
alert('query: ' + query + '\nstatus: '+httpRequest.status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getSymbols(string) {
|
||||
// needed for iterating over Unicode characters after U+FFFF
|
||||
var length = string.length;
|
||||
var index = -1;
|
||||
var output = [];
|
||||
var character;
|
||||
var charCode;
|
||||
while (++index < length) {
|
||||
character = string.charAt(index);
|
||||
charCode = character.charCodeAt(0);
|
||||
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
|
||||
output.push(character + string.charAt(++index));
|
||||
} else {
|
||||
output.push(character);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
// from: https://developer.mozilla.org/...
|
||||
// en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt
|
||||
function knownCharCodeAt(str, idx) {
|
||||
str += '';
|
||||
var code,
|
||||
end = str.length;
|
||||
|
||||
var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
||||
while ((surrogatePairs.exec(str)) != null) {
|
||||
var li = surrogatePairs.lastIndex;
|
||||
if (li - 2 < idx) {
|
||||
idx++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (idx >= end || idx < 0) {
|
||||
return NaN;
|
||||
}
|
||||
code = str.charCodeAt(idx);
|
||||
var hi, low;
|
||||
if (0xD800 <= code && code <= 0xDBFF) {
|
||||
hi = code;
|
||||
// Go one further, "characters" is part of a surrogate pair
|
||||
low = str.charCodeAt(idx + 1);
|
||||
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
function codePointStr(uniChar) {
|
||||
if (uniChar.length == 1) {
|
||||
var code = uniChar.charCodeAt(0);
|
||||
} else { // characters after U+FFFF
|
||||
var code = knownCharCodeAt(uniChar, 0);
|
||||
};
|
||||
var codeStr = code.toString(16);
|
||||
var padding = Array(Math.max(4 - codeStr.length + 1, 0)).join(0);
|
||||
return 'U+' + padding + codeStr.toUpperCase();
|
||||
}
|
||||
|
||||
function fillTable(responseData) {
|
||||
var results = JSON.parse(responseData);
|
||||
console.log(results);
|
||||
var table = document.getElementById('results');
|
||||
var tr;
|
||||
var characters = getSymbols(results.chars);
|
||||
for (var i=results.start; i < results.stop; i++) {
|
||||
ch = characters[i];
|
||||
if (ch == '\n') continue;
|
||||
if (ch == '\x00') break;
|
||||
var hexCode = codePointStr(ch);
|
||||
tr = document.createElement('tr');
|
||||
tr.appendChild(document.createElement('td'));
|
||||
tr.appendChild(document.createElement('th'));
|
||||
tr.cells[0].appendChild(document.createTextNode(hexCode));
|
||||
tr.cells[1].appendChild(document.createTextNode(ch));
|
||||
tr.id = hexCode;
|
||||
table.appendChild(tr);
|
||||
if (results.stop < results.total) {
|
||||
setTimeout(requestMaker(results.stop)(), REQUEST_DELAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
window.onload = function() {
|
||||
var requester = requestMaker(0);
|
||||
document.getElementById('queryForm').onsubmit = requester;
|
||||
document.getElementById('queryButton').onclick = requester;
|
||||
}
|
||||
//})();
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
<form id="queryForm">
|
||||
<input id="queryField" type="search" name="query" value="">
|
||||
<input id="queryButton" type="button" value="find">
|
||||
Examples: {links}
|
||||
</form>
|
||||
</p>
|
||||
<p id="message">{message}</p>
|
||||
<hr>
|
||||
<table id="results">
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
218
concurrency/charfinder/charfinder.py
Executable file
218
concurrency/charfinder/charfinder.py
Executable file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Unicode character finder utility:
|
||||
find characters based on words in their official names.
|
||||
|
||||
This can be used from the command line, just pass words as arguments.
|
||||
|
||||
Here is the ``main`` function which makes it happen::
|
||||
|
||||
>>> main('rook') # doctest: +NORMALIZE_WHITESPACE
|
||||
U+2656 ♖ WHITE CHESS ROOK
|
||||
U+265C ♜ BLACK CHESS ROOK
|
||||
(2 matches for 'rook')
|
||||
>>> main('rook', 'black') # doctest: +NORMALIZE_WHITESPACE
|
||||
U+265C ♜ BLACK CHESS ROOK
|
||||
(1 match for 'rook black')
|
||||
>>> main('white bishop') # doctest: +NORMALIZE_WHITESPACE
|
||||
U+2657 ♗ WHITE CHESS BISHOP
|
||||
(1 match for 'white bishop')
|
||||
>>> main("jabberwocky's vest")
|
||||
(No match for "jabberwocky's vest")
|
||||
|
||||
|
||||
For exploring words that occur in the character names, there is the
|
||||
``word_report`` function::
|
||||
|
||||
>>> index = UnicodeNameIndex(sample_chars)
|
||||
>>> index.word_report()
|
||||
3 SIGN
|
||||
2 A
|
||||
2 EURO
|
||||
2 LATIN
|
||||
2 LETTER
|
||||
1 CAPITAL
|
||||
1 CURRENCY
|
||||
1 DOLLAR
|
||||
1 SMALL
|
||||
>>> index = UnicodeNameIndex()
|
||||
>>> index.word_report(10)
|
||||
75821 CJK
|
||||
75761 IDEOGRAPH
|
||||
74656 UNIFIED
|
||||
13196 SYLLABLE
|
||||
11735 HANGUL
|
||||
7616 LETTER
|
||||
2232 WITH
|
||||
2180 SIGN
|
||||
2122 SMALL
|
||||
1709 CAPITAL
|
||||
|
||||
Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
|
||||
are indexed with those three words only, excluding the hexadecimal
|
||||
codepoint at the end of the name.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
import unicodedata
|
||||
import pickle
|
||||
import warnings
|
||||
import itertools
|
||||
from collections import namedtuple
|
||||
|
||||
RE_WORD = re.compile('\w+')
|
||||
RE_UNICODE_NAME = re.compile('^[A-Z0-9 -]+$')
|
||||
RE_CODEPOINT = re.compile('U\+([0-9A-F]{4,6})')
|
||||
|
||||
INDEX_NAME = 'charfinder_index.pickle'
|
||||
MINIMUM_SAVE_LEN = 10000
|
||||
CJK_UNI_PREFIX = 'CJK UNIFIED IDEOGRAPH'
|
||||
CJK_CMP_PREFIX = 'CJK COMPATIBILITY IDEOGRAPH'
|
||||
|
||||
sample_chars = [
|
||||
'$', # DOLLAR SIGN
|
||||
'A', # LATIN CAPITAL LETTER A
|
||||
'a', # LATIN SMALL LETTER A
|
||||
'\u20a0', # EURO-CURRENCY SIGN
|
||||
'\u20ac', # EURO SIGN
|
||||
]
|
||||
|
||||
|
||||
def tokenize(text):
|
||||
"""return iterable of uppercased words"""
|
||||
for match in RE_WORD.finditer(text):
|
||||
yield match.group().upper()
|
||||
|
||||
|
||||
def query_type(text):
|
||||
text_upper = text.upper()
|
||||
if 'U+' in text_upper:
|
||||
return 'CODEPOINT'
|
||||
elif RE_UNICODE_NAME.match(text_upper):
|
||||
return 'NAME'
|
||||
else:
|
||||
return 'CHARACTERS'
|
||||
|
||||
CharDescription = namedtuple('CharDescription', 'code_str char name')
|
||||
|
||||
class UnicodeNameIndex:
|
||||
|
||||
def __init__(self, chars=None):
|
||||
self.load(chars)
|
||||
|
||||
def load(self, chars=None):
|
||||
self.index = None
|
||||
if chars is None:
|
||||
try:
|
||||
with open(INDEX_NAME, 'rb') as fp:
|
||||
self.index = pickle.load(fp)
|
||||
except OSError:
|
||||
pass
|
||||
if self.index is None:
|
||||
self.build_index(chars)
|
||||
if len(self.index) > MINIMUM_SAVE_LEN:
|
||||
try:
|
||||
self.save()
|
||||
except OSError as exc:
|
||||
warnings.warn('Could not save {!r}: {}'
|
||||
.format(INDEX_NAME, exc))
|
||||
|
||||
def save(self):
|
||||
with open(INDEX_NAME, 'wb') as fp:
|
||||
pickle.dump(self.index, fp)
|
||||
|
||||
def build_index(self, chars=None):
|
||||
if chars is None:
|
||||
chars = (chr(i) for i in range(32, sys.maxunicode))
|
||||
index = {}
|
||||
for char in chars:
|
||||
try:
|
||||
name = unicodedata.name(char)
|
||||
except ValueError:
|
||||
continue
|
||||
if name.startswith(CJK_UNI_PREFIX):
|
||||
name = CJK_UNI_PREFIX
|
||||
elif name.startswith(CJK_CMP_PREFIX):
|
||||
name = CJK_CMP_PREFIX
|
||||
|
||||
for word in tokenize(name):
|
||||
index.setdefault(word, set()).add(char)
|
||||
|
||||
self.index = index
|
||||
|
||||
def __len__(self):
|
||||
return len(self.index)
|
||||
|
||||
def word_rank(self, top=None):
|
||||
res = [(len(self.index[key]), key) for key in self.index]
|
||||
res.sort(key=lambda item: (-item[0], item[1]))
|
||||
if top is not None:
|
||||
res = res[:top]
|
||||
return res
|
||||
|
||||
def word_report(self, top=None):
|
||||
for postings, key in self.word_rank(top):
|
||||
print('{:5} {}'.format(postings, key))
|
||||
|
||||
def find_chars(self, query, start=0, stop=None):
|
||||
stop = sys.maxsize if stop is None else stop
|
||||
result_sets = []
|
||||
for word in tokenize(query):
|
||||
if word in self.index:
|
||||
result_sets.append(self.index[word])
|
||||
else: # shorcut: no such word
|
||||
result_sets = []
|
||||
break
|
||||
if result_sets:
|
||||
result = result_sets[0].intersection(*result_sets[1:])
|
||||
result = sorted(result) # must sort for consistency
|
||||
for char in itertools.islice(result, start, stop):
|
||||
yield char
|
||||
|
||||
def find_codes(self, query, start=0, stop=None):
|
||||
return (ord(char) for char
|
||||
in self.find_chars(query, start, stop))
|
||||
|
||||
def describe(self, char):
|
||||
code_str = 'U+{:04X}'.format(ord(char))
|
||||
name = unicodedata.name(char)
|
||||
return CharDescription(code_str, char, name)
|
||||
|
||||
def find_descriptions(self, query, start=0, stop=None):
|
||||
for char in self.find_chars(query, start, stop):
|
||||
yield self.describe(char)
|
||||
|
||||
def describe_str(self, char):
|
||||
return '{:7}\t{}\t{}'.format(*self.describe(char))
|
||||
|
||||
def find_description_strs(self, query, start=0, stop=None):
|
||||
for char in self.find_chars(query, start, stop):
|
||||
yield self.describe_str(char)
|
||||
|
||||
@staticmethod # not an instance method due to concurrency
|
||||
def status(query, counter):
|
||||
if counter == 0:
|
||||
msg = 'No match'
|
||||
elif counter == 1:
|
||||
msg = '1 match'
|
||||
else:
|
||||
msg = '{} matches'.format(counter)
|
||||
return '{} for {!r}'.format(msg, query)
|
||||
|
||||
|
||||
def main(*args):
|
||||
index = UnicodeNameIndex()
|
||||
query = ' '.join(args)
|
||||
n = 0
|
||||
for n, line in enumerate(index.find_description_strs(query), 1):
|
||||
print(line)
|
||||
print('({})'.format(index.status(query, n)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
main(*sys.argv[1:])
|
||||
else:
|
||||
print('Usage: {} word1 [word2]...'.format(sys.argv[0]))
|
||||
BIN
concurrency/charfinder/charfinder_index.pickle
Normal file
BIN
concurrency/charfinder/charfinder_index.pickle
Normal file
Binary file not shown.
88
concurrency/charfinder/http_charfinder.py
Executable file
88
concurrency/charfinder/http_charfinder.py
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import asyncio
|
||||
from aiohttp import web
|
||||
|
||||
from charfinder import UnicodeNameIndex
|
||||
|
||||
PAGE_TPL = '''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Charserver</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
<form action="/">
|
||||
<input type="search" name="query" value="{query}">
|
||||
<input type="submit" value="find">
|
||||
Examples: {links}
|
||||
</form>
|
||||
</p>
|
||||
<p>{message}</p>
|
||||
<hr>
|
||||
<table>
|
||||
{result}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
EXAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit Roman face Ethiopic'
|
||||
' black mark symbol dot operator Braille hexagram').split()
|
||||
|
||||
LINK_TPL = '<a href="/?query={0}" title="find "{0}"">{0}</a>'
|
||||
|
||||
LINKS_HTML = ', '.join(LINK_TPL.format(word)
|
||||
for word in sorted(EXAMPLE_WORDS, key=str.upper))
|
||||
|
||||
ROW_TPL = '<tr><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
|
||||
|
||||
CONTENT_TYPE = 'text/html; charset=UTF-8'
|
||||
|
||||
index = None # a UnicodeNameIndex instance
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def handle(request):
|
||||
query = request.GET.get('query', '')
|
||||
print('Query: {!r}'.format(query))
|
||||
if query:
|
||||
descriptions = list(index.find_descriptions(query))
|
||||
res = '\n'.join(ROW_TPL.format(**vars(descr))
|
||||
for descr in descriptions)
|
||||
msg = index.status(query, len(descriptions))
|
||||
else:
|
||||
descriptions = []
|
||||
res = ''
|
||||
msg = 'Type words describing characters.'
|
||||
|
||||
text = PAGE_TPL.format(query=query, result=res,
|
||||
message=msg, links=LINKS_HTML)
|
||||
print('Sending {} results'.format(len(descriptions)))
|
||||
return web.Response(content_type=CONTENT_TYPE, text=text)
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def init(loop, address, port):
|
||||
app = web.Application(loop=loop)
|
||||
app.router.add_route('GET', '/', handle)
|
||||
|
||||
server = yield from loop.create_server(app.make_handler(),
|
||||
address, port)
|
||||
host = server.sockets[0].getsockname()
|
||||
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
|
||||
|
||||
|
||||
def main(address="127.0.0.1", port=8888):
|
||||
port = int(port)
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(init(loop, address, port))
|
||||
loop.run_forever()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index = UnicodeNameIndex()
|
||||
main(*sys.argv[1:])
|
||||
130
concurrency/charfinder/http_charfinder2.py
Executable file
130
concurrency/charfinder/http_charfinder2.py
Executable file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import asyncio
|
||||
import urllib
|
||||
import json
|
||||
from aiohttp import web
|
||||
|
||||
from charfinder import UnicodeNameIndex
|
||||
|
||||
PAGE_TPL = '''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Charserver</title>
|
||||
<script type="text/javascript">
|
||||
function onclick() {
|
||||
var table = document.getElementById("results");
|
||||
for (var char in "ABCDE") {
|
||||
code = char.charCodeAt(0);
|
||||
var tr = document.createElement('tr');
|
||||
tr.appendChild(document.createElement('td'));
|
||||
tr.appendChild(document.createElement('th'));
|
||||
var code_str = 'U+'+code.toString(16);
|
||||
tr.cells[0].appendChild(document.createTextNode(code_str));
|
||||
tr.cells[1].appendChild(document.createTextNode(char));
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
<form action="/">
|
||||
<input type="search" name="query" value="">
|
||||
<input type="submit" value="find" onclick="fillTable()">
|
||||
Examples: {links}
|
||||
</form>
|
||||
</p>
|
||||
<p>{message}</p>
|
||||
<hr>
|
||||
<table id="results">
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
EXAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit Roman face Ethiopic'
|
||||
' black mark symbol dot operator Braille hexagram').split()
|
||||
|
||||
LINK_TPL = '<a href="/?query={0}" title="find "{0}"">{0}</a>'
|
||||
|
||||
LINKS_HTML = ', '.join(LINK_TPL.format(word)
|
||||
for word in sorted(EXAMPLE_WORDS, key=str.upper))
|
||||
|
||||
ROW_TPL = '<tr id="{code_str}"><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
|
||||
|
||||
HTML_TYPE = 'text/html; charset=UTF-8'
|
||||
TEXT_TYPE = 'text/plain; charset=UTF-8'
|
||||
|
||||
RESULTS_PER_REQUEST = 15
|
||||
|
||||
index = None # a UnicodeNameIndex instance
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def form(request):
|
||||
peername = request.transport.get_extra_info('peername')
|
||||
print('Request from: {}, query: {!r}'.format(peername, request.path_qs))
|
||||
msg = 'Type words describing characters.'
|
||||
text = PAGE_TPL.format(message=msg, links=LINKS_HTML)
|
||||
return web.Response(content_type=HTML_TYPE, text=text)
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def get_chars(request):
|
||||
peername = request.transport.get_extra_info('peername')
|
||||
query = request.GET.get('query', '')
|
||||
limit = request.GET.get('query', 0)
|
||||
print('Request from: {}, GET data: {!r}'.format(peername, dict(request.GET)))
|
||||
if query:
|
||||
try:
|
||||
start = int(request.GET.get('start', 0))
|
||||
stop = int(request.GET.get('stop', sys.maxsize))
|
||||
except ValueError:
|
||||
raise web.HTTPBadRequest()
|
||||
stop = min(stop, start+RESULTS_PER_REQUEST)
|
||||
chars = list(index.find_chars(query, start, stop))
|
||||
else:
|
||||
chars = []
|
||||
start = 0
|
||||
stop = 0
|
||||
num_results = len(chars)
|
||||
text = ''.join(char if n % 64 else char+'\n'
|
||||
for n, char in enumerate(chars, 1))
|
||||
response_data = {'total': num_results, 'start': start, 'stop': stop}
|
||||
print('Response to query: {query!r}, start: {start}, stop: {stop}'.format(
|
||||
query=query, **response_data))
|
||||
response_data['chars'] = text
|
||||
json_obj = json.dumps(response_data)
|
||||
print('Sending {} results'.format(num_results))
|
||||
headers = {'Access-Control-Allow-Origin': '*'}
|
||||
return web.Response(content_type=TEXT_TYPE, headers=headers, text=json_obj)
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def init(loop, address, port):
|
||||
app = web.Application(loop=loop)
|
||||
app.router.add_route('GET', '/chars', get_chars)
|
||||
app.router.add_route('GET', '/', form)
|
||||
|
||||
server = yield from loop.create_server(app.make_handler(),
|
||||
address, port)
|
||||
host = server.sockets[0].getsockname()
|
||||
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
|
||||
|
||||
|
||||
def main(address="127.0.0.1", port=8888):
|
||||
port = int(port)
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(init(loop, address, port))
|
||||
try:
|
||||
loop.run_forever()
|
||||
except KeyboardInterrupt:
|
||||
print('Stopped.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index = UnicodeNameIndex()
|
||||
main(*sys.argv[1:])
|
||||
61
concurrency/charfinder/tcp_charfinder.py
Executable file
61
concurrency/charfinder/tcp_charfinder.py
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import asyncio
|
||||
|
||||
from charfinder import UnicodeNameIndex
|
||||
|
||||
CRLF = b'\r\n'
|
||||
PROMPT = b'?> '
|
||||
|
||||
index = None # a UnicodeNameIndex instance
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def handle_queries(reader, writer):
|
||||
while True:
|
||||
writer.write(PROMPT) # can't yield from!
|
||||
yield from writer.drain() # must yield from!
|
||||
data = yield from reader.readline()
|
||||
try:
|
||||
query = data.decode().strip()
|
||||
except UnicodeDecodeError:
|
||||
query = '\x00'
|
||||
client = writer.get_extra_info('peername')
|
||||
print('Received from {}: {!r}'.format(client, query))
|
||||
if query:
|
||||
if ord(query[:1]) < 32:
|
||||
break
|
||||
lines = list(index.find_description_strs(query))
|
||||
if lines:
|
||||
writer.writelines(line.encode() + CRLF for line in lines)
|
||||
writer.write(index.status(query, len(lines)).encode() + CRLF)
|
||||
|
||||
yield from writer.drain()
|
||||
print('Sent {} results'.format(len(lines)))
|
||||
|
||||
print('Close the client socket')
|
||||
writer.close()
|
||||
|
||||
|
||||
def main(address='127.0.0.1', port=8888):
|
||||
port = int(port)
|
||||
loop = asyncio.get_event_loop()
|
||||
coro = asyncio.start_server(handle_queries, address, port, loop=loop)
|
||||
server = loop.run_until_complete(coro)
|
||||
|
||||
host = server.sockets[0].getsockname()
|
||||
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
|
||||
try:
|
||||
loop.run_forever()
|
||||
except KeyboardInterrupt: # CTRL+C pressed
|
||||
pass
|
||||
|
||||
server.close()
|
||||
loop.run_until_complete(server.wait_closed())
|
||||
loop.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
index = UnicodeNameIndex()
|
||||
main(*sys.argv[1:])
|
||||
114
concurrency/charfinder/test_charfinder.py
Normal file
114
concurrency/charfinder/test_charfinder.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import pytest
|
||||
|
||||
from charfinder import UnicodeNameIndex, tokenize, sample_chars, query_type
|
||||
from unicodedata import name
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_index():
|
||||
return UnicodeNameIndex(sample_chars)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def full_index():
|
||||
return UnicodeNameIndex()
|
||||
|
||||
|
||||
def test_query_type():
|
||||
assert query_type('blue') == 'NAME'
|
||||
|
||||
|
||||
def test_tokenize():
|
||||
assert list(tokenize('')) == []
|
||||
assert list(tokenize('a b')) == ['A', 'B']
|
||||
assert list(tokenize('a-b')) == ['A', 'B']
|
||||
assert list(tokenize('abc')) == ['ABC']
|
||||
assert list(tokenize('café')) == ['CAFÉ']
|
||||
|
||||
|
||||
def test_index():
|
||||
sample_index = UnicodeNameIndex(sample_chars)
|
||||
assert len(sample_index) == 9
|
||||
|
||||
|
||||
def test_find_word_no_match(sample_index):
|
||||
res = list(sample_index.find_codes('qwertyuiop'))
|
||||
assert len(res) == 0
|
||||
|
||||
|
||||
def test_find_word_1_match(sample_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in sample_index.find_codes('currency')]
|
||||
assert res == [(8352, 'EURO-CURRENCY SIGN')]
|
||||
|
||||
|
||||
def test_find_word_1_match_character_result(sample_index):
|
||||
res = [name(char) for char in sample_index.find_chars('currency')]
|
||||
assert res == ['EURO-CURRENCY SIGN']
|
||||
|
||||
|
||||
def test_find_word_2_matches(sample_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in sample_index.find_codes('Euro')]
|
||||
assert res == [(8352, 'EURO-CURRENCY SIGN'),
|
||||
(8364, 'EURO SIGN')]
|
||||
|
||||
|
||||
def test_find_2_words_no_matches(sample_index):
|
||||
res = list(sample_index.find_codes('Euro letter'))
|
||||
assert len(res) == 0
|
||||
|
||||
|
||||
def test_find_2_words_no_matches_because_one_not_found(sample_index):
|
||||
res = list(sample_index.find_codes('letter qwertyuiop'))
|
||||
assert len(res) == 0
|
||||
|
||||
|
||||
def test_find_2_words_1_match(sample_index):
|
||||
res = list(sample_index.find_codes('sign dollar'))
|
||||
assert len(res) == 1
|
||||
|
||||
|
||||
def test_find_2_words_2_matches(sample_index):
|
||||
res = list(sample_index.find_codes('latin letter'))
|
||||
assert len(res) == 2
|
||||
|
||||
|
||||
def test_find_codes_many_matches_full(full_index):
|
||||
res = list(full_index.find_codes('letter'))
|
||||
assert len(res) > 7000
|
||||
|
||||
|
||||
def test_find_1_word_1_match_full(full_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in full_index.find_codes('registered')]
|
||||
assert res == [(174, 'REGISTERED SIGN')]
|
||||
|
||||
|
||||
def test_find_1_word_2_matches_full(full_index):
|
||||
res = list(full_index.find_codes('rook'))
|
||||
assert len(res) == 2
|
||||
|
||||
|
||||
def test_find_3_words_no_matches_full(full_index):
|
||||
res = list(full_index.find_codes('no such character'))
|
||||
assert len(res) == 0
|
||||
|
||||
|
||||
def test_find_with_start(sample_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in sample_index.find_codes('sign', 1)]
|
||||
assert res == [(8352, 'EURO-CURRENCY SIGN'), (8364, 'EURO SIGN')]
|
||||
|
||||
|
||||
def test_find_with_stop(sample_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in sample_index.find_codes('sign', 0, 2)]
|
||||
assert res == [(36, 'DOLLAR SIGN'), (8352, 'EURO-CURRENCY SIGN')]
|
||||
|
||||
|
||||
def test_find_with_start_stop(sample_index):
|
||||
res = [(code, name(chr(code)))
|
||||
for code in sample_index.find_codes('sign', 1, 2)]
|
||||
assert res == [(8352, 'EURO-CURRENCY SIGN')]
|
||||
|
||||
Reference in New Issue
Block a user