concurrency examples
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,6 +1,7 @@
|
|||||||
concurrency/flags/img/*.gif
|
concurrency/flags/img/*.gif
|
||||||
concurrency/charfinder_index.pickle
|
concurrency/charfinder/charfinder_index.pickle
|
||||||
metaprog/oscon-schedule/data/schedule?_db
|
metaprog/oscon-schedule/data/schedule?_db
|
||||||
|
concurrency/wikipedia/fixture/docroot/
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
@@ -7,20 +7,21 @@
|
|||||||
//(function() {
|
//(function() {
|
||||||
var BASE_URL = 'http://127.0.0.1:8888/chars';
|
var BASE_URL = 'http://127.0.0.1:8888/chars';
|
||||||
var RESULTS_PER_REQUEST = 10;
|
var RESULTS_PER_REQUEST = 10;
|
||||||
var REQUEST_DELAY = 1000; // in milliseconds
|
var REQUEST_DELAY = 100; // in milliseconds
|
||||||
var httpRequest = new XMLHttpRequest();
|
var httpRequest = new XMLHttpRequest();
|
||||||
httpRequest.onreadystatechange = processResponse;
|
httpRequest.onreadystatechange = processResponse;
|
||||||
|
|
||||||
function requestMaker(start) {
|
function requestMaker(start) {
|
||||||
var makeRequest = function (event) {
|
var makeRequest = function (event) {
|
||||||
var query = document.getElementById('queryField').value;
|
var query = document.getElementById('queryField').value;
|
||||||
var limit = RESULTS_PER_REQUEST;
|
var stop = start + RESULTS_PER_REQUEST;
|
||||||
httpRequest.open('GET', BASE_URL+'?query='+query+'&limit='+limit);
|
var params = '?query='+query+'&start='+start+'&stop='+stop;
|
||||||
|
httpRequest.open('GET', BASE_URL+params);
|
||||||
httpRequest.send();
|
httpRequest.send();
|
||||||
document.getElementById('message').textContent = 'Query: ' + query;
|
document.getElementById('message').textContent = 'Query: ' + query;
|
||||||
var table = document.getElementById('results');
|
var table = document.getElementById('results');
|
||||||
var tr;
|
var tr;
|
||||||
while (tr = table.lastChild) table.removeChild(tr);
|
if (start == 0) while (tr = table.lastChild) table.removeChild(tr);
|
||||||
return false; // don't submit form
|
return false; // don't submit form
|
||||||
}
|
}
|
||||||
return makeRequest;
|
return makeRequest;
|
||||||
@@ -104,7 +105,7 @@
|
|||||||
var table = document.getElementById('results');
|
var table = document.getElementById('results');
|
||||||
var tr;
|
var tr;
|
||||||
var characters = getSymbols(results.chars);
|
var characters = getSymbols(results.chars);
|
||||||
for (var i=results.start; i < results.stop; i++) {
|
for (var i=0; i < characters.length; i++) {
|
||||||
ch = characters[i];
|
ch = characters[i];
|
||||||
if (ch == '\n') continue;
|
if (ch == '\n') continue;
|
||||||
if (ch == '\x00') break;
|
if (ch == '\x00') break;
|
||||||
@@ -116,9 +117,10 @@
|
|||||||
tr.cells[1].appendChild(document.createTextNode(ch));
|
tr.cells[1].appendChild(document.createTextNode(ch));
|
||||||
tr.id = hexCode;
|
tr.id = hexCode;
|
||||||
table.appendChild(tr);
|
table.appendChild(tr);
|
||||||
if (results.stop < results.total) {
|
}
|
||||||
setTimeout(requestMaker(results.stop)(), REQUEST_DELAY);
|
// setTimeout(getDescriptions, REQUEST_DELAY/2)
|
||||||
}
|
if (results.stop < results.total) {
|
||||||
|
setTimeout(requestMaker(results.stop), REQUEST_DELAY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
window.onload = function() {
|
window.onload = function() {
|
||||||
|
|||||||
@@ -98,6 +98,8 @@ def query_type(text):
|
|||||||
|
|
||||||
CharDescription = namedtuple('CharDescription', 'code_str char name')
|
CharDescription = namedtuple('CharDescription', 'code_str char name')
|
||||||
|
|
||||||
|
QueryResult = namedtuple('QueryResult', 'len items')
|
||||||
|
|
||||||
class UnicodeNameIndex:
|
class UnicodeNameIndex:
|
||||||
|
|
||||||
def __init__(self, chars=None):
|
def __init__(self, chars=None):
|
||||||
@@ -169,12 +171,14 @@ class UnicodeNameIndex:
|
|||||||
if result_sets:
|
if result_sets:
|
||||||
result = result_sets[0].intersection(*result_sets[1:])
|
result = result_sets[0].intersection(*result_sets[1:])
|
||||||
result = sorted(result) # must sort for consistency
|
result = sorted(result) # must sort for consistency
|
||||||
for char in itertools.islice(result, start, stop):
|
result_iter = itertools.islice(result, start, stop)
|
||||||
yield char
|
return QueryResult(len(result),
|
||||||
|
(char for char in result_iter))
|
||||||
|
return QueryResult(0, ())
|
||||||
|
|
||||||
def find_codes(self, query, start=0, stop=None):
|
def find_codes(self, query, start=0, stop=None):
|
||||||
return (ord(char) for char
|
return (ord(char) for char
|
||||||
in self.find_chars(query, start, stop))
|
in self.find_chars(query, start, stop).items)
|
||||||
|
|
||||||
def describe(self, char):
|
def describe(self, char):
|
||||||
code_str = 'U+{:04X}'.format(ord(char))
|
code_str = 'U+{:04X}'.format(ord(char))
|
||||||
@@ -185,6 +189,10 @@ class UnicodeNameIndex:
|
|||||||
for char in self.find_chars(query, start, stop):
|
for char in self.find_chars(query, start, stop):
|
||||||
yield self.describe(char)
|
yield self.describe(char)
|
||||||
|
|
||||||
|
def get_descriptions(self, chars):
|
||||||
|
for char in chars:
|
||||||
|
yield self.describe(char)
|
||||||
|
|
||||||
def describe_str(self, char):
|
def describe_str(self, char):
|
||||||
return '{:7}\t{}\t{}'.format(*self.describe(char))
|
return '{:7}\t{}\t{}'.format(*self.describe(char))
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -75,9 +75,8 @@ def form(request):
|
|||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def get_chars(request):
|
def get_chars(request):
|
||||||
peername = request.transport.get_extra_info('peername')
|
peername = request.transport.get_extra_info('peername')
|
||||||
query = request.GET.get('query', '')
|
|
||||||
limit = request.GET.get('query', 0)
|
|
||||||
print('Request from: {}, GET data: {!r}'.format(peername, dict(request.GET)))
|
print('Request from: {}, GET data: {!r}'.format(peername, dict(request.GET)))
|
||||||
|
query = request.GET.get('query', '')
|
||||||
if query:
|
if query:
|
||||||
try:
|
try:
|
||||||
start = int(request.GET.get('start', 0))
|
start = int(request.GET.get('start', 0))
|
||||||
@@ -85,12 +84,9 @@ def get_chars(request):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
raise web.HTTPBadRequest()
|
raise web.HTTPBadRequest()
|
||||||
stop = min(stop, start+RESULTS_PER_REQUEST)
|
stop = min(stop, start+RESULTS_PER_REQUEST)
|
||||||
chars = list(index.find_chars(query, start, stop))
|
num_results, chars = index.find_chars(query, start, stop)
|
||||||
else:
|
else:
|
||||||
chars = []
|
raise web.HTTPBadRequest()
|
||||||
start = 0
|
|
||||||
stop = 0
|
|
||||||
num_results = len(chars)
|
|
||||||
text = ''.join(char if n % 64 else char+'\n'
|
text = ''.join(char if n % 64 else char+'\n'
|
||||||
for n, char in enumerate(chars, 1))
|
for n, char in enumerate(chars, 1))
|
||||||
response_data = {'total': num_results, 'start': start, 'stop': stop}
|
response_data = {'total': num_results, 'start': start, 'stop': stop}
|
||||||
@@ -98,7 +94,7 @@ def get_chars(request):
|
|||||||
query=query, **response_data))
|
query=query, **response_data))
|
||||||
response_data['chars'] = text
|
response_data['chars'] = text
|
||||||
json_obj = json.dumps(response_data)
|
json_obj = json.dumps(response_data)
|
||||||
print('Sending {} results'.format(num_results))
|
print('Sending {} characters'.format(len(text)))
|
||||||
headers = {'Access-Control-Allow-Origin': '*'}
|
headers = {'Access-Control-Allow-Origin': '*'}
|
||||||
return web.Response(content_type=TEXT_TYPE, headers=headers, text=json_obj)
|
return web.Response(content_type=TEXT_TYPE, headers=headers, text=json_obj)
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ def test_find_word_1_match(sample_index):
|
|||||||
|
|
||||||
|
|
||||||
def test_find_word_1_match_character_result(sample_index):
|
def test_find_word_1_match_character_result(sample_index):
|
||||||
res = [name(char) for char in sample_index.find_chars('currency')]
|
res = [name(char) for char in
|
||||||
|
sample_index.find_chars('currency').items]
|
||||||
assert res == ['EURO-CURRENCY SIGN']
|
assert res == ['EURO-CURRENCY SIGN']
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
64
concurrency/wikipedia/build_fixture.py
Normal file
64
concurrency/wikipedia/build_fixture.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
from daypicts import get_picture_url, validate_date, gen_dates
|
||||||
|
from daypicts import NoPictureForDate
|
||||||
|
from daypicts import POTD_PATH
|
||||||
|
|
||||||
|
FIXTURE_DIR = 'fixture/'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args(argv):
|
||||||
|
parser = argparse.ArgumentParser(description=main.__doc__)
|
||||||
|
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
|
||||||
|
parser.add_argument('date', help=date_help)
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
iso_parts = validate_date(args.date)
|
||||||
|
except ValueError as exc:
|
||||||
|
print('error:', exc.args[0])
|
||||||
|
parser.print_usage()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
dates = list(gen_dates(iso_parts))
|
||||||
|
if len(dates) == 1:
|
||||||
|
print('-> Date: ', dates[0])
|
||||||
|
else:
|
||||||
|
fmt = '-> {} days: {}...{}'
|
||||||
|
print(fmt.format(len(dates), dates[0], dates[-1]))
|
||||||
|
|
||||||
|
return dates, args
|
||||||
|
|
||||||
|
|
||||||
|
def save_picture_urls(dates, save_path):
|
||||||
|
for date in dates:
|
||||||
|
try:
|
||||||
|
url = get_picture_url(date)
|
||||||
|
except NoPictureForDate as exc:
|
||||||
|
snippet = repr(exc)
|
||||||
|
else:
|
||||||
|
snippet = url.replace('http://', 'src="//') + '"'
|
||||||
|
print(date, end=' ')
|
||||||
|
print(snippet)
|
||||||
|
with open(os.path.join(save_path, date), 'w') as fp:
|
||||||
|
fp.write(snippet)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
"""Build test fixture from Wikipedia "POTD" data"""
|
||||||
|
|
||||||
|
save_path = os.path.join(FIXTURE_DIR,POTD_PATH)
|
||||||
|
try:
|
||||||
|
os.makedirs(save_path)
|
||||||
|
except FileExistsError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
dates, args = parse_args(argv)
|
||||||
|
|
||||||
|
save_picture_urls(dates, save_path)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1:])
|
||||||
@@ -25,7 +25,8 @@ import datetime
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
SAVE_DIR = 'pictures/'
|
SAVE_DIR = 'pictures/'
|
||||||
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
|
POTD_PATH = 'Template:POTD/'
|
||||||
|
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/' + POTD_PATH
|
||||||
POTD_IMAGE_RE = re.compile(r'src="(//upload\..*?)"')
|
POTD_IMAGE_RE = re.compile(r'src="(//upload\..*?)"')
|
||||||
PODT_EARLIEST_TEMPLATE = '2007-01-01'
|
PODT_EARLIEST_TEMPLATE = '2007-01-01'
|
||||||
|
|
||||||
@@ -84,7 +85,7 @@ def validate_date(text):
|
|||||||
test_parts = parts[:]
|
test_parts = parts[:]
|
||||||
while len(test_parts) < 3:
|
while len(test_parts) < 3:
|
||||||
test_parts.append(1)
|
test_parts.append(1)
|
||||||
date = datetime.datetime(*(int(part) for part in test_parts))
|
date = datetime.date(*(int(part) for part in test_parts))
|
||||||
iso_date = date.strftime(ISO_DATE_FMT)
|
iso_date = date.strftime(ISO_DATE_FMT)
|
||||||
iso_date = iso_date[:1+len(parts)*3]
|
iso_date = iso_date[:1+len(parts)*3]
|
||||||
if iso_date < PODT_EARLIEST_TEMPLATE:
|
if iso_date < PODT_EARLIEST_TEMPLATE:
|
||||||
@@ -95,7 +96,7 @@ def validate_date(text):
|
|||||||
def gen_month_dates(iso_month):
|
def gen_month_dates(iso_month):
|
||||||
first = datetime.datetime.strptime(iso_month+'-01', ISO_DATE_FMT)
|
first = datetime.datetime.strptime(iso_month+'-01', ISO_DATE_FMT)
|
||||||
one_day = datetime.timedelta(days=1)
|
one_day = datetime.timedelta(days=1)
|
||||||
date = first
|
date = first.date()
|
||||||
while date.month == first.month:
|
while date.month == first.month:
|
||||||
yield date.strftime(ISO_DATE_FMT)
|
yield date.strftime(ISO_DATE_FMT)
|
||||||
date += one_day
|
date += one_day
|
||||||
@@ -115,6 +116,26 @@ def gen_dates(iso_parts):
|
|||||||
yield iso_parts
|
yield iso_parts
|
||||||
|
|
||||||
|
|
||||||
|
def get_picture_urls(dates, verbose=False, save_fixture=False):
|
||||||
|
urls = []
|
||||||
|
count = 0
|
||||||
|
for date in dates:
|
||||||
|
try:
|
||||||
|
url = get_picture_url(date)
|
||||||
|
except NoPictureForDate as exc:
|
||||||
|
if verbose:
|
||||||
|
print('*** {!r} ***'.format(exc))
|
||||||
|
continue
|
||||||
|
count += 1
|
||||||
|
if verbose:
|
||||||
|
print(format(count, '3d'), end=' ')
|
||||||
|
print(url.split('/')[-1])
|
||||||
|
else:
|
||||||
|
print(url)
|
||||||
|
urls.append(url)
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
def parse_args(argv):
|
def parse_args(argv):
|
||||||
parser = argparse.ArgumentParser(description=main.__doc__)
|
parser = argparse.ArgumentParser(description=main.__doc__)
|
||||||
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
|
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
|
||||||
@@ -123,6 +144,8 @@ def parse_args(argv):
|
|||||||
help='maximum number of items to fetch')
|
help='maximum number of items to fetch')
|
||||||
parser.add_argument('-u', '--url_only', action='store_true',
|
parser.add_argument('-u', '--url_only', action='store_true',
|
||||||
help='get picture URLS only')
|
help='get picture URLS only')
|
||||||
|
parser.add_argument('-f', '--fixture_save', action='store_true',
|
||||||
|
help='save data for local test fixture')
|
||||||
parser.add_argument('-v', '--verbose', action='store_true',
|
parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
help='display progress information')
|
help='display progress information')
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
@@ -145,26 +168,6 @@ def parse_args(argv):
|
|||||||
return dates, args
|
return dates, args
|
||||||
|
|
||||||
|
|
||||||
def get_picture_urls(dates, verbose=False):
|
|
||||||
urls = []
|
|
||||||
count = 0
|
|
||||||
for date in dates:
|
|
||||||
try:
|
|
||||||
url = get_picture_url(date)
|
|
||||||
except NoPictureForDate as exc:
|
|
||||||
if verbose:
|
|
||||||
print('*** {!r} ***'.format(exc))
|
|
||||||
continue
|
|
||||||
count += 1
|
|
||||||
if verbose:
|
|
||||||
print(format(count, '3d'), end=' ')
|
|
||||||
print(url.split('/')[-1])
|
|
||||||
else:
|
|
||||||
print(url)
|
|
||||||
urls.append(url)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv, get_picture_urls):
|
def main(argv, get_picture_urls):
|
||||||
"""Get Wikipedia "Picture of The Day" for date, month or year"""
|
"""Get Wikipedia "Picture of The Day" for date, month or year"""
|
||||||
|
|
||||||
@@ -172,7 +175,7 @@ def main(argv, get_picture_urls):
|
|||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
urls = get_picture_urls(dates, args.verbose)
|
urls = get_picture_urls(dates, args.verbose, args.fixture_save)
|
||||||
|
|
||||||
elapsed = time.time() - t0
|
elapsed = time.time() - t0
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
|
|||||||
@@ -6,19 +6,19 @@ import sys
|
|||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
from daypicts import main
|
from daypicts import main, NoPictureForDate
|
||||||
from daypicts import NoPictureForDate
|
from daypicts import POTD_BASE_URL, POTD_IMAGE_RE
|
||||||
from daypicts import POTD_BASE_URL
|
|
||||||
from daypicts import POTD_IMAGE_RE
|
|
||||||
|
|
||||||
GLOBAL_TIMEOUT = 300 # seconds
|
GLOBAL_TIMEOUT = 300 # seconds
|
||||||
|
MAX_CONCURRENT_REQUESTS = 30
|
||||||
|
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def get_picture_url(iso_date):
|
def get_picture_url(iso_date, semaphore):
|
||||||
page_url = POTD_BASE_URL+iso_date
|
page_url = POTD_BASE_URL+iso_date
|
||||||
response = yield from aiohttp.request('GET', page_url)
|
with (yield from semaphore):
|
||||||
text = yield from response.text()
|
response = yield from aiohttp.request('GET', page_url)
|
||||||
|
text = yield from response.text()
|
||||||
pict_url = POTD_IMAGE_RE.search(text)
|
pict_url = POTD_IMAGE_RE.search(text)
|
||||||
if pict_url is None:
|
if pict_url is None:
|
||||||
raise NoPictureForDate(iso_date)
|
raise NoPictureForDate(iso_date)
|
||||||
@@ -27,7 +27,8 @@ def get_picture_url(iso_date):
|
|||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def get_picture_urls(dates, verbose=False):
|
def get_picture_urls(dates, verbose=False):
|
||||||
tasks = [get_picture_url(date) for date in dates]
|
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
|
||||||
|
tasks = [get_picture_url(date, semaphore) for date in dates]
|
||||||
urls = []
|
urls = []
|
||||||
count = 0
|
count = 0
|
||||||
# get results as jobs are done
|
# get results as jobs are done
|
||||||
|
|||||||
@@ -7,13 +7,12 @@ from concurrent import futures
|
|||||||
|
|
||||||
from daypicts import main, get_picture_url, NoPictureForDate
|
from daypicts import main, get_picture_url, NoPictureForDate
|
||||||
|
|
||||||
MAX_NUM_THREADS = 400
|
|
||||||
GLOBAL_TIMEOUT = 300 # seconds
|
GLOBAL_TIMEOUT = 300 # seconds
|
||||||
|
MAX_CONCURRENT_REQUESTS = 30
|
||||||
|
|
||||||
|
|
||||||
def get_picture_urls(dates, verbose=False):
|
def get_picture_urls(dates, verbose=False):
|
||||||
num_threads = min(len(dates), MAX_NUM_THREADS)
|
pool = futures.ThreadPoolExecutor(MAX_CONCURRENT_REQUESTS)
|
||||||
pool = futures.ThreadPoolExecutor(num_threads)
|
|
||||||
|
|
||||||
pending = {}
|
pending = {}
|
||||||
for date in dates:
|
for date in dates:
|
||||||
|
|||||||
BIN
concurrency/wikipedia/fixture/docroot.zip
Normal file
BIN
concurrency/wikipedia/fixture/docroot.zip
Normal file
Binary file not shown.
Reference in New Issue
Block a user