update from Atlas with major reorg

This commit is contained in:
Luciano Ramalho
2015-04-17 21:29:30 -03:00
parent 57902d31b5
commit a786180239
134 changed files with 369 additions and 520 deletions

View File

@@ -0,0 +1,43 @@
import timeit
def exists_and_truthy_hasattr(obj, attr_name):
if hasattr(obj, attr_name):
return bool(getattr(obj, attr_name))
else:
return False
def exists_and_truthy_getattr(obj, attr_name):
return bool(getattr(obj, attr_name, False))
def exists_and_truthy_tryget(obj, attr_name):
try:
return bool(getattr(obj, attr_name))
except AttributeError:
return False
class Gizmo:
def __init__(self):
self.gadget = True
gizmo = Gizmo()
test_keys = 'hasattr', 'getattr', 'tryget'
def average(timings):
sample = timings[1:-1]
return sum(sample) / len(sample)
def do_tests():
for test_key in test_keys:
func_name = 'exists_and_truthy_' + test_key
test = func_name + '(gizmo, "gadget")'
setup = 'from __main__ import gizmo, ' + func_name
elapsed = average(timeit.repeat(test, repeat=5, setup=setup))
print(test_key.rjust(7), format(elapsed, '0.5f'))
if __name__ == '__main__':
do_tests()
del gizmo.gadget
do_tests()

View File

@@ -0,0 +1,44 @@
import timeit
test_hasattr = """
if hasattr(gizmo, 'gadget'):
feature = gizmo.gadget
else:
feature = None
"""
test_getattr = """
feature = getattr(gizmo, 'gadget', None)
"""
test_tryget = """
try:
feature = getattr(gizmo, 'gadget')
except AttributeError:
feature = None
"""
class Gizmo:
def __init__(self):
self.gadget = True
gizmo = Gizmo()
test_keys = 'hasattr', 'getattr', 'tryget'
def test():
for test_key in test_keys:
test_name = 'test_' + test_key
test = globals()[test_name]
setup = 'from __main__ import gizmo'
t_present = min(timeit.repeat(test, setup=setup))
del gizmo.gadget
t_absent = min(timeit.repeat(test, setup=setup))
gizmo.gadget = True
print('{:7} {:.3f} {:.3f}'.format(test_key, t_present, t_absent))
if __name__ == '__main__':
test()

View File

@@ -0,0 +1,48 @@
# Test data for spherical coordinates computed by Vector.angles()
#
# π π/2 π/3 π/4
# 3.141592653590 1.570796326795 1.047197551197 0.785398163397
#
# azimuth
# x y θ
# x1 x2 r Φ1
1 1 1.414213562373 0.785398163397
1 0 1.000000000000 0.000000000000
0 1 1.000000000000 1.570796326795
0 0 0.000000000000 0.000000000000
1 -1 1.414213562373 5.497787143782
-1 1 1.414213562373 2.356194490192
0 -1 1.000000000000 4.712388980385
-1 -1 1.414213562373 3.926990816987
#
# x y z θ Φ
# x1 x2 x3 r Φ1 Φ2
1 1 1 1.732050807569 0.955316618125 0.785398163397
2 2 2 3.464101615138 0.955316618125 0.785398163397
0 0 0 0.000000000000 0.000000000000 0.000000000000
1 0 0 1.000000000000 0.000000000000 0.000000000000
0 1 0 1.000000000000 1.570796326795 0.000000000000
0 0 1 1.000000000000 1.570796326795 1.570796326795
1 1 0 1.414213562373 0.785398163397 0.000000000000
1 0 1 1.414213562373 0.785398163397 1.570796326795
0 1 1 1.414213562373 1.570796326795 0.785398163397
1 1 -1 1.732050807569 0.955316618125 5.497787143782
#
# x y z t θ Φ
# x1 x2 x3 x4 r Φ1 Φ2 Φ3
1 1 1 0 1.732050807569 0.955316618125 0.785398163397 0.000000000000
2 2 2 0 3.464101615138 0.955316618125 0.785398163397 0.000000000000
1 1 1 1 2.000000000000 1.047197551197 0.955316618125 0.785398163397
2 2 2 2 4.000000000000 1.047197551197 0.955316618125 0.785398163397
1 0 0 0 1.000000000000 0.000000000000 0.000000000000 0.000000000000
0 1 0 0 1.000000000000 1.570796326795 0.000000000000 0.000000000000
0 0 1 0 1.000000000000 1.570796326795 1.570796326795 0.000000000000
0 0 0 1 1.000000000000 1.570796326795 1.570796326795 1.570796326795
1 1 0 0 1.414213562373 0.785398163397 0.000000000000 0.000000000000
0 1 1 0 1.414213562373 1.570796326795 0.785398163397 0.000000000000
0 0 1 1 1.414213562373 1.570796326795 1.570796326795 0.785398163397
1 0 0 1 1.414213562373 0.785398163397 1.570796326795 1.570796326795
1 0 1 0 1.414213562373 0.785398163397 1.570796326795 0.000000000000
0 1 0 1 1.414213562373 1.570796326795 0.785398163397 1.570796326795
1 1 1 -1 2.000000000000 1.047197551197 0.955316618125 5.497787143782
-1 -1 -1 -1 2.000000000000 2.094395102393 2.186276035465 3.926990816987

View File

@@ -0,0 +1,85 @@
======================================
Pythonic way to sum n-th list element?
======================================
Examples inspired by Guy Middleton's question on Python-list, Fri Apr 18 22:21:08 CEST 2003. Message: https://mail.python.org/pipermail/python-list/2003-April/218568.html
Guy Middleton::
>>> my_list = [[1, 2, 3], [40, 50, 60], [9, 8, 7]]
>>> import functools as ft
>>> ft.reduce(lambda a, b: a+b, [sub[1] for sub in my_list])
60
LR::
>>> ft.reduce(lambda a, b: a + b[1], my_list, 0)
60
Fernando Perez::
>>> import numpy as np
>>> my_array = np.array(my_list)
>>> np.sum(my_array[:, 1])
60
Skip Montanaro::
>>> import operator
>>> ft.reduce(operator.add, [sub[1] for sub in my_list], 0)
60
>>> ft.reduce(operator.add, [sub[1] for sub in []])
Traceback (most recent call last):
...
TypeError: reduce() of empty sequence with no initial value
>>> ft.reduce(operator.add, [sub[1] for sub in []], 0)
0
Evan Simpson::
>>> total = 0
>>> for sub in my_list:
... total += sub[1]
>>> total
60
Alex Martelli (``sum`` was added in Python 2.3, released July 9, 2003)::
>>> sum([sub[1] for sub in my_list])
60
After generator expressions (added in Python 2.4, November 30, 2004)::
>>> sum(sub[1] for sub in my_list)
60
If you want the sum of a list of items, you should write it in a way
that looks like "the sum of a list of items", not in a way that looks
like "loop over these items, maintain another variable t, perform a
sequence of additions". Why do we have high level languages if not to
express our intentions at a higher level and let the language worry
about what low-level operations are needed to implement it?
David Eppstein
Alex Martelli
https://mail.python.org/pipermail/python-list/2003-April/186311.html
"The sum" is so frequently needed that I wouldn't mind at all if
Python singled it out as a built-in. But "reduce(operator.add, ..."
just isn't a great way to express it, in my opinion (and yet as an
old APL'er, and FP-liker, I _should_ like it -- but I don't).
https://mail.python.org/pipermail/python-list/2003-April/225323.html
Four years later, having coded a lot of Python, taught it widely,
written a lot about it, and so on, I've changed my mind: I now
think that reduce is more trouble than it's worth and Python
would be better off without it, if it was being designed from
scratch today -- it would not substantially reduce (:-) Python's
power and WOULD substantially ease the teaching/&c task. That's
not a strong-enough argument to REMOVE a builtin, of course, and
thus that's definitely NOT what I'm arguing for. But I do suggest
avoiding reduce in most cases -- that's all.

View File

@@ -0,0 +1,37 @@
"""
Test spherical coordinates in ``Vector`` class
"""
import sys
from vector_v5 import Vector
FIXTURE = 'spherical-coordinates.txt'
EPSILON = 10**-8
def parse_float_cells(cells):
floats = []
for cell in cells:
try:
floats.append(float(cell))
except ValueError:
continue
return floats
def load_fixture(verbose=False):
with open(FIXTURE, encoding='utf8') as text:
for line in text:
if line.startswith('#'): # comment line
continue
cells = line.split('\t')
cartesian = parse_float_cells(cells[:5])
spherical = parse_float_cells(cells[5:])
v = Vector(cartesian)
if verbose:
print(repr(v), '\t->', spherical)
diff = abs(abs(v) - spherical[0])
assert diff < EPSILON, 'expected {}, got {}'.format(spherical[0], abs(v))
assert all(abs(av - af) < EPSILON for av, af in zip(v.angles(), spherical[1:])), (
'expected {}, got {}'.format(spherical[1:], list(v.angles())))
if __name__=='__main__':
load_fixture('-v' in sys.argv)

View File

@@ -0,0 +1,147 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Charserver</title>
<script type="text/javascript">
//(function() {
var BASE_URL = 'http://127.0.0.1:8888/chars';
var RESULTS_PER_REQUEST = 10;
var REQUEST_DELAY = 100; // in milliseconds
var httpRequest = new XMLHttpRequest();
httpRequest.onreadystatechange = processResponse;
function requestMaker(start) {
var makeRequest = function (event) {
var query = document.getElementById('queryField').value;
var stop = start + RESULTS_PER_REQUEST;
var params = '?query='+query+'&start='+start+'&stop='+stop;
httpRequest.open('GET', BASE_URL+params);
httpRequest.send();
document.getElementById('message').textContent = 'Query: ' + query;
var table = document.getElementById('results');
var tr;
if (start == 0) while (tr = table.lastChild) table.removeChild(tr);
return false; // don't submit form
}
return makeRequest;
}
function processResponse() {
if (httpRequest.readyState === 4) {
var query = document.getElementById('queryField').value;
if (httpRequest.status === 200) {
fillTable(httpRequest.responseText);
} else {
alert('query: ' + query + '\nstatus: '+httpRequest.status);
}
}
}
function getSymbols(string) {
// needed for iterating over Unicode characters after U+FFFF
var length = string.length;
var index = -1;
var output = [];
var character;
var charCode;
while (++index < length) {
character = string.charAt(index);
charCode = character.charCodeAt(0);
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
output.push(character + string.charAt(++index));
} else {
output.push(character);
}
}
return output;
}
// from: https://developer.mozilla.org/...
// en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt
function knownCharCodeAt(str, idx) {
str += '';
var code,
end = str.length;
var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
while ((surrogatePairs.exec(str)) != null) {
var li = surrogatePairs.lastIndex;
if (li - 2 < idx) {
idx++;
}
else {
break;
}
}
if (idx >= end || idx < 0) {
return NaN;
}
code = str.charCodeAt(idx);
var hi, low;
if (0xD800 <= code && code <= 0xDBFF) {
hi = code;
// Go one further, "characters" is part of a surrogate pair
low = str.charCodeAt(idx + 1);
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
return code;
}
function codePointStr(uniChar) {
if (uniChar.length == 1) {
var code = uniChar.charCodeAt(0);
} else { // characters after U+FFFF
var code = knownCharCodeAt(uniChar, 0);
};
var codeStr = code.toString(16);
var padding = Array(Math.max(4 - codeStr.length + 1, 0)).join(0);
return 'U+' + padding + codeStr.toUpperCase();
}
function fillTable(responseData) {
var results = JSON.parse(responseData);
console.log(results);
var table = document.getElementById('results');
var tr;
var characters = getSymbols(results.chars);
for (var i=0; i < characters.length; i++) {
ch = characters[i];
if (ch == '\n') continue;
if (ch == '\x00') break;
var hexCode = codePointStr(ch);
tr = document.createElement('tr');
tr.appendChild(document.createElement('td'));
tr.appendChild(document.createElement('th'));
tr.cells[0].appendChild(document.createTextNode(hexCode));
tr.cells[1].appendChild(document.createTextNode(ch));
tr.id = hexCode;
table.appendChild(tr);
}
// setTimeout(getDescriptions, REQUEST_DELAY/2)
if (results.stop < results.total) {
setTimeout(requestMaker(results.stop), REQUEST_DELAY);
}
}
window.onload = function() {
var requester = requestMaker(0);
document.getElementById('queryForm').onsubmit = requester;
document.getElementById('queryButton').onclick = requester;
}
//})();
</script>
</head>
<body>
<p>
<form id="queryForm">
<input id="queryField" type="search" name="query" value="">
<input id="queryButton" type="button" value="find">
Examples: {links}
</form>
</p>
<p id="message">{message}</p>
<hr>
<table id="results">
</table>
</body>
</html>

View File

@@ -0,0 +1,226 @@
#!/usr/bin/env python3
"""
Unicode character finder utility:
find characters based on words in their official names.
This can be used from the command line, just pass words as arguments.
Here is the ``main`` function which makes it happen::
>>> main('rook') # doctest: +NORMALIZE_WHITESPACE
U+2656 ♖ WHITE CHESS ROOK
U+265C ♜ BLACK CHESS ROOK
(2 matches for 'rook')
>>> main('rook', 'black') # doctest: +NORMALIZE_WHITESPACE
U+265C ♜ BLACK CHESS ROOK
(1 match for 'rook black')
>>> main('white bishop') # doctest: +NORMALIZE_WHITESPACE
U+2657 ♗ WHITE CHESS BISHOP
(1 match for 'white bishop')
>>> main("jabberwocky's vest")
(No match for "jabberwocky's vest")
For exploring words that occur in the character names, there is the
``word_report`` function::
>>> index = UnicodeNameIndex(sample_chars)
>>> index.word_report()
3 SIGN
2 A
2 EURO
2 LATIN
2 LETTER
1 CAPITAL
1 CURRENCY
1 DOLLAR
1 SMALL
>>> index = UnicodeNameIndex()
>>> index.word_report(10)
75821 CJK
75761 IDEOGRAPH
74656 UNIFIED
13196 SYLLABLE
11735 HANGUL
7616 LETTER
2232 WITH
2180 SIGN
2122 SMALL
1709 CAPITAL
Note: characters with names starting with 'CJK UNIFIED IDEOGRAPH'
are indexed with those three words only, excluding the hexadecimal
codepoint at the end of the name.
"""
import sys
import re
import unicodedata
import pickle
import warnings
import itertools
from collections import namedtuple
RE_WORD = re.compile('\w+')
RE_UNICODE_NAME = re.compile('^[A-Z0-9 -]+$')
RE_CODEPOINT = re.compile('U\+([0-9A-F]{4,6})')
INDEX_NAME = 'charfinder_index.pickle'
MINIMUM_SAVE_LEN = 10000
CJK_UNI_PREFIX = 'CJK UNIFIED IDEOGRAPH'
CJK_CMP_PREFIX = 'CJK COMPATIBILITY IDEOGRAPH'
sample_chars = [
'$', # DOLLAR SIGN
'A', # LATIN CAPITAL LETTER A
'a', # LATIN SMALL LETTER A
'\u20a0', # EURO-CURRENCY SIGN
'\u20ac', # EURO SIGN
]
def tokenize(text):
"""return iterable of uppercased words"""
for match in RE_WORD.finditer(text):
yield match.group().upper()
def query_type(text):
text_upper = text.upper()
if 'U+' in text_upper:
return 'CODEPOINT'
elif RE_UNICODE_NAME.match(text_upper):
return 'NAME'
else:
return 'CHARACTERS'
CharDescription = namedtuple('CharDescription', 'code_str char name')
QueryResult = namedtuple('QueryResult', 'len items')
class UnicodeNameIndex:
def __init__(self, chars=None):
self.load(chars)
def load(self, chars=None):
self.index = None
if chars is None:
try:
with open(INDEX_NAME, 'rb') as fp:
self.index = pickle.load(fp)
except OSError:
pass
if self.index is None:
self.build_index(chars)
if len(self.index) > MINIMUM_SAVE_LEN:
try:
self.save()
except OSError as exc:
warnings.warn('Could not save {!r}: {}'
.format(INDEX_NAME, exc))
def save(self):
with open(INDEX_NAME, 'wb') as fp:
pickle.dump(self.index, fp)
def build_index(self, chars=None):
if chars is None:
chars = (chr(i) for i in range(32, sys.maxunicode))
index = {}
for char in chars:
try:
name = unicodedata.name(char)
except ValueError:
continue
if name.startswith(CJK_UNI_PREFIX):
name = CJK_UNI_PREFIX
elif name.startswith(CJK_CMP_PREFIX):
name = CJK_CMP_PREFIX
for word in tokenize(name):
index.setdefault(word, set()).add(char)
self.index = index
def __len__(self):
return len(self.index)
def word_rank(self, top=None):
res = [(len(self.index[key]), key) for key in self.index]
res.sort(key=lambda item: (-item[0], item[1]))
if top is not None:
res = res[:top]
return res
def word_report(self, top=None):
for postings, key in self.word_rank(top):
print('{:5} {}'.format(postings, key))
def find_chars(self, query, start=0, stop=None):
stop = sys.maxsize if stop is None else stop
result_sets = []
for word in tokenize(query):
if word in self.index:
result_sets.append(self.index[word])
else: # shorcut: no such word
result_sets = []
break
if result_sets:
result = result_sets[0].intersection(*result_sets[1:])
result = sorted(result) # must sort for consistency
result_iter = itertools.islice(result, start, stop)
return QueryResult(len(result),
(char for char in result_iter))
return QueryResult(0, ())
def find_codes(self, query, start=0, stop=None):
return (ord(char) for char
in self.find_chars(query, start, stop).items)
def describe(self, char):
code_str = 'U+{:04X}'.format(ord(char))
name = unicodedata.name(char)
return CharDescription(code_str, char, name)
def find_descriptions(self, query, start=0, stop=None):
for char in self.find_chars(query, start, stop).items:
yield self.describe(char)
def get_descriptions(self, chars):
for char in chars:
yield self.describe(char)
def describe_str(self, char):
return '{:7}\t{}\t{}'.format(*self.describe(char))
def find_description_strs(self, query, start=0, stop=None):
for char in self.find_chars(query, start, stop).items:
yield self.describe_str(char)
@staticmethod # not an instance method due to concurrency
def status(query, counter):
if counter == 0:
msg = 'No match'
elif counter == 1:
msg = '1 match'
else:
msg = '{} matches'.format(counter)
return '{} for {!r}'.format(msg, query)
def main(*args):
index = UnicodeNameIndex()
query = ' '.join(args)
n = 0
for n, line in enumerate(index.find_description_strs(query), 1):
print(line)
print('({})'.format(index.status(query, n)))
if __name__ == '__main__':
if len(sys.argv) > 1:
main(*sys.argv[1:])
else:
print('Usage: {} word1 [word2]...'.format(sys.argv[0]))

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env python3
import sys
import asyncio
from aiohttp import web
from charfinder import UnicodeNameIndex
PAGE_TPL = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Charserver</title>
</head>
<body>
<p>
<form action="/">
<input type="search" name="query" value="{query}">
<input type="submit" value="find">
Examples: {links}
</form>
</p>
<p>{message}</p>
<hr>
<table>
{result}
</table>
</body>
</html>
'''
EXAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit Roman face Ethiopic'
' black mark symbol dot operator Braille hexagram').split()
LINK_TPL = '<a href="/?query={0}" title="find &quot;{0}&quot;">{0}</a>'
LINKS_HTML = ', '.join(LINK_TPL.format(word)
for word in sorted(EXAMPLE_WORDS, key=str.upper))
ROW_TPL = '<tr><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
CONTENT_TYPE = 'text/html; charset=UTF-8'
index = None # a UnicodeNameIndex instance
@asyncio.coroutine
def handle(request):
query = request.GET.get('query', '')
print('Query: {!r}'.format(query))
if query:
descriptions = list(index.find_descriptions(query))
res = '\n'.join(ROW_TPL.format(**vars(descr))
for descr in descriptions)
msg = index.status(query, len(descriptions))
else:
descriptions = []
res = ''
msg = 'Type words describing characters.'
text = PAGE_TPL.format(query=query, result=res,
message=msg, links=LINKS_HTML)
print('Sending {} results'.format(len(descriptions)))
return web.Response(content_type=CONTENT_TYPE, text=text)
@asyncio.coroutine
def init(loop, address, port):
app = web.Application(loop=loop)
app.router.add_route('GET', '/', handle)
server = yield from loop.create_server(app.make_handler(),
address, port)
host = server.sockets[0].getsockname()
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
def main(address="127.0.0.1", port=8888):
port = int(port)
loop = asyncio.get_event_loop()
loop.run_until_complete(init(loop, address, port))
loop.run_forever()
if __name__ == '__main__':
index = UnicodeNameIndex()
main(*sys.argv[1:])

View File

@@ -0,0 +1,126 @@
#!/usr/bin/env python3
import sys
import asyncio
import urllib
import json
from aiohttp import web
from charfinder import UnicodeNameIndex
PAGE_TPL = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Charserver</title>
<script type="text/javascript">
function onclick() {
var table = document.getElementById("results");
for (var char in "ABCDE") {
code = char.charCodeAt(0);
var tr = document.createElement('tr');
tr.appendChild(document.createElement('td'));
tr.appendChild(document.createElement('th'));
var code_str = 'U+'+code.toString(16);
tr.cells[0].appendChild(document.createTextNode(code_str));
tr.cells[1].appendChild(document.createTextNode(char));
}
}
</script>
</head>
<body>
<p>
<form action="/">
<input type="search" name="query" value="">
<input type="submit" value="find" onclick="fillTable()">
Examples: {links}
</form>
</p>
<p>{message}</p>
<hr>
<table id="results">
</table>
</body>
</html>
'''
EXAMPLE_WORDS = ('bismillah chess cat circled Malayalam digit Roman face Ethiopic'
' black mark symbol dot operator Braille hexagram').split()
LINK_TPL = '<a href="/?query={0}" title="find &quot;{0}&quot;">{0}</a>'
LINKS_HTML = ', '.join(LINK_TPL.format(word)
for word in sorted(EXAMPLE_WORDS, key=str.upper))
ROW_TPL = '<tr id="{code_str}"><td>{code_str}</td><th>{char}</th><td>{name}</td></tr>'
HTML_TYPE = 'text/html; charset=UTF-8'
TEXT_TYPE = 'text/plain; charset=UTF-8'
RESULTS_PER_REQUEST = 15
index = None # a UnicodeNameIndex instance
@asyncio.coroutine
def form(request):
peername = request.transport.get_extra_info('peername')
print('Request from: {}, query: {!r}'.format(peername, request.path_qs))
msg = 'Type words describing characters.'
text = PAGE_TPL.format(message=msg, links=LINKS_HTML)
return web.Response(content_type=HTML_TYPE, text=text)
@asyncio.coroutine
def get_chars(request):
peername = request.transport.get_extra_info('peername')
print('Request from: {}, GET data: {!r}'.format(peername, dict(request.GET)))
query = request.GET.get('query', '')
if query:
try:
start = int(request.GET.get('start', 0))
stop = int(request.GET.get('stop', sys.maxsize))
except ValueError:
raise web.HTTPBadRequest()
stop = min(stop, start+RESULTS_PER_REQUEST)
num_results, chars = index.find_chars(query, start, stop)
else:
raise web.HTTPBadRequest()
text = ''.join(char if n % 64 else char+'\n'
for n, char in enumerate(chars, 1))
response_data = {'total': num_results, 'start': start, 'stop': stop}
print('Response to query: {query!r}, start: {start}, stop: {stop}'.format(
query=query, **response_data))
response_data['chars'] = text
json_obj = json.dumps(response_data)
print('Sending {} characters'.format(len(text)))
headers = {'Access-Control-Allow-Origin': '*'}
return web.Response(content_type=TEXT_TYPE, headers=headers, text=json_obj)
@asyncio.coroutine
def init(loop, address, port):
app = web.Application(loop=loop)
app.router.add_route('GET', '/chars', get_chars)
app.router.add_route('GET', '/', form)
server = yield from loop.create_server(app.make_handler(),
address, port)
host = server.sockets[0].getsockname()
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
def main(address="127.0.0.1", port=8888):
port = int(port)
loop = asyncio.get_event_loop()
loop.run_until_complete(init(loop, address, port))
try:
loop.run_forever()
except KeyboardInterrupt:
print('Stopped.')
if __name__ == '__main__':
index = UnicodeNameIndex()
main(*sys.argv[1:])

View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
import sys
import asyncio
from charfinder import UnicodeNameIndex
CRLF = b'\r\n'
PROMPT = b'?> '
index = None # a UnicodeNameIndex instance
@asyncio.coroutine
def handle_queries(reader, writer):
while True:
writer.write(PROMPT) # can't yield from!
yield from writer.drain() # must yield from!
data = yield from reader.readline()
try:
query = data.decode().strip()
except UnicodeDecodeError:
query = '\x00'
client = writer.get_extra_info('peername')
print('Received from {}: {!r}'.format(client, query))
if query:
if ord(query[:1]) < 32:
break
lines = list(index.find_description_strs(query))
if lines:
writer.writelines(line.encode() + CRLF for line in lines)
writer.write(index.status(query, len(lines)).encode() + CRLF)
yield from writer.drain()
print('Sent {} results'.format(len(lines)))
print('Close the client socket')
writer.close()
def main(address='127.0.0.1', port=8888):
port = int(port)
loop = asyncio.get_event_loop()
coro = asyncio.start_server(handle_queries, address, port, loop=loop)
server = loop.run_until_complete(coro)
host = server.sockets[0].getsockname()
print('Serving on {}. Hit CTRL-C to stop.'.format(host))
try:
loop.run_forever()
except KeyboardInterrupt: # CTRL+C pressed
pass
server.close()
loop.run_until_complete(server.wait_closed())
loop.close()
if __name__ == '__main__':
index = UnicodeNameIndex()
main(*sys.argv[1:])

View File

@@ -0,0 +1,115 @@
import pytest
from charfinder import UnicodeNameIndex, tokenize, sample_chars, query_type
from unicodedata import name
@pytest.fixture
def sample_index():
return UnicodeNameIndex(sample_chars)
@pytest.fixture(scope="module")
def full_index():
return UnicodeNameIndex()
def test_query_type():
assert query_type('blue') == 'NAME'
def test_tokenize():
assert list(tokenize('')) == []
assert list(tokenize('a b')) == ['A', 'B']
assert list(tokenize('a-b')) == ['A', 'B']
assert list(tokenize('abc')) == ['ABC']
assert list(tokenize('café')) == ['CAFÉ']
def test_index():
sample_index = UnicodeNameIndex(sample_chars)
assert len(sample_index) == 9
def test_find_word_no_match(sample_index):
res = list(sample_index.find_codes('qwertyuiop'))
assert len(res) == 0
def test_find_word_1_match(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('currency')]
assert res == [(8352, 'EURO-CURRENCY SIGN')]
def test_find_word_1_match_character_result(sample_index):
res = [name(char) for char in
sample_index.find_chars('currency').items]
assert res == ['EURO-CURRENCY SIGN']
def test_find_word_2_matches(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('Euro')]
assert res == [(8352, 'EURO-CURRENCY SIGN'),
(8364, 'EURO SIGN')]
def test_find_2_words_no_matches(sample_index):
res = list(sample_index.find_codes('Euro letter'))
assert len(res) == 0
def test_find_2_words_no_matches_because_one_not_found(sample_index):
res = list(sample_index.find_codes('letter qwertyuiop'))
assert len(res) == 0
def test_find_2_words_1_match(sample_index):
res = list(sample_index.find_codes('sign dollar'))
assert len(res) == 1
def test_find_2_words_2_matches(sample_index):
res = list(sample_index.find_codes('latin letter'))
assert len(res) == 2
def test_find_codes_many_matches_full(full_index):
res = list(full_index.find_codes('letter'))
assert len(res) > 7000
def test_find_1_word_1_match_full(full_index):
res = [(code, name(chr(code)))
for code in full_index.find_codes('registered')]
assert res == [(174, 'REGISTERED SIGN')]
def test_find_1_word_2_matches_full(full_index):
res = list(full_index.find_codes('rook'))
assert len(res) == 2
def test_find_3_words_no_matches_full(full_index):
res = list(full_index.find_codes('no such character'))
assert len(res) == 0
def test_find_with_start(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('sign', 1)]
assert res == [(8352, 'EURO-CURRENCY SIGN'), (8364, 'EURO SIGN')]
def test_find_with_stop(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('sign', 0, 2)]
assert res == [(36, 'DOLLAR SIGN'), (8352, 'EURO-CURRENCY SIGN')]
def test_find_with_start_stop(sample_index):
res = [(code, name(chr(code)))
for code in sample_index.find_codes('sign', 1, 2)]
assert res == [(8352, 'EURO-CURRENCY SIGN')]

View File

@@ -0,0 +1,33 @@
=========================================
Setting up the test environment
=========================================
Some of the concurrency examples in this book require a local HTTP
server. These instructions show how I setup Ngnix on GNU/Linux,
Mac OS X 10.9 and Windows 7.
Nginx setup on Mac OS X
========================
Homebrew (copy & paste code at the bottom of http://brew.sh/)::
$ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
$ brew doctor
$ brew install nginx
Download and unpack::
Docroot is: /usr/local/var/www
/usr/local/etc/nginx/nginx.conf
To have launchd start nginx at login:
ln -sfv /usr/local/opt/nginx/*.plist ~/Library/LaunchAgents
Then to load nginx now:
launchctl load ~/Library/LaunchAgents/homebrew.mxcl.nginx.plist
Or, if you don't want/need launchctl, you can just run:
nginx
Nginx setup on Lubuntu 14.04.1 LTS
==================================
/usr/share/nginx/html

View File

@@ -0,0 +1,30 @@
"""
Build flags fixture
"""
import shutil
import os
import json
SRC = 'img/'
DEST = 'fixture/'
with open('country-codes.tab') as cc_fp:
for line in cc_fp:
if line.startswith('#'):
continue
iso_cc, gec_cc, name = line.strip().split('\t')
print(iso_cc, name)
cc = iso_cc.lower()
img_name = cc + '.gif'
from_file = os.path.join(SRC, img_name)
to_path = os.path.join(DEST, cc)
os.mkdir(to_path)
to_file = os.path.join(to_path, img_name)
shutil.copyfile(from_file, to_file)
tld_cc = 'uk' if cc == 'gb' else cc
metadata = {'country': name, 'iso_cc': iso_cc,
'tld_cc': '.'+tld_cc, 'gec_cc': gec_cc}
with open(os.path.join(to_path, 'metadata.json'), 'wt') as json_fp:
json.dump(metadata, json_fp, ensure_ascii=True)

View File

@@ -0,0 +1,19 @@
from collections import Counter
from operator import itemgetter
from string import ascii_uppercase
with open('country-codes.tab') as fp:
ct = Counter()
for line in fp:
if line.startswith('#'):
continue
cc, _, _ = line.split('\t')
ct[cc[0]] += 1
print(cc, end=' ')
for key, value in sorted(ct.items(), key=itemgetter(1), reverse=True):
print(key, value)
print('Total:', sum(ct.values()))
print('Missing:', ', '.join(set(ascii_uppercase) - ct.keys()))

View File

@@ -0,0 +1,36 @@
"""
Check country code TLDs
"""
import shutil
import os
import json
iso_cc_db = {}
with open('country-codes.tab') as cc_fp:
for line in cc_fp:
if line.startswith('#'):
continue
iso_cc, gec_cc, name = line.strip().split('\t')
iso_cc_db[iso_cc.lower()] = name
tld_cc_db = {}
with open('tlds.tab') as cc_fp:
for line in cc_fp:
if line.startswith('#'):
continue
tld_cc, category, entity = line.strip().split('\t')
if category.strip() != 'country-code':
continue
if ascii(tld_cc) != repr(tld_cc):
continue
tld_cc_db[tld_cc[1:].strip()] = entity
not_tld = iso_cc_db.keys() - tld_cc_db.keys()
print(sorted(not_tld))
for iso_cc, name in sorted(iso_cc_db.items()):
entity = tld_cc_db[iso_cc]
print('{}\t{}\t{}'.format(iso_cc, name, entity))

View File

@@ -0,0 +1,22 @@
import tkinter
class Test:
def __init__(self, master):
canvas = tkinter.Canvas(master)
canvas.image = tkinter.PhotoImage(file = 'img/br.gif')
print(vars(canvas.image))
canvas.create_image(0,0, image=canvas.image, anchor=tkinter.NW)
canvas.bind('<Button-2>', self.right_click)
canvas.grid(row=0, column=0)
def right_click(self, event):
print(vars(event))
raise SystemExit()
root = tkinter.Tk()
test = Test(root)
root.mainloop()

View File

@@ -0,0 +1,195 @@
# ISO-3166-1 US-GEC name
AF AF Afghanistan
AL AL Albania
DZ AG Algeria
AD AN Andorra
AO AO Angola
AG AC Antigua and Barbuda
AR AR Argentina
AM AM Armenia
AU AS Australia
AT AU Austria
AZ AJ Azerbaijan
BS BF Bahamas
BH BA Bahrain
BD BG Bangladesh
BB BB Barbados
BY BO Belarus
BE BE Belgium
BZ BH Belize
BJ BN Benin
BT BT Bhutan
BO BL Bolivia
BA BK Bosnia and Herzegovina
BW BC Botswana
BR BR Brazil
BN BX Brunei Darussalam
BG BU Bulgaria
BF UV Burkina Faso
BI BY Burundi
KH CB Cambodia
CM CM Cameroon
CA CA Canada
CV CV Cape Verde
CF CT Central African Republic
TD CD Chad
CL CI Chile
CN CH China
CO CO Colombia
KM CN Comoros
CG CF Congo (Brazzaville)
CD CG Congo (Kinshasa)
CR CS Costa Rica
CI IV Côte d'Ivoire
HR HR Croatia
CU CU Cuba
CY CY Cyprus
CZ EZ Czech Republic
DK DA Denmark
DJ DJ Djibouti
DM DO Dominica
EC EC Ecuador
EG EG Egypt
SV ES El Salvador
GQ EK Equatorial Guinea
ER ER Eritrea
EE EN Estonia
ET ET Ethiopia
FJ FJ Fiji
FI FI Finland
FR FR France
GA GB Gabon
GM GA Gambia
GE GG Georgia
DE GM Germany
GH GH Ghana
GR GR Greece
GD GJ Grenada
GT GT Guatemala
GN GV Guinea
GW PU Guinea-Bissau
GY GY Guyana
HT HA Haiti
HN HO Honduras
HU HU Hungary
IS IC Iceland
IN IN India
ID ID Indonesia
IR IR Iran
IQ IZ Iraq
IE EI Ireland
IL IS Israel
IT IT Italy
JM JM Jamaica
JP JA Japan
JO JO Jordan
KZ KZ Kazakhstan
KE KE Kenya
KI KR Kiribati
KP KN Korea, North
KR KS Korea, South
KW KU Kuwait
KG KG Kyrgyzstan
LA LA Laos
LV LG Latvia
LB LE Lebanon
LS LT Lesotho
LR LI Liberia
LY LY Libya
LI LS Liechtenstein
LT LH Lithuania
LU LU Luxembourg
MK MK Macedonia
MG MA Madagascar
MW MI Malawi
MY MY Malaysia
MV MV Maldives
ML ML Mali
MT MT Malta
MH RM Marshall Islands
MR MR Mauritania
MU MP Mauritius
MX MX Mexico
FM FM Micronesia
MD MD Moldova
MC MN Monaco
MN MG Mongolia
ME MJ Montenegro
MA MO Morocco
MZ MZ Mozambique
MM BM Myanmar
NA WA Namibia
NR NR Nauru
NP NP Nepal
NL NL Netherlands
NZ NZ New Zealand
NI NU Nicaragua
NE NG Niger
NG NI Nigeria
NO NO Norway
OM MU Oman
PK PK Pakistan
PW PS Palau
PA PM Panama
PG PP Papua New Guinea
PY PA Paraguay
PE PE Peru
PH RP Philippines
PL PL Poland
PT PO Portugal
QA QA Qatar
RO RO Romania
RU RS Russian Federation
RW RW Rwanda
KN SC Saint Kitts and Nevis
LC ST Saint Lucia
VC VC Grenadines
WS WS Samoa
SM SM San Marino
ST TP Sao Tome and Principe
SA SA Saudi Arabia
SN SG Senegal
RS RI Serbia
SC SE Seychelles
SL SL Sierra Leone
SG SN Singapore
SK LO Slovakia
SI SI Slovenia
SB BP Solomon Islands
SO SO Somalia
ZA SF South Africa
SS OD South Sudan
ES SP Spain
LK CE Sri Lanka
SD SU Sudan
SR NS Suriname
SZ WZ Swaziland
SE SW Sweden
CH SZ Switzerland
SY SY Syria
TW TW Taiwan
TJ TI Tajikistan
TZ TZ Tanzania
TH TH Thailand
TL TT Timor-Leste
TG TO Togo
TO TN Tonga
TT TD Trinidad and Tobago
TN TS Tunisia
TR TU Turkey
TM TX Turkmenistan
TV TV Tuvalu
UG UG Uganda
UA UP Ukraine
AE AE United Arab Emirates
GB UK United Kingdom
US US United States of America
UY UY Uruguay
UZ UZ Uzbekistan
VU NH Vanuatu
VA VT Vatican City
VE VE Venezuela
VN VM Vietnam
YE YM Yemen
ZM ZA Zambia
ZW ZI Zimbabwe

View File

@@ -0,0 +1,51 @@
"""
Mappings of ISO-3166-1 alpha-2 country codes to names, to GEC
(Geopolitical Entities and Codes used by the US government)
and utility functions for flag download examples
"""
DATA_FILE = 'country-codes.tab'
# original source
CIA_URL = ('https://www.cia.gov/library/publications/'
'the-world-factbook/graphics/flags/large/{gec}-lgflag.gif')
# local nginx web server
NGINX_URL = 'http://localhost:8080/ciaflags/{gec}.gif'
# Vaurien
VAURIEN_URL = 'http://localhost:8000/ciaflags/{gec}.gif'
SOURCE_URLS = {
'CIA' : CIA_URL,
'NGINX' : NGINX_URL,
'VAURIEN' : VAURIEN_URL,
}
DEST_PATH_NAME = 'img/{cc}.gif'
cc2name = {} # ISO-3166-1 to name
cc2gec = {} # ISO-3166-1 to GEC
def _load():
with open(DATA_FILE, encoding='utf-8') as cc_txt:
for line in cc_txt:
line = line.rstrip()
if line.startswith('#'):
continue
iso_cc, gec, name = line.split('\t')
cc2name[iso_cc] = name
cc2gec[iso_cc] = gec
def flag_url(iso_cc, source='CIA'):
base_url = SOURCE_URLS[source.upper()]
return base_url.format(gec=cc2gec[iso_cc].lower())
def iso_file_name(iso_cc):
return DEST_PATH_NAME.format(cc=iso_cc.lower())
def gec_file_name(iso_cc):
return DEST_PATH_NAME.format(cc=cc2gec[iso_cc].lower())
_load()

View File

@@ -0,0 +1,63 @@
import requests
import countryflags as cf
import time
times = {}
def fetch(iso_cc, source):
resp = requests.get(cf.flag_url(iso_cc, source))
if resp.status_code != 200:
resp.raise_for_status()
file_name = cf.iso_file_name(iso_cc)
with open(file_name, 'wb') as img:
written = img.write(resp.content)
return written, file_name
def main(source):
pending = sorted(cf.cc2name)
to_download = len(pending)
downloaded = 0
t0 = time.time()
for iso_cc in pending:
print('get:', iso_cc)
try:
times[iso_cc] = [time.time() - t0]
octets, file_name = fetch(iso_cc, source)
times[iso_cc].append(time.time() - t0)
downloaded += 1
print('\t--> {}: {:5d} bytes'.format(file_name, octets))
except Exception as exc:
print('\t***', iso_cc, 'generated an exception:', exc)
ratio = downloaded / to_download
print('{} of {} downloaded ({:.1%})'.format(downloaded, to_download, ratio))
for iso_cc in sorted(times):
start, end = times[iso_cc]
print('{}\t{:.6g}\t{:.6g}'.format(iso_cc, start, end))
if __name__ == '__main__':
import argparse
source_names = ', '.join(sorted(cf.SOURCE_URLS))
parser = argparse.ArgumentParser(description='Download flag images.')
parser.add_argument('source', help='one of: ' + source_names)
args = parser.parse_args()
main(args.source)
"""
From cia.gov:
real 3m26.679s
user 0m5.212s
sys 0m0.383s
From localhost nginx:
real 0m1.193s
user 0m0.858s
sys 0m0.179s
From localhost nginx via Vaurien with .5s delay
real 1m40.519s
user 0m1.103s
sys 0m0.243s
"""

View File

@@ -0,0 +1,61 @@
from concurrent import futures
import sys
import requests
import countryflags as cf
import time
from getsequential import fetch
DEFAULT_NUM_THREADS = 100
GLOBAL_TIMEOUT = 300 # seconds
times = {}
def main(source, num_threads):
pool = futures.ThreadPoolExecutor(num_threads)
pending = {}
t0 = time.time()
# submit all jobs
for iso_cc in sorted(cf.cc2name):
print('get:', iso_cc)
times[iso_cc] = [time.time() - t0]
job = pool.submit(fetch, iso_cc, source)
pending[job] = iso_cc
to_download = len(pending)
downloaded = 0
# get results as jobs are done
for job in futures.as_completed(pending, timeout=GLOBAL_TIMEOUT):
try:
octets, file_name = job.result()
times[pending[job]].append(time.time() - t0)
downloaded += 1
print('\t--> {}: {:5d} bytes'.format(file_name, octets))
except Exception as exc:
print('\t***', pending[job], 'generated an exception:', exc)
ratio = downloaded / to_download
print('{} of {} downloaded ({:.1%})'.format(downloaded, to_download, ratio))
for iso_cc in sorted(times):
start, end = times[iso_cc]
print('{}\t{:.6g}\t{:.6g}'.format(iso_cc, start, end))
if __name__ == '__main__':
import argparse
source_names = ', '.join(sorted(cf.SOURCE_URLS))
parser = argparse.ArgumentParser(description='Download flag images.')
parser.add_argument('source', help='one of: ' + source_names)
parser.add_argument('-t', '--threads', type=int, default=DEFAULT_NUM_THREADS,
help='number of threads (default: %s)' % DEFAULT_NUM_THREADS)
args = parser.parse_args()
main(args.source, args.threads)
"""
From CIA, 1 thread:
real 2m0.832s
user 0m4.685s
sys 0m0.366s
"""

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
This file exists so that the directory is stored by git.

View File

@@ -0,0 +1,848 @@
# https://www.iana.org/domains/root/db
.abogado generic Top Level Domain Holdings Limited
.ac country-code Network Information Center (AC Domain Registry) c/o Cable and Wireless (Ascension Island)
.academy generic Half Oaks, LLC
.accountants generic Knob Town, LLC
.active generic The Active Network, Inc
.actor generic United TLD Holdco Ltd.
.ad country-code Andorra Telecom
.adult generic ICM Registry AD LLC
.ae country-code Telecommunication Regulatory Authority (TRA)
.aero sponsored Societe Internationale de Telecommunications Aeronautique (SITA INC USA)
.af country-code Ministry of Communications and IT
.ag country-code UHSA School of Medicine
.agency generic Steel Falls, LLC
.ai country-code Government of Anguilla
.airforce generic United TLD Holdco Ltd.
.al country-code Electronic and Postal Communications Authority - AKEP
.allfinanz generic Allfinanz Deutsche Vermögensberatung Aktiengesellschaft
.alsace generic REGION D ALSACE
.am country-code Internet Society
.amsterdam generic Gemeente Amsterdam
.an country-code University of Curacao
.android generic Charleston Road Registry Inc.
.ao country-code Faculdade de Engenharia da Universidade Agostinho Neto
.apartments generic June Maple, LLC
.aq country-code Antarctica Network Information Centre Limited
.aquarelle generic Aquarelle.com
.ar country-code Presidencia de la Nación Secretaría Legal y Técnica
.archi generic STARTING DOT LIMITED
.army generic United TLD Holdco Ltd.
.arpa infrastructure Internet Architecture Board (IAB)
.as country-code AS Domain Registry
.asia sponsored DotAsia Organisation Ltd.
.associates generic Baxter Hill, LLC
.at country-code nic.at GmbH
.attorney generic United TLD Holdco, Ltd
.au country-code .au Domain Administration (auDA)
.auction generic United TLD HoldCo, Ltd.
.audio generic Uniregistry, Corp.
.autos generic DERAutos, LLC
.aw country-code SETAR
.ax country-code Ålands landskapsregering
.axa generic AXA SA
.az country-code IntraNS
.ba country-code Universtiy Telinformatic Centre (UTIC)
.band generic United TLD Holdco, Ltd
.bank generic fTLD Registry Services, LLC
.bar generic Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable
.barclaycard generic Barclays Bank PLC
.barclays generic Barclays Bank PLC
.bargains generic Half Hallow, LLC
.bayern generic Bayern Connect GmbH
.bb country-code Government of Barbados Ministry of Economic Affairs and Development Telecommunications Unit
.bd country-code Ministry of Post & Telecommunications Bangladesh Secretariat
.be country-code DNS Belgium vzw/asbl
.beer generic Top Level Domain Holdings Limited
.berlin generic dotBERLIN GmbH & Co. KG
.best generic BestTLD Pty Ltd
.bf country-code ARCE-AutoritÈ de RÈgulation des Communications Electroniques
.bg country-code Register.BG
.bh country-code Telecommunications Regulatory Authority (TRA)
.bi country-code Centre National de l'Informatique
.bid generic dot Bid Limited
.bike generic Grand Hollow, LLC
.bingo generic Sand Cedar, LLC
.bio generic STARTING DOT LIMITED
.biz generic-restricted NeuStar, Inc.
.bj country-code Benin Telecoms S.A.
.bl country-code Not assigned
.black generic Afilias Limited
.blackfriday generic Uniregistry, Corp.
.bloomberg generic Bloomberg IP Holdings LLC
.blue generic Afilias Limited
.bm country-code Registry General Ministry of Labour and Immigration
.bmw generic Bayerische Motoren Werke Aktiengesellschaft
.bn country-code Telekom Brunei Berhad
.bnpparibas generic BNP Paribas
.bo country-code Agencia para el Desarrollo de la Información de la Sociedad en Bolivia
.boo generic Charleston Road Registry Inc.
.boutique generic Over Galley, LLC
.bq country-code Not assigned
.br country-code Comite Gestor da Internet no Brasil
.brussels generic DNS.be vzw
.bs country-code The College of the Bahamas
.bt country-code Ministry of Information and Communications
.budapest generic Top Level Domain Holdings Limited
.build generic Plan Bee LLC
.builders generic Atomic Madison, LLC
.business generic Spring Cross, LLC
.buzz generic DOTSTRATEGY CO.
.bv country-code UNINETT Norid A/S
.bw country-code Botswana Communications Regulatory Authority (BOCRA)
.by country-code Reliable Software Inc.
.bz country-code University of Belize
.bzh generic Association www.bzh
.ca country-code Canadian Internet Registration Authority (CIRA) Autorite Canadienne pour les Enregistrements Internet (ACEI)
.cab generic Half Sunset, LLC
.cal generic Charleston Road Registry Inc.
.camera generic Atomic Maple, LLC
.camp generic Delta Dynamite, LLC
.cancerresearch generic Australian Cancer Research Foundation
.canon generic Canon Inc.
.capetown generic ZA Central Registry NPC trading as ZA Central Registry
.capital generic Delta Mill, LLC
.caravan generic Caravan International, Inc.
.cards generic Foggy Hollow, LLC
.care generic Goose Cross, LLC
.career generic dotCareer LLC
.careers generic Wild Corner, LLC
.cartier generic Richemont DNS Inc.
.casa generic Top Level Domain Holdings Limited
.cash generic Delta Lake, LLC
.cat sponsored Fundacio puntCAT
.catering generic New Falls. LLC
.cbn generic The Christian Broadcasting Network, Inc.
.cc country-code eNIC Cocos (Keeling) Islands Pty. Ltd. d/b/a Island Internet Services
.cd country-code Office Congolais des Postes et Télécommunications - OCPT
.center generic Tin Mill, LLC
.ceo generic CEOTLD Pty Ltd
.cern generic European Organization for Nuclear Research ("CERN")
.cf country-code Societe Centrafricaine de Telecommunications (SOCATEL)
.cg country-code ONPT Congo and Interpoint Switzerland
.ch country-code SWITCH The Swiss Education & Research Network
.channel generic Charleston Road Registry Inc.
.chat generic Sand Fields, LLC
.cheap generic Sand Cover, LLC
.christmas generic Uniregistry, Corp.
.chrome generic Charleston Road Registry Inc.
.church generic Holly Fileds, LLC
.ci country-code INP-HB Institut National Polytechnique Felix Houphouet Boigny
.citic generic CITIC Group Corporation
.city generic Snow Sky, LLC
.ck country-code Telecom Cook Islands Ltd.
.cl country-code NIC Chile (University of Chile)
.claims generic Black Corner, LLC
.cleaning generic Fox Shadow, LLC
.click generic Uniregistry, Corp.
.clinic generic Goose Park, LLC
.clothing generic Steel Lake, LLC
.club generic .CLUB DOMAINS, LLC
.cm country-code Cameroon Telecommunications (CAMTEL)
.cn country-code Computer Network Information Center, Chinese Academy of Sciences
.co country-code .CO Internet S.A.S.
.coach generic Koko Island, LLC
.codes generic Puff Willow, LLC
.coffee generic Trixy Cover, LLC
.college generic XYZ.COM LLC
.cologne generic NetCologne Gesellschaft für Telekommunikation mbH
.com generic VeriSign Global Registry Services
.community generic Fox Orchard, LLC
.company generic Silver Avenue, LLC
.computer generic Pine Mill, LLC
.condos generic Pine House, LLC
.construction generic Fox Dynamite, LLC
.consulting generic United TLD Holdco, LTD.
.contractors generic Magic Woods, LLC
.cooking generic Top Level Domain Holdings Limited
.cool generic Koko Lake, LLC
.coop sponsored DotCooperation LLC
.country generic Top Level Domain Holdings Limited
.cr country-code National Academy of Sciences Academia Nacional de Ciencias
.credit generic Snow Shadow, LLC
.creditcard generic Binky Frostbite, LLC
.cricket generic dot Cricket Limited
.crs generic Federated Co-operatives Limited
.cruises generic Spring Way, LLC
.cu country-code CENIAInternet Industria y San Jose Capitolio Nacional
.cuisinella generic SALM S.A.S.
.cv country-code Agência Nacional das Comunicações (ANAC)
.cw country-code University of Curacao
.cx country-code Christmas Island Internet Administration Limited
.cy country-code University of Cyprus
.cymru generic Nominet UK
.cz country-code CZ.NIC, z.s.p.o
.dabur generic Dabur India Limited
.dad generic Charleston Road Registry Inc.
.dance generic United TLD Holdco Ltd.
.dating generic Pine Fest, LLC
.day generic Charleston Road Registry Inc.
.dclk generic Charleston Road Registry Inc.
.de country-code DENIC eG
.deals generic Sand Sunset, LLC
.degree generic United TLD Holdco, Ltd
.delivery generic Steel Station, LLC
.democrat generic United TLD Holdco Ltd.
.dental generic Tin Birch, LLC
.dentist generic United TLD Holdco, Ltd
.desi generic Desi Networks LLC
.design generic Top Level Design, LLC
.dev generic Charleston Road Registry Inc.
.diamonds generic John Edge, LLC
.diet generic Uniregistry, Corp.
.digital generic Dash Park, LLC
.direct generic Half Trail, LLC
.directory generic Extra Madison, LLC
.discount generic Holly Hill, LLC
.dj country-code Djibouti Telecom S.A
.dk country-code Dansk Internet Forum
.dm country-code DotDM Corporation
.dnp generic Dai Nippon Printing Co., Ltd.
.do country-code Pontificia Universidad Catolica Madre y Maestra Recinto Santo Tomas de Aquino
.docs generic Charleston Road Registry Inc.
.domains generic Sugar Cross, LLC
.doosan generic Doosan Corporation
.durban generic ZA Central Registry NPC trading as ZA Central Registry
.dvag generic Deutsche Vermögensberatung Aktiengesellschaft DVAG
.dz country-code CERIST
.eat generic Charleston Road Registry Inc.
.ec country-code NIC.EC (NICEC) S.A.
.edu sponsored EDUCAUSE
.education generic Brice Way, LLC
.ee country-code Eesti Interneti Sihtasutus (EIS)
.eg country-code Egyptian Universities Network (EUN) Supreme Council of Universities
.eh country-code Not assigned
.email generic Spring Madison, LLC
.emerck generic Merck KGaA
.energy generic Binky Birch, LLC
.engineer generic United TLD Holdco Ltd.
.engineering generic Romeo Canyon
.enterprises generic Snow Oaks, LLC
.equipment generic Corn Station, LLC
.er country-code Eritrea Telecommunication Services Corporation (EriTel)
.es country-code Red.es
.esq generic Charleston Road Registry Inc.
.estate generic Trixy Park, LLC
.et country-code Ethio telecom
.eu country-code EURid vzw/asbl
.eurovision generic European Broadcasting Union (EBU)
.eus generic Puntueus Fundazioa
.events generic Pioneer Maple, LLC
.everbank generic EverBank
.exchange generic Spring Falls, LLC
.expert generic Magic Pass, LLC
.exposed generic Victor Beach, LLC
.fail generic Atomic Pipe, LLC
.fans generic Asiamix Digital Limited
.farm generic Just Maple, LLC
.fashion generic Top Level Domain Holdings Limited
.feedback generic Top Level Spectrum, Inc.
.fi country-code Finnish Communications Regulatory Authority
.finance generic Cotton Cypress, LLC
.financial generic Just Cover, LLC
.firmdale generic Firmdale Holdings Limited
.fish generic Fox Woods, LLC
.fishing generic Top Level Domain Holdings Limited
.fit generic Minds + Machines Group Limited
.fitness generic Brice Orchard, LLC
.fj country-code The University of the South Pacific IT Services
.fk country-code Falkland Islands Government
.flights generic Fox Station, LLC
.florist generic Half Cypress, LLC
.flowers generic Uniregistry, Corp.
.flsmidth generic FLSmidth A/S
.fly generic Charleston Road Registry Inc.
.fm country-code FSM Telecommunications Corporation
.fo country-code FO Council
.foo generic Charleston Road Registry Inc.
.forsale generic United TLD Holdco, LLC
.foundation generic John Dale, LLC
.fr country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.frl generic FRLregistry B.V.
.frogans generic OP3FT
.fund generic John Castle, LLC
.furniture generic Lone Fields, LLC
.futbol generic United TLD Holdco, Ltd.
.ga country-code Agence Nationale des Infrastructures Numériques et des Fréquences (ANINF)
.gal generic Asociación puntoGAL
.gallery generic Sugar House, LLC
.garden generic Top Level Domain Holdings Limited
.gb country-code Reserved Domain - IANA
.gbiz generic Charleston Road Registry Inc.
.gd country-code The National Telecommunications Regulatory Commission (NTRC)
.gdn generic Joint Stock Company "Navigation-information systems"
.ge country-code Caucasus Online
.gent generic COMBELL GROUP NV/SA
.gf country-code Net Plus
.gg country-code Island Networks Ltd.
.ggee generic GMO Internet, Inc.
.gh country-code Network Computer Systems Limited
.gi country-code Sapphire Networks
.gift generic Uniregistry, Corp.
.gifts generic Goose Sky, LLC
.gives generic United TLD Holdco Ltd.
.gl country-code TELE Greenland A/S
.glass generic Black Cover, LLC
.gle generic Charleston Road Registry Inc.
.global generic Dot Global Domain Registry Limited
.globo generic Globo Comunicação e Participações S.A
.gm country-code GM-NIC
.gmail generic Charleston Road Registry Inc.
.gmo generic GMO Internet, Inc.
.gmx generic 1&1 Mail & Media GmbH
.gn country-code Centre National des Sciences Halieutiques de Boussoura
.goldpoint generic YODOBASHI CAMERA CO.,LTD.
.goog generic Charleston Road Registry Inc.
.google generic Charleston Road Registry Inc.
.gop generic Republican State Leadership Committee, Inc.
.gov sponsored General Services Administration Attn: QTDC, 2E08 (.gov Domain Registration)
.gp country-code Networking Technologies Group
.gq country-code GETESA
.gr country-code ICS-FORTH GR
.graphics generic Over Madison, LLC
.gratis generic Pioneer Tigers, LLC
.green generic Afilias Limited
.gripe generic Corn Sunset, LLC
.gs country-code Government of South Georgia and South Sandwich Islands (GSGSSI)
.gt country-code Universidad del Valle de Guatemala
.gu country-code University of Guam Computer Center
.guide generic Snow Moon, LLC
.guitars generic Uniregistry, Corp.
.guru generic Pioneer Cypress, LLC
.gw country-code Autoridade Reguladora Nacional - Tecnologias de Informação e Comunicação da Guiné-Bissau
.gy country-code University of Guyana
.hamburg generic Hamburg Top-Level-Domain GmbH
.hangout generic Charleston Road Registry Inc.
.haus generic United TLD Holdco, LTD.
.healthcare generic Silver Glen, LLC
.help generic Uniregistry, Corp.
.here generic Charleston Road Registry Inc.
.hermes generic Hermes International
.hiphop generic Uniregistry, Corp.
.hiv generic dotHIV gemeinnuetziger e.V.
.hk country-code Hong Kong Internet Registration Corporation Ltd.
.hm country-code HM Domain Registry
.hn country-code Red de Desarrollo Sostenible Honduras
.holdings generic John Madison, LLC
.holiday generic Goose Woods, LLC
.homes generic DERHomes, LLC
.horse generic Top Level Domain Holdings Limited
.host generic DotHost Inc.
.hosting generic Uniregistry, Corp.
.house generic Sugar Park, LLC
.how generic Charleston Road Registry Inc.
.hr country-code CARNet - Croatian Academic and Research Network
.ht country-code Consortium FDS/RDDH
.hu country-code Council of Hungarian Internet Providers (CHIP)
.ibm generic International Business Machines Corporation
.id country-code Perkumpulan Pengelola Nama Domain Internet Indonesia (PANDI)
.ie country-code University College Dublin Computing Services Computer Centre
.ifm generic ifm electronic gmbh
.il country-code Internet Society of Israel
.im country-code Isle of Man Government
.immo generic Auburn Bloom, LLC
.immobilien generic United TLD Holdco Ltd.
.in country-code National Internet Exchange of India
.industries generic Outer House, LLC
.info generic Afilias Limited
.ing generic Charleston Road Registry Inc.
.ink generic Top Level Design, LLC
.institute generic Outer Maple, LLC
.insure generic Pioneer Willow, LLC
.int sponsored Internet Assigned Numbers Authority
.international generic Wild Way, LLC
.investments generic Holly Glen, LLC
.io country-code IO Top Level Domain Registry Cable and Wireless
.iq country-code Communications and Media Commission (CMC)
.ir country-code Institute for Research in Fundamental Sciences
.irish generic Dot-Irish LLC
.is country-code ISNIC - Internet Iceland ltd.
.it country-code IIT - CNR
.iwc generic Richemont DNS Inc.
.jcb generic JCB Co., Ltd.
.je country-code Island Networks (Jersey) Ltd.
.jetzt generic New TLD Company AB
.jm country-code University of West Indies
.jo country-code National Information Technology Center (NITC)
.jobs sponsored Employ Media LLC
.joburg generic ZA Central Registry NPC trading as ZA Central Registry
.jp country-code Japan Registry Services Co., Ltd.
.juegos generic Uniregistry, Corp.
.kaufen generic United TLD Holdco Ltd.
.kddi generic KDDI CORPORATION
.ke country-code Kenya Network Information Center (KeNIC)
.kg country-code AsiaInfo Telecommunication Enterprise
.kh country-code Ministry of Post and Telecommunications
.ki country-code Ministry of Communications, Transport, and Tourism Development
.kim generic Afilias Limited
.kitchen generic Just Goodbye, LLC
.kiwi generic DOT KIWI LIMITED
.km country-code Comores Telecom
.kn country-code Ministry of Finance, Sustainable Development Information & Technology
.koeln generic NetCologne Gesellschaft für Telekommunikation mbH
.kp country-code Star Joint Venture Company
.kr country-code Korea Internet & Security Agency (KISA)
.krd generic KRG Department of Information Technology
.kred generic KredTLD Pty Ltd
.kw country-code Ministry of Communications
.ky country-code The Information and Communications Technology Authority
.kyoto generic Academic Institution: Kyoto Jyoho Gakuen
.kz country-code Association of IT Companies of Kazakhstan
.la country-code Lao National Internet Committee (LANIC), Ministry of Posts and Telecommunications
.lacaixa generic CAIXA D'ESTALVIS I PENSIONS DE BARCELONA
.land generic Pine Moon, LLC
.lat generic ECOM-LAC Federación de Latinoamérica y el Caribe para Internet y el Comercio Electrónico
.latrobe generic La Trobe University
.lawyer generic United TLD Holdco, Ltd
.lb country-code American University of Beirut Computing and Networking Services
.lc country-code University of Puerto Rico
.lds generic IRI Domain Management, LLC
.lease generic Victor Trail, LLC
.legal generic Blue Falls, LLC
.lgbt generic Afilias Limited
.li country-code Universitaet Liechtenstein
.lidl generic Schwarz Domains und Services GmbH & Co. KG
.life generic Trixy Oaks, LLC
.lighting generic John McCook, LLC
.limited generic Big Fest, LLC
.limo generic Hidden Frostbite, LLC
.link generic Uniregistry, Corp.
.lk country-code Council for Information Technology LK Domain Registrar
.loans generic June Woods, LLC
.london generic Dot London Domains Limited
.lotte generic Lotte Holdings Co., Ltd.
.lotto generic Afilias Limited
.lr country-code Data Technology Solutions, Inc.
.ls country-code National University of Lesotho
.lt country-code Kaunas University of Technology
.ltda generic InterNetX Corp.
.lu country-code RESTENA
.luxe generic Top Level Domain Holdings Limited
.luxury generic Luxury Partners LLC
.lv country-code University of Latvia Institute of Mathematics and Computer Science Department of Network Solutions (DNS)
.ly country-code General Post and Telecommunication Company
.ma country-code Agence Nationale de Réglementation des Télécommunications (ANRT)
.madrid generic Comunidad de Madrid
.maison generic Victor Frostbite, LLC
.management generic John Goodbye, LLC
.mango generic PUNTO FA S.L.
.market generic Unitied TLD Holdco, Ltd
.marketing generic Fern Pass, LLC
.marriott generic Marriott Worldwide Corporation
.mc country-code Gouvernement de Monaco Direction des Communications Electroniques
.md country-code MoldData S.E.
.me country-code Government of Montenegro
.media generic Grand Glen, LLC
.meet generic Afilias Limited
.melbourne generic The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation
.meme generic Charleston Road Registry Inc.
.memorial generic Dog Beach, LLC
.menu generic Wedding TLD2, LLC
.mf country-code Not assigned
.mg country-code NIC-MG (Network Information Center Madagascar)
.mh country-code Office of the Cabinet
.miami generic Top Level Domain Holdings Limited
.mil sponsored DoD Network Information Center
.mini generic Bayerische Motoren Werke Aktiengesellschaft
.mk country-code Macedonian Academic Research Network Skopje
.ml country-code Agence des Technologies de lInformation et de la Communication
.mm country-code Ministry of Communications, Posts & Telegraphs
.mn country-code Datacom Co., Ltd.
.mo country-code Bureau of Telecommunications Regulation (DSRT)
.mobi sponsored Afilias Technologies Limited dba dotMobi
.moda generic United TLD Holdco Ltd.
.moe generic Interlink Co., Ltd.
.monash generic Monash University
.money generic Outer McCook, LLC
.mormon generic IRI Domain Management, LLC ("Applicant")
.mortgage generic United TLD Holdco, Ltd
.moscow generic Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID)
.motorcycles generic DERMotorcycles, LLC
.mov generic Charleston Road Registry Inc.
.mp country-code Saipan Datacom, Inc.
.mq country-code MEDIASERV
.mr country-code Université des Sciences, de Technologie et de Médecine
.ms country-code MNI Networks Ltd.
.mt country-code NIC (Malta)
.mu country-code Internet Direct Ltd
.museum sponsored Museum Domain Management Association
.mv country-code Dhiraagu Pvt. Ltd. (DHIVEHINET)
.mw country-code Malawi Sustainable Development Network Programme (Malawi SDNP)
.mx country-code NIC-Mexico ITESM - Campus Monterrey
.my country-code MYNIC Berhad
.mz country-code Centro de Informatica de Universidade Eduardo Mondlane
.na country-code Namibian Network Information Center
.nagoya generic GMO Registry, Inc.
.name generic-restricted VeriSign Information Services, Inc.
.navy generic United TLD Holdco Ltd.
.nc country-code Office des Postes et Telecommunications
.ne country-code SONITEL
.net generic VeriSign Global Registry Services
.network generic Trixy Manor, LLC
.neustar generic NeuStar, Inc.
.new generic Charleston Road Registry Inc.
.nexus generic Charleston Road Registry Inc.
.nf country-code Norfolk Island Data Services
.ng country-code Nigeria Internet Registration Association
.ngo generic Public Interest Registry
.nhk generic Japan Broadcasting Corporation (NHK)
.ni country-code Universidad Nacional del Ingernieria Centro de Computo
.nico generic DWANGO Co., Ltd.
.ninja generic United TLD Holdco Ltd.
.nl country-code SIDN (Stichting Internet Domeinregistratie Nederland)
.no country-code UNINETT Norid A/S
.np country-code Mercantile Communications Pvt. Ltd.
.nr country-code CENPAC NET
.nra generic NRA Holdings Company, INC.
.nrw generic Minds + Machines GmbH
.ntt generic NIPPON TELEGRAPH AND TELEPHONE CORPORATION
.nu country-code The IUSN Foundation
.nyc generic The City of New York by and through the New York City Department of Information Technology & Telecommunications
.nz country-code InternetNZ
.okinawa generic BusinessRalliart inc.
.om country-code Telecommunications Regulatory Authority (TRA)
.one generic One.com A/S
.ong generic Public Interest Registry
.onl generic I-REGISTRY Ltd., Niederlassung Deutschland
.ooo generic INFIBEAM INCORPORATION LIMITED
.org generic Public Interest Registry (PIR)
.organic generic Afilias Limited
.osaka generic Interlink Co., Ltd.
.otsuka generic Otsuka Holdings Co., Ltd.
.ovh generic OVH SAS
.pa country-code Universidad Tecnologica de Panama
.paris generic City of Paris
.partners generic Magic Glen, LLC
.parts generic Sea Goodbye, LLC
.party generic Blue Sky Registry Limited
.pe country-code Red Cientifica Peruana
.pf country-code Gouvernement de la Polynésie française
.pg country-code PNG DNS Administration Vice Chancellors Office The Papua New Guinea University of Technology
.ph country-code PH Domain Foundation
.pharmacy generic National Association of Boards of Pharmacy
.photo generic Uniregistry, Corp.
.photography generic Sugar Glen, LLC
.photos generic Sea Corner, LLC
.physio generic PhysBiz Pty Ltd
.pics generic Uniregistry, Corp.
.pictures generic Foggy Sky, LLC
.pink generic Afilias Limited
.pizza generic Foggy Moon, LLC
.pk country-code PKNIC
.pl country-code Research and Academic Computer Network
.place generic Snow Galley, LLC
.plumbing generic Spring Tigers, LLC
.pm country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.pn country-code Pitcairn Island Administration
.pohl generic Deutsche Vermögensberatung Aktiengesellschaft DVAG
.poker generic Afilias Domains No. 5 Limited
.porn generic ICM Registry PN LLC
.post sponsored Universal Postal Union
.pr country-code Gauss Research Laboratory Inc.
.praxi generic Praxi S.p.A.
.press generic DotPress Inc.
.pro generic-restricted Registry Services Corporation dba RegistryPro
.prod generic Charleston Road Registry Inc.
.productions generic Magic Birch, LLC
.prof generic Charleston Road Registry Inc.
.properties generic Big Pass, LLC
.property generic Uniregistry, Corp.
.ps country-code Ministry Of Telecommunications & Information Technology, Government Computer Center.
.pt country-code Associação DNS.PT
.pub generic United TLD Holdco Ltd.
.pw country-code Micronesia Investment and Development Corporation
.py country-code NIC-PY
.qa country-code Communications Regulatory Authority
.qpon generic dotCOOL, Inc.
.quebec generic PointQuébec Inc
.re country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.realtor generic Real Estate Domains LLC
.recipes generic Grand Island, LLC
.red generic Afilias Limited
.rehab generic United TLD Holdco Ltd.
.reise generic dotreise GmbH
.reisen generic New Cypress, LLC
.reit generic National Association of Real Estate Investment Trusts, Inc.
.ren generic Beijing Qianxiang Wangjing Technology Development Co., Ltd.
.rentals generic Big Hollow,LLC
.repair generic Lone Sunset, LLC
.report generic Binky Glen, LLC
.republican generic United TLD Holdco Ltd.
.rest generic Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable
.restaurant generic Snow Avenue, LLC
.reviews generic United TLD Holdco, Ltd.
.rich generic I-REGISTRY Ltd., Niederlassung Deutschland
.rio generic Empresa Municipal de Informática SA - IPLANRIO
.rip generic United TLD Holdco Ltd.
.ro country-code National Institute for R&D in Informatics
.rocks generic United TLD Holdco, LTD.
.rodeo generic Top Level Domain Holdings Limited
.rs country-code Serbian National Internet Domain Registry (RNIDS)
.rsvp generic Charleston Road Registry Inc.
.ru country-code Coordination Center for TLD RU
.ruhr generic regiodot GmbH & Co. KG
.rw country-code Rwanda Information Communication and Technology Association (RICTA)
.ryukyu generic BusinessRalliart inc.
.sa country-code Communications and Information Technology Commission
.saarland generic dotSaarland GmbH
.sale generic United TLD Holdco, Ltd
.samsung generic SAMSUNG SDS CO., LTD
.sarl generic Delta Orchard, LLC
.saxo generic Saxo Bank A/S
.sb country-code Solomon Telekom Company Limited
.sc country-code VCS Pty Ltd
.sca generic SVENSKA CELLULOSA AKTIEBOLAGET SCA (publ)
.scb generic The Siam Commercial Bank Public Company Limited ("SCB")
.schmidt generic SALM S.A.S.
.schule generic Outer Moon, LLC
.schwarz generic Schwarz Domains und Services GmbH & Co. KG
.science generic dot Science Limited
.scot generic Dot Scot Registry Limited
.sd country-code Sudan Internet Society
.se country-code The Internet Infrastructure Foundation
.services generic Fox Castle, LLC
.sew generic SEW-EURODRIVE GmbH & Co KG
.sexy generic Uniregistry, Corp.
.sg country-code Singapore Network Information Centre (SGNIC) Pte Ltd
.sh country-code Government of St. Helena
.shiksha generic Afilias Limited
.shoes generic Binky Galley, LLC
.shriram generic Shriram Capital Ltd.
.si country-code Academic and Research Network of Slovenia (ARNES)
.singles generic Fern Madison, LLC
.sj country-code UNINETT Norid A/S
.sk country-code SK-NIC, a.s.
.sky generic Sky IP International Ltd, a company incorporated in England and Wales, operating via its registered Swiss branch
.sl country-code Sierratel
.sm country-code Telecom Italia San Marino S.p.A.
.sn country-code Universite Cheikh Anta Diop NIC Senegal
.so country-code Ministry of Post and Telecommunications
.social generic United TLD Holdco Ltd.
.software generic United TLD Holdco, Ltd
.sohu generic Sohu.com Limited
.solar generic Ruby Town, LLC
.solutions generic Silver Cover, LLC
.soy generic Charleston Road Registry Inc.
.space generic DotSpace Inc.
.spiegel generic SPIEGEL-Verlag Rudolf Augstein GmbH & Co. KG
.sr country-code Telesur
.ss country-code Not assigned
.st country-code Tecnisys
.style generic Binky Moon, LLC
.su country-code Russian Institute for Development of Public Networks (ROSNIIROS)
.supplies generic Atomic Fields, LLC
.supply generic Half Falls, LLC
.support generic Grand Orchard, LLC
.surf generic Top Level Domain Holdings Limited
.surgery generic Tin Avenue, LLC
.suzuki generic SUZUKI MOTOR CORPORATION
.sv country-code SVNet
.sx country-code SX Registry SA B.V.
.sy country-code National Agency for Network Services (NANS)
.sydney generic State of New South Wales, Department of Premier and Cabinet
.systems generic Dash Cypress, LLC
.sz country-code University of Swaziland Department of Computer Science
.taipei generic Taipei City Government
.tatar generic Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic"
.tattoo generic Uniregistry, Corp.
.tax generic Storm Orchard, LLC
.tc country-code Melrex TC
.td country-code Société des télécommunications du Tchad (SOTEL TCHAD)
.technology generic Auburn Falls, LLC
.tel sponsored Telnic Ltd.
.temasek generic Temasek Holdings (Private) Limited
.tennis generic Cotton Bloom, LLC
.tf country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.tg country-code Cafe Informatique et Telecommunications
.th country-code Thai Network Information Center Foundation
.tienda generic Victor Manor, LLC
.tips generic Corn Willow, LLC
.tires generic Dog Edge, LLC
.tirol generic punkt Tirol GmbH
.tj country-code Information Technology Center
.tk country-code Telecommunication Tokelau Corporation (Teletok)
.tl country-code Ministry of Transport and Communications; National Division of Information and Technology
.tm country-code TM Domain Registry Ltd
.tn country-code Agence Tunisienne d'Internet
.to country-code Government of the Kingdom of Tonga H.R.H. Crown Prince Tupouto'a c/o Consulate of Tonga
.today generic Pearl Woods, LLC
.tokyo generic GMO Registry, Inc.
.tools generic Pioneer North, LLC
.top generic Jiangsu Bangning Science & Technology Co.,Ltd.
.toshiba generic TOSHIBA Corporation
.town generic Koko Moon, LLC
.toys generic Pioneer Orchard, LLC
.tp country-code -
.tr country-code Middle East Technical University Department of Computer Engineering
.trade generic Elite Registry Limited
.training generic Wild Willow, LLC
.travel sponsored Tralliance Registry Management Company, LLC.
.trust generic Artemis Internet Inc
.tt country-code University of the West Indies Faculty of Engineering
.tui generic TUI AG
.tv country-code Ministry of Finance and Tourism
.tw country-code Taiwan Network Information Center (TWNIC)
.tz country-code Tanzania Network Information Centre (tzNIC)
.ua country-code Hostmaster Ltd.
.ug country-code Uganda Online Ltd.
.uk country-code Nominet UK
.um country-code Not assigned
.university generic Little Station, LLC
.uno generic Dot Latin LLC
.uol generic UBN INTERNET LTDA.
.us country-code NeuStar, Inc.
.uy country-code SeCIU - Universidad de la Republica
.uz country-code Computerization and Information Technologies Developing Center UZINFOCOM
.va country-code Holy See Secretariat of State Department of Telecommunications
.vacations generic Atomic Tigers, LLC
.vc country-code Ministry of Telecommunications, Science, Technology and Industry
.ve country-code Comisión Nacional de Telecomunicaciones (CONATEL)
.vegas generic Dot Vegas, Inc.
.ventures generic Binky Lake, LLC
.versicherung generic dotversicherung-registry GmbH
.vet generic United TLD Holdco, Ltd
.vg country-code Telecommunications Regulatory Commission of the Virgin Islands
.vi country-code Virgin Islands Public Telcommunications System c/o COBEX Internet Services
.viajes generic Black Madison, LLC
.video generic United TLD Holdco, Ltd
.villas generic New Sky, LLC
.vision generic Koko Station, LLC
.vlaanderen generic DNS.be vzw
.vn country-code Ministry of Information and Communications of Socialist Republic of Viet Nam
.vodka generic Top Level Domain Holdings Limited
.vote generic Monolith Registry LLC
.voting generic Valuetainment Corp.
.voto generic Monolith Registry LLC
.voyage generic Ruby House, LLC
.vu country-code Telecom Vanuatu Limited
.wales generic Nominet UK
.wang generic Zodiac Registry Limited
.watch generic Sand Shadow, LLC
.webcam generic dot Webcam Limited
.website generic DotWebsite Inc.
.wed generic Atgron, Inc.
.wedding generic Top Level Domain Holdings Limited
.wf country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.whoswho generic Who's Who Registry
.wien generic punkt.wien GmbH
.wiki generic Top Level Design, LLC
.williamhill generic William Hill Organization Limited
.wme generic William Morris Endeavor Entertainment, LLC
.work generic Top Level Domain Holdings Limited
.works generic Little Dynamite, LLC
.world generic Bitter Fields, LLC
.ws country-code Government of Samoa Ministry of Foreign Affairs & Trade
.wtc generic World Trade Centers Association, Inc.
.wtf generic Hidden Way, LLC
.测试 test Internet Assigned Numbers Authority
.परीक्षा test Internet Assigned Numbers Authority
.佛山 generic Guangzhou YU Wei Information Technology Co., Ltd.
.集团 generic Eagle Horizon Limited
.在线 generic TLD REGISTRY LIMITED
.한국 country-code KISA (Korea Internet & Security Agency)
.ভারত country-code National Internet Exchange of India
.八卦 generic Zodiac Scorpio Limited
.موقع generic Suhub Electronic Establishment
.বাংলা country-code Not assigned
.公益 generic China Organizational Name Administration Center
.公司 generic Computer Network Information Center of Chinese Academy of Sciences China Internet Network Information Center
.移动 generic Afilias Limited
.我爱你 generic Tycoon Treasure Limited
.москва generic Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID)
.испытание test Internet Assigned Numbers Authority
.қаз country-code Association of IT Companies of Kazakhstan
.онлайн generic CORE Association
.сайт generic CORE Association
.срб country-code Serbian National Internet Domain Registry (RNIDS)
.бел country-code Reliable Software Inc.
.테스트 test Internet Assigned Numbers Authority
.淡马锡 generic Temasek Holdings (Private) Limited
.орг generic Public Interest Registry
.삼성 generic SAMSUNG SDS CO., LTD
.சிங்கப்பூர் country-code Singapore Network Information Centre (SGNIC) Pte Ltd
.商标 generic HU YI GLOBAL INFORMATION RESOURCES(HOLDING) COMPANY.HONGKONG LIMITED
.商店 generic Wild Island, LLC
.商城 generic Zodiac Aquarius Limited
.дети generic The Foundation for Network Initiatives “The Smart Internet”
.мкд country-code Macedonian Academic Research Network Skopje
.טעסט test Internet Assigned Numbers Authority
.中文网 generic TLD REGISTRY LIMITED
.中信 generic CITIC Group Corporation
.中国 country-code China Internet Network Information Center
.中國 country-code China Internet Network Information Center
.谷歌 generic Charleston Road Registry Inc.
.భారత్ country-code National Internet Exchange of India
.ලංකා country-code LK Domain Registry
.測試 test Internet Assigned Numbers Authority
.ભારત country-code National Internet Exchange of India
.भारत country-code National Internet Exchange of India
.آزمایشی test Internet Assigned Numbers Authority
.பரிட்சை test Internet Assigned Numbers Authority
.网店 generic Zodiac Libra Limited
.संगठन generic Public Interest Registry
.网络 generic Computer Network Information Center of Chinese Academy of Sciences China Internet Network Information Center
.укр country-code Ukrainian Network Information Centre (UANIC), Inc.
.香港 country-code Hong Kong Internet Registration Corporation Ltd.
.δοκιμή test Internet Assigned Numbers Authority
.إختبار test Internet Assigned Numbers Authority
.台湾 country-code Taiwan Network Information Center (TWNIC)
.台灣 country-code Taiwan Network Information Center (TWNIC)
.手机 generic Beijing RITT-Net Technology Development Co., Ltd
.мон country-code Datacom Co.,Ltd
.الجزائر country-code CERIST
.عمان country-code Telecommunications Regulatory Authority (TRA)
.ایران country-code Institute for Research in Fundamental Sciences (IPM)
.امارات country-code Telecommunications Regulatory Authority (TRA)
.بازار generic CORE Association
.پاکستان country-code Not assigned
.الاردن country-code National Information Technology Center (NITC)
.بھارت country-code National Internet Exchange of India
.المغرب country-code Agence Nationale de Réglementation des Télécommunications (ANRT)
.السعودية country-code Communications and Information Technology Commission
.سودان country-code Not assigned
.عراق country-code Not assigned
.مليسيا country-code MYNIC Berhad
.شبكة generic International Domain Registry Pty. Ltd.
.გე country-code Information Technologies Development Center (ITDC)
.机构 generic Public Interest Registry
.组织机构 generic Public Interest Registry
.ไทย country-code Thai Network Information Center Foundation
.سورية country-code National Agency for Network Services (NANS)
.рус generic Rusnames Limited
.рф country-code Coordination Center for TLD RU
.تونس country-code Agence Tunisienne d'Internet
.みんな generic Charleston Road Registry Inc.
.グーグル generic Charleston Road Registry Inc.
.世界 generic Stable Tone Limited
.ਭਾਰਤ country-code National Internet Exchange of India
.网址 generic KNET Co., Ltd
.游戏 generic Spring Fields, LLC
.vermögensberater generic Deutsche Vermögensberatung Aktiengesellschaft DVAG
.vermögensberatung generic Deutsche Vermögensberatung Aktiengesellschaft DVAG
.企业 generic Dash McCook, LLC
.مصر country-code National Telecommunication Regulatory Authority - NTRA
.قطر country-code Communications Regulatory Authority
.广东 generic Guangzhou YU Wei Information Technology Co., Ltd.
.இலங்கை country-code LK Domain Registry
.இந்தியா country-code National Internet Exchange of India
.հայ country-code Not assigned
.新加坡 country-code Singapore Network Information Centre (SGNIC) Pte Ltd
.فلسطين country-code Ministry of Telecom & Information Technology (MTIT)
.テスト test Internet Assigned Numbers Authority
.政务 generic China Organizational Name Administration Center
.xxx sponsored ICM Registry LLC
.xyz generic XYZ.COM LLC
.yachts generic DERYachts, LLC
.yandex generic YANDEX, LLC
.ye country-code TeleYemen
.yodobashi generic YODOBASHI CAMERA CO.,LTD.
.yoga generic Top Level Domain Holdings Limited
.yokohama generic GMO Registry, Inc.
.youtube generic Charleston Road Registry Inc.
.yt country-code Association Française pour le Nommage Internet en Coopération (A.F.N.I.C.)
.za country-code ZA Domain Name Authority
.zip generic Charleston Road Registry Inc.
.zm country-code Zambia Information and Communications Technology Authority (ZICTA)
.zone generic Outer Falls, LLC
.zuerich generic Kanton Zürich (Canton of Zurich)
.zw country-code Postal and Telecommunications Regulatory Authority of Zimbabwe (POTRAZ)

View File

@@ -0,0 +1,3 @@
#!/bin/bash
vaurien --protocol http --proxy localhost:8000 --backend localhost:8080 \
--behavior 100:delay --behavior-delay-sleep 1

View File

@@ -0,0 +1,3 @@
#!/bin/bash
vaurien --protocol http --proxy localhost:8000 --backend localhost:8080 \
--behavior 50:error,50:delay --behavior-delay-sleep .5

View File

@@ -0,0 +1,24 @@
import os
from time import sleep, time
from lelo import parallel
DELAY = .2
@parallel
def loiter(serial, delay):
pid = os.getpid()
print('%2d pid = %d' % (serial, pid))
sleep(delay)
return pid
t0 = time()
results = []
for i in range(15):
res = loiter(i, DELAY)
results.append(res)
print('Processes used: ', list(set(results)))
print('### Elapsed time: %0.2f' % (time() - t0))

View File

@@ -0,0 +1,19 @@
import os
from parallelize import parallelize
from time import sleep, time
print('one process:')
t0 = time()
for i in range(12):
print('%2d pid = %d' % (i, os.getpid()))
sleep(.2)
print('elapsed time: %0.2f' % (time() - t0))
print()
print('several processes:')
t0 = time()
for i in parallelize(range(12)):
print('%2d pid = %d' % (i, os.getpid()))
sleep(.2)
print('elapsed time: %0.2f' % (time() - t0))

View File

@@ -0,0 +1,23 @@
import os
from time import sleep, time
from parallelize import parallelize, per_item
DELAY = .2
def loiter(serial, delay):
pid = os.getpid()
print('%2d pid = %d' % (serial, pid))
sleep(delay)
return pid
t0 = time()
results = []
for i in parallelize(range(15), fork=per_item):
res = loiter(i, DELAY)
results.append(res)
print('Processes used: ', list(set(results)))
print('### Elapsed time: %0.2f' % (time() - t0))

View File

@@ -0,0 +1,42 @@
# spinner_asyncio.py
# credits: Example by Luciano Ramalho inspired by
# Michele Simionato's multiprocessing example
# source:
# http://python-3-patterns-idioms-test.readthedocs.org/en/latest/CoroutinesAndConcurrency.html
import sys
import asyncio
DELAY = 0.1
DISPLAY = '|/-\\'
@asyncio.coroutine
def spinner_func(before='', after=''):
write, flush = sys.stdout.write, sys.stdout.flush
while True:
for char in DISPLAY:
msg = '{} {} {}'.format(before, char, after)
write(msg)
flush()
write('\x08' * len(msg))
try:
yield from asyncio.sleep(DELAY)
except asyncio.CancelledError:
return
@asyncio.coroutine
def long_computation(delay):
# emulate a long computation
yield from asyncio.sleep(delay)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
spinner = loop.create_task(spinner_func('Please wait...', 'thinking!'))
long_task = loop.create_task(long_computation(3))
long_task.add_done_callback(lambda f: spinner.cancel())
loop.run_until_complete(spinner)
loop.close()

View File

@@ -0,0 +1,46 @@
# spinner_asyncio2.py
# credits: Example by Luciano Ramalho inspired by
# Michele Simionato's multiprocessing example
# source:
# http://python-3-patterns-idioms-test.readthedocs.org/en/latest/CoroutinesAndConcurrency.html
import sys
import asyncio
DELAY = 0.1
DISPLAY = '|/-\\'
@asyncio.coroutine
def spinner_func(before='', after=''):
write, flush = sys.stdout.write, sys.stdout.flush
while True:
for char in DISPLAY:
msg = '{} {} {}'.format(before, char, after)
write(msg)
flush()
write('\x08' * len(msg))
try:
yield from asyncio.sleep(DELAY)
except asyncio.CancelledError:
return
@asyncio.coroutine
def long_computation(delay):
# emulate a long computation
yield from asyncio.sleep(delay)
@asyncio.coroutine
def supervisor(delay):
spinner = loop.create_task(spinner_func('Please wait...', 'thinking!'))
yield from long_computation(delay)
spinner.cancel()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(supervisor(3))
loop.close()

View File

@@ -0,0 +1,38 @@
# spinner_proc.py
# credit: Example by Michele Simionato in comp lang python.
# source:
# http://python-3-patterns-idioms-test.readthedocs.org/en/latest/CoroutinesAndConcurrency.html
import sys
import time
import multiprocessing
DELAY = 0.1
DISPLAY = '|/-\\'
def spinner_func(before='', after=''):
write, flush = sys.stdout.write, sys.stdout.flush
while True:
for char in DISPLAY:
msg = '{} {} {}'.format(before, char, after)
write(msg)
flush()
write('\x08' * len(msg))
time.sleep(DELAY)
def long_computation():
# emulate a long computation
time.sleep(3)
if __name__ == '__main__':
spinner = multiprocessing.Process(
None, spinner_func, args=('Please wait ... ', ' thinking!'))
spinner.start()
try:
long_computation()
print('\nComputation done')
finally:
spinner.terminate()

View File

@@ -0,0 +1,35 @@
# spinner_thread.py
# adapted from spinner_proc.py to use threads
import sys
import time
import threading
DELAY = 0.1
DISPLAY = '|/-\\'
def spinner_func(before='', after=''):
write, flush = sys.stdout.write, sys.stdout.flush
while True:
for char in DISPLAY:
msg = '{} {} {}'.format(before, char, after)
write(msg)
flush()
write('\x08' * len(msg))
time.sleep(DELAY)
def long_computation():
# emulate a long computation
time.sleep(3)
if __name__ == '__main__':
spinner = threading.Thread(
None, spinner_func, args=('Please wait...', 'thinking!'))
spinner.daemon = True
spinner.start()
long_computation()
print('\nComputation done')

View File

@@ -0,0 +1,17 @@
import asyncio
@asyncio.coroutine
def show_remaining():
for remaining in range(5, 0, -1):
print('Remaining: ', remaining)
yield from asyncio.sleep(1)
def main():
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(show_remaining())
finally:
loop.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,30 @@
import asyncio
import sys
import contextlib
@asyncio.coroutine
def show_remaining(dots_task):
remaining = 5
while remaining:
print('Remaining: ', remaining)
sys.stdout.flush()
yield from asyncio.sleep(1)
remaining -= 1
dots_task.cancel()
print()
@asyncio.coroutine
def dots():
while True:
print('.', sep='', end='')
sys.stdout.flush()
yield from asyncio.sleep(.1)
def main():
with contextlib.closing(asyncio.get_event_loop()) as loop:
dots_task = asyncio.Task(dots())
coros = [show_remaining(dots_task), dots_task]
loop.run_until_complete(asyncio.wait(coros))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,23 @@
import asyncio
def show_remaining(loop):
if not hasattr(show_remaining, 'remaining'):
show_remaining.remaining = 5
print('Remaining: ', show_remaining.remaining)
show_remaining.remaining -= 1
if show_remaining.remaining:
loop.call_later(1, show_remaining, loop)
else:
loop.stop()
def main():
loop = asyncio.get_event_loop()
try:
loop.call_soon(show_remaining, loop)
loop.run_forever()
finally:
loop.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,29 @@
import sys
import asyncio
def make_show_remaining(seconds):
remaining = seconds
def show_remaining(loop):
nonlocal remaining
print('Remaining: ', remaining)
remaining -= 1
if remaining:
loop.call_later(1, show_remaining, loop)
else:
loop.stop()
return show_remaining
def main(seconds=5):
seconds = int(seconds)
loop = asyncio.get_event_loop()
try:
loop.call_soon(make_show_remaining(seconds), loop)
loop.run_forever()
finally:
loop.close()
if __name__ == '__main__':
main(*sys.argv[1:])

View File

@@ -0,0 +1,8 @@
import time
def main():
for remaining in range(5, 0, -1):
print('Remaining: ', remaining)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,138 @@
====================================
Configuring a local test environment
====================================
tl;dr;
======
This text explains how to configure **nginx** and **vaurien** to build
a local mirror of the data to run the Wikipedia Picture of the Day
examples while avoiding network traffic and introducing controlled
delays and errors for testing, thanks to the **vaurien** proxy.
Rationale and overview
======================
The Wikipedia Picture of the Day examples are designed to demonstrate
the performance of different approaches to finding and downloading
images from the Wikipedia. However, we don't want to hit the Wikipedia
with multiple requests per second while testing, and we want to be
able to simulate high latency and random network errors.
For this setup I chose **nginx** as the HTTP server because it is very
fast and easy to configure, and the **vaurien** proxy because it was
designed by Mozilla to introduce delays and network errors for testing.
The initial fixture data, ``docroot.zip``, contains a directory
``docroot/Template-POTD/`` with 1096 small text files, each consisting
of an HTML fragment (just a ``src="..."`` attribute) or an error message
(for days when no picture was published, like 2013-09-12). These files
correspond to every day of the years 2012, 2013 and 2014. The year 2012
was a leap year, that's why there are 1096 files and not 1095.
Once these files are unpacked to the ``docroot/Template-POTD`` directory
and **nginx** is configured, the ``build_fixture.py`` script can fetch the
actual images from the Wikipedia for local storage in the directory
``docroot/wikimedia/``.
When that is done you can configure **nginx** and **vaurien** to experiment
with the ``daypicts*.py``examples without hitting the network.
Instructions
============
1. Unpack test data
-------------------
Unpack the initial data in the ``fixture/`` directory and verify that 1096
files were created in ``fixture/docroot/Template-POTD/``::
$ ls # inside the fixture/ directory
README.rst docroot.zip
$ unzip docroot.zip
... many lines omitted...
inflating: docroot/Template-POTD/2014-12-29
inflating: docroot/Template-POTD/2014-12-30
inflating: docroot/Template-POTD/2014-12-31
$ ls docroot/Template-POTD/ | wc -w
1096
2. Install **nginx**
--------------------
Download and install **nginx**. I used version 1.6.2 -- the latest
stable version as I write this.
- Download page: http://nginx.org/en/download.html
- Beginner's guide: http://nginx.org/en/docs/beginners_guide.html
3. Configure **nginx**
----------------------
Edit the the ``nginx.conf`` file to set the port and document root.
The file is usually found in ``/usr/local/nginx/conf``, ``/etc/nginx``,
or ``/usr/local/etc/nginx``.
Most of the content in ``nginx.conf`` is within a block labeled ``http``
and enclosed in curly braces. Within that block there can be multiple
blocks labeled ``server``. Add another ``server`` block like this one::
server {
listen 8001;
location / {
root /full-path-to.../fixture/docroot;
}
}
After editing ``nginx.conf`` the server must be started (if it's not
running) or told to reload the configuration file::
$ nginx # to start, if necessary
$ nginx -s reload # to reload the configuration
To test the configuration, open the URL below in a browser. Doing so
will download a small file named ``2014-01-01`` with an HTML fragment::
http://localhost:8001/Template-POTD/2014-01-01
If the test fails, please double check the procedure just described and
refer to the **nginx** documentation.
Platform-specific instructions
==============================
Nginx setup on Mac OS X
-----------------------
Homebrew (copy & paste code at the bottom of http://brew.sh/)::
$ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
$ brew doctor
$ brew install nginx
Download and unpack::
Docroot is: /usr/local/var/www
/usr/local/etc/nginx/nginx.conf
To have launchd start nginx at login:
ln -sfv /usr/local/opt/nginx/*.plist ~/Library/LaunchAgents
Then to load nginx now:
launchctl load ~/Library/LaunchAgents/homebrew.mxcl.nginx.plist
Or, if you don't want/need launchctl, you can just run:
nginx
Nginx setup on Lubuntu 14.04.1 LTS
----------------------------------
Docroot is: /usr/share/nginx/html

View File

@@ -0,0 +1,97 @@
import sys
import argparse
import os
import urllib
import requests
from daypicts import get_picture_url, get_picture_urls
from daypicts import validate_date, gen_dates, picture_type
from daypicts import NoPictureForDate
from daypicts import REMOTE_PICT_BASE_URL, PICT_EXCEPTIONS
FIXTURE_DOC_DIR = 'fixture/docroot/'
FIXTURE_TEMPLATE_POTD_DIR = FIXTURE_DOC_DIR + 'Template-POTD/'
def parse_args(argv):
parser = argparse.ArgumentParser(description=main.__doc__)
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
parser.add_argument('date', help=date_help)
parser.add_argument('-u', '--url_only', action='store_true',
help='get picture URLS only')
args = parser.parse_args(argv)
try:
iso_parts = validate_date(args.date)
except ValueError as exc:
print('error:', exc.args[0])
parser.print_usage()
sys.exit(2)
dates = list(gen_dates(iso_parts))
if len(dates) == 1:
print('-> Date: ', dates[0])
else:
fmt = '-> {} days: {}...{}'
print(fmt.format(len(dates), dates[0], dates[-1]))
return dates, args
def save_picture_urls(dates, save_path):
for date in dates:
try:
url = get_picture_url(date)
except NoPictureForDate as exc:
snippet = repr(exc)
else:
snippet = url.replace('http://', 'src="//') + '"'
print(date, end=' ')
print(snippet)
with open(os.path.join(save_path, date), 'w') as fp:
fp.write(snippet)
def save_pictures(dates, save_path, verbose=False):
urls_ok = []
for date, url in get_picture_urls(dates, verbose):
response = requests.get(url)
file_path = os.path.join(save_path,
url.replace(REMOTE_PICT_BASE_URL, ''))
file_path = urllib.parse.unquote(file_path)
octets = response.content
# http://en.wikipedia.org/wiki/Template:POTD/2013-06-15
if date not in PICT_EXCEPTIONS:
assert picture_type(octets) is not None, url
try:
os.makedirs(os.path.dirname(file_path))
except FileExistsError:
pass
with open(file_path, 'wb') as fp:
fp.write(octets)
print(file_path)
return urls_ok
def main(argv):
"""Build test fixture from Wikipedia "POTD" data"""
try:
os.makedirs(FIXTURE_TEMPLATE_POTD_DIR)
except FileExistsError:
pass
dates, args = parse_args(argv)
if args.url_only:
save_picture_urls(dates, FIXTURE_TEMPLATE_POTD_DIR)
else:
save_pictures(dates, FIXTURE_DOC_DIR)
if __name__ == '__main__':
main(sys.argv[1:])

View File

@@ -0,0 +1,227 @@
"""
Wikipedia Picture of the Day (POTD) download example
Note:
The earliest Pictures of the Day I've found are in this page:
http://en.wikipedia.org/wiki/Wikipedia:Picture_of_the_day/May_2004
However, I have not found Template:POTD/YYYY-MM-DD pages earlier
than this:
http://en.wikipedia.org/wiki/Template:POTD/2007-01-01
For simplicity, this script only retrieves pictures starting
from 2007-01-01.
"""
import sys
import argparse
import re
import time
import datetime
import os
import imghdr
import warnings
import requests
SAVE_DIR = 'downloaded/'
HTTP_PORT = 8002
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
#POTD_BASE_URL = 'http://127.0.0.1:{}/Template-POTD/'.format(HTTP_PORT)
REMOTE_PICT_BASE_URL = 'http://upload.wikimedia.org/wikipedia/'
#LOCAL_PICT_BASE_URL = 'http://127.0.0.1:{}/'.format(HTTP_PORT)
LOCAL_PICT_BASE_URL = REMOTE_PICT_BASE_URL
PICT_BASE_URL = REMOTE_PICT_BASE_URL
POTD_IMAGE_RE = re.compile(r'src="(//upload\..*?)"')
PODT_EARLIEST_TEMPLATE = '2007-01-01'
RE_YEAR = r'([12]\d{3})'
RE_MONTH = RE_YEAR + r'-([01]\d)'
RE_DATE = RE_MONTH + r'-([0-3]\d)'
ISO_DATE_FMT = '%Y-%m-%d'
PICT_EXCEPTIONS = {
'2013-06-15', # .webm movie [1]
}
#[1] http://en.wikipedia.org/wiki/Template:POTD/2013-06-15
class NoPictureForDate(Exception):
'''No Picture of the Day found for {iso_date}'''
class NoPictureTemplateBefore(ValueError):
'''Template:POTD did not exist before PODT_EARLIEST_TEMPLATE'''
def get_picture_url(iso_date):
page_url = POTD_BASE_URL + iso_date
print(page_url)
response = requests.get(page_url)
pict_url = POTD_IMAGE_RE.search(response.text)
if pict_url is None:
raise NoPictureForDate(iso_date)
return 'http:' + pict_url.group(1)
def validate_date(text):
try:
parts = [int(part) for part in text.split('-')]
except ValueError:
raise ValueError('date must use YYYY, YYYY-MM or YYYY-MM-DD format')
test_parts = parts[:]
while len(test_parts) < 3:
test_parts.append(1)
date = datetime.date(*(int(part) for part in test_parts))
iso_date = date.strftime(ISO_DATE_FMT)
iso_date = iso_date[:1+len(parts)*3]
if iso_date < PODT_EARLIEST_TEMPLATE:
raise NoPictureTemplateBefore(PODT_EARLIEST_TEMPLATE)
return iso_date
def gen_month_dates(iso_month):
first = datetime.datetime.strptime(iso_month+'-01', ISO_DATE_FMT)
one_day = datetime.timedelta(days=1)
date = first.date()
while date.month == first.month:
yield date.strftime(ISO_DATE_FMT)
date += one_day
def gen_year_dates(iso_year):
for i in range(1, 13):
yield from gen_month_dates(iso_year + '-{:02d}'.format(i))
def gen_dates(iso_parts):
if len(iso_parts) == 4:
yield from gen_year_dates(iso_parts)
elif len(iso_parts) == 7:
yield from gen_month_dates(iso_parts)
else:
yield iso_parts
def get_picture_urls(dates, verbose=False):
date_urls = []
count = 0
for date in dates:
try:
url = get_picture_url(date)
except NoPictureForDate as exc:
if verbose:
print('*** {!r} ***'.format(exc))
continue
count += 1
if verbose:
print(format(count, '3d'), end=' ')
print(url.split('/')[-1])
else:
print(url)
date_urls.append((date, url))
return date_urls
def picture_type(octets):
pict_type = imghdr.what(None, octets)
if pict_type is None:
if (octets.startswith(b'<') and
b'<svg' in octets[:200] and
octets.rstrip().endswith(b'</svg>')):
pict_type = 'svg'
return pict_type
def get_pictures(dates, verbose=False):
urls_ok = []
try:
os.makedirs(SAVE_DIR)
except FileExistsError:
pass
for date, url in get_picture_urls(dates, verbose):
if PICT_BASE_URL == LOCAL_PICT_BASE_URL:
url = url.replace(REMOTE_PICT_BASE_URL, PICT_BASE_URL)
response = requests.get(url)
if response.status_code != 200:
warnings.warn('HTTP code {}: {}'.format(response.status_code, url))
continue
octets = response.content
if date not in PICT_EXCEPTIONS:
assert picture_type(octets) is not None, url
file_path = url.replace(PICT_BASE_URL, '')
file_name = os.path.basename(file_path)
path = os.path.join(SAVE_DIR, date.split('-')[0])
file_path = os.path.join(path, file_name)
#import pdb; pdb.set_trace()
try:
os.makedirs(path)
except FileExistsError:
pass
with open(file_path, 'wb') as fp:
fp.write(octets)
urls_ok.append(url)
print(file_path)
return urls_ok
def parse_args(argv):
parser = argparse.ArgumentParser(description=main.__doc__)
date_help = 'YYYY-MM-DD or YYYY-MM or YYYY: year, month and day'
parser.add_argument('date', help=date_help)
parser.add_argument('-q', '--max_qty', type=int,
help='maximum number of items to fetch')
parser.add_argument('-u', '--url_only', action='store_true',
help='get picture URLS only')
parser.add_argument('-f', '--fixture_save', action='store_true',
help='save data for local test fixture')
parser.add_argument('-v', '--verbose', action='store_true',
help='display progress information')
args = parser.parse_args(argv)
try:
iso_parts = validate_date(args.date)
except ValueError as exc:
print('error:', exc.args[0])
parser.print_usage()
sys.exit(2)
dates = list(gen_dates(iso_parts))
if args.verbose:
if len(dates) == 1:
print('-> Date: ', dates[0])
else:
fmt = '-> {} days: {}...{}'
print(fmt.format(len(dates), dates[0], dates[-1]))
return dates, args
def main(argv, get_picture_urls):
"""Get Wikipedia "Picture of The Day" for date, month or year"""
dates, args = parse_args(argv)
t0 = time.time()
if args.url_only:
urls = get_picture_urls(dates, args.verbose)
else:
urls = get_pictures(dates, args.verbose)
elapsed = time.time() - t0
if args.verbose:
print('-> found: {} pictures | elapsed time: {:.2f}s'
.format(len(urls), elapsed))
if __name__ == '__main__':
main(sys.argv[1:], get_picture_urls)

View File

@@ -0,0 +1,62 @@
"""
Wikipedia Picture of the Day (POTD) download example
"""
import sys
import asyncio
import aiohttp
from daypicts import main, NoPictureForDate
from daypicts import POTD_BASE_URL, POTD_IMAGE_RE
GLOBAL_TIMEOUT = 300 # seconds
MAX_CONCURRENT_REQUESTS = 30
@asyncio.coroutine
def get_picture_url(iso_date, semaphore):
page_url = POTD_BASE_URL+iso_date
with (yield from semaphore):
response = yield from aiohttp.request('GET', page_url)
text = yield from response.text()
pict_url = POTD_IMAGE_RE.search(text)
if pict_url is None:
raise NoPictureForDate(iso_date)
return 'http:' + pict_url.group(1)
@asyncio.coroutine
def get_picture_urls(dates, verbose=False):
semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
tasks = [get_picture_url(date, semaphore) for date in dates]
urls = []
count = 0
# get results as jobs are done
for job in asyncio.as_completed(tasks, timeout=GLOBAL_TIMEOUT):
try:
url = yield from job
except NoPictureForDate as exc:
if verbose:
print('*** {!r} ***'.format(exc))
continue
except aiohttp.ClientResponseError as exc:
print('****** {!r} ******'.format(exc))
continue
count += 1
if verbose:
print(format(count, '3d'), end=' ')
print(url.split('/')[-1])
else:
print(url)
urls.append(url)
return urls
def run_loop(dates, verbose=False):
loop = asyncio.get_event_loop()
return loop.run_until_complete(get_picture_urls(dates, verbose))
if __name__ == '__main__':
main(sys.argv[1:], run_loop)

View File

@@ -0,0 +1,44 @@
"""
Wikipedia Picture of the Day (POTD) download example
"""
import sys
from concurrent import futures
from daypicts import main, get_picture_url, NoPictureForDate
GLOBAL_TIMEOUT = 300 # seconds
MAX_CONCURRENT_REQUESTS = 30
def get_picture_urls(dates, verbose=False):
pool = futures.ThreadPoolExecutor(MAX_CONCURRENT_REQUESTS)
pending = {}
for date in dates:
job = pool.submit(get_picture_url, date)
pending[job] = date
urls = []
count = 0
# get results as jobs are done
for job in futures.as_completed(pending, timeout=GLOBAL_TIMEOUT):
try:
url = job.result()
except NoPictureForDate as exc:
if verbose:
print('*** {!r} ***'.format(exc))
continue
count += 1
if verbose:
print(format(count, '3d'), end=' ')
print(url.split('/')[-1])
else:
print(url)
urls.append(url)
return urls
if __name__ == '__main__':
main(sys.argv[1:], get_picture_urls)

View File

@@ -0,0 +1,3 @@
#!/bin/bash
vaurien --protocol http --proxy localhost:8002 --backend localhost:8001 \
--behavior 100:delay --behavior-delay-sleep .1

View File

@@ -0,0 +1,4 @@
#!/bin/bash
# run tests skipping @pytest.mark.network
py.test test_daypicts.py -m 'not network' $1 $2 $3

View File

@@ -0,0 +1,105 @@
====================================
Configuring a local test environment
====================================
tl;dr;
======
This text explains how to configure **nginx** and **vaurien** to build
a local mirror of the data to run the Wikipedia Picture of the Day
examples while avoiding network traffic and introducing controlled
delays and errors for testing, thanks to the **vaurien** proxy.
Rationale and overview
======================
The Wikipedia Picture of the Day examples are designed to demonstrate
the performance of different approaches to finding and downloading
images from the Wikipedia. However, we don't want to hit the Wikipedia
with multiple requests per second while testing, and we want to be
able to simulate high latency and random network errors.
For this setup I chose **nginx** as the HTTP server because it is very
fast and easy to configure, and the **vaurien** proxy because it was
designed by Mozilla to introduce delays and network errors for testing.
The initial fixture data, ``docroot.zip``, contains a directory
``docroot/Template-POTD/`` with 1096 small text files, each consisting
of an HTML fragment (just a ``src="..."`` attribute) or an error message
(for days when no picture was published, like 2013-09-12). These files
correspond to every day of the years 2012, 2013 and 2014. The year 2012
was a leap year, that's why there are 1096 files and not 1095.
Once these files are unpacked to the ``docroot/Template-POTD`` directory
and **nginx** is configured, the ``build_fixture.py`` script can fetch the
actual images from the Wikipedia for local storage in the directory
``docroot/wikimedia/``.
When that is done you can configure **nginx** and **vaurien** to experiment
with the ``daypicts*.py``examples without hitting the network.
Instructions
============
1. Unpack test data
-------------------
Unpack the initial data in the ``fixture/`` directory and verify that 1096
files were created in ``fixture/docroot/Template-POTD/``::
$ ls # inside the fixture/ directory
README.rst docroot.zip
$ unzip docroot.zip
... many lines omitted...
inflating: docroot/Template-POTD/2014-12-29
inflating: docroot/Template-POTD/2014-12-30
inflating: docroot/Template-POTD/2014-12-31
$ ls docroot/Template-POTD/ | wc -w
1096
2. Install **nginx**
--------------------
Download and install **nginx**. I used version 1.6.2 -- the latest
stable version as I write this.
- Download page: http://nginx.org/en/download.html
- Beginner's guide: http://nginx.org/en/docs/beginners_guide.html
3. Configure **nginx**
----------------------
Edit the the ``nginx.conf`` file to set the port and document root.
The file is usually found in ``/usr/local/nginx/conf``, ``/etc/nginx``,
or ``/usr/local/etc/nginx``.
Most of the content in ``nginx.conf`` is within a block labeled ``http``
and enclosed in curly braces. Within that block there can be multiple
blocks labeled ``server``. Add another ``server`` block like this one::
server {
listen 8001;
location / {
root /full-path-to.../fixture/docroot;
}
}
After editing ``nginx.conf`` the server must be started (if it's not
running) or told to reload the configuration file::
$ nginx # to start, if necessary
$ nginx -s reload # to reload the configuration
To test the configuration, open the URL below in a browser. Doing so
will download a small file named ``2014-01-01`` with an HTML fragment::
http://localhost:8001/Template-POTD/2014-01-01
If the test fails, please double check the procedure just described and
refer to the **nginx** documentation.

Binary file not shown.

View File

@@ -0,0 +1,39 @@
=====================================
Wikipedia Picture of the Day examples
=====================================
These examples use various asynchronous programming techniques to download
images and metadata from the English Wikipedia `Picture of the Day`_ archive.
.. _Picture of the Day: http://en.wikipedia.org/wiki/Wikipedia:Picture_of_the_day/Archive
--------
Timings
--------
``sync.py``
===========
::
$ time python sync.py 2014-06 -q 5
5 images downloaded (167.8 Kbytes total)
real 0m6.272s
user 0m0.065s
sys 0m0.039s
$ time python sync.py 2014-06 -q 5
5 images downloaded (167.8 Kbytes total)
real 0m5.447s
user 0m0.068s
sys 0m0.040s
$ time python sync.py 2014-06 -q 5
5 images downloaded (167.8 Kbytes total)
real 0m6.314s
user 0m0.068s
sys 0m0.040s

View File

@@ -0,0 +1,36 @@
"""
Wikipedia Picture of the Day (POTD) download example
Inspired by example at:
https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example
"""
from concurrent import futures
import potd
def save_month(year_month, verbose):
year, month = [int(s) for s in year_month.split('-')]
total_size = 0
img_count = 0
dates = potd.list_days_of_month(year, month)
with futures.ProcessPoolExecutor(max_workers=100) as executor:
downloads = dict((executor.submit(potd.save_one, date, verbose), date)
for date in dates)
for future in futures.as_completed(downloads):
date = downloads[future]
if future.exception() is not None:
print('%r generated an exception: %s' % (date,
future.exception()))
else:
img_size = future.result()
total_size += img_size
img_count += 1
print('%r OK: %r' % (date, img_size))
return img_count, total_size
if __name__ == '__main__':
potd.main(save_month=save_month)

View File

@@ -0,0 +1,36 @@
"""
Wikipedia Picture of the Day (POTD) download example
Inspired by example at:
https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example
"""
from concurrent import futures
import potd
def save_month(year_month, verbose):
year, month = [int(s) for s in year_month.split('-')]
total_size = 0
img_count = 0
dates = potd.list_days_of_month(year, month)
with futures.ThreadPoolExecutor(max_workers=100) as executor:
downloads = dict((executor.submit(potd.save_one, date, verbose), date)
for date in dates)
for future in futures.as_completed(downloads):
date = downloads[future]
if future.exception() is not None:
print('%r generated an exception: %s' % (date,
future.exception()))
else:
img_size = future.result()
total_size += img_size
img_count += 1
print('%r OK: %r' % (date, img_size))
return img_count, total_size
if __name__ == '__main__':
potd.main(save_month=save_month)

View File

@@ -0,0 +1,100 @@
"""
Wikipedia Picture of the Day (POTD) download example
Baseline synchronous example for comparison: downloads metadata and
images in the simple but slow synchronous way i.e. one after the other.
"""
import calendar
import datetime
import re
import os
import io
import time
import requests
import argparse
SAVE_DIR = 'pictures/'
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
class NoPictureForDate(Exception):
'''No Picture of the Day found for {day}'''
def build_page_url(iso_date):
return POTD_BASE_URL + iso_date
def fetch(url):
response = requests.get(url)
return response
def extract_image_url(html):
re_image = r'src="(//upload\..*?)"'
image_url = re.search(re_image, html)
return 'http:' + image_url.group(1)
def format_date(year, month, day):
return '{year}-{month:02d}-{day:02d}'.format(**locals())
def list_days_of_month(year, month):
lastday = calendar.monthrange(year, month)[1]
days = [format_date(year, month, day) for day in range(1, lastday + 1)]
return days
def build_save_path(iso_date, url):
head, filename = os.path.split(url)
return os.path.join(SAVE_DIR, iso_date+'_'+filename)
def save_one(iso_date, verbose):
page_url = build_page_url(iso_date)
response = fetch(page_url)
if response.status_code != 200:
msg = NoPictureForDate.__doc__.format(day=iso_date)
raise NoPictureForDate(msg)
img_url = extract_image_url(response.text)
response = fetch(img_url)
path = build_save_path(iso_date, img_url)
if verbose:
print('saving: '+path)
with io.open(path, 'wb') as fp:
fp.write(response.content)
return len(response.content)
def save_month(year_month, verbose):
year, month = [int(s) for s in year_month.split('-')]
total_size = 0
img_count = 0
dates = list_days_of_month(year, month)
for date in dates:
try:
total_size += save_one(date, verbose)
img_count += 1
except NoPictureForDate:
continue
return img_count, total_size
def main(save_one=save_one, save_month=save_month):
"""Get "Picture of The Day" from English Wikipedia for a given date or month"""
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument('date', help='year, month and (optional) day in YYYY-MM-DD format')
parser.add_argument('-q', '--max_qty', type=int,
help='maximum number of files to download')
parser.add_argument('-v', '--verbose', action='store_true',
help='display progress information')
args = parser.parse_args()
t0 = time.time()
if len(args.date) == len('YYYY-MM-DD'):
img_count = 1
total_size = save_one(args.date, args.verbose)
else:
img_count, total_size = save_month(args.date, args.verbose)
elapsed = time.time() - t0
print("images: %3d | total size: %6.1f Kbytes | elapsed time: %3ds" %
(img_count, total_size/1024.0, elapsed))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,96 @@
import unittest
import potd
class TestSequenceFunctions(unittest.TestCase):
def setUp(self):
self.thumb_url = ("""http://upload.wikimedia.org/wikipedia/"""
"""commons/thumb/f/fe/Orthographic_projection_SW.jpg/350px"""
"""-Orthographic_projection_SW.jpg""")
def test_buid_page_url(self):
date = '2014-05-01'
result = potd.build_page_url(date)
self.assertEqual(result, 'http://en.wikipedia.org/wiki/Template:POTD/2014-05-01')
def test_fetch_status_code(self):
date = '2014-05-02'
url = potd.build_page_url(date)
response = potd.fetch(url)
self.assertEqual(response.status_code, 200)
def test_fetch_status_code_not_found(self):
date = '2100-01-01'
url = potd.build_page_url(date)
response = potd.fetch(url)
self.assertEqual(response.status_code, 404)
def test_extract_image_url(self):
image_url = potd.extract_image_url(HTML)
self.assertEqual(image_url, self.thumb_url)
def test_fetch_image_jpeg(self):
response = potd.fetch(self.thumb_url)
self.assertEqual(response.headers['content-type'], 'image/jpeg')
def test_list_days_of_month(self):
year = 2014
month = 5
days = potd.list_days_of_month(year, month)
self.assertEqual(len(days), 31)
self.assertEqual('2014-05-01', days[0])
self.assertEqual('2014-05-31', days[-1])
def test_list_days_of_february(self):
year = 2014
month = 2
days = potd.list_days_of_month(year, month)
self.assertEqual(len(days), 28)
self.assertEqual('2014-02-01', days[0])
self.assertEqual('2014-02-28', days[-1])
def test_format_date(self):
year = 2014
month = 2
day = 1
a_date = '2014-02-01'
date = potd.format_date(year, month, day)
self.assertEqual(a_date, date)
self.assertEqual(potd.format_date(2010, 11, 12), '2010-11-12')
def test_build_save_path(self):
date = '2014-06-04'
path = potd.SAVE_DIR + date + '_350px-Orthographic_projection_SW.jpg'
self.assertEqual(path, potd.build_save_path(date, self.thumb_url))
HTML = (
'''<td><a href="/wiki/File:Orthographic_projection_SW.jpg" class="image"
title="Orthographic projection"><img alt="Orthographic projection"
src="//upload.wikimedia.org/wikipedia/commons/thumb/f/fe/O'''
'''rthographic_projection_SW.jpg/350px-Orthographic_projection_SW.jpg"
width="350" height="350" srcset="//upload.wikimedia.org/wikipedia/comm'''
'''ons/thumb/f/fe/Orthographic_projection_SW.jpg/525px-
Orthographic_projection_SW.jpg 1.5x, //upload.wikimedia.org/wikipedia/
commons/thumb/f/fe/Orthographic_projection_SW.jpg/700px-
Orthographic_projection_SW.jpg 2x" data-file-width="2058" data-file-
height="2058"></a></td>
''')
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,115 @@
"""
Wikipedia Picture of the Day (POTD) download example
Baseline synchronous example for comparison: downloads images and metadata
in the simple but slow synchronous way i.e. one after the other.
"""
from __future__ import print_function
import sys
import os
import io
import re
import argparse
import datetime
import urllib2
import contextlib
import time
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
THUMB_BASE_URL = 'http://upload.wikimedia.org/wikipedia/commons/thumb/'
THUMB_SRC_RE = re.compile(r'src=".*?/thumb/(.*?/\d+px-[^"]+)')
LOCAL_IMG_PATH = 'pictures/'
verbose = True
class ParsingException(ValueError):
"""Raised if unable to parse POTD MediaWiki source"""
def fetch_potd_url(iso_date):
"""Fetch picture name from iso_date ('YYYY-MM-DD' format)"""
potd_url = POTD_BASE_URL + iso_date
with contextlib.closing(urllib2.urlopen(potd_url)) as fp:
html = fp.read()
thumb_src = THUMB_SRC_RE.search(html)
if not thumb_src:
msg = 'cannot find thumbnail source for ' + potd_url
raise ParsingException(msg)
thumb_url = THUMB_BASE_URL+thumb_src.group(1)
return thumb_url
def gen_month_days(year, month):
a_date = datetime.date(year, month, 1)
one_day = datetime.timedelta(1)
while a_date.month == month:
yield a_date
a_date += one_day
def get_img_names(iso_month):
"""Fetch picture names from iso_month ('YYYY-MM' format)"""
year, month = (int(part) for part in iso_month.split('-'))
for day in gen_month_days(year, month):
iso_date = '{:%Y-%m-%d}'.format(day)
if verbose:
print(iso_date)
try:
img_url = fetch_potd_url(iso_date)
except urllib2.HTTPError:
break
yield (iso_date, img_url)
def fetch_image(iso_date, img_url):
if verbose:
print('\t' + img_url)
with contextlib.closing(urllib2.urlopen(img_url)) as fp:
img = fp.read()
img_filename = iso_date + '__' + img_url.split('/')[-1]
if verbose:
print('\t\twriting %0.1f Kbytes' % (len(img)/1024.0))
img_path = os.path.join(LOCAL_IMG_PATH, img_filename)
with io.open(img_path, 'wb') as fp:
fp.write(img)
return len(img)
def get_images(iso_month, max_count=0):
if max_count is 0:
max_count = sys.maxsize
img_count = 0
total_size = 0
for iso_date, img_url in get_img_names(iso_month):
total_size += fetch_image(iso_date, img_url)
img_count += 1
if img_count == max_count:
break
return (img_count, total_size)
def main():
"""Get "Pictures of The Day" from English Wikipedia for a given month"""
global verbose
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument('year_month', help='year and month in YYYY-MM format')
parser.add_argument('-q', '--max_qty', type=int,
help='maximum number of files to download')
parser.add_argument('-v', '--verbose', action='store_true',
help='display progress information')
args = parser.parse_args()
verbose = args.verbose
t0 = time.time()
img_count, total_size = get_images(args.year_month, args.max_qty)
elapsed = time.time() - t0
print("images: %3d | total size: %6.1f Kbytes | elapsed time: %3ds" %
(img_count, total_size/1024.0, elapsed))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,118 @@
"""
Wikipedia Picture of the Day (POTD) download example
Baseline synchronous example for comparison: downloads images and metadata
in the simple but slow synchronous way i.e. one after the other.
"""
import sys
import os
import io
import re
import argparse
import datetime
import urllib.request
import urllib.error
import contextlib
import time
POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/'
THUMB_BASE_URL = 'http://upload.wikimedia.org/wikipedia/commons/thumb/'
THUMB_SRC_RE = re.compile(r'src=".*?/thumb/(.*?/\d+px-[^"]+)')
LOCAL_IMG_PATH = 'pictures/'
verbose = True
class ParsingException(ValueError):
"""Raised if unable to parse POTD MediaWiki source"""
def gen_month_dates(year, month):
"""Produce all dates in a given year, month"""
a_date = datetime.date(year, month, 1)
one_day = datetime.timedelta(1)
while a_date.month == month:
yield '{:%Y-%m-%d}'.format(a_date)
a_date += one_day
def fetch_potd_url(iso_date):
"""Fetch POTD thumbnail URL for iso_date ('YYYY-MM-DD' format)"""
if verbose:
print(iso_date)
potd_url = POTD_BASE_URL + iso_date
try:
with urllib.request.urlopen(potd_url) as fp:
html = fp.read().decode('utf-8')
thumb_src = THUMB_SRC_RE.search(html)
if not thumb_src:
msg = 'cannot find thumbnail source for ' + potd_url
raise ParsingException(msg)
thumb_url = THUMB_BASE_URL+thumb_src.group(1)
except urllib.error.HTTPError:
return None
return thumb_url
def gen_img_names(iso_month):
"""Produce picture names by fetching POTD metadata"""
year, month = (int(part) for part in iso_month.split('-'))
for iso_date in gen_month_dates(year, month):
img_url = fetch_potd_url(iso_date)
if img_url is None:
break
yield (iso_date, img_url)
def fetch_image(iso_date, img_url):
"""Fetch and save image data for date and url"""
if verbose:
print('\t' + img_url)
with contextlib.closing(urllib.request.urlopen(img_url)) as fp:
img = fp.read()
img_filename = iso_date + '__' + img_url.split('/')[-1]
if verbose:
print('\t\twriting %0.1f Kbytes' % (len(img)/1024.0))
img_path = os.path.join(LOCAL_IMG_PATH, img_filename)
with io.open(img_path, 'wb') as fp:
fp.write(img)
return len(img)
def get_images(iso_month, max_count=0):
"""Download up to max_count images for a given month"""
if max_count is 0:
max_count = sys.maxsize
img_count = 0
total_size = 0
for iso_date, img_url in gen_img_names(iso_month):
total_size += fetch_image(iso_date, img_url)
img_count += 1
if img_count == max_count:
break
return (img_count, total_size)
def main():
"""Get "Pictures of The Day" from English Wikipedia for a given month"""
global verbose
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument('year_month', help='year and month in YYYY-MM format')
parser.add_argument('-q', '--max_qty', type=int,
help='maximum number of files to download')
parser.add_argument('-v', '--verbose', action='store_true',
help='display progress information')
args = parser.parse_args()
verbose = args.verbose
t0 = time.time()
img_count, total_size = get_images(args.year_month, args.max_qty)
elapsed = time.time() - t0
print("images: %3d | total size: %6.1f Kbytes | elapsed time: %3ds" %
(img_count, total_size/1024.0, elapsed))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,87 @@
"""
Wikipedia Picture of the Day (POTD) download example
"""
import pytest
from daypicts import *
@pytest.mark.network
def test_get_picture_url_existing():
url = get_picture_url('2012-01-01')
expected = ('http://upload.wikimedia.org/wikipedia/commons/'
'thumb/9/9d/MODIS_Map.jpg/550px-MODIS_Map.jpg')
assert url == expected
@pytest.mark.network
def test_get_picture_url_not_existing():
with pytest.raises(NoPictureForDate):
get_picture_url('2013-09-12')
def test_validate_full_date():
parts = validate_date('2015-1-2')
assert parts == '2015-01-02'
def test_validate_date_too_early():
with pytest.raises(NoPictureTemplateBefore):
validate_date('2006-12-31')
def test_validate_month():
parts = validate_date('2015-1')
assert parts == '2015-01'
def test_validate_year():
parts = validate_date('2015')
assert parts == '2015'
def test_gen_month_dates():
dates = list(gen_month_dates('2015-02'))
assert len(dates) == 28
assert dates[0] == '2015-02-01'
assert dates[27] == '2015-02-28'
def test_gen_month_dates_leap():
dates = list(gen_month_dates('2012-02'))
assert len(dates) == 29
assert dates[28] == '2012-02-29'
def test_gen_year_dates():
dates = list(gen_year_dates('2015'))
assert len(dates) == 365
assert dates[0] == '2015-01-01'
assert dates[364] == '2015-12-31'
def test_gen_year_dates_leap():
dates = list(gen_year_dates('2012'))
assert len(dates) == 366
assert dates[365] == '2012-12-31'
GIF_MIN = (b'GIF89a\x01\x00\x01\x00\x00\xff\x00,\x00\x00'
b'\x00\x00\x01\x00\x01\x00\x00\x02\x00;')
SVG_MIN = b'<svg xmlns="http://www.w3.org/2000/svg"></svg>'
SVG_XML_DECL = b'<?xml version="1.0" encoding="UTF-8"?>' + SVG_MIN
NOISE = b'\xb0\x0bU\xbe]L\n\x92\xbe\xc6\xf65"\xcc\xa3\xe3'
def test_picture_type_imghdr():
assert picture_type(GIF_MIN) == 'gif'
def test_picture_type_svg():
assert picture_type(SVG_MIN) == 'svg'
assert picture_type(SVG_XML_DECL) == 'svg'
def test_picture_type_unknown():
assert picture_type(NOISE) is None

View File

@@ -1,15 +0,0 @@
# clockdeco.py
import time
def clock(func):
def clocked(*args):
t0 = time.time()
result = func(*args)
elapsed = time.time() - t0
name = func.__name__
arg_str = ', '.join(repr(arg) for arg in args)
print('[%0.8fs] %s(%s) -> %r' % (elapsed, name, arg_str, result))
return result
return clocked

View File

@@ -0,0 +1,60 @@
"""
>>> f = Foo()
>>> f.bar = 77
>>> f.bar
77
>>> Foo.bar.__doc__
'The "bar" attribute'
>>> import pydoc
>>> pydoc.getdoc(Foo.bazz)
'The "bazz" attribute'
"""
def doc_descriptor_wrapper_factory(descriptor):
wrapper_cls_name = 'DocDescriptorWrapper'
wrapper_cls_attrs = descriptor.__dict__.copy()
wrapper_cls_attrs['__slots__'] = ['_wrapped']
def wrapped_getter(self):
"the wrapped descriptor instance"
return self._wrapped
def wrapper_repr(self):
return '<{} {!r}>'.format(wrapper_cls_name, self.__doc__)
wrapper_cls_attrs['wrapped'] = property(wrapped_getter)
wrapper_cls_attrs['__repr__'] = wrapper_repr
wrapper_cls = type(wrapper_cls_name, (), wrapper_cls_attrs)
wrapper = wrapper_cls()
wrapper._wrapped = descriptor
return wrapper
class DocDescriptor:
"""A documented descriptor"""
def __init__(self, documentation):
self.__doc__ = documentation
cls_name = self.__class__.__name__
self.storage_name = '_{}_{:x}'.format(cls_name, id(self))
def __get__(self, instance, owner):
"""The __get__ method"""
if instance is None:
return doc_descriptor_wrapper_factory(self)
else:
return getattr(instance, self.storage_name)
def __set__(self, instance, value):
setattr(instance, self.storage_name, value)
class Foo:
"""The "Foo" class"""
bar = DocDescriptor('The "bar" attribute')
bazz = DocDescriptor('The "bazz" attribute')

View File

@@ -0,0 +1,14 @@
fetch1(request1, function (response1) {
// phase 1
var request2 = step1(response1);
fetch2(request2, function (response2) {
// phase 2
var request3 = step2(response2);
fetch3(request3, function (response3) {
// phase 3
step3(response3);
});
});
});

View File

@@ -0,0 +1,15 @@
def phase1(response1):
request2 = step1(response1)
fetch2(request2, phase2)
def phase2(response2):
request3 = step2(response2)
fetch3(request3, phase3)
def phase3(response3):
step3(response3)
fetch1(request1, phase1)

Binary file not shown.

View File

@@ -0,0 +1,14 @@
@asyncio.coroutine
def three_phases():
response1 = yield from fetch1(request1)
# phase 1
request2 = step1(response1)
response2 = yield from fetch2(request2)
# phase 2
request3 = step2(response2)
response3 = yield from fetch3(request3)
# phase 3
step3(response3)
loop.create_task(three_phases)

View File

@@ -0,0 +1,194 @@
====================================
Configuring a local test environment
====================================
tl;dr;
======
This text explains how to configure **nginx** and **vaurien** to build a local
mirror of the data to run the flag download examples while avoiding network
traffic and introducing controlled delays and errors for testing, thanks to
the **vaurien** proxy.
Rationale and overview
======================
The flag download examples are designed to compare the performance of
different approaches to finding and downloading files from the Web. However,
we don't want to hit a public server with multiple requests per second while
testing, and we want to be able to simulate high latency and random network
errors.
For this setup I chose **nginx** as the HTTP server because it is very fast
and easy to configure, and the **vaurien** proxy because it was designed by
Mozilla to introduce delays and network errors for testing.
The archive ``flags.zip``, contains a directory ``flags/`` with 194
subdirectories, each containing a ``.gif` image and a ``metadata.json`` file.
These images are public-domain flags copied from the CIA World Fact Book [1].
[1] https://www.cia.gov/library/publications/the-world-factbook/
Once these files are unpacked to the ``flags/`` directory and **nginx** is
configured, you can experiment with the ``flags*.py``examples without hitting
the network.
Instructions
============
1. Unpack test data
-------------------
Unpack the initial data in the ``countries/`` directory and verify that 194
directories are created in ``countries/flags/``, each with a ``.gif`` and
a ``metadata.json`` file::
$ unzip flags.zip
... many lines omitted...
creating: flags/zw/
inflating: flags/zw/metadata.json
inflating: flags/zw/zw.gif
$ ls flags | wc -w
194
$ find flags | grep .gif | wc -l
194
$ find flags | grep .json | wc -l
194
$ ls flags/ad
ad.gif metadata.json
2. Install **nginx**
--------------------
Download and install **nginx**. I used version 1.6.2 -- the latest
stable version as I write this.
- Download page: http://nginx.org/en/download.html
- Beginner's guide: http://nginx.org/en/docs/beginners_guide.html
3. Configure **nginx**
----------------------
Edit the the ``nginx.conf`` file to set the port and document root.
You can determine which ``nginx.conf`` is in use by running::
$ nginx -V
The output starts with::
nginx version: nginx/1.6.2
built by clang 6.0 (clang-600.0.51) (based on LLVM 3.5svn)
TLS SNI support enabled
configure arguments:...
Among the configure arguments you'll see ``--conf-path=``. That's the
file you will edit.
Most of the content in ``nginx.conf`` is within a block labeled ``http``
and enclosed in curly braces. Within that block there can be multiple
blocks labeled ``server``. Add another ``server`` block like this one::
server {
listen 8001;
location /flags/ {
root /full-path-to.../countries/;
}
}
After editing ``nginx.conf`` the server must be started (if it's not
running) or told to reload the configuration file::
$ nginx # to start, if necessary
$ nginx -s reload # to reload the configuration
To test the configuration, open the URL below in a browser. You should
see the blue, yellow and red flag of Andorra::
http://localhost:8001/flags/ad/ad.gif
If the test fails, please double check the procedure just described and
refer to the **nginx** documentation.
At this point you may run the ``flags_*2.py`` examples against the **nginx**
install by changing the ``BASE_URL`` constant in ``flags_sequential2.py``.
However, **nginx** is so fast that you will not see much difference in run
time between the sequential and the threaded versions, for example. For more
realistic testing with simulated network lag, we need **vaurien**.
4. Install and run **vaurien**
------------------------------
**vaurien depends on gevent which is only available for Python 2.5-2.7. To
install vaurien I opened another shell, created another ``virtualenv`` for
Python 2.7, and used that environment to install and run vaurien::
$ virtualenv-2.7 .env27 --no-site-packages --distribute
New python executable in .env27/bin/python
Installing setuptools, pip...done.
$ . .env27/bin/activate
(.env27)$ pip install vaurien
Downloading/unpacking vaurien
Downloading vaurien-1.9.tar.gz (50kB): 50kB downloaded
...many lines and a few minutes later...
Successfully installed vaurien cornice gevent statsd-client vaurienclient
greenlet http-parser pyramid simplejson requests zope.interface
translationstring PasteDeploy WebOb repoze.lru zope.deprecation venusian
Cleaning up...
(.env27)$
Using that same shell with the ``.env27`` activated, run the ``vaurien_delay.sh`` script in the ``countries/`` directory::
(.env27)$ $ ./vaurien_delay.sh
2015-02-25 20:20:17 [69124] [INFO] Starting the Chaos TCP Server
2015-02-25 20:20:17 [69124] [INFO] Options:
2015-02-25 20:20:17 [69124] [INFO] * proxies from localhost:8002 to localhost:8001
2015-02-25 20:20:17 [69124] [INFO] * timeout: 30
2015-02-25 20:20:17 [69124] [INFO] * stay_connected: 0
2015-02-25 20:20:17 [69124] [INFO] * pool_max_size: 100
2015-02-25 20:20:17 [69124] [INFO] * pool_timeout: 30
2015-02-25 20:20:17 [69124] [INFO] * async_mode: 1
The ``vaurien_delay.sh`` adds a 1s delay to every response.
There is also the ``vaurien_error_delay.sh`` script which produces errors in 25% of the responses and a .5 se delay to 50% of the responses.
Platform-specific instructions
==============================
Nginx setup on Mac OS X
-----------------------
Homebrew (copy & paste code at the bottom of http://brew.sh/)::
$ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
$ brew doctor
$ brew install nginx
Download and unpack::
Docroot is: /usr/local/var/www
/usr/local/etc/nginx/nginx.conf
To have launchd start nginx at login:
ln -sfv /usr/local/opt/nginx/*.plist ~/Library/LaunchAgents
Then to load nginx now:
launchctl load ~/Library/LaunchAgents/homebrew.mxcl.nginx.plist
Or, if you don't want/need launchctl, you can just run:
nginx
Nginx setup on Lubuntu 14.04.1 LTS
----------------------------------
Docroot is: /usr/share/nginx/html

View File

@@ -0,0 +1,61 @@
"""Download flags of top 20 countries by population
asyncio+aiottp version
Sample run::
$ python3 flags_asyncio.py
NG retrieved.
FR retrieved.
IN retrieved.
...
EG retrieved.
DE retrieved.
IR retrieved.
20 flags downloaded in 1.08s
"""
import asyncio
import aiohttp
from flags import BASE_URL, save_flag, main
@asyncio.coroutine
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
res = yield from aiohttp.request('GET', url)
image = yield from res.read()
return image
@asyncio.coroutine
def download_one(cc):
image = yield from get_flag(cc)
print('{} retrieved.'.format(cc))
save_flag(image, cc.lower() + '.gif')
return cc
@asyncio.coroutine
def downloader_coro(cc_list):
to_do = [download_one(cc) for cc in cc_list]
results = []
for future in asyncio.as_completed(to_do):
print(future)
result = yield from future
results.append(result)
return results
def download_many(cc_list):
loop = asyncio.get_event_loop()
results = loop.run_until_complete(downloader_coro(cc_list))
loop.close()
return len(results)
if __name__ == '__main__':
main(download_many)

View File

@@ -0,0 +1,42 @@
"""Download flags of top 20 countries by population
ProcessPoolExecutor version
Sample run::
$ python3 flags_threadpool.py
BD retrieved.
EG retrieved.
CN retrieved.
...
PH retrieved.
US retrieved.
IR retrieved.
20 flags downloaded in 0.93s
"""
# BEGIN FLAGS_PROCESSPOOL
from concurrent import futures
from flags import save_flag, get_flag, show, main
MAX_WORKERS = 20
def download_one(cc):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
with futures.ProcessPoolExecutor() as executor: # <1>
res = executor.map(download_one, sorted(cc_list))
return len(list(res))
if __name__ == '__main__':
main(download_many)
# END FLAGS_PROCESSPOOL

View File

@@ -0,0 +1,103 @@
Prefixes with most flags:
M 18
S 18
B 17
C 15
T 13
G 12
A 11
L 11
K 10
There are no flags with prefix X
Errors with threadpool:
$ python3 flags_threadpool2.py _
ZT failed: 503 - Service Temporarily Unavailable
ZU failed: 503 - Service Temporarily Unavailable
ZV failed: 503 - Service Temporarily Unavailable
ZY failed: 503 - Service Temporarily Unavailable
--------------------
24 flags downloaded.
37 not found.
615 errors.
Elapsed time: 3.86s
$ python3 flags_sequential2.py
Searching for 10 flags: BD, BR, CN, ID, IN, JP, NG, PK, RU, US
BD failed: (ProtocolError('Connection aborted.', gaierror(8, 'nodename nor servname provided, or not known')),)
--------------------
0 flag downloaded.
1 error.
Elapsed time: 0.02s
*** WARNING: 9 downloads never started! ***
194 flags downloaded.
482 not found.
Elapsed time: 683.71s
real 11m23.870s
user 0m3.214s
sys 0m0.603s
$ python3 flags2.py -a
LOCAL site: http://localhost:8001/flags
Searching for 194 flags: from AD to ZW
1 concurrent conection will be used.
--------------------
194 flags downloaded.
Elapsed time: 0.90s
(.env34) 192:countries luciano$ python3 flags2.py -e
LOCAL site: http://localhost:8001/flags
Searching for 676 flags: from AA to ZZ
1 concurrent conection will be used.
--------------------
194 flags downloaded.
482 not found.
Elapsed time: 4.71s
(.env34) 192:countries luciano$ python3 flags2.py -s remote
(.env34) 192:countries luciano$ python3 flags2.py -s remote -a -l 100
REMOTE site: http://python.pro.br/fluent/data/flags
Searching for 100 flags: from AD to LK
1 concurrent conection will be used.
--------------------
100 flags downloaded.
Elapsed time: 72.58s
(.env34) 192:countries luciano$ python3 flags2.py -s remote -e
REMOTE site: http://python.pro.br/fluent/data/flags
Searching for 676 flags: from AA to ZZ
1 concurrent conection will be used.
--------------------
194 flags downloaded.
482 not found.
Elapsed time: 436.09s
(.env34) 192:countries luciano$ python3 flags2_threadpool.py -s remote -e
REMOTE site: http://python.pro.br/fluent/data/flags
Searching for 676 flags: from AA to ZZ
30 concurrent conections will be used.
--------------------
194 flags downloaded.
482 not found.
Elapsed time: 12.32s
(.env34) 192:countries luciano$ python3 flags2_threadpool.py -s remote -e -m 100
REMOTE site: http://python.pro.br/fluent/data/flags
Searching for 676 flags: from AA to ZZ
100 concurrent conections will be used.
--------------------
89 flags downloaded.
184 not found.
403 errors.
Elapsed time: 7.62s
(.env34) 192:countries luciano$
wait_with_progress
http://compiletoi.net/fast-scraping-in-python-with-asyncio.html
http://blog.condi.me/asynchronous-part-1/

View File

@@ -0,0 +1,31 @@
"""
Experiments with futures
"""
from time import sleep, strftime
from concurrent import futures
def display(*args):
print(strftime('[%H:%M:%S]'), end=' ')
print(*args)
def loiter(n):
msg = '{}loiter({}): doing nothing for {}s...'
display(msg.format('\t'*n, n, n))
sleep(n)
msg = '{}loiter({}): done.'
display(msg.format('\t'*n, n))
return n * 10
def demo_submit():
executor = futures.ThreadPoolExecutor(3)
future_list = [executor.submit(loiter, n) for n in range(5)]
display('done?', [future.done() for future in future_list])
display('Waiting for results...')
for i, result in enumerate(future.result() for future in future_list):
display('result[{}]: {}'.format(i, result))
demo_submit()

View File

@@ -0,0 +1,41 @@
"""
An experiment showing that ``asyncio.Future`` is an iterable (it
implements `__iter__`) designed to be used with ``yield from``.
Priming the future returns itself. After the result of the future
is set, next iteration produces the result as the ``value`` attribute
of ``StopIteration``.
Sample run::
$ python3 future_yield.py
a, future: <Future pending> 0x66514c
b, prime_res: <Future pending> 0x66514c
b, exc.value: 42
"""
import asyncio
@asyncio.coroutine
def a(future):
print('a, future:\t', future, hex(id(future)))
res = yield from future
return res
def b():
future = asyncio.Future()
coro = a(future)
prime_res = next(coro)
print('b, prime_res:\t', prime_res, hex(id(future)))
# If next(coro) is called again before the result of
# the future is set, we get:
# AssertionError: yield from wasn't used with future
#result = next(coro) # uncomment to see AssertionError
future.set_result(42)
try:
next(coro)
except StopIteration as exc:
print('b, exc.value:\t', exc.value)
b()

View File

@@ -0,0 +1,19 @@
@asyncio.coroutine
def a(future):
print('a, future:', future, hex(id(future)))
res = yield from future
return res
def b():
future = asyncio.Future()
coro = a(future)
prime_result = next(coro)
print('b, prime_result:', prime_result, hex(id(future)))
loop = asyncio.get_event_loop()
future = asyncio.Future()
print('future:', future, hex(id(future)))
tasks = [asyncio.async(a(future))]
res = loop.run_until_complete(b())

View File

@@ -0,0 +1,45 @@
# Code and text by BitBucket user "enigmacurry" posted to
# https://bitbucket.org/pypy/pypy/issue/708/discrepancy-in-dict-subclass-__getitem__
# Adapted by Luciano Ramalho:
# - changed comments to docstring to run with doctest;
# - added test for Test class raising exception
# - and added () to print.
"""
This is a test case to describe a bug I'm seeing in PyPy 1.5. I have
a Cache object that is a dictionary that supports lookup via regular
attribute access. For instance:
>>> c = Cache()
>>> c["asdf"] = "asdf"
>>> c.asdf == c["asdf"]
True
>>> t = Test()
>>> t["asdf"] = "asdf"
>>> t.asdf == t["asdf"]
Traceback (most recent call last):
...
Exception: Trying to getitem: asdf
When looking up keys via attribute, PyPy 1.5 calls __getitem__
whereas CPython 2.7.1 does not.
"""
class Cache(dict):
"A dictionary that supports attribute style key lookup"
def __init__(self, **kw):
dict.__init__(self, kw)
self.__dict__ = self
class Test(Cache):
def __getitem__(self, item):
# I want to process items differently than attributes:
raise Exception("Trying to getitem: %s" % item)
if __name__ == "__main__":
t = Test()
t["asdf"] = "asdf"
#CPython does not call __getitem__ .. PyPy does:
print(t.asdf)
#Doesn't matter if it's a member of __dict__ or not:
print(t.__getattribute__)

View File

@@ -0,0 +1,61 @@
BaseException
├── SystemExit
├── KeyboardInterrupt
├── GeneratorExit
└── Exception
├── StopIteration
├── ArithmeticError
│ ├── FloatingPointError
│ ├── OverflowError
│ └── ZeroDivisionError
├── AssertionError
├── AttributeError
├── BufferError
├── EOFError
├── ImportError
├── LookupError
│ ├── IndexError
│ └── KeyError
├── MemoryError
├── NameError
│ └── UnboundLocalError
├── OSError
│ ├── BlockingIOError
│ ├── ChildProcessError
│ ├── ConnectionError
│ │ ├── BrokenPipeError
│ │ ├── ConnectionAbortedError
│ │ ├── ConnectionRefusedError
│ │ └── ConnectionResetError
│ ├── FileExistsError
│ ├── FileNotFoundError
│ ├── InterruptedError
│ ├── IsADirectoryError
│ ├── NotADirectoryError
│ ├── PermissionError
│ ├── ProcessLookupError
│ └── TimeoutError
├── ReferenceError
├── RuntimeError
│ └── NotImplementedError
├── SyntaxError
│ └── IndentationError
│ └── TabError
├── SystemError
├── TypeError
├── ValueError
│ └── UnicodeError
│ ├── UnicodeDecodeError
│ ├── UnicodeEncodeError
│ └── UnicodeTranslateError
└── Warning
├── DeprecationWarning
├── PendingDeprecationWarning
├── RuntimeWarning
├── SyntaxWarning
├── UserWarning
├── FutureWarning
├── ImportWarning
├── UnicodeWarning
├── BytesWarning
└── ResourceWarning

View File

@@ -0,0 +1,40 @@
====================================
Differences between PyPy and CPython
====================================
Note: this is an excerpt from the `PyPy`_ documentation. On Nov. 19, 2014 I ran this test on PyPy 2.4.0 and PyPy3 2.4.0 and the result was not as described, but was the same as with CPython: 'foo'.
.. _PyPy: http://pypy.readthedocs.org/en/latest/cpython_differences.html#subclasses-of-built-in-types
Subclasses of built-in types
----------------------------
Officially, CPython has no rule at all for when exactly
overridden method of subclasses of built-in types get
implicitly called or not. As an approximation, these methods
are never called by other built-in methods of the same object.
For example, an overridden ``__getitem__()`` in a subclass of
``dict`` will not be called by e.g. the built-in ``get()``
method.
The above is true both in CPython and in PyPy. Differences
can occur about whether a built-in function or method will
call an overridden method of *another* object than ``self``.
In PyPy, they are generally always called, whereas not in
CPython. For example, in PyPy, ``dict1.update(dict2)``
considers that ``dict2`` is just a general mapping object, and
will thus call overridden ``keys()`` and ``__getitem__()``
methods on it. So the following code prints ``42`` on PyPy
but ``foo`` on CPython::
>>> class D(dict):
... def __getitem__(self, key):
... return 42
...
>>>
>>> d1 = {}
>>> d2 = D(a='foo')
>>> d1.update(d2)
>>> print(d1['a'])
42

View File

@@ -0,0 +1,31 @@
====================================
Subclassing built-in caveats
====================================
::
>>> class D1(dict):
... def __getitem__(self, key):
... return 42
...
>>> d1 = D1(a='foo')
>>> d1
{'a': 'foo'}
>>> d1['a']
42
>>> d1.get('a')
'foo'
::
>>> class D2(dict):
... def get(self, key):
... return 42
...
>>> d2 = D2(a='foo')
>>> d2
{'a': 'foo'}
>>> d2['a']
'foo'
>>> d2.get('a')
42

View File

@@ -0,0 +1,25 @@
"""
A class equivalent to the class statement below would be generated by this code:
>>> import collections
>>> Point = collections.plainclass('Point', 'x y')
"""
class Point(object):
__slots__ = ['x', 'y'] # save memory in the likely event there are many instances
def __init__(self, x, y):
self.x = x
self.y = y
def __repr__(self):
return 'Point({!r}, {!r})'.format(self.x, self.y)
def __eq__(self, other):
if not isinstance(other, Point):
return NotImplemented
return self.x == other.x and self.y == other.y
def __iter__(self, other): # support unpacking
yield self.x
yield self.y

View File

@@ -0,0 +1,29 @@
"""
Alex Martelli, _Python in a Nutshell, 2e._ (O'Reilly, 2006), p. 101
==========================
Properties and inheritance
==========================
Properties are inherited normally, just like any other attribute.
However, theres a little trap for the unwary: the methods called
upon to access a property are those that are defined in the class
in which the property itself is defined, without intrinsic use of
further overriding that may happen in subclasses. For example:
"""
class B(object):
def f(self):
return 23
g = property(f)
class C(B):
def f(self):
return 42
c = C()
print(c.g) # prints 23, not 42

View File

@@ -0,0 +1,44 @@
'''
4.13. Special Attributes
The implementation adds a few special read-only attributes to
several object types, where they are relevant.
Some of these are not reported by the dir() built-in function.
https://docs.python.org/3/library/stdtypes.html#special-attributes
'''
obj_attrs = {'__dict__', '__class__'}
cls_data_attrs = {'__slots__', '__bases__', '__name__', '__qualname__', '__mro__'}
cls_methods = {'mro', '__subclasses__'}
cls_attrs = cls_data_attrs | cls_methods
an_object = object()
class EmptyClass():
pass
an_instance = EmptyClass()
class EmptySlots():
__slots__ = ()
a_slots_instance = EmptySlots()
objs = EmptyClass, EmptySlots, an_object, an_instance, a_slots_instance
for obj in objs:
print('-' * 60)
print(repr(obj), ':', type(obj))
dir_obj = set(dir(obj))
print('obj_attrs not listed:', sorted(obj_attrs - dir_obj))
print('all cls_attrs :', sorted(cls_attrs))
print('cls_attrs not listed:', sorted(cls_attrs - dir_obj))

View File

@@ -0,0 +1,44 @@
"""
Spreadsheet example adapted from Raymond Hettinger's `recipe`__
__ http://code.activestate.com/recipes/355045-spreadsheet/
Demonstration::
>>> from math import sin, pi
>>> ss = Spreadsheet(sin=sin, pi=pi)
>>> ss['a1'] = '-5'
>>> ss['a2'] = 'a1*6'
>>> ss['a3'] = 'a2*7'
>>> ss['a3']
-210
>>> ss['b1'] = 'sin(pi/4)'
>>> ss['b1'] # doctest:+ELLIPSIS
0.707106781186...
>>> ss.getformula('b1')
'sin(pi/4)'
>>> ss['c1'] = 'abs(a2)'
>>> ss['c1']
30
>>> ss['d1'] = '3*'
>>> ss['d1']
Traceback (most recent call last):
...
SyntaxError: unexpected EOF while parsing
"""
class Spreadsheet:
def __init__(self, **tools):
self._cells = {}
self._tools = tools
def __setitem__(self, key, formula):
self._cells[key] = formula
def getformula(self, key):
return self._cells[key]
def __getitem__(self, key):
return eval(self._cells[key], self._tools, self)

View File

@@ -0,0 +1,54 @@
"""
Spreadsheet example adapted from Raymond Hettinger's `recipe`__
__ http://code.activestate.com/recipes/355045-spreadsheet/
Demonstration::
>>> from math import sin, pi
>>> ss = Spreadsheet(sin=sin, pi=pi, abs=abs)
>>> ss['a1'] = '-5'
>>> ss['a2'] = 'a1*6'
>>> ss['a3'] = 'a2*7'
>>> ss['a3']
-210
>>> ss['b1'] = 'sin(pi/4)'
>>> ss['b1'] # doctest:+ELLIPSIS
0.707106781186...
>>> ss.getformula('b1')
'sin(pi/4)'
>>> ss['c1'] = 'abs(a2)'
>>> ss['c1']
30
>>> ss['c2'] = 'len(a2)'
>>> ss['c2']
Traceback (most recent call last):
...
NameError: name 'len' is not defined
>>> ss['d1'] = '3*'
Traceback (most recent call last):
...
SyntaxError: unexpected EOF while parsing ['d1'] = '3*'
"""
class Spreadsheet:
def __init__(self, **tools):
self._cells = {}
self._tools = {'__builtins__' : {}}
self._tools.update(tools)
def __setitem__(self, key, formula):
try:
compile(formula, '<__setitem__>', 'eval')
except SyntaxError as exc:
msg = '{} [{!r}] = {!r}'.format(exc.msg, key, formula)
raise SyntaxError(msg)
self._cells[key] = formula
def getformula(self, key):
return self._cells[key]
def __getitem__(self, key):
return eval(self._cells[key], self._tools, self)

View File

@@ -0,0 +1,26 @@
/***
Compound interest function with ``BigDecimal``
Equivalent in Python:
def compound_interest(principal, rate, periods):
return principal * ((1 + rate) ** periods - 1)
***/
import java.math.BigDecimal;
public class Interest {
static BigDecimal compoundInterest(BigDecimal principal, BigDecimal rate, int periods) {
return principal.multiply(BigDecimal.ONE.add(rate).pow(periods).subtract(BigDecimal.ONE));
}
public static void main(String[] args) {
BigDecimal principal = new BigDecimal(1000);
BigDecimal rate = new BigDecimal("0.06");
int periods = 5;
System.out.println(compoundInterest(principal, rate, periods));
}
}

View File

@@ -0,0 +1,17 @@
"""
Experiments with infix operator dispatch
>>> kadd = KnowsAdd()
>>> kadd + 1
(<KnowsAdd object>, 1)
>>> kadd * 1
"""
class KnowsAdd:
def __add__(self, other):
return self, other
def __repr__(self):
return '<{} object>'.format(type(self).__name__)

View File

@@ -0,0 +1,47 @@
import java.math.BigInteger;
class CorrectFactorial {
public static BigInteger factorial(BigInteger n) {
return n.compareTo(BigInteger.ONE) <= 0 ? BigInteger.ONE
: n.multiply(factorial(n.subtract(BigInteger.ONE)));
}
public static void main(String args[]) {
BigInteger upperBound = new BigInteger("25");
for (BigInteger i = BigInteger.ONE;
i.compareTo(upperBound) <= 0;
i = i.add(BigInteger.ONE)) {
System.out.println(i + "! = " + factorial(i));
}
}
}
/* output:
1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800
11! = 39916800
12! = 479001600
13! = 6227020800
14! = 87178291200
15! = 1307674368000
16! = 20922789888000
17! = 355687428096000
18! = 6402373705728000
19! = 121645100408832000
20! = 2432902008176640000
21! = 51090942171709440000
22! = 1124000727777607680000
23! = 25852016738884976640000
24! = 620448401733239439360000
25! = 15511210043330985984000000
*/

View File

@@ -0,0 +1,43 @@
class SimpleFactorial {
public static long factorial(long n) {
return n < 2 ? 1 : n * factorial(n-1);
}
public static void main(String args[]) {
for (long i = 1; i <= 25; i++) {
System.out.println(i + "! = " + factorial(i));
}
}
}
/* output: incorrect results starting with 21!
1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800
11! = 39916800
12! = 479001600
13! = 6227020800
14! = 87178291200
15! = 1307674368000
16! = 20922789888000
17! = 355687428096000
18! = 6402373705728000
19! = 121645100408832000
20! = 2432902008176640000
21! = -4249290049419214848
22! = -1250660718674968576
23! = 8128291617894825984
24! = -7835185981329244160
25! = 7034535277573963776
*/

View File

@@ -0,0 +1,36 @@
def factorial(n):
return 1 if n < 2 else n * factorial(n-1)
if __name__=='__main__':
for i in range(1, 26):
print('%s! = %s' % (i, factorial(i)))
"""
output:
1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800
11! = 39916800
12! = 479001600
13! = 6227020800
14! = 87178291200
15! = 1307674368000
16! = 20922789888000
17! = 355687428096000
18! = 6402373705728000
19! = 121645100408832000
20! = 2432902008176640000
21! = 51090942171709440000
22! = 1124000727777607680000
23! = 25852016738884976640000
24! = 620448401733239439360000
25! = 15511210043330985984000000
"""

View File

@@ -0,0 +1,25 @@
"""
Compound interest function with ``decimal.Decimal``
"""
def compound_interest(principal, rate, periods):
return principal * ((1 + rate) ** periods - 1)
def test(verbose=False):
from decimal import Decimal, getcontext
getcontext().prec = 8
fixture = [(1000, Decimal('0.05'), 1, Decimal('50')),
(1000, Decimal('0.10'), 5, Decimal('610.51')),
(1000, Decimal('0.10'), 15, Decimal('3177.2482')),
(1000, Decimal('0.06'), 5, Decimal('338.2256')),
]
for principal, rate, periods, future_value in fixture:
computed = compound_interest(principal, rate, periods)
if verbose:
print('{!r}, {!r}, {!r} -> {!r}'.format(
principal, rate, periods, computed))
assert future_value == computed, '{!r} != {!r}'.format(future_value, computed)
if __name__ == '__main__':
test(True)

140
attic/operator/vector.py Normal file
View File

@@ -0,0 +1,140 @@
"""
The `+` operator produces a `Vector` result.
>>> v1 = Vector(2, 4)
>>> v2 = Vector(2, 1)
>>> v1 + v2
Vector(4, 5)
We can also implemement the `*` operator to perform scalar multiplication
or elementwise multiplication.
>>> v = Vector(3, 4)
>>> abs(v)
5.0
>>> v * 3
Vector(9, 12)
>>> abs(v * 3)
15.0
>>> v25 = Vector(2, 5)
>>> v71 = Vector(7, 1)
>>> v71 * v25
Vector(14, 5)
A vector can be used in a boolean context, where it will be considered
_falsy_ if it has magnitude zero, otherwise it is _truthy_::
>>> bool(v)
True
>>> bool(Vector(0, 0))
False
Vectors can have n-dimensions::
>>> v3 = Vector(1, 2, 3)
>>> len(v3)
3
>>> v3
Vector(1, 2, 3)
>>> abs(v3) # doctest:+ELLIPSIS
3.74165738...
>>> v3 + Vector(4, 5, 6)
Vector(5, 7, 9)
>>> v3 * 5
Vector(5, 10, 15)
>>> v2 + v3
Traceback (most recent call last):
...
ValueError: Addition applies only to vectors of equal dimensions.
The `repr` of a Vector is produced with the help of the `reprlib.repr`
function, limiting the size of the output string:
>>> Vector(*range(100))
Vector(0, 1, 2, 3, 4, 5, ...)
Dot product is a scalar: the sum of the products of the corresponding
components of two vectors.
>>> v25 = Vector(2, 5)
>>> v71 = Vector(7, 1)
>>> v25.dot(v71)
19
>>> Vector(1, 2, 3).dot(Vector(4, 5, 6))
32
>>> Vector(1, 2, 3).dot(Vector(-2, 0, 5))
13
As described in PEP 465, starting with Python 3.5, `__matmul__` is
the special method for the new ``@`` operator, to be used the dot
product of vectors or matrix multiplication (as opposed to ``*``
which is intended for scalar or elementwise multiplication):
>>> # skip these tests on Python < 3.5
>>> v25 @ v71 # doctest:+SKIP
19
>>> v71 * v25
Vector(14, 5)
>>> Vector(1, 2, 3) @ Vector(-2, 0, 5) # doctest:+SKIP
13
"""
# BEGIN VECTOR_OPS
import math
import numbers
import reprlib
EQ_DIMENSIONS_MSG = '%s applies only to vectors of equal dimensions.'
class Vector:
"""An n-dimensional vector"""
def __init__(self, *components): # <1>
self._components = tuple(components) # <2>
def __repr__(self):
return 'Vector' + (reprlib.repr(self._components)) # <3>
def __iter__(self):
return iter(self._components) # <4>
def __abs__(self):
return math.sqrt(sum(comp*comp for comp in self)) # <5>
def __len__(self):
return len(self._components) # <6>
def __add__(self, other):
if len(self) != len(other):
raise ValueError(EQ_DIMENSIONS_MSG % 'Addition')
return Vector(*(a+b for a, b in zip(self, other))) # <7>
def __mul__(self, other):
if isinstance(other, numbers.Number):
return Vector(*(comp*other for comp in self)) # <8>
else:
return self.elementwise_mul(other) # <9>
def elementwise_mul(self, other):
if len(self) != len(other):
raise ValueError(EQ_DIMENSIONS_MSG %
'Elementwise multiplication')
return Vector(*(a*b for a, b in zip(self, other))) # <10>
def __bool__(self):
return any(self) # <11>
def dot(self, other):
if len(self) != len(other):
raise ValueError(EQ_DIMENSIONS_MSG %
'Dot product')
return sum(a*b for a, b in zip(self, other)) # <12>
__matmul__ = dot # support @ operator in Python 3.5
# END VECTOR_OPS