update from Atlas

2014-12-29 03:51:34 -02:00 · 2014-12-29 03:51:34 -02:00 · 08b7bce340
commit 08b7bce340
parent 9db73c75ef
12 changed files with 801 additions and 21 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 concurrency/flags/img/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/attributes/exists_truthy.py
+++ b/attributes/exists_truthy.py
@ -0,0 +1,43 @@
 import timeit
 def exists_and_truthy_hasattr(obj, attr_name):
    if hasattr(obj, attr_name):
        return bool(getattr(obj, attr_name))
    else:
        return False
 def exists_and_truthy_getattr(obj, attr_name):
    return bool(getattr(obj, attr_name, False))
 def exists_and_truthy_tryget(obj, attr_name):
    try:
        return bool(getattr(obj, attr_name))
    except AttributeError:
        return False
 class Gizmo:
    def __init__(self):
        self.gadget = True
 gizmo = Gizmo()
 test_keys = 'hasattr', 'getattr', 'tryget'
 def average(timings):
    sample = timings[1:-1]
    return sum(sample) / len(sample)
 def do_tests():
    for test_key in test_keys:
        func_name = 'exists_and_truthy_' + test_key
        test = func_name + '(gizmo, "gadget")'
        setup = 'from __main__ import gizmo, ' + func_name
        elapsed = average(timeit.repeat(test, repeat=5, setup=setup))
        print(test_key.rjust(7), format(elapsed, '0.5f'))
 if __name__ == '__main__':
    do_tests()
    del gizmo.gadget
    do_tests()
--- a/attributes/hasattr.py
+++ b/attributes/hasattr.py
@ -0,0 +1,44 @@
 import timeit
 test_hasattr = """
 if hasattr(gizmo, 'gadget'):
    feature = gizmo.gadget
 else:
    feature = None
 """
 test_getattr = """
 feature = getattr(gizmo, 'gadget', None)
 """
 test_tryget = """
 try:
    feature = getattr(gizmo, 'gadget')
 except AttributeError:
    feature = None
 """
 class Gizmo:
    def __init__(self):
        self.gadget = True
 gizmo = Gizmo()
 test_keys = 'hasattr', 'getattr', 'tryget'
 def test():
    for test_key in test_keys:
        test_name = 'test_' + test_key
        test = globals()[test_name]
        setup = 'from __main__ import gizmo'
        t_present = min(timeit.repeat(test, setup=setup))
        del gizmo.gadget
        t_absent = min(timeit.repeat(test, setup=setup))
        gizmo.gadget = True
        print('{:7}  {:.3f}  {:.3f}'.format(test_key, t_present, t_absent))
 if __name__ == '__main__':
    test()
--- a/concurrency/flags/countryflags.py
+++ b/concurrency/flags/countryflags.py
@ -16,7 +16,11 @@ NGINX_URL = 'http://localhost:8080/ciaflags/{gec}.gif'
 # Vaurien
 VAURIEN_URL = 'http://localhost:8000/ciaflags/{gec}.gif'
-BASE_URL = VAURIEN_URL
+SOURCE_URLS = {
    'CIA' : CIA_URL,
    'NGINX' : NGINX_URL,
    'VAURIEN' : VAURIEN_URL,
 }
 DEST_PATH_NAME = 'img/{cc}.gif'
@ -34,8 +38,9 @@ def _load():
            cc2gec[iso_cc] = gec
-def flag_url(iso_cc):
+def flag_url(iso_cc, source='CIA'):
-    return BASE_URL.format(gec=cc2gec[iso_cc].lower())
+    base_url = SOURCE_URLS[source.upper()]
    return base_url.format(gec=cc2gec[iso_cc].lower())
 def iso_file_name(iso_cc):
    return DEST_PATH_NAME.format(cc=iso_cc.lower())
--- a/concurrency/flags/getsequential.py
+++ b/concurrency/flags/getsequential.py
@ -5,8 +5,8 @@ import time
 times = {}
-def fetch(iso_cc):
+def fetch(iso_cc, source):
-    resp = requests.get(cf.flag_url(iso_cc))
+    resp = requests.get(cf.flag_url(iso_cc, source))
    if resp.status_code != 200:
        resp.raise_for_status()
    file_name = cf.iso_file_name(iso_cc)
@ -14,7 +14,7 @@ def fetch(iso_cc):
        written = img.write(resp.content)
    return written, file_name
-def main():
+def main(source):
    pending = sorted(cf.cc2name)
    to_download = len(pending)
    downloaded = 0
@ -23,7 +23,7 @@ def main():
        print('get:', iso_cc)
        try:
            times[iso_cc] = [time.time() - t0]
-            octets, file_name = fetch(iso_cc)
+            octets, file_name = fetch(iso_cc, source)
            times[iso_cc].append(time.time() - t0)
            downloaded += 1
            print('\t--> {}: {:5d} bytes'.format(file_name, octets))
@ -36,7 +36,14 @@ def main():
        print('{}\t{:.6g}\t{:.6g}'.format(iso_cc, start, end))
 if __name__ == '__main__':
-    main()
+    import argparse
    source_names = ', '.join(sorted(cf.SOURCE_URLS))
    parser = argparse.ArgumentParser(description='Download flag images.')
    parser.add_argument('source', help='one of: ' + source_names)
    args = parser.parse_args()
    main(args.source)
 """
 From cia.gov:
@ -53,4 +60,4 @@ From localhost nginx via Vaurien with .5s delay
 real    1m40.519s
 user    0m1.103s
 sys     0m0.243s
-"""
+"""
--- a/concurrency/flags/getthreadpool.py
+++ b/concurrency/flags/getthreadpool.py
@ -11,7 +11,7 @@ GLOBAL_TIMEOUT = 300  # seconds
 times = {}
-def main(num_threads):
+def main(source, num_threads):
    pool = futures.ThreadPoolExecutor(num_threads)
    pending = {}
    t0 = time.time()
@ -19,7 +19,7 @@ def main(num_threads):
    for iso_cc in sorted(cf.cc2name):
        print('get:', iso_cc)
        times[iso_cc] = [time.time() - t0]
-        job = pool.submit(fetch, iso_cc)
+        job = pool.submit(fetch, iso_cc, source)
        pending[job] = iso_cc
    to_download = len(pending)
    downloaded = 0
@ -39,18 +39,23 @@ def main(num_threads):
        print('{}\t{:.6g}\t{:.6g}'.format(iso_cc, start, end))
 if __name__ == '__main__':
-    if len(sys.argv) == 2:
+    import argparse
-        num_threads = int(sys.argv[1])
+
-    else:
+    source_names = ', '.join(sorted(cf.SOURCE_URLS))
-        num_threads = DEFAULT_NUM_THREADS
+    parser = argparse.ArgumentParser(description='Download flag images.')
-    main(num_threads)
+    parser.add_argument('source', help='one of: ' + source_names)
    parser.add_argument('-t', '--threads', type=int, default=DEFAULT_NUM_THREADS,
                   help='number of threads (default: %s)' % DEFAULT_NUM_THREADS)
    args = parser.parse_args()
    main(args.source, args.threads)
 """
-From localhost nginx:
+From CIA, 1 thread:
-real    0m1.163s
+real    2m0.832s
-user    0m1.001s
+user    0m4.685s
-sys     0m0.289s
+sys     0m0.366s
-"""
+"""
--- a/concurrency/flags/img.zip
+++ b/concurrency/flags/img.zip
--- a/metaprog/spreadsheet.py
+++ b/metaprog/spreadsheet.py
@ -0,0 +1,50 @@
 """
 Spreadsheet example adapted from Raymond Hettinger's `recipe`__
 __ http://code.activestate.com/recipes/355045-spreadsheet/
 Demonstration::
    >>> from math import sin, pi
    >>> ss = Spreadsheet(sin=sin, pi=pi, abs=abs)
    >>> ss['a1'] = '-5'
    >>> ss['a2'] = 'a1*6'
    >>> ss['a3'] = 'a2*7'
    >>> ss['a3']
    -210
    >>> ss['b1'] = 'sin(pi/4)'
    >>> ss['b1']  # doctest:+ELLIPSIS
    0.707106781186...
    >>> ss.getformula('b1')
    'sin(pi/4)'
    >>> ss['c1'] = 'abs(a2)'
    >>> ss['c1']
    30
    >>> ss['c2'] = 'len(a2)'
    >>> ss['c2']
    Traceback (most recent call last):
      ...
    NameError: name 'len' is not defined
    >>> ss['d1'] = '3*'
    >>> ss['d1']
    Traceback (most recent call last):
      ...
    SyntaxError: unexpected EOF while parsing
 """
 class Spreadsheet:
    def __init__(self, **tools):
        self._cells = {}
        self._tools = {'__builtins__' : {}}
        self._tools.update(tools)
    def __setitem__(self, key, formula):
        self._cells[key] = formula
    def getformula(self, key):
        return self._cells[key]
    def __getitem__(self, key):
        return eval(self._cells[key], self._tools, self)
--- a/metaprog/spreadsheet2.py
+++ b/metaprog/spreadsheet2.py
@ -0,0 +1,54 @@
 """
 Spreadsheet example adapted from Raymond Hettinger's `recipe`__
 __ http://code.activestate.com/recipes/355045-spreadsheet/
 Demonstration::
    >>> from math import sin, pi
    >>> ss = Spreadsheet(sin=sin, pi=pi, abs=abs)
    >>> ss['a1'] = '-5'
    >>> ss['a2'] = 'a1*6'
    >>> ss['a3'] = 'a2*7'
    >>> ss['a3']
    -210
    >>> ss['b1'] = 'sin(pi/4)'
    >>> ss['b1']  # doctest:+ELLIPSIS
    0.707106781186...
    >>> ss.getformula('b1')
    'sin(pi/4)'
    >>> ss['c1'] = 'abs(a2)'
    >>> ss['c1']
    30
    >>> ss['c2'] = 'len(a2)'
    >>> ss['c2']
    Traceback (most recent call last):
      ...
    NameError: name 'len' is not defined
    >>> ss['d1'] = '3*'
    Traceback (most recent call last):
      ...
    SyntaxError: unexpected EOF while parsing ['d1'] = '3*'
 """
 class Spreadsheet:
    def __init__(self, **tools):
        self._cells = {}
        self._tools = {'__builtins__' : {}}
        self._tools.update(tools)
    def __setitem__(self, key, formula):
        try:
            compile(formula, '<__setitem__>', 'eval')
        except SyntaxError as exc:
            msg = '{} [{!r}] = {!r}'.format(exc.msg, key, formula)
            raise SyntaxError(msg)
        self._cells[key] = formula
    def getformula(self, key):
        return self._cells[key]
    def __getitem__(self, key):
        return eval(self._cells[key], self._tools, self)
--- a/support/isis2json/isis2json.py
+++ b/support/isis2json/isis2json.py
@ -0,0 +1,261 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 # isis2json.py: convert ISIS and ISO-2709 files to JSON
 #
 # Copyright (C) 2010 BIREME/PAHO/WHO
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published
 # by the Free Software Foundation, either version 2.1 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 ############################
 # BEGIN ISIS2JSON
 # this script works with Python or Jython (versions >=2.5 and <3)
 import sys
 import argparse
 from uuid import uuid4
 import os
 try:
    import json
 except ImportError:
    if os.name == 'java':  # running Jython
        from com.xhaus.jyson import JysonCodec as json
    else:
        import simplejson as json
 SKIP_INACTIVE = True
 DEFAULT_QTY = 2**31
 ISIS_MFN_KEY = 'mfn'
 ISIS_ACTIVE_KEY = 'active'
 SUBFIELD_DELIMITER = '^'
 INPUT_ENCODING = 'cp1252'
 def iter_iso_records(iso_file_name, isis_json_type):  # <1>
    from iso2709 import IsoFile
    from subfield import expand
    iso = IsoFile(iso_file_name)
    for record in iso:
        fields = {}
        for field in record.directory:
            field_key = str(int(field.tag))  # remove leading zeroes
            field_occurrences = fields.setdefault(field_key, [])
            content = field.value.decode(INPUT_ENCODING, 'replace')
            if isis_json_type == 1:
                field_occurrences.append(content)
            elif isis_json_type == 2:
                field_occurrences.append(expand(content))
            elif isis_json_type == 3:
                field_occurrences.append(dict(expand(content)))
            else:
                raise NotImplementedError('ISIS-JSON type %s conversion '
                    'not yet implemented for .iso input' % isis_json_type)
        yield fields
    iso.close()
 def iter_mst_records(master_file_name, isis_json_type):  # <2>
    try:
        from bruma.master import MasterFactory, Record
    except ImportError:
        print('IMPORT ERROR: Jython 2.5 and Bruma.jar '
              'are required to read .mst files')
        raise SystemExit
    mst = MasterFactory.getInstance(master_file_name).open()
    for record in mst:
        fields = {}
        if SKIP_INACTIVE:
            if record.getStatus() != Record.Status.ACTIVE:
                continue
        else:  # save status only there are non-active records
            fields[ISIS_ACTIVE_KEY] = (record.getStatus() ==
                                       Record.Status.ACTIVE)
        fields[ISIS_MFN_KEY] = record.getMfn()
        for field in record.getFields():
            field_key = str(field.getId())
            field_occurrences = fields.setdefault(field_key, [])
            if isis_json_type == 3:
                content = {}
                for subfield in field.getSubfields():
                    subfield_key = subfield.getId()
                    if subfield_key == '*':
                        content['_'] = subfield.getContent()
                    else:
                        subfield_occurrences = content.setdefault(subfield_key, [])
                        subfield_occurrences.append(subfield.getContent())
                field_occurrences.append(content)
            elif isis_json_type == 1:
                content = []
                for subfield in field.getSubfields():
                    subfield_key = subfield.getId()
                    if subfield_key == '*':
                        content.insert(0, subfield.getContent())
                    else:
                        content.append(SUBFIELD_DELIMITER + subfield_key +
                                       subfield.getContent())
                field_occurrences.append(''.join(content))
            else:
                raise NotImplementedError('ISIS-JSON type %s conversion '
                    'not yet implemented for .mst input' % isis_json_type)
        yield fields
    mst.close()
 def write_json(input_gen, file_name, output, qty, skip, id_tag,  # <3>
               gen_uuid, mongo, mfn, isis_json_type, prefix,
               constant):
    start = skip
    end = start + qty
    if id_tag:
        id_tag = str(id_tag)
        ids = set()
    else:
        id_tag = ''
    for i, record in enumerate(input_gen):
        if i >= end:
            break
        if not mongo:
            if i == 0:
                output.write('[')
            elif i > start:
                output.write(',')
        if start <= i < end:
            if id_tag:
                occurrences = record.get(id_tag, None)
                if occurrences is None:
                    msg = 'id tag #%s not found in record %s'
                    if ISIS_MFN_KEY in record:
                        msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
                    raise KeyError(msg % (id_tag, i))
                if len(occurrences) > 1:
                    msg = 'multiple id tags #%s found in record %s'
                    if ISIS_MFN_KEY in record:
                        msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
                    raise TypeError(msg % (id_tag, i))
                else:  # ok, we have one and only one id field
                    if isis_json_type == 1:
                        id = occurrences[0]
                    elif isis_json_type == 2:
                        id = occurrences[0][0][1]
                    elif isis_json_type == 3:
                        id = occurrences[0]['_']
                    if id in ids:
                        msg = 'duplicate id %s in tag #%s, record %s'
                        if ISIS_MFN_KEY in record:
                            msg = msg + (' (mfn=%s)' % record[ISIS_MFN_KEY])
                        raise TypeError(msg % (id, id_tag, i))
                    record['_id'] = id
                    ids.add(id)
            elif gen_uuid:
                record['_id'] = unicode(uuid4())
            elif mfn:
                record['_id'] = record[ISIS_MFN_KEY]
            if prefix:
                # iterate over a fixed sequence of tags
                for tag in tuple(record):
                    if str(tag).isdigit():
                        record[prefix+tag] = record[tag]
                        del record[tag]  # this is why we iterate over a tuple
                        # with the tags, and not directly on the record dict
            if constant:
                constant_key, constant_value = constant.split(':')
                record[constant_key] = constant_value
            output.write(json.dumps(record).encode('utf-8'))
            output.write('\n')
    if not mongo:
        output.write(']\n')
 def main():  # <4>
    # create the parser
    parser = argparse.ArgumentParser(
        description='Convert an ISIS .mst or .iso file to a JSON array')
    # add the arguments
    parser.add_argument(
        'file_name', metavar='INPUT.(mst|iso)',
        help='.mst or .iso file to read')
    parser.add_argument(
        '-o', '--out', type=argparse.FileType('w'), default=sys.stdout,
        metavar='OUTPUT.json',
        help='the file where the JSON output should be written'
             ' (default: write to stdout)')
    parser.add_argument(
        '-c', '--couch', action='store_true',
        help='output array within a "docs" item in a JSON document'
             ' for bulk insert to CouchDB via POST to db/_bulk_docs')
    parser.add_argument(
        '-m', '--mongo', action='store_true',
        help='output individual records as separate JSON dictionaries,'
             ' one per line for bulk insert to MongoDB via mongoimport utility')
    parser.add_argument(
        '-t', '--type', type=int, metavar='ISIS_JSON_TYPE', default=1,
        help='ISIS-JSON type, sets field structure: 1=string, 2=alist, 3=dict (default=1)')
    parser.add_argument(
        '-q', '--qty', type=int, default=DEFAULT_QTY,
        help='maximum quantity of records to read (default=ALL)')
    parser.add_argument(
        '-s', '--skip', type=int, default=0,
        help='records to skip from start of .mst (default=0)')
    parser.add_argument(
        '-i', '--id', type=int, metavar='TAG_NUMBER', default=0,
        help='generate an "_id" from the given unique TAG field number'
             ' for each record')
    parser.add_argument(
        '-u', '--uuid', action='store_true',
        help='generate an "_id" with a random UUID for each record')
    parser.add_argument(
        '-p', '--prefix', type=str, metavar='PREFIX', default='',
        help='concatenate prefix to every numeric field tag (ex. 99 becomes "v99")')
    parser.add_argument(
        '-n', '--mfn', action='store_true',
        help='generate an "_id" from the MFN of each record'
             ' (available only for .mst input)')
    parser.add_argument(
        '-k', '--constant', type=str, metavar='TAG:VALUE', default='',
        help='Include a constant tag:value in every record (ex. -k type:AS)')
    '''
    # TODO: implement this to export large quantities of records to CouchDB
    parser.add_argument(
        '-r', '--repeat', type=int, default=1,
        help='repeat operation, saving multiple JSON files'
             ' (default=1, use -r 0 to repeat until end of input)')
    '''
    # parse the command line
    args = parser.parse_args()
    if args.file_name.lower().endswith('.mst'):
        input_gen_func = iter_mst_records  # <5>
    else:
        if args.mfn:
            print('UNSUPORTED: -n/--mfn option only available for .mst input.')
            raise SystemExit
        input_gen_func = iter_iso_records  # <6>
    input_gen = input_gen_func(args.file_name, args.type)  # <7>
    if args.couch:
        args.out.write('{ "docs" : ')
    write_json(input_gen, args.file_name, args.out, args.qty,  # <8>
               args.skip, args.id, args.uuid, args.mongo, args.mfn,
               args.type, args.prefix, args.constant)
    if args.couch:
        args.out.write('}\n')
    args.out.close()
 if __name__ == '__main__':
    main()
 # END ISIS2JSON
--- a/support/isis2json/iso2709.py
+++ b/support/isis2json/iso2709.py
@ -0,0 +1,167 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 # ISO-2709 file reader
 #
 # Copyright (C) 2010 BIREME/PAHO/WHO
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published
 # by the Free Software Foundation, either version 2.1 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 from struct import unpack
 CR =  '\x0D' # \r
 LF =  '\x0A' # \n
 IS1 = '\x1F' # ECMA-48 Unit Separator
 IS2 = '\x1E' # ECMA-48 Record Separator / ISO-2709 field separator
 IS3 = '\x1D' # ECMA-48 Group Separator / ISO-2709 record separator
 LABEL_LEN = 24
 LABEL_FORMAT = '5s c 4s c c 5s 3s c c c c'
 TAG_LEN = 3
 DEFAULT_ENCODING = 'ASCII'
 SUBFIELD_DELIMITER = '^'
 class IsoFile(object):
    def __init__(self, filename, encoding = DEFAULT_ENCODING):
        self.file = open(filename, 'rb')
        self.encoding = encoding
    def __iter__(self):
        return self
    def next(self):
        return IsoRecord(self)
    __next__ = next # Python 3 compatibility
    def read(self, size):
        ''' read and drop all CR and LF characters '''
        # TODO: this is inneficient but works, patches accepted!
        # NOTE: our fixtures include files which have no linebreaks,
        # files with CR-LF linebreaks and files with LF linebreaks
        chunks = []
        count = 0
        while count < size:
            chunk = self.file.read(size-count)
            if len(chunk) == 0:
                break
            chunk = chunk.replace(CR+LF,'')
            if CR in chunk:
                chunk = chunk.replace(CR,'')
            if LF in chunk:
                chunk = chunk.replace(LF,'')
            count += len(chunk)
            chunks.append(chunk)
        return ''.join(chunks)
    def close(self):
        self.file.close()
 class IsoRecord(object):
    label_part_names = ('rec_len rec_status impl_codes indicator_len identifier_len'
                        ' base_addr user_defined'
                        # directory map:
                        ' fld_len_len start_len impl_len reserved').split()
    rec_len = 0
    def __init__(self, iso_file=None):
        self.iso_file = iso_file
        self.load_label()
        self.load_directory()
        self.load_fields()
    def __len__(self):
        return self.rec_len
    def load_label(self):
        label = self.iso_file.read(LABEL_LEN)
        if len(label) == 0:
            raise StopIteration
        elif len(label) != 24:
            raise ValueError('Invalid record label: "%s"' % label)
        parts = unpack(LABEL_FORMAT, label)
        for name, part in zip(self.label_part_names, parts):
            if name.endswith('_len') or name.endswith('_addr'):
                part = int(part)
            setattr(self, name, part)
    def show_label(self):
        for name in self.label_part_names:
            print('%15s : %r' % (name, getattr(self, name)))
    def load_directory(self):
        fmt_dir = '3s %ss %ss %ss' % (self.fld_len_len, self.start_len, self.impl_len)
        entry_len = TAG_LEN + self.fld_len_len + self.start_len + self.impl_len
        self.directory = []
        while True:
            char = self.iso_file.read(1)
            if char.isdigit():
                entry = char + self.iso_file.read(entry_len-1)
                entry = Field(* unpack(fmt_dir, entry))
                self.directory.append(entry)
            else:
                break
    def load_fields(self):
        for field in self.directory:
            if self.indicator_len > 0:
                field.indicator = self.iso_file.read(self.indicator_len)
            # XXX: lilacs30.iso has an identifier_len == 2,
            # but we need to ignore it to succesfully read the field contents
            # TODO: find out when to ignore the idenfier_len,
            # or fix the lilacs30.iso fixture
            #
            ##if self.identifier_len > 0: #
            ##    field.identifier = self.iso_file.read(self.identifier_len)
            value = self.iso_file.read(len(field))
            assert len(value) == len(field)
            field.value = value[:-1] # remove trailing field separator
        self.iso_file.read(1) # discard record separator
    def __iter__(self):
        return self
    def next(self):
        for field in self.directory:
            yield(field)
    __next__ = next # Python 3 compatibility
    def dump(self):
        for field in self.directory:
            print('%3s %r' % (field.tag, field.value))
 class Field(object):
    def __init__(self, tag, len, start, impl):
        self.tag = tag
        self.len = int(len)
        self.start = int(start)
        self.impl = impl
    def show(self):
        for name in 'tag len start impl'.split():
            print('%15s : %r' % (name, getattr(self, name)))
    def __len__(self):
        return self.len
 def test():
    import doctest
    doctest.testfile('iso2709_test.txt')
 if __name__=='__main__':
    test()
--- a/support/isis2json/subfield.py
+++ b/support/isis2json/subfield.py
@ -0,0 +1,142 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 # ISIS-DM: the ISIS Data Model API
 #
 # Copyright (C) 2010 BIREME/PAHO/WHO
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published
 # by the Free Software Foundation, either version 2.1 of the License, or
 # (at your option) any later version.
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 from collections import namedtuple
 import re
 MAIN_SUBFIELD_KEY = '_'
 SUBFIELD_MARKER_RE = re.compile(r'\^([a-z0-9])', re.IGNORECASE)
 DEFAULT_ENCODING = u'utf-8'
 def expand(content, subkeys=None):
    ''' Parse a field into an association list of keys and subfields
        >>> expand('zero^1one^2two^3three')
        [('_', 'zero'), ('1', 'one'), ('2', 'two'), ('3', 'three')]
    '''
    if subkeys is None:
        regex = SUBFIELD_MARKER_RE
    elif subkeys == '':
        return [(MAIN_SUBFIELD_KEY, content)]
    else:
        regex = re.compile(r'\^(['+subkeys+'])', re.IGNORECASE)
    content = content.replace('^^', '^^ ')
    parts = []
    start = 0
    key = MAIN_SUBFIELD_KEY
    while True:
        found = regex.search(content, start)
        if found is None: break
        parts.append((key, content[start:found.start()].rstrip()))
        key = found.group(1).lower()
        start = found.end()
    parts.append((key, content[start:].rstrip()))
    return parts
 class CompositeString(object):
    ''' Represent an Isis field, with subfields, using
    Python native datastructures
    >>> author = CompositeString('John Tenniel^xillustrator',
    ... subkeys='x')
    >>> unicode(author)
    u'John Tenniel^xillustrator'
    '''
    def __init__(self, isis_raw, subkeys=None, encoding=DEFAULT_ENCODING):
        if not isinstance(isis_raw, basestring):
            raise TypeError('%r value must be unicode or str instance' % isis_raw)
        self.__isis_raw = isis_raw.decode(encoding)
        self.__expanded = expand(self.__isis_raw, subkeys)
    def __getitem__(self, key):
        for subfield in self.__expanded:
            if subfield[0] == key:
                return subfield[1]
        else:
            raise KeyError(key)
    def __iter__(self):
        return (subfield[0] for subfield in self.__expanded)
    def items(self):
        return self.__expanded
    def __unicode__(self):
        return self.__isis_raw
    def __str__(self):
        return str(self.__isis_raw)
 class CompositeField(object):
    ''' Represent an Isis field, with subfields, using
        Python native datastructures
        >>> author = CompositeField( [('name','Braz, Marcelo'),('role','writer')] )
        >>> print author['name']
        Braz, Marcelo
        >>> print author['role']
        writer
        >>> author
        CompositeField((('name', 'Braz, Marcelo'), ('role', 'writer')))
    '''
    def __init__(self, value, subkeys=None):
        if subkeys is None:
            subkeys = [item[0] for item in value]
        try:
            value_as_dict = dict(value)
        except TypeError:
            raise TypeError('%r value must be a key-value structure' % self)
        for key in value_as_dict:
            if key not in subkeys:
                raise TypeError('Unexpected keyword %r' % key)
        self.value = tuple([(key, value_as_dict.get(key,None)) for key in subkeys])
    def __getitem__(self, key):
        return dict(self.value)[key]
    def __repr__(self):
        return "CompositeField(%s)" % str(self.items())
    def items(self):
        return self.value
    def __unicode__(self):
        unicode(self.items())
    def __str__(self):
        str(self.items())
 def test():
    import doctest
    doctest.testmod()
 if __name__=='__main__':
    test()