Differential D10335 Diff 70580 lib/lib9p/pytest/sequencer.py

Changeset View

Standalone View

lib/lib9p/pytest/sequencer.py

This file was added.

				#! /usr/bin/env python

				from __future__ import print_function

				#__all__ = ['EncDec', 'EncDecSimple', 'EncDecTyped', 'EncDecA',
				# 'SequenceError', 'Sequencer']

				import abc
				import struct
				import sys

				_ProtoStruct = {
				'1': struct.Struct('<B'),
				'2': struct.Struct('<H'),
				'4': struct.Struct('<I'),
				'8': struct.Struct('<Q'),
				'_string_': None, # handled specially
				}
				for _i in (1, 2, 4, 8):
				_ProtoStruct[_i] = _ProtoStruct[str(_i)]
				del _i

				class EncDec(object):
				__metaclass__ = abc.ABCMeta
				"""
				Base class for en/de-coders, which are put into sequencers.

				All have a name and arbitrary user-supplied auxiliary data
				(default=None).

				All provide a pack() and unpack(). The pack() function
				returns a "bytes" value. This is internally implemented as a
				function apack() that returns a list of struct.pack() bytes,
				and pack() just joins them up as needed.

				The pack/unpack functions take a dictionary of variable names
				and values, and a second dictionary for conditionals, but at
				this level conditionals don't apply: they are just being
				passed through. Variable names do apply to array encoders

				EncDec also provide b2s() and s2b() static methods, which
				convert strings to bytes and vice versa, as reversibly as
				possible (using surrogateescape encoding). In Python2 this is
				a no-op since the string type is the bytes type (<type
				'unicode'>) is the unicode-ized string type).

				EncDec also provides b2u() and u2b() to do conversion to/from
				Unicode.

				These are partly for internal use (all strings get converted
				to UTF-8 byte sequences when coding a _string_ type) and partly
				for doctests, where we just want some py2k/py3k compat hacks.
				"""
				def __init__(self, name, aux):
				self.name = name
				self.aux = aux

				@staticmethod
				def b2u(byte_sequence):
				"transform bytes to unicode"
				return byte_sequence.decode('utf-8', 'surrogateescape')

				@staticmethod
				def u2b(unicode_sequence):
				"transform unicode to bytes"
				return unicode_sequence.encode('utf-8', 'surrogateescape')

				if sys.version_info[0] >= 3:
				b2s = b2u
				@staticmethod
				def s2b(string):
				"transform string to bytes (leaves raw byte sequence unchanged)"
				if isinstance(string, bytes):
				return string
				return string.encode('utf-8', 'surrogateescape')
				else:
				@staticmethod
				def b2s(byte_sequence):
				"transform bytes to string - no-op in python2.7"
				return byte_sequence
				@staticmethod
				def s2b(string):
				"transform string or unicode to bytes"
				if isinstance(string, unicode):
				return string.encode('utf-8', 'surrogateescape')
				return string

				def pack(self, vdict, cdict, val):
				"encode value <val> into a byte-string"
				return b''.join(self.apack(vdict, cdict, val))

				@abc.abstractmethod
				def apack(self, vdict, cdict, val):
				"encode value <val> into [bytes1, b2, ..., bN]"

				@abc.abstractmethod
				def unpack(self, vdict, cdict, bstring, offset, noerror=False):
				"unpack bytes from <bstring> at <offset>"


				class EncDecSimple(EncDec):
				r"""
				Encode/decode a simple (but named) field. The field is not an
				array, which requires using EncDecA, nor a typed object
				like a qid or stat instance -- those require a Sequence and
				EncDecTyped.

				The format is one of '1'/1, '2'/2, '4'/4, '8'/8, or '_string_'.

				Note: using b2s here is purely a doctest/tetsmod python2/python3
				compat hack. The output of e.pack is <type 'bytes'>; b2s
				converts it to a string, purely for display purposes. (It might
				be better to map py2 output to bytes but they just print as a
				string anyway.) In normal use, you should not call b2s here.

				>>> e = EncDecSimple('eggs', 2)
				>>> e.b2s(e.pack({}, {}, 0))
				'\x00\x00'
				>>> e.b2s(e.pack({}, {}, 256))
				'\x00\x01'

				Values that cannot be packed produce a SequenceError:

				>>> e.pack({}, {}, None)
				Traceback (most recent call last):
				...
				SequenceError: failed while packing 'eggs'=None
				>>> e.pack({}, {}, -1)
				Traceback (most recent call last):
				...
				SequenceError: failed while packing 'eggs'=-1

				Unpacking both returns a value, and tells how many bytes it
				used out of the bytestring or byte-array argument. If there
				are not enough bytes remaining at the starting offset, it
				raises a SequenceError, unless noerror=True (then unset
				values are None)

				>>> e.unpack({}, {}, b'\x00\x01', 0)
				(256, 2)
				>>> e.unpack({}, {}, b'', 0)
				Traceback (most recent call last):
				...
				SequenceError: out of data while unpacking 'eggs'
				>>> e.unpack({}, {}, b'', 0, noerror=True)
				(None, 2)

				Note that strings can be provided as regular strings, byte
				strings (same as regular strings in py2k), or Unicode strings
				(same as regular strings in py3k). Unicode strings will be
				converted to UTF-8 before being packed. Since this leaves
				7-bit characters alone, these examples work in both py2k and
				py3k. (Note: the UTF-8 encoding of u'\u1234' is
				'\0xe1\0x88\0xb4' or 225, 136, 180. The b2i trick below is
				another py2k vs py3k special case just for doctests: py2k
				tries to display the utf-8 encoded data as a string.)

				>>> e = EncDecSimple('spam', '_string_')
				>>> e.b2s(e.pack({}, {}, 'p3=unicode,p2=bytes'))
				'\x13\x00p3=unicode,p2=bytes'

				>>> e.b2s(e.pack({}, {}, b'bytes'))
				'\x05\x00bytes'

				>>> import sys
				>>> ispy3k = sys.version_info[0] >= 3

				>>> b2i = lambda x: x if ispy3k else ord(x)
				>>> [b2i(x) for x in e.pack({}, {}, u'\u1234')]
				[3, 0, 225, 136, 180]

				The byte length of the utf-8 data cannot exceed 65535 since
				the encoding has the length as a 2-byte field (a la the
				encoding for 'eggs' here). A too-long string produces
				a SequenceError as well.

				>>> e.pack({}, {}, 16384 * 'spam')
				Traceback (most recent call last):
				...
				SequenceError: string too long (len=65536) while packing 'spam'

				Unpacking strings produces byte arrays. (Of course,
				in py2k these are also known as <type 'str'>.)

				>>> unpacked = e.unpack({}, {}, b'\x04\x00data', 0)
				>>> etype = bytes if ispy3k else str
				>>> print(isinstance(unpacked[0], etype))
				True
				>>> e.b2s(unpacked[0])
				'data'
				>>> unpacked[1]
				6

				You may use e.b2s() to conver them to unicode strings in py3k,
				or you may set e.autob2s. This still only really does
				anything in py3k, since py2k strings are bytes, so it's
				really just intended for doctest purposes (see EncDecA):

				>>> e.autob2s = True
				>>> e.unpack({}, {}, b'\x07\x00stringy', 0)
				('stringy', 9)
				"""
				def __init__(self, name, fmt, aux=None):
				super(EncDecSimple, self).__init__(name, aux)
				self.fmt = fmt
				self.struct = _ProtoStruct[fmt]
				self.autob2s = False

				def __repr__(self):
				if self.aux is None:
				return '{0}({1!r}, {2!r})'.format(self.__class__.__name__,
				self.name, self.fmt)
				return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
				self.name, self.fmt, self.aux)

				__str__ = __repr__

				def apack(self, vdict, cdict, val):
				"encode a value"
				try:
				if self.struct:
				return [self.struct.pack(val)]
				sval = self.s2b(val)
				if len(sval) > 65535:
				raise SequenceError('string too long (len={0:d}) '
				'while packing {1!r}'.format(len(sval), self.name))
				return [EncDecSimple.string_len.pack(len(sval)), sval]
				# Include AttributeError in case someone tries to, e.g.,
				# pack name=None and self.s2b() tries to use .encode on it.
				except (struct.error, AttributeError):
				raise SequenceError('failed '
				'while packing {0!r}={1!r}'.format(self.name, val))

				def _unpack1(self, via, bstring, offset, noerror):
				"internal function to unpack single item"
				try:
				tup = via.unpack_from(bstring, offset)
				except struct.error as err:
				if 'unpack_from requires a buffer of at least' in str(err):
				if noerror:
				return None, offset + via.size
				raise SequenceError('out of data '
				'while unpacking {0!r}'.format(self.name))
				# not clear what to do here if noerror
				raise SequenceError('failed '
				'while unpacking {0!r}'.format(self.name))
				assert len(tup) == 1
				return tup[0], offset + via.size

				def unpack(self, vdict, cdict, bstring, offset, noerror=False):
				"decode a value; return the value and the new offset"
				if self.struct:
				return self._unpack1(self.struct, bstring, offset, noerror)
				slen, offset = self._unpack1(EncDecSimple.string_len, bstring, offset,
				noerror)
				if slen is None:
				return None, offset
				nexto = offset + slen
				if len(bstring) < nexto:
				if noerror:
				val = None
				else:
				raise SequenceError('out of data '
				'while unpacking {0!r}'.format(self.name))
				else:
				val = bstring[offset:nexto]
				if self.autob2s:
				val = self.b2s(val)
				return val, nexto

				# string length: 2 byte unsigned field
				EncDecSimple.string_len = _ProtoStruct[2]

				class EncDecTyped(EncDec):
				r"""
				EncDec for typed objects (which are build from PFODs, which are
				a sneaky class variant of OrderedDict similar to namedtuple).

				Calling the klass() function with no arguments must create an
				instance with all-None members.

				We also require a Sequencer to pack and unpack the members of
				the underlying pfod.

				>>> qid_s = Sequencer('qid')
				>>> qid_s.append_encdec(None, EncDecSimple('type', 1))
				>>> qid_s.append_encdec(None, EncDecSimple('version', 4))
				>>> qid_s.append_encdec(None, EncDecSimple('path', 8))
				>>> len(qid_s)
				3

				>>> from pfod import pfod
				>>> qid = pfod('qid', ['type', 'version', 'path'])
				>>> len(qid._fields)
				3
				>>> qid_inst = qid(1, 2, 3)
				>>> qid_inst
				qid(type=1, version=2, path=3)

				>>> e = EncDecTyped(qid, 'aqid', qid_s)
				>>> e.b2s(e.pack({}, {}, qid_inst))
				'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'
				>>> e.unpack({}, {},
				... b'\x01\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00', 0)
				(qid(type=1, version=2, path=3), 13)

				If an EncDecTyped instance has a conditional sequencer, note
				that unpacking will leave un-selected items set to None (see
				the Sequencer example below):

				>>> breakfast = pfod('breakfast', 'eggs spam ham')
				>>> breakfast()
				breakfast(eggs=None, spam=None, ham=None)
				>>> bfseq = Sequencer('breakfast')
				>>> bfseq.append_encdec(None, EncDecSimple('eggs', 1))
				>>> bfseq.append_encdec('yuck', EncDecSimple('spam', 1))
				>>> bfseq.append_encdec(None, EncDecSimple('ham', 1))
				>>> e = EncDecTyped(breakfast, 'bfname', bfseq)
				>>> e.unpack({}, {'yuck': False}, b'\x02\x01\x04', 0)
				(breakfast(eggs=2, spam=None, ham=1), 2)

				This used just two of the three bytes: eggs=2, ham=1.

				>>> e.unpack({}, {'yuck': True}, b'\x02\x01\x04', 0)
				(breakfast(eggs=2, spam=1, ham=4), 3)

				This used the third byte, so ham=4.
				"""
				def __init__(self, klass, name, sequence, aux=None):
				assert len(sequence) == len(klass()._fields) # temporary
				super(EncDecTyped, self).__init__(name, aux)
				self.klass = klass
				self.name = name
				self.sequence = sequence

				def __repr__(self):
				if self.aux is None:
				return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
				self.klass, self.name, self.sequence)
				return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
				self.klass, self.name, self.sequence, self.aux)

				__str__ = __repr__

				def apack(self, vdict, cdict, val):
				"""
				Pack each of our instance variables.

				Note that some packing may be conditional.
				"""
				return self.sequence.apack(val, cdict)

				def unpack(self, vdict, cdict, bstring, offset, noerror=False):
				"""
				Unpack each instance variable, into a new object of
				self.klass. Return the new instance and new offset.

				Note that some unpacking may be conditional.
				"""
				obj = self.klass()
				offset = self.sequence.unpack_from(obj, cdict, bstring, offset, noerror)
				return obj, offset

				class EncDecA(EncDec):
				r"""
				EncDec for arrays (repeated objects).

				We take the name of repeat count variable, and a sub-coder
				(Sequencer instance). For instance, we can en/de-code
				repeat='nwname' copies of name='wname', or nwname of
				name='wqid', in a Twalk en/de-code.

				Note that we don't pack or unpack the repeat count itself --
				that must be done by higher level code. We just get its value
				from vdict.

				>>> subcode = EncDecSimple('wname', '_string_')
				>>> e = EncDecA('nwname', 'wname', subcode)
				>>> e.b2s(e.pack({'nwname': 2}, {}, ['A', 'BC']))
				'\x01\x00A\x02\x00BC'

				>>> subcode.autob2s = True # so that A and BC decode to py3k str
				>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00BC', 0)
				(['A', 'BC'], 7)

				When using noerror, the first sub-item that fails to decode
				completely starts the None-s. Strings whose length fails to
				decode are assumed to be zero bytes long as well, for the
				purpose of showing the expected packet length:

				>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02\x00', 0, noerror=True)
				(['A', None], 7)
				>>> e.unpack({'nwname': 2}, {}, b'\x01\x00A\x02', 0, noerror=True)
				(['A', None], 5)
				>>> e.unpack({'nwname': 3}, {}, b'\x01\x00A\x02', 0, noerror=True)
				(['A', None, None], 7)

				As a special case, supplying None for the sub-coder
				makes the repeated item pack or unpack a simple byte
				string. (Note that autob2s is not supported here.)
				A too-short byte string is simply truncated!

				>>> e = EncDecA('count', 'data', None)
				>>> e.b2s(e.pack({'count': 5}, {}, b'12345'))
				'12345'
				>>> x = list(e.unpack({'count': 3}, {}, b'123', 0))
				>>> x[0] = e.b2s(x[0])
				>>> x
				['123', 3]
				>>> x = list(e.unpack({'count': 3}, {}, b'12', 0, noerror=True))
				>>> x[0] = e.b2s(x[0])
				>>> x
				['12', 3]
				"""
				def __init__(self, repeat, name, sub, aux=None):
				super(EncDecA, self).__init__(name, aux)
				self.repeat = repeat
				self.name = name
				self.sub = sub

				def __repr__(self):
				if self.aux is None:
				return '{0}({1!r}, {2!r}, {3!r})'.format(self.__class__.__name__,
				self.repeat, self.name, self.sub)
				return '{0}({1!r}, {2!r}, {3!r}, {4!r})'.format(self.__class__.__name__,
				self.repeat, self.name, self.sub, self.aux)

				__str__ = __repr__

				def apack(self, vdict, cdict, val):
				"pack each val[i], for i in range(vdict[self.repeat])"
				num = vdict[self.repeat]
				assert num == len(val)
				if self.sub is None:
				assert isinstance(val, bytes)
				return [val]
				parts = []
				for i in val:
				parts.extend(self.sub.apack(vdict, cdict, i))
				return parts

				def unpack(self, vdict, cdict, bstring, offset, noerror=False):
				"unpack repeatedly, per self.repeat, into new array."
				num = vdict[self.repeat]
				if num is None and noerror:
				num = 0
				else:
				assert num >= 0
				if self.sub is None:
				nexto = offset + num
				if len(bstring) < nexto and not noerror:
				raise SequenceError('out of data '
				'while unpacking {0!r}'.format(self.name))
				return bstring[offset:nexto], nexto
				array = []
				for i in range(num):
				obj, offset = self.sub.unpack(vdict, cdict, bstring, offset,
				noerror)
				array.append(obj)
				return array, offset

				class SequenceError(Exception):
				"sequence error: item too big, or ran out of data"
				pass

				class Sequencer(object):
				r"""
				A sequencer is an object that packs (marshals) or unpacks
				(unmarshals) a series of objects, according to their EncDec
				instances.

				The objects themselves (and their values) come from, or
				go into, a dictionary: <vdict>, the first argument to
				pack/unpack.

				Some fields may be conditional. The conditions are in a
				separate dictionary (the second or <cdict> argument).

				Some objects may be dictionaries or PFODs, e.g., they may
				be a Plan9 qid or stat structure. These have their own
				sub-encoding.

				As with each encoder, we have both an apack() function
				(returns a list of parts) and a plain pack(). Users should
				mostly stick with plain pack().

				>>> s = Sequencer('monty')
				>>> s
				Sequencer('monty')
				>>> e = EncDecSimple('eggs', 2)
				>>> s.append_encdec(None, e)
				>>> s.append_encdec(None, EncDecSimple('spam', 1))
				>>> s[0]
				(None, EncDecSimple('eggs', 2))
				>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {}))
				'\x01\x02A'

				When particular fields are conditional, they appear in
				packed output, or are taken from the byte-string during
				unpacking, only if their condition is true.

				As with struct, use unpack_from to start at an arbitrary
				offset and/or omit verification that the entire byte-string
				is consumed.

				>>> s = Sequencer('python')
				>>> s.append_encdec(None, e)
				>>> s.append_encdec('.u', EncDecSimple('spam', 1))
				>>> s[1]
				('.u', EncDecSimple('spam', 1))
				>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': True}))
				'\x01\x02A'
				>>> e.b2s(s.pack({'eggs': 513, 'spam': 65}, {'.u': False}))
				'\x01\x02'

				>>> d = {}
				>>> s.unpack(d, {'.u': True}, b'\x01\x02A')
				>>> print(d['eggs'], d['spam'])
				513 65
				>>> d = {}
				>>> s.unpack(d, {'.u': False}, b'\x01\x02A', 0)
				Traceback (most recent call last):
				...
				SequenceError: 1 byte(s) unconsumed
				>>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
				2
				>>> print(d)
				{'eggs': 513}

				The incoming dictionary-like object may be pre-initialized
				if you like; only sequences that decode are filled-in:

				>>> d = {'eggs': None, 'spam': None}
				>>> s.unpack_from(d, {'.u': False}, b'\x01\x02A', 0)
				2
				>>> print(d['eggs'], d['spam'])
				513 None

				Some objects may be arrays; if so their EncDec is actually
				an EncDecA, the repeat count must be in the dictionary, and
				the object itself must have a len() and be index-able:

				>>> s = Sequencer('arr')
				>>> s.append_encdec(None, EncDecSimple('n', 1))
				>>> ae = EncDecSimple('array', 2)
				>>> s.append_encdec(None, EncDecA('n', 'array', ae))
				>>> ae.b2s(s.pack({'n': 2, 'array': [257, 514]}, {}))
				'\x02\x01\x01\x02\x02'

				Unpacking an array creates a list of the number of items.
				The EncDec encoder that decodes the number of items needs to
				occur first in the sequencer, so that the dictionary will have
				acquired the repeat-count variable's value by the time we hit
				the array's encdec:

				>>> d = {}
				>>> s.unpack(d, {}, b'\x01\x04\x00')
				>>> d['n'], d['array']
				(1, [4])
				"""
				def __init__(self, name):
				self.name = name
				self._codes = []
				self.debug = False # or sys.stderr

				def __repr__(self):
				return '{0}({1!r})'.format(self.__class__.__name__, self.name)

				__str__ = __repr__

				def __len__(self):
				return len(self._codes)

				def __iter__(self):
				return iter(self._codes)

				def __getitem__(self, index):
				return self._codes[index]

				def dprint(self, args, *kwargs):
				if not self.debug:
				return
				if isinstance(self.debug, bool):
				dest = sys.stdout
				else:
				dest = self.debug
				print(args, file=dest, *kwargs)

				def append_encdec(self, cond, code):
				"add EncDec en/de-coder, conditional on cond"
				self._codes.append((cond, code))

				def apack(self, vdict, cdict):
				"""
				Produce packed representation of each field.
				"""
				packed_data = []
				for cond, code in self._codes:
				# Skip this item if it's conditional on a false thing.
				if cond is not None and not cdict[cond]:
				self.dprint('skip %r - %r is False' % (code, cond))
				continue

				# Pack the item.
				self.dprint('pack %r - no cond or %r is True' % (code, cond))
				packed_data.extend(code.apack(vdict, cdict, vdict[code.name]))

				return packed_data

				def pack(self, vdict, cdict):
				"""
				Flatten packed data.
				"""
				return b''.join(self.apack(vdict, cdict))

				def unpack_from(self, vdict, cdict, bstring, offset=0, noerror=False):
				"""
				Unpack from byte string.

				The values are unpacked into a dictionary vdict;
				some of its entries may themselves be ordered
				dictionaries created by typedefed codes.

				Raises SequenceError if the string is too short,
				unless you set noerror, in which case we assume
				you want see what you can get out of the data.
				"""
				for cond, code in self._codes:
				# Skip this item if it's conditional on a false thing.
				if cond is not None and not cdict[cond]:
				self.dprint('skip %r - %r is False' % (code, cond))
				continue

				# Unpack the item.
				self.dprint('unpack %r - no cond or %r is True' % (code, cond))
				obj, offset = code.unpack(vdict, cdict, bstring, offset, noerror)
				vdict[code.name] = obj

				return offset

				def unpack(self, vdict, cdict, bstring, noerror=False):
				"""
				Like unpack_from but unless noerror=True, requires that
				we completely use up the given byte string.
				"""
				offset = self.unpack_from(vdict, cdict, bstring, 0, noerror)
				if not noerror and offset != len(bstring):
				raise SequenceError('{0} byte(s) unconsumed'.format(
				len(bstring) - offset))

				if __name__ == '__main__':
				import doctest
				doctest.testmod()