From 22fbfbd37cede755facddc8ef966fd2701ba4567 Mon Sep 17 00:00:00 2001 From: Signal Linden Date: Mon, 26 Sep 2022 11:18:49 -0700 Subject: [PATCH 01/13] Update CREDITS.md --- CREDITS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CREDITS.md b/CREDITS.md index 8ff6129..df3f841 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -1,4 +1,4 @@ # Credits Thanks to [Tao Takashi](https://github.com/mrtopf) for -llsd PyPI package name. +the llsd PyPI package name. From 2f252dfc9799bb86fecb2a53ec247e497a2a04a0 Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 07:22:44 +0000 Subject: [PATCH 02/13] Speed up LLSD decoding --- llsd/base.py | 104 ++++++++++++++++++++++------------------- llsd/serde_binary.py | 2 + llsd/serde_notation.py | 2 + llsd/serde_xml.py | 1 + 4 files changed, 60 insertions(+), 49 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 6ac695f..1966234 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -321,19 +321,6 @@ def _to_python(node): return NODE_HANDLERS[node.tag](node) -def _hex_as_nybble(hex): - "Accepts a single hex character and returns a nybble." - if (hex >= b'0') and (hex <= b'9'): - return ord(hex) - ord(b'0') - elif (hex >= b'a') and (hex <=b'f'): - return 10 + ord(hex) - ord(b'a') - elif (hex >= b'A') and (hex <=b'F'): - return 10 + ord(hex) - ord(b'A') - else: - raise LLSDParseError('Invalid hex character: %s' % hex) - - - class LLSDBaseFormatter(object): """ This base class cannot be instantiated on its own: it assumes a subclass @@ -366,13 +353,20 @@ def __init__(self): } +_X_ORD = ord(b'x') +_BACKSLASH_ORD = ord(b'\\') +_UNESC_BUFF_LEN = 1024 + + class LLSDBaseParser(object): """ Utility methods useful for parser subclasses. """ + __slots__ = ['_buffer', '_index'] + def __init__(self): self._buffer = b'' - self._index = 0 + self._index = 0 def _error(self, message, offset=0): try: @@ -399,53 +393,65 @@ def _getc(self, num=1): # map char following escape char to corresponding character _escaped = { - b'a': b'\a', - b'b': b'\b', - b'f': b'\f', - b'n': b'\n', - b'r': b'\r', - b't': b'\t', - b'v': b'\v', + ord(b'a'): ord(b'\a'), + ord(b'b'): ord(b'\b'), + ord(b'f'): ord(b'\f'), + ord(b'n'): ord(b'\n'), + ord(b'r'): ord(b'\r'), + ord(b't'): ord(b'\t'), + ord(b'v'): ord(b'\v'), } def _parse_string_delim(self, delim): "Parse a delimited string." - parts = bytearray() - found_escape = False - found_hex = False - found_digit = False - byte = 0 + insert_idx = 0 + delim_ord = ord(delim) + unesc_buff = bytearray(_UNESC_BUFF_LEN) + # Cache these in locals + buff = self._buffer + read_idx = self._index while True: - cc = self._getc() - if found_escape: - if found_hex: - if found_digit: - found_escape = False - found_hex = False - found_digit = False - byte <<= 4 - byte |= _hex_as_nybble(cc) - parts.append(byte) - byte = 0 - else: - found_digit = True - byte = _hex_as_nybble(cc) - elif cc == b'x': - found_hex = True + try: + cc = buff[read_idx] + except IndexError: + self._index = read_idx + self._error("Trying to read past end of buffer") + return + read_idx += 1 + + if cc == _BACKSLASH_ORD: + # Backslash, figure out if this is an \xNN hex escape or + # something like \t + cc = buff[read_idx] + read_idx += 1 + if cc == _X_ORD: + # Read the two hex nybbles + byte_val = int(chr(buff[read_idx]), 16) + read_idx += 1 + byte_val = (byte_val << 4) | int(chr(buff[read_idx]), 16) + read_idx += 1 + unesc_buff[insert_idx] = byte_val else: - found_escape = False # escape char preceding anything other than the chars in # _escaped just results in that same char without the # escape char - parts.extend(self._escaped.get(cc, cc)) - elif cc == b'\\': - found_escape = True - elif cc == delim: + unesc_buff[insert_idx] = self._escaped.get(cc, cc) + elif cc == delim_ord: break else: - parts.extend(cc) + unesc_buff[insert_idx] = cc + + insert_idx += 1 + + # We inserted a character, check if we need to expand the buffer. + if insert_idx % _UNESC_BUFF_LEN == 0: + # Any string this long may overflow the escape buffer, + # make a new expanded buffer + unesc_buff = bytearray(unesc_buff) + unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN) try: - return parts.decode('utf-8') + self._index = read_idx + return unesc_buff[:insert_idx].decode('utf-8') except UnicodeDecodeError as exc: self._error(exc) diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 41d24f7..5392e32 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -13,6 +13,8 @@ class LLSDBinaryParser(LLSDBaseParser): See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization """ + __slots__ = ['_dispatch', '_keep_binary'] + def __init__(self): super(LLSDBinaryParser, self).__init__() # One way of dispatching based on the next character we see would be a diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index 73cae03..067dff3 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -328,6 +328,8 @@ class LLSDNotationFormatter(LLSDBaseFormatter): See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization """ + __slots__ = [] + def LLSD(self, v): return self._generate(v.thing) def UNDEF(self, v): diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index fcec338..c8404a5 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -36,6 +36,7 @@ class LLSDXMLFormatter(LLSDBaseFormatter): module level format_xml is the most convenient interface to this functionality. """ + __slots__ = [] def _elt(self, name, contents=None): "Serialize a single element." From 6d1ce89416a827aaa1140ff8fb966b2ae8b3c303 Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 08:05:36 +0000 Subject: [PATCH 03/13] Further notation parse speedup by avoiding unnecessary comparisons Checking if we're currently within an escape sequence every single loop iteration is wasteful. --- llsd/base.py | 54 ++++++++++++++++++++++++---------------------- tests/llsd_test.py | 6 ++++++ 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 1966234..43c62a9 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -407,40 +407,42 @@ def _parse_string_delim(self, delim): insert_idx = 0 delim_ord = ord(delim) unesc_buff = bytearray(_UNESC_BUFF_LEN) - # Cache these in locals + # Cache these in locals, otherwise we have to perform a lookup on + # `self` inside our hot loop. buff = self._buffer read_idx = self._index + cc = 0 while True: try: cc = buff[read_idx] + read_idx += 1 + + if cc == _BACKSLASH_ORD: + # Backslash, figure out if this is an \xNN hex escape or + # something like \t + cc = buff[read_idx] + read_idx += 1 + if cc == _X_ORD: + # It's a hex escape. char is the value of the two + # following hex nybbles + cc = int(chr(buff[read_idx]), 16) << 4 + read_idx += 1 + cc |= int(chr(buff[read_idx]), 16) + read_idx += 1 + else: + # escape char preceding anything other than the chars + # in _escaped just results in that same char without + # the escape char + cc = self._escaped.get(cc, cc) + elif cc == delim_ord: + break except IndexError: + # We can be reasonably sure that any IndexErrors inside here + # were caused by an out-of-bounds `buff[read_idx]`. self._index = read_idx self._error("Trying to read past end of buffer") - return - read_idx += 1 - - if cc == _BACKSLASH_ORD: - # Backslash, figure out if this is an \xNN hex escape or - # something like \t - cc = buff[read_idx] - read_idx += 1 - if cc == _X_ORD: - # Read the two hex nybbles - byte_val = int(chr(buff[read_idx]), 16) - read_idx += 1 - byte_val = (byte_val << 4) | int(chr(buff[read_idx]), 16) - read_idx += 1 - unesc_buff[insert_idx] = byte_val - else: - # escape char preceding anything other than the chars in - # _escaped just results in that same char without the - # escape char - unesc_buff[insert_idx] = self._escaped.get(cc, cc) - elif cc == delim_ord: - break - else: - unesc_buff[insert_idx] = cc + unesc_buff[insert_idx] = cc insert_idx += 1 # We inserted a character, check if we need to expand the buffer. @@ -463,4 +465,4 @@ def starts_with(startstr, something): pos = something.tell() s = something.read(len(startstr)) something.seek(pos, os.SEEK_SET) - return (s == startstr) \ No newline at end of file + return (s == startstr) diff --git a/tests/llsd_test.py b/tests/llsd_test.py index e8d5fe4..a705d2e 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -507,6 +507,12 @@ def testParseNotationIncorrectMIME(self): except llsd.LLSDParseError: pass + def testParseNotationUnterminatedString(self): + """ + Test with an unterminated delimited string + """ + self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo") + class LLSDBinaryUnitTest(unittest.TestCase): """ From 147e7eaded1cfc0d81bab706ba954e0b1b92ebf2 Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 08:21:08 +0000 Subject: [PATCH 04/13] Used a shared pre-sized string decode buffer This avoids an unnecessary alloc in the common case of strings under 1024 chars. --- llsd/base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 43c62a9..be087c0 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -362,11 +362,12 @@ class LLSDBaseParser(object): """ Utility methods useful for parser subclasses. """ - __slots__ = ['_buffer', '_index'] + __slots__ = ['_buffer', '_index', '_unesc_buff'] def __init__(self): self._buffer = b'' self._index = 0 + self._unesc_buff = bytearray(_UNESC_BUFF_LEN) def _error(self, message, offset=0): try: @@ -406,9 +407,11 @@ def _parse_string_delim(self, delim): "Parse a delimited string." insert_idx = 0 delim_ord = ord(delim) - unesc_buff = bytearray(_UNESC_BUFF_LEN) + # Preallocate a working buffer for the unescaped string output + # to avoid allocs in the hot loop. + unesc_buff = self._unesc_buff # Cache these in locals, otherwise we have to perform a lookup on - # `self` inside our hot loop. + # `self` in the hot loop. buff = self._buffer read_idx = self._index cc = 0 @@ -453,6 +456,7 @@ def _parse_string_delim(self, delim): unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN) try: self._index = read_idx + # Slice off only what we used of the working decode buffer return unesc_buff[:insert_idx].decode('utf-8') except UnicodeDecodeError as exc: self._error(exc) From 3f91c7bd6d84d222bc1b45b694676288270b68be Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 12:55:49 +0000 Subject: [PATCH 05/13] Clarify naming and comments --- llsd/base.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index be087c0..aeead6d 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -355,19 +355,20 @@ def __init__(self): _X_ORD = ord(b'x') _BACKSLASH_ORD = ord(b'\\') -_UNESC_BUFF_LEN = 1024 +_DECODE_BUFF_ALLOC_SIZE = 1024 class LLSDBaseParser(object): """ Utility methods useful for parser subclasses. """ - __slots__ = ['_buffer', '_index', '_unesc_buff'] + __slots__ = ['_buffer', '_index', '_decode_buff'] def __init__(self): self._buffer = b'' self._index = 0 - self._unesc_buff = bytearray(_UNESC_BUFF_LEN) + # Scratch space for decoding delimited strings + self._decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE) def _error(self, message, offset=0): try: @@ -407,9 +408,9 @@ def _parse_string_delim(self, delim): "Parse a delimited string." insert_idx = 0 delim_ord = ord(delim) - # Preallocate a working buffer for the unescaped string output + # Preallocate a working buffer for the decoded string output # to avoid allocs in the hot loop. - unesc_buff = self._unesc_buff + decode_buff = self._decode_buff # Cache these in locals, otherwise we have to perform a lookup on # `self` in the hot loop. buff = self._buffer @@ -445,19 +446,20 @@ def _parse_string_delim(self, delim): self._index = read_idx self._error("Trying to read past end of buffer") - unesc_buff[insert_idx] = cc + decode_buff[insert_idx] = cc insert_idx += 1 # We inserted a character, check if we need to expand the buffer. - if insert_idx % _UNESC_BUFF_LEN == 0: - # Any string this long may overflow the escape buffer, - # make a new expanded buffer - unesc_buff = bytearray(unesc_buff) - unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN) + if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0: + # Any additions may now overflow the decoding buffer, make + # a new expanded buffer containing the existing contents. + decode_buff = bytearray(decode_buff) + decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE) try: + # Sync our local read index with the canonical one self._index = read_idx # Slice off only what we used of the working decode buffer - return unesc_buff[:insert_idx].decode('utf-8') + return decode_buff[:insert_idx].decode('utf-8') except UnicodeDecodeError as exc: self._error(exc) From 2764149aebd1fbc19c1a49ea36bf567b784b1a44 Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 16:44:46 +0000 Subject: [PATCH 06/13] Make PY2's `self._buffer[n]` behavior match PY3's `bytes` on PY2 are really `str`s, so `val[n]` returns a character rather than an integer. Wrapping bytes values on PY2 helps us preserve semantics without requiring branching on Python version or version-specific lambdas to peek from `self._buffer`. --- llsd/base.py | 11 +++++++++++ llsd/serde_binary.py | 6 +++++- llsd/serde_notation.py | 7 ++++++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index aeead6d..0c5c0c7 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -141,6 +141,17 @@ def B(fmt): return fmt +class PY3SemanticBytes(BytesType): + """Wrapper to make `buffer[n]` return an integer like in Py3""" + __slots__ = [] + + def __getitem__(self, item): + ret = super(PY3SemanticBytes, self).__getitem__(item) + if is_integer(item): + return ord(ret) + return ret + + def is_integer(o): """ portable test if an object is like an int """ return isinstance(o, IntTypes) diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index 5392e32..cbf65e4 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -4,7 +4,7 @@ import uuid from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, _str_to_bytes, binary, is_integer, is_string, - starts_with, uri) + starts_with, uri, PY2, is_bytes, PY3SemanticBytes) class LLSDBinaryParser(LLSDBaseParser): @@ -63,6 +63,10 @@ def parse(self, buffer, ignore_binary = False): :param ignore_binary: parser throws away data in llsd binary nodes. :returns: returns a python object. """ + if PY2 and is_bytes(buffer): + # We need to wrap this in a helper class so that individual element + # access works the same as in PY3 + buffer = PY3SemanticBytes(buffer) self._buffer = buffer self._index = 0 self._keep_binary = not ignore_binary diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index 067dff3..e2e9340 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -4,7 +4,7 @@ import uuid from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, LLSDParseError, LLSDSerializationError, UnicodeType, - _format_datestr, _parse_datestr, _str_to_bytes, binary, uri) + _format_datestr, _parse_datestr, _str_to_bytes, binary, uri, PY2, is_bytes, PY3SemanticBytes) _int_regex = re.compile(br"[-+]?\d+") _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan") @@ -86,6 +86,11 @@ def parse(self, buffer, ignore_binary = False): if buffer == b"": return False + if PY2 and is_bytes(buffer): + # We need to wrap this in a helper class so that individual element + # access works the same as in PY3 + buffer = PY3SemanticBytes(buffer) + self._buffer = buffer self._index = 0 return self._parse() From 6bb156a97978c0be4151431d8eaf7f3082b4784e Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 17:22:24 +0000 Subject: [PATCH 07/13] Give context for hex escape decode failures --- llsd/base.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index 0c5c0c7..b59ffeb 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -440,10 +440,17 @@ def _parse_string_delim(self, delim): if cc == _X_ORD: # It's a hex escape. char is the value of the two # following hex nybbles - cc = int(chr(buff[read_idx]), 16) << 4 - read_idx += 1 - cc |= int(chr(buff[read_idx]), 16) - read_idx += 1 + try: + cc = int(chr(buff[read_idx]), 16) << 4 + read_idx += 1 + cc |= int(chr(buff[read_idx]), 16) + read_idx += 1 + except ValueError as e: + # One of the hex characters was likely invalid. + # Wrap the ValueError so that we can provide a + # byte offset in the error. + self._index = read_idx + self._error(str(e)) else: # escape char preceding anything other than the chars # in _escaped just results in that same char without From 7330f3117c162877c72cc39911c10750e9322d7d Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 17:50:58 +0000 Subject: [PATCH 08/13] Only realloc / resize the decode buffer if we actually overflow Shaves 100ms off my testcase's runtime. --- llsd/base.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index b59ffeb..bd00db3 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -464,15 +464,17 @@ def _parse_string_delim(self, delim): self._index = read_idx self._error("Trying to read past end of buffer") - decode_buff[insert_idx] = cc - insert_idx += 1 - - # We inserted a character, check if we need to expand the buffer. - if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0: - # Any additions may now overflow the decoding buffer, make - # a new expanded buffer containing the existing contents. + try: + decode_buff[insert_idx] = cc + except IndexError: + # Oops, that overflowed the decoding buffer, make a + # new expanded buffer containing the existing contents. decode_buff = bytearray(decode_buff) decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE) + decode_buff[insert_idx] = cc + + insert_idx += 1 + try: # Sync our local read index with the canonical one self._index = read_idx From 5abdc32015d584d6254ecba7e5aec8b1436fcb2b Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 18:08:33 +0000 Subject: [PATCH 09/13] Add explanatory comment for PY3SemanticBytes --- llsd/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llsd/base.py b/llsd/base.py index bd00db3..fc86b5e 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -147,6 +147,8 @@ class PY3SemanticBytes(BytesType): def __getitem__(self, item): ret = super(PY3SemanticBytes, self).__getitem__(item) + # `buffer[n]` should return an integer, but slice syntax like + # `buffer[n:n+1]` should still return a `Bytes` object as before. if is_integer(item): return ord(ret) return ret From 232902c606e32c17f9c10560d2792ffe60409600 Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Wed, 28 Sep 2022 19:02:21 +0000 Subject: [PATCH 10/13] Add testcases for hex escape parse failures --- tests/llsd_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/llsd_test.py b/tests/llsd_test.py index a705d2e..4cb3f2e 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -513,6 +513,11 @@ def testParseNotationUnterminatedString(self): """ self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo") + def testParseNotationTruncatedHex(self): + self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xf") + + def testParseNotationInvalidHex(self): + self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'") class LLSDBinaryUnitTest(unittest.TestCase): """ From 1bb6812e117d80387fcdffbb1403cf872e6513bb Mon Sep 17 00:00:00 2001 From: Salad Dais Date: Thu, 29 Sep 2022 15:54:07 +0000 Subject: [PATCH 11/13] Speed up hex escape parsing by 30~% --- llsd/base.py | 21 +++++++++++++-------- tests/llsd_test.py | 6 +++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/llsd/base.py b/llsd/base.py index fc86b5e..544f480 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -441,18 +441,23 @@ def _parse_string_delim(self, delim): read_idx += 1 if cc == _X_ORD: # It's a hex escape. char is the value of the two - # following hex nybbles + # following hex nybbles. This slice may result in + # a short read (0 or 1 bytes), but either a + # `ValueError` will be triggered by the first case, + # and the second will cause an `IndexError` on the + # next iteration of the loop. + hex_bytes = buff[read_idx:read_idx + 2] + read_idx += 2 try: - cc = int(chr(buff[read_idx]), 16) << 4 - read_idx += 1 - cc |= int(chr(buff[read_idx]), 16) - read_idx += 1 + # int() can parse a `bytes` containing hex, + # no explicit `bytes.decode("ascii")` required. + cc = int(hex_bytes, 16) except ValueError as e: # One of the hex characters was likely invalid. # Wrap the ValueError so that we can provide a # byte offset in the error. self._index = read_idx - self._error(str(e)) + self._error(e, offset=-2) else: # escape char preceding anything other than the chars # in _escaped just results in that same char without @@ -477,9 +482,9 @@ def _parse_string_delim(self, delim): insert_idx += 1 + # Sync our local read index with the canonical one + self._index = read_idx try: - # Sync our local read index with the canonical one - self._index = read_idx # Slice off only what we used of the working decode buffer return decode_buff[:insert_idx].decode('utf-8') except UnicodeDecodeError as exc: diff --git a/tests/llsd_test.py b/tests/llsd_test.py index 4cb3f2e..b86ab96 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -513,12 +513,16 @@ def testParseNotationUnterminatedString(self): """ self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo") - def testParseNotationTruncatedHex(self): + def testParseNotationHexEscapeNoChars(self): + self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\x") + + def testParseNotationHalfTruncatedHex(self): self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xf") def testParseNotationInvalidHex(self): self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'") + class LLSDBinaryUnitTest(unittest.TestCase): """ This class aggregates all the tests for parse_binary and LLSD.as_binary From 7cb6d424d009c03820d9cf140fa2d4c462c6ec8d Mon Sep 17 00:00:00 2001 From: Bennett Goble Date: Mon, 3 Oct 2022 10:49:37 -0700 Subject: [PATCH 12/13] Re-export parse/format classes from main module Allow users to simply `from llsd import LLSDBinaryParser` --- llsd/__init__.py | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/llsd/__init__.py b/llsd/__init__.py index 9727479..85c3471 100644 --- a/llsd/__init__.py +++ b/llsd/__init__.py @@ -9,31 +9,9 @@ """ from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, LLSDParseError, LLSDSerializationError, LongType, UnicodeType, binary, starts_with, undef, uri) -from llsd.serde_binary import format_binary, parse_binary -from llsd.serde_notation import format_notation, parse_notation -from llsd.serde_xml import format_pretty_xml, format_xml, parse_xml - -__all__ = [ - "BINARY_MIME_TYPE", - "LLSD", - "LLSDParseError", - "LLSDSerializationError", - "LongType", - "NOTATION_MIME_TYPE", - "UnicodeType", - "XML_MIME_TYPE", - "binary", - "format_binary", - "format_notation", - "format_pretty_xml", - "format_xml", - "parse", - "parse_binary", - "parse_notation", - "parse_xml", - "undef", - "uri", -] +from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary +from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation +from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml def parse(something, mime_type = None): @@ -81,4 +59,4 @@ def __str__(self): as_xml = staticmethod(format_xml) as_pretty_xml = staticmethod(format_pretty_xml) as_binary = staticmethod(format_binary) - as_notation = staticmethod(format_notation) \ No newline at end of file + as_notation = staticmethod(format_notation) From f590a0b1efbedcbbbf0ea5ec3824768945524708 Mon Sep 17 00:00:00 2001 From: Bennett Goble Date: Thu, 27 Oct 2022 15:07:47 -0700 Subject: [PATCH 13/13] Add automated CLA --- .github/workflows/cla.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/cla.yaml diff --git a/.github/workflows/cla.yaml b/.github/workflows/cla.yaml new file mode 100644 index 0000000..fa180c6 --- /dev/null +++ b/.github/workflows/cla.yaml @@ -0,0 +1,25 @@ +name: Check CLA + +on: + issue_comment: + types: [created] + pull_request_target: + types: [opened, closed, synchronize] + +jobs: + cla: + name: Check CLA + runs-on: ubuntu-latest + steps: + - name: CLA Assistant + if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' + uses: secondlife-3p/contributor-assistant@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PERSONAL_ACCESS_TOKEN: ${{ secrets.SHARED_CLA_TOKEN }} + with: + branch: main + path-to-document: https://github.com/secondlife/cla/blob/master/CLA.md + path-to-signatures: signatures.json + remote-organization-name: secondlife + remote-repository-name: cla-signatures