From 22fbfbd37cede755facddc8ef966fd2701ba4567 Mon Sep 17 00:00:00 2001
From: Signal Linden <signal@lindenlab.com>
Date: Mon, 26 Sep 2022 11:18:49 -0700
Subject: [PATCH 01/13] Update CREDITS.md

---
 CREDITS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CREDITS.md b/CREDITS.md
index 8ff6129..df3f841 100644
--- a/CREDITS.md
+++ b/CREDITS.md
@@ -1,4 +1,4 @@
 # Credits
 
 Thanks to [Tao Takashi](https://github.com/mrtopf) for
-llsd PyPI package name.
+the llsd PyPI package name.

From 2f252dfc9799bb86fecb2a53ec247e497a2a04a0 Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 07:22:44 +0000
Subject: [PATCH 02/13] Speed up LLSD decoding

---
 llsd/base.py           | 104 ++++++++++++++++++++++-------------------
 llsd/serde_binary.py   |   2 +
 llsd/serde_notation.py |   2 +
 llsd/serde_xml.py      |   1 +
 4 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index 6ac695f..1966234 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -321,19 +321,6 @@ def _to_python(node):
     return NODE_HANDLERS[node.tag](node)
 
 
-def _hex_as_nybble(hex):
-    "Accepts a single hex character and returns a nybble."
-    if (hex >= b'0') and (hex <= b'9'):
-        return ord(hex) - ord(b'0')
-    elif (hex >= b'a') and (hex <=b'f'):
-        return 10 + ord(hex) - ord(b'a')
-    elif (hex >= b'A') and (hex <=b'F'):
-        return 10 + ord(hex) - ord(b'A')
-    else:
-        raise LLSDParseError('Invalid hex character: %s' % hex)
-
-
-
 class LLSDBaseFormatter(object):
     """
     This base class cannot be instantiated on its own: it assumes a subclass
@@ -366,13 +353,20 @@ def __init__(self):
         }
 
 
+_X_ORD = ord(b'x')
+_BACKSLASH_ORD = ord(b'\\')
+_UNESC_BUFF_LEN = 1024
+
+
 class LLSDBaseParser(object):
     """
     Utility methods useful for parser subclasses.
     """
+    __slots__ = ['_buffer', '_index']
+
     def __init__(self):
         self._buffer = b''
-        self._index  = 0
+        self._index = 0
 
     def _error(self, message, offset=0):
         try:
@@ -399,53 +393,65 @@ def _getc(self, num=1):
 
     # map char following escape char to corresponding character
     _escaped = {
-        b'a': b'\a',
-        b'b': b'\b',
-        b'f': b'\f',
-        b'n': b'\n',
-        b'r': b'\r',
-        b't': b'\t',
-        b'v': b'\v',
+        ord(b'a'): ord(b'\a'),
+        ord(b'b'): ord(b'\b'),
+        ord(b'f'): ord(b'\f'),
+        ord(b'n'): ord(b'\n'),
+        ord(b'r'): ord(b'\r'),
+        ord(b't'): ord(b'\t'),
+        ord(b'v'): ord(b'\v'),
     }
 
     def _parse_string_delim(self, delim):
         "Parse a delimited string."
-        parts = bytearray()
-        found_escape = False
-        found_hex = False
-        found_digit = False
-        byte = 0
+        insert_idx = 0
+        delim_ord = ord(delim)
+        unesc_buff = bytearray(_UNESC_BUFF_LEN)
+        # Cache these in locals
+        buff = self._buffer
+        read_idx = self._index
         while True:
-            cc = self._getc()
-            if found_escape:
-                if found_hex:
-                    if found_digit:
-                        found_escape = False
-                        found_hex = False
-                        found_digit = False
-                        byte <<= 4
-                        byte |= _hex_as_nybble(cc)
-                        parts.append(byte)
-                        byte = 0
-                    else:
-                        found_digit = True
-                        byte = _hex_as_nybble(cc)
-                elif cc == b'x':
-                    found_hex = True
+            try:
+                cc = buff[read_idx]
+            except IndexError:
+                self._index = read_idx
+                self._error("Trying to read past end of buffer")
+                return
+            read_idx += 1
+
+            if cc == _BACKSLASH_ORD:
+                # Backslash, figure out if this is an \xNN hex escape or
+                # something like \t
+                cc = buff[read_idx]
+                read_idx += 1
+                if cc == _X_ORD:
+                    # Read the two hex nybbles
+                    byte_val = int(chr(buff[read_idx]), 16)
+                    read_idx += 1
+                    byte_val = (byte_val << 4) | int(chr(buff[read_idx]), 16)
+                    read_idx += 1
+                    unesc_buff[insert_idx] = byte_val
                 else:
-                    found_escape = False
                     # escape char preceding anything other than the chars in
                     # _escaped just results in that same char without the
                     # escape char
-                    parts.extend(self._escaped.get(cc, cc))
-            elif cc == b'\\':
-                found_escape = True
-            elif cc == delim:
+                    unesc_buff[insert_idx] = self._escaped.get(cc, cc)
+            elif cc == delim_ord:
                 break
             else:
-                parts.extend(cc)
+                unesc_buff[insert_idx] = cc
+
+            insert_idx += 1
+
+            # We inserted a character, check if we need to expand the buffer.
+            if insert_idx % _UNESC_BUFF_LEN == 0:
+                # Any string this long may overflow the escape buffer,
+                # make a new expanded buffer
+                unesc_buff = bytearray(unesc_buff)
+                unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN)
         try:
-            return parts.decode('utf-8')
+            self._index = read_idx
+            return unesc_buff[:insert_idx].decode('utf-8')
         except UnicodeDecodeError as exc:
             self._error(exc)
 
diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py
index 41d24f7..5392e32 100644
--- a/llsd/serde_binary.py
+++ b/llsd/serde_binary.py
@@ -13,6 +13,8 @@ class LLSDBinaryParser(LLSDBaseParser):
 
     See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
     """
+    __slots__ = ['_dispatch', '_keep_binary']
+
     def __init__(self):
         super(LLSDBinaryParser, self).__init__()
         # One way of dispatching based on the next character we see would be a
diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py
index 73cae03..067dff3 100644
--- a/llsd/serde_notation.py
+++ b/llsd/serde_notation.py
@@ -328,6 +328,8 @@ class LLSDNotationFormatter(LLSDBaseFormatter):
 
     See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
     """
+    __slots__ = []
+
     def LLSD(self, v):
         return self._generate(v.thing)
     def UNDEF(self, v):
diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py
index fcec338..c8404a5 100644
--- a/llsd/serde_xml.py
+++ b/llsd/serde_xml.py
@@ -36,6 +36,7 @@ class LLSDXMLFormatter(LLSDBaseFormatter):
     module level format_xml is the most convenient interface to this
     functionality.
     """
+    __slots__ = []
 
     def _elt(self, name, contents=None):
         "Serialize a single element."

From 6d1ce89416a827aaa1140ff8fb966b2ae8b3c303 Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 08:05:36 +0000
Subject: [PATCH 03/13] Further notation parse speedup by avoiding unnecessary
 comparisons

Checking if we're currently within an escape sequence every single
loop iteration is wasteful.
---
 llsd/base.py       | 54 ++++++++++++++++++++++++----------------------
 tests/llsd_test.py |  6 ++++++
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index 1966234..43c62a9 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -407,40 +407,42 @@ def _parse_string_delim(self, delim):
         insert_idx = 0
         delim_ord = ord(delim)
         unesc_buff = bytearray(_UNESC_BUFF_LEN)
-        # Cache these in locals
+        # Cache these in locals, otherwise we have to perform a lookup on
+        # `self` inside our hot loop.
         buff = self._buffer
         read_idx = self._index
+        cc = 0
         while True:
             try:
                 cc = buff[read_idx]
+                read_idx += 1
+
+                if cc == _BACKSLASH_ORD:
+                    # Backslash, figure out if this is an \xNN hex escape or
+                    # something like \t
+                    cc = buff[read_idx]
+                    read_idx += 1
+                    if cc == _X_ORD:
+                        # It's a hex escape. char is the value of the two
+                        # following hex nybbles
+                        cc = int(chr(buff[read_idx]), 16) << 4
+                        read_idx += 1
+                        cc |= int(chr(buff[read_idx]), 16)
+                        read_idx += 1
+                    else:
+                        # escape char preceding anything other than the chars
+                        # in _escaped just results in that same char without
+                        # the escape char
+                        cc = self._escaped.get(cc, cc)
+                elif cc == delim_ord:
+                    break
             except IndexError:
+                # We can be reasonably sure that any IndexErrors inside here
+                # were caused by an out-of-bounds `buff[read_idx]`.
                 self._index = read_idx
                 self._error("Trying to read past end of buffer")
-                return
-            read_idx += 1
-
-            if cc == _BACKSLASH_ORD:
-                # Backslash, figure out if this is an \xNN hex escape or
-                # something like \t
-                cc = buff[read_idx]
-                read_idx += 1
-                if cc == _X_ORD:
-                    # Read the two hex nybbles
-                    byte_val = int(chr(buff[read_idx]), 16)
-                    read_idx += 1
-                    byte_val = (byte_val << 4) | int(chr(buff[read_idx]), 16)
-                    read_idx += 1
-                    unesc_buff[insert_idx] = byte_val
-                else:
-                    # escape char preceding anything other than the chars in
-                    # _escaped just results in that same char without the
-                    # escape char
-                    unesc_buff[insert_idx] = self._escaped.get(cc, cc)
-            elif cc == delim_ord:
-                break
-            else:
-                unesc_buff[insert_idx] = cc
 
+            unesc_buff[insert_idx] = cc
             insert_idx += 1
 
             # We inserted a character, check if we need to expand the buffer.
@@ -463,4 +465,4 @@ def starts_with(startstr, something):
         pos = something.tell()
         s = something.read(len(startstr))
         something.seek(pos, os.SEEK_SET)
-        return (s == startstr)
\ No newline at end of file
+        return (s == startstr)
diff --git a/tests/llsd_test.py b/tests/llsd_test.py
index e8d5fe4..a705d2e 100644
--- a/tests/llsd_test.py
+++ b/tests/llsd_test.py
@@ -507,6 +507,12 @@ def testParseNotationIncorrectMIME(self):
         except llsd.LLSDParseError:
             pass
 
+    def testParseNotationUnterminatedString(self):
+        """
+        Test with an unterminated delimited string
+        """
+        self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")
+
 
 class LLSDBinaryUnitTest(unittest.TestCase):
     """

From 147e7eaded1cfc0d81bab706ba954e0b1b92ebf2 Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 08:21:08 +0000
Subject: [PATCH 04/13] Used a shared pre-sized string decode buffer

This avoids an unnecessary alloc in the common case of strings
under 1024 chars.
---
 llsd/base.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index 43c62a9..be087c0 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -362,11 +362,12 @@ class LLSDBaseParser(object):
     """
     Utility methods useful for parser subclasses.
     """
-    __slots__ = ['_buffer', '_index']
+    __slots__ = ['_buffer', '_index', '_unesc_buff']
 
     def __init__(self):
         self._buffer = b''
         self._index = 0
+        self._unesc_buff = bytearray(_UNESC_BUFF_LEN)
 
     def _error(self, message, offset=0):
         try:
@@ -406,9 +407,11 @@ def _parse_string_delim(self, delim):
         "Parse a delimited string."
         insert_idx = 0
         delim_ord = ord(delim)
-        unesc_buff = bytearray(_UNESC_BUFF_LEN)
+        # Preallocate a working buffer for the unescaped string output
+        # to avoid allocs in the hot loop.
+        unesc_buff = self._unesc_buff
         # Cache these in locals, otherwise we have to perform a lookup on
-        # `self` inside our hot loop.
+        # `self` in the hot loop.
         buff = self._buffer
         read_idx = self._index
         cc = 0
@@ -453,6 +456,7 @@ def _parse_string_delim(self, delim):
                 unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN)
         try:
             self._index = read_idx
+            # Slice off only what we used of the working decode buffer
             return unesc_buff[:insert_idx].decode('utf-8')
         except UnicodeDecodeError as exc:
             self._error(exc)

From 3f91c7bd6d84d222bc1b45b694676288270b68be Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 12:55:49 +0000
Subject: [PATCH 05/13] Clarify naming and comments

---
 llsd/base.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index be087c0..aeead6d 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -355,19 +355,20 @@ def __init__(self):
 
 _X_ORD = ord(b'x')
 _BACKSLASH_ORD = ord(b'\\')
-_UNESC_BUFF_LEN = 1024
+_DECODE_BUFF_ALLOC_SIZE = 1024
 
 
 class LLSDBaseParser(object):
     """
     Utility methods useful for parser subclasses.
     """
-    __slots__ = ['_buffer', '_index', '_unesc_buff']
+    __slots__ = ['_buffer', '_index', '_decode_buff']
 
     def __init__(self):
         self._buffer = b''
         self._index = 0
-        self._unesc_buff = bytearray(_UNESC_BUFF_LEN)
+        # Scratch space for decoding delimited strings
+        self._decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE)
 
     def _error(self, message, offset=0):
         try:
@@ -407,9 +408,9 @@ def _parse_string_delim(self, delim):
         "Parse a delimited string."
         insert_idx = 0
         delim_ord = ord(delim)
-        # Preallocate a working buffer for the unescaped string output
+        # Preallocate a working buffer for the decoded string output
         # to avoid allocs in the hot loop.
-        unesc_buff = self._unesc_buff
+        decode_buff = self._decode_buff
         # Cache these in locals, otherwise we have to perform a lookup on
         # `self` in the hot loop.
         buff = self._buffer
@@ -445,19 +446,20 @@ def _parse_string_delim(self, delim):
                 self._index = read_idx
                 self._error("Trying to read past end of buffer")
 
-            unesc_buff[insert_idx] = cc
+            decode_buff[insert_idx] = cc
             insert_idx += 1
 
             # We inserted a character, check if we need to expand the buffer.
-            if insert_idx % _UNESC_BUFF_LEN == 0:
-                # Any string this long may overflow the escape buffer,
-                # make a new expanded buffer
-                unesc_buff = bytearray(unesc_buff)
-                unesc_buff.extend(b"\x00" * _UNESC_BUFF_LEN)
+            if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0:
+                # Any additions may now overflow the decoding buffer, make
+                # a new expanded buffer containing the existing contents.
+                decode_buff = bytearray(decode_buff)
+                decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE)
         try:
+            # Sync our local read index with the canonical one
             self._index = read_idx
             # Slice off only what we used of the working decode buffer
-            return unesc_buff[:insert_idx].decode('utf-8')
+            return decode_buff[:insert_idx].decode('utf-8')
         except UnicodeDecodeError as exc:
             self._error(exc)
 

From 2764149aebd1fbc19c1a49ea36bf567b784b1a44 Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 16:44:46 +0000
Subject: [PATCH 06/13] Make PY2's `self._buffer[n]` behavior match PY3's

`bytes` on PY2 are really `str`s, so `val[n]` returns a character
rather than an integer. Wrapping bytes values on PY2 helps us
preserve semantics without requiring branching on Python version
or version-specific lambdas to peek from `self._buffer`.
---
 llsd/base.py           | 11 +++++++++++
 llsd/serde_binary.py   |  6 +++++-
 llsd/serde_notation.py |  7 ++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index aeead6d..0c5c0c7 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -141,6 +141,17 @@ def B(fmt):
             return fmt
 
 
+class PY3SemanticBytes(BytesType):
+    """Wrapper to make `buffer[n]` return an integer like in Py3"""
+    __slots__ = []
+
+    def __getitem__(self, item):
+        ret = super(PY3SemanticBytes, self).__getitem__(item)
+        if is_integer(item):
+            return ord(ret)
+        return ret
+
+
 def is_integer(o):
     """ portable test if an object is like an int """
     return isinstance(o, IntTypes)
diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py
index 5392e32..cbf65e4 100644
--- a/llsd/serde_binary.py
+++ b/llsd/serde_binary.py
@@ -4,7 +4,7 @@
 import uuid
 
 from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, _str_to_bytes, binary, is_integer, is_string,
-                       starts_with, uri)
+                       starts_with, uri, PY2, is_bytes, PY3SemanticBytes)
 
 
 class LLSDBinaryParser(LLSDBaseParser):
@@ -63,6 +63,10 @@ def parse(self, buffer, ignore_binary = False):
         :param ignore_binary: parser throws away data in llsd binary nodes.
         :returns: returns a python object.
         """
+        if PY2 and is_bytes(buffer):
+            # We need to wrap this in a helper class so that individual element
+            # access works the same as in PY3
+            buffer = PY3SemanticBytes(buffer)
         self._buffer = buffer
         self._index = 0
         self._keep_binary = not ignore_binary
diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py
index 067dff3..e2e9340 100644
--- a/llsd/serde_notation.py
+++ b/llsd/serde_notation.py
@@ -4,7 +4,7 @@
 import uuid
 
 from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, LLSDParseError, LLSDSerializationError, UnicodeType,
-                       _format_datestr, _parse_datestr, _str_to_bytes, binary, uri)
+                       _format_datestr, _parse_datestr, _str_to_bytes, binary, uri, PY2, is_bytes, PY3SemanticBytes)
 
 _int_regex = re.compile(br"[-+]?\d+")
 _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan")
@@ -86,6 +86,11 @@ def parse(self, buffer, ignore_binary = False):
         if buffer == b"":
             return False
 
+        if PY2 and is_bytes(buffer):
+            # We need to wrap this in a helper class so that individual element
+            # access works the same as in PY3
+            buffer = PY3SemanticBytes(buffer)
+
         self._buffer = buffer
         self._index = 0
         return self._parse()

From 6bb156a97978c0be4151431d8eaf7f3082b4784e Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 17:22:24 +0000
Subject: [PATCH 07/13] Give context for hex escape decode failures

---
 llsd/base.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index 0c5c0c7..b59ffeb 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -440,10 +440,17 @@ def _parse_string_delim(self, delim):
                     if cc == _X_ORD:
                         # It's a hex escape. char is the value of the two
                         # following hex nybbles
-                        cc = int(chr(buff[read_idx]), 16) << 4
-                        read_idx += 1
-                        cc |= int(chr(buff[read_idx]), 16)
-                        read_idx += 1
+                        try:
+                            cc = int(chr(buff[read_idx]), 16) << 4
+                            read_idx += 1
+                            cc |= int(chr(buff[read_idx]), 16)
+                            read_idx += 1
+                        except ValueError as e:
+                            # One of the hex characters was likely invalid.
+                            # Wrap the ValueError so that we can provide a
+                            # byte offset in the error.
+                            self._index = read_idx
+                            self._error(str(e))
                     else:
                         # escape char preceding anything other than the chars
                         # in _escaped just results in that same char without

From 7330f3117c162877c72cc39911c10750e9322d7d Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 17:50:58 +0000
Subject: [PATCH 08/13] Only realloc / resize the decode buffer if we actually
 overflow

Shaves 100ms off my testcase's runtime.
---
 llsd/base.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index b59ffeb..bd00db3 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -464,15 +464,17 @@ def _parse_string_delim(self, delim):
                 self._index = read_idx
                 self._error("Trying to read past end of buffer")
 
-            decode_buff[insert_idx] = cc
-            insert_idx += 1
-
-            # We inserted a character, check if we need to expand the buffer.
-            if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0:
-                # Any additions may now overflow the decoding buffer, make
-                # a new expanded buffer containing the existing contents.
+            try:
+                decode_buff[insert_idx] = cc
+            except IndexError:
+                # Oops, that overflowed the decoding buffer, make a
+                # new expanded buffer containing the existing contents.
                 decode_buff = bytearray(decode_buff)
                 decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE)
+                decode_buff[insert_idx] = cc
+
+            insert_idx += 1
+
         try:
             # Sync our local read index with the canonical one
             self._index = read_idx

From 5abdc32015d584d6254ecba7e5aec8b1436fcb2b Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 18:08:33 +0000
Subject: [PATCH 09/13] Add explanatory comment for PY3SemanticBytes

---
 llsd/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llsd/base.py b/llsd/base.py
index bd00db3..fc86b5e 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -147,6 +147,8 @@ class PY3SemanticBytes(BytesType):
 
     def __getitem__(self, item):
         ret = super(PY3SemanticBytes, self).__getitem__(item)
+        # `buffer[n]` should return an integer, but slice syntax like
+        # `buffer[n:n+1]` should still return a `Bytes` object as before.
         if is_integer(item):
             return ord(ret)
         return ret

From 232902c606e32c17f9c10560d2792ffe60409600 Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Wed, 28 Sep 2022 19:02:21 +0000
Subject: [PATCH 10/13] Add testcases for hex escape parse failures

---
 tests/llsd_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/llsd_test.py b/tests/llsd_test.py
index a705d2e..4cb3f2e 100644
--- a/tests/llsd_test.py
+++ b/tests/llsd_test.py
@@ -513,6 +513,11 @@ def testParseNotationUnterminatedString(self):
         """
         self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")
 
+    def testParseNotationTruncatedHex(self):
+        self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xf")
+
+    def testParseNotationInvalidHex(self):
+        self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'")
 
 class LLSDBinaryUnitTest(unittest.TestCase):
     """

From 1bb6812e117d80387fcdffbb1403cf872e6513bb Mon Sep 17 00:00:00 2001
From: Salad Dais <SaladDais@users.noreply.github.com>
Date: Thu, 29 Sep 2022 15:54:07 +0000
Subject: [PATCH 11/13] Speed up hex escape parsing by 30~%

---
 llsd/base.py       | 21 +++++++++++++--------
 tests/llsd_test.py |  6 +++++-
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llsd/base.py b/llsd/base.py
index fc86b5e..544f480 100644
--- a/llsd/base.py
+++ b/llsd/base.py
@@ -441,18 +441,23 @@ def _parse_string_delim(self, delim):
                     read_idx += 1
                     if cc == _X_ORD:
                         # It's a hex escape. char is the value of the two
-                        # following hex nybbles
+                        # following hex nybbles. This slice may result in
+                        # a short read (0 or 1 bytes), but either a
+                        # `ValueError` will be triggered by the first case,
+                        # and the second will cause an `IndexError` on the
+                        # next iteration of the loop.
+                        hex_bytes = buff[read_idx:read_idx + 2]
+                        read_idx += 2
                         try:
-                            cc = int(chr(buff[read_idx]), 16) << 4
-                            read_idx += 1
-                            cc |= int(chr(buff[read_idx]), 16)
-                            read_idx += 1
+                            # int() can parse a `bytes` containing hex,
+                            # no explicit `bytes.decode("ascii")` required.
+                            cc = int(hex_bytes, 16)
                         except ValueError as e:
                             # One of the hex characters was likely invalid.
                             # Wrap the ValueError so that we can provide a
                             # byte offset in the error.
                             self._index = read_idx
-                            self._error(str(e))
+                            self._error(e, offset=-2)
                     else:
                         # escape char preceding anything other than the chars
                         # in _escaped just results in that same char without
@@ -477,9 +482,9 @@ def _parse_string_delim(self, delim):
 
             insert_idx += 1
 
+        # Sync our local read index with the canonical one
+        self._index = read_idx
         try:
-            # Sync our local read index with the canonical one
-            self._index = read_idx
             # Slice off only what we used of the working decode buffer
             return decode_buff[:insert_idx].decode('utf-8')
         except UnicodeDecodeError as exc:
diff --git a/tests/llsd_test.py b/tests/llsd_test.py
index 4cb3f2e..b86ab96 100644
--- a/tests/llsd_test.py
+++ b/tests/llsd_test.py
@@ -513,12 +513,16 @@ def testParseNotationUnterminatedString(self):
         """
         self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")
 
-    def testParseNotationTruncatedHex(self):
+    def testParseNotationHexEscapeNoChars(self):
+        self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\x")
+
+    def testParseNotationHalfTruncatedHex(self):
         self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xf")
 
     def testParseNotationInvalidHex(self):
         self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'")
 
+
 class LLSDBinaryUnitTest(unittest.TestCase):
     """
     This class aggregates all the tests for parse_binary and LLSD.as_binary

From 7cb6d424d009c03820d9cf140fa2d4c462c6ec8d Mon Sep 17 00:00:00 2001
From: Bennett Goble <signal@lindenlab.com>
Date: Mon, 3 Oct 2022 10:49:37 -0700
Subject: [PATCH 12/13] Re-export parse/format classes from main module

Allow users to simply `from llsd import LLSDBinaryParser`
---
 llsd/__init__.py | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/llsd/__init__.py b/llsd/__init__.py
index 9727479..85c3471 100644
--- a/llsd/__init__.py
+++ b/llsd/__init__.py
@@ -9,31 +9,9 @@
 """
 from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, LLSDParseError,
                        LLSDSerializationError, LongType, UnicodeType, binary, starts_with, undef, uri)
-from llsd.serde_binary import format_binary, parse_binary
-from llsd.serde_notation import format_notation, parse_notation
-from llsd.serde_xml import format_pretty_xml, format_xml, parse_xml
-
-__all__ = [
-    "BINARY_MIME_TYPE",
-    "LLSD",
-    "LLSDParseError",
-    "LLSDSerializationError",
-    "LongType",
-    "NOTATION_MIME_TYPE",
-    "UnicodeType",
-    "XML_MIME_TYPE",
-    "binary",
-    "format_binary",
-    "format_notation",
-    "format_pretty_xml",
-    "format_xml",
-    "parse",
-    "parse_binary",
-    "parse_notation",
-    "parse_xml",
-    "undef",
-    "uri",
-]
+from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary
+from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation
+from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml
 
 
 def parse(something, mime_type = None):
@@ -81,4 +59,4 @@ def __str__(self):
     as_xml = staticmethod(format_xml)
     as_pretty_xml = staticmethod(format_pretty_xml)
     as_binary = staticmethod(format_binary)
-    as_notation = staticmethod(format_notation)
\ No newline at end of file
+    as_notation = staticmethod(format_notation)

From f590a0b1efbedcbbbf0ea5ec3824768945524708 Mon Sep 17 00:00:00 2001
From: Bennett Goble <signal@lindenlab.com>
Date: Thu, 27 Oct 2022 15:07:47 -0700
Subject: [PATCH 13/13] Add automated CLA

---
 .github/workflows/cla.yaml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/cla.yaml

diff --git a/.github/workflows/cla.yaml b/.github/workflows/cla.yaml
new file mode 100644
index 0000000..fa180c6
--- /dev/null
+++ b/.github/workflows/cla.yaml
@@ -0,0 +1,25 @@
+name: Check CLA
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_target:
+    types: [opened, closed, synchronize]
+
+jobs:
+  cla:
+    name: Check CLA
+    runs-on: ubuntu-latest
+    steps:
+      - name: CLA Assistant
+        if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target'
+        uses: secondlife-3p/contributor-assistant@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PERSONAL_ACCESS_TOKEN: ${{ secrets.SHARED_CLA_TOKEN }}
+        with:
+          branch: main
+          path-to-document: https://github.com/secondlife/cla/blob/master/CLA.md
+          path-to-signatures: signatures.json
+          remote-organization-name: secondlife
+          remote-repository-name: cla-signatures