From dc655e3501a16b5fcf3fc91de025f0fd9506e912 Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Sun, 11 Dec 2022 20:39:44 +0100
Subject: [PATCH] Monkey-patch some string functions in narrow string builds

The official distribution of Python 2.7 on Windows is built with "narrow strings" (UTF-16 strings with no proper indexing or length). This makes some tests fail. "Fix" this by monkey-patching a few functions and using a wrapping unicode class, as we can't monkey-patch the actual unicode type.

This is very fragile code, but it's the best we could do given the limitations.
---
 lslopt/lslbasefuncs.py |  6 ++--
 strutil.py             | 67 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/lslopt/lslbasefuncs.py b/lslopt/lslbasefuncs.py
index 683b15d..cdd0cb2 100644
--- a/lslopt/lslbasefuncs.py
+++ b/lslopt/lslbasefuncs.py
@@ -674,6 +674,8 @@ def InternalUTF8toString(s):
 # type check. Same for llGetSubString and llList2List. They are all joined into
 # one single function.
 def InternalGetDeleteSubSequence(val, start, end, isGet):
+    if type(val) == unicode:
+        val = uniwrap(val)
     start = fi(start)
     end = fi(end)
     L = len(val)
@@ -1298,7 +1300,7 @@ def llGetSubString(s, start, end):
     return InternalGetDeleteSubSequence(s, start, end, isGet=True)
 
 def llHash(s):
-    s = fs(s)
+    s = uniwrap(fs(s))
     hash = 0
     for i in s:
         hash = (hash * 65599 + ord(i)) & 0xFFFFFFFF
@@ -1718,7 +1720,7 @@ def llModPow(base, exp, mod):
     return S32(ret)
 
 def llOrd(val, index):
-    val = fs(val)
+    val = uniwrap(fs(val))
     index = fi(index)
     L = len(val)
     if -L <= index < L:
diff --git a/strutil.py b/strutil.py
index 85bfa06..4a6e4fe 100644
--- a/strutil.py
+++ b/strutil.py
@@ -22,12 +22,15 @@ import codecs
 codecs.register(lambda x: codecs.lookup('utf8') if x == 'cp65001' else None)
 
 import sys
+
 if sys.version_info.major >= 3:
     unicode = str
     unichr = chr
     xrange = range
     python3 = True
     python2 = False
+    python2Narrow = False
+    uniwrap = unicode
 
     def str2u(s, enc=None):
         """Convert a native Python3 str to Unicode. This is a NOP."""
@@ -57,6 +60,8 @@ else:
     xrange = xrange
     python2 = True
     python3 = False
+    python2Narrow = False
+    uniwrap = unicode
 
     def str2u(s, enc=None):
         """Convert a native Python2 str to Unicode."""
@@ -80,6 +85,68 @@ else:
         """Convert Bytes or Unicode to native Python 2 str."""
         return s if type(s) == str else u2str(s, enc)
 
+    if len(u'\U00010001') == 2:
+        # Narrow character build (UTF-16 strings)
+        # Monkey-patch the relevant functions
+        python2Narrow = True
+        _unichr = unichr
+        _ord = ord
+        _len = len
+
+        def unichr(n):
+            if not (65536 <= n < 0x110000):
+                return _unichr(n)
+            return ('\\U%08X' % n).decode('unicode-escape')
+
+        def ord(x):
+            if isinstance(x, unicode) and _len(x) == 2:
+                x = unicode(x)
+                if 0xD800 <= _ord(x[0]) < 0xDC00:
+                    return 65536 + ((_ord(x[0]) & 0x3FF) << 10
+                        | (_ord(x[1]) & 0x3FF))
+            return _ord(x)
+
+        def len(x):
+            if isinstance(x, unicode):
+                return _len(x.encode('utf-32le')) >> 2
+            return _len(x)
+
+        # Alas, we can't monkey-patch the unicode class' __getitem__ and
+        # __getslice__ methods; we need a workaround.
+        class uniwrap(unicode):
+            def __getslice__(self, start, stop):
+                lim = sys.maxint >> 2
+                if start < 0: start = 0
+                if stop < 0: stop = 0
+                if start < lim:
+                    start <<= 2
+                else:
+                    start = sys.maxint
+                if stop < lim:
+                    stop <<= 2
+                else:
+                    stop = sys.maxint
+                return self.encode('utf-32le')[start:stop].decode(
+                    'utf-32le')
+            def __getitem__(self, item):
+                if type(item) == slice:
+                    start = item.start
+                    stop = item.stop
+                    step = item.step
+                    if start is not None:
+                        start <<= 2
+                    if stop is not None:
+                        stop <<= 2
+                    if step is not None:
+                        step <<= 2
+                    return self.encode('utf-32le')[start:stop:step].decode(
+                        'utf-32le')
+                u = self.encode('utf-32le')
+                item <<= 2
+                if item >= _len(u):
+                    return u[item]  # raise IndexError, as slicing doesn't
+                return u[item:(item+4 if item != -4 else None)].decode(
+                    'utf-32le')
 
 def b2u(s, enc=None):
     """Bytes to Unicode"""