From d9938f1a37b8d9759d8a065ebb8a01906711e807 Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Mon, 9 Nov 2020 01:51:24 +0100
Subject: [PATCH] Misc small non-user-visible fixes

Some are Unicode vs Str stuff, others are style adjustments, others in unused functions, and so on.
---
 lslopt/lslinliner.py |  4 ++--
 lslopt/lslparse.py   |  2 +-
 main.py              | 41 +++++++++++++++++++++--------------------
 run-tests.py         | 10 ++++++----
 strutil.py           |  8 +++-----
 5 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/lslopt/lslinliner.py b/lslopt/lslinliner.py
index e39a03a..afa5cb0 100644
--- a/lslopt/lslinliner.py
+++ b/lslopt/lslinliner.py
@@ -18,9 +18,9 @@
 # Expand inlined functions. This could perhaps be made at parse time, but that
 # would obfuscate the source too much.
 
-from lslcommon import nr
+from lslopt.lslcommon import nr
 
-# Statement-level nodes that have at most 1 child and is of type expression
+# Statement-level nodes that have at most 1 child and are of type expression
 SINGLE_OPT_EXPR_CHILD_NODES = frozenset({'DECL', 'EXPR', 'RETURN',
     '@', 'STSW', 'JUMP', ';', 'LAMBDA'})
 
diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py
index e4bd70d..cb816b3 100644
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@@ -3007,7 +3007,7 @@ list lazy_list_set(list L, integer i, list v)
         del self.scopestack
 
         if self.enable_inline:
-            import lslinliner
+            from lslopt import lslinliner
             lslinliner.inliner().inline(self.tree, self.symtab)
 
         treesymtab = self.tree, self.symtab
diff --git a/main.py b/main.py
index 99c3058..d1948ab 100755
--- a/main.py
+++ b/main.py
@@ -96,7 +96,7 @@ def PreparePreproc(script):
     line up to the point where the string was closed. That will place the next
     token in the same line and column it previously was.
     """
-    s = ''
+    s = u''
     nlines = 0
     col = 0
 
@@ -120,29 +120,29 @@ def PreparePreproc(script):
     # least surprise seems to suggest to accept valid LSL strings as LSL
     # instead of reproducing that C quirk. This also matches what FS is doing
     # currently, so it's good for compatibility.
-    tok = re.compile(str2u(
+    tok = re.compile(str2u(  # Python 3.5 does not recognize ur'...' literals
         r'(?:'
             r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
             r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
             r'|[^"]'
         r')+'
         r'|"'
-        ), re.S)
+        , 'utf8'), re.S)
     # RE used inside strings.
     tok2 = re.compile(str2u(
         r'(?:'
             r"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
             r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
-                                # backslash trigraph or actual backslash,
-                                # followed by any trigraph or non-newline
+                               # backslash trigraph or actual backslash,
+                               # followed by any trigraph or non-newline
             r'|(?!\?\?/\n|\\\n|"|\n).'
-                                # any character that doesn't start a trigraph/
-                                # backslash escape followed by a newline
-                                # or is a newline or double quote, as we're
-                                # interested in all those individually.
-        r')+'                   # as many of those as possible
-        r'|\?\?/\n|\\\n|\n|"'   # or any of those individually
-        ))
+                               # any character that doesn't start a trigraph/
+                               # backslash escape followed by a newline
+                               # or is a newline or double quote, as we're
+                               # interested in all those individually.
+        r')'                   # as many of those as possible
+        r'|\?\?/\n|\\\n|\n|"'  # or any of those individually
+        , 'utf8'))
 
     pos = 0
     match = tok.search(script, pos)
@@ -157,24 +157,24 @@ def PreparePreproc(script):
                 matched2 = match2.group(0)
                 pos += len(matched2)
 
-                if matched2 == b'\\\n' or matched2 == b'??/\n':
+                if matched2 == u'\\\n' or matched2 == u'??/\n':
                     nlines += 1
                     col = 0
                     match2 = tok2.search(script, pos)
                     continue
-                if matched2 == b'"':
+                if matched2 == u'"':
                     if nlines:
-                        if script[pos:pos+1] == b'\n':
+                        if script[pos:pos+1] == u'\n':
                             col = -1 # don't add spaces if not necessary
                         # col misses the quote added here, so add 1
-                        s += b'"' + b'\n'*nlines + b' '*(col+1)
+                        s += u'"' + u'\n'*nlines + u' '*(col+1)
                     else:
-                        s += b'"'
+                        s += u'"'
                     break
-                if matched2 == b'\n':
+                if matched2 == u'\n':
                     nlines += 1
                     col = 0
-                    s += b'\\n'
+                    s += u'\\n'
                 else:
                     col += len(matched2)
                     s += matched2
@@ -453,7 +453,7 @@ def main(argv):
                 if chgfix[1:] not in validoptions:
                     Usage(argv[0], 'optimizer-options')
                     werr(u"\nError: Unrecognized"
-                        u" optimizer option: %s\n" % chg.decode('utf8'))
+                         u" optimizer option: %s\n" % chg.decode('utf8'))
                     return 1
                 if chgfix[0] == '-':
                     options.discard(chgfix[1:])
@@ -642,6 +642,7 @@ def main(argv):
         # Append user arguments at the end to allow them to override defaults
         preproc_cmdline += preproc_user_postargs
 
+        # Transform to bytes and check Unicode validity
         if type(script) is unicode:
             script = script.encode('utf8')
         else:
diff --git a/run-tests.py b/run-tests.py
index 9416975..0c6e12f 100755
--- a/run-tests.py
+++ b/run-tests.py
@@ -698,7 +698,7 @@ def generateScriptTests():
             # Create a closure with the test data
             def makeTestFunction(fbase, suite):
                 def TestFunction(self):
-                    stdin = tryRead(fbase + '.lsl') or ''
+                    stdin = tryRead(fbase + '.lsl') or b''
                     expected_stdout = tryRead(fbase + '.out') or b''
                     expected_stderr = tryRead(fbase + '.err') or b''
                     runargs = (parseArgs(tryRead(fbase + '.run', Binary=False))
@@ -734,8 +734,9 @@ def generateScriptTests():
                         werr(expected_stderr)
                         werr(u'\n************ actual stderr:\n')
                         werr(actual_stderr)
-                        if difflib and expected_stderr and actual_stderr:
-                            sys.stderr.write(u'\n************ diff:\n'
+                        if difflib and expected_stderr and actual_stderr \
+                           and not expected_stderr.startswith(b'REGEX\n'):
+                            werr(u'\n************ diff:\n'
                                  + u'\n'.join(difflib.unified_diff(
                                     b2u(expected_stderr).split(u'\n'),
                                     b2u(actual_stderr).split(u'\n'),
@@ -755,7 +756,8 @@ def generateScriptTests():
                         werr(expected_stdout)
                         werr(u'\n************ actual stdout:\n')
                         werr(actual_stdout)
-                        if difflib and expected_stdout and actual_stdout:
+                        if difflib and expected_stdout and actual_stdout \
+                           and not expected_stdout.startswith(b'REGEX\n'):
                             werr(u'\n************ diff:\n'
                                  + u'\n'.join(difflib.unified_diff(
                                     b2u(expected_stdout).split('\n'),
diff --git a/strutil.py b/strutil.py
index 0597ec6..2b71bba 100644
--- a/strutil.py
+++ b/strutil.py
@@ -31,8 +31,7 @@ if sys.version_info.major >= 3:
 
     def str2b(s, enc=None):
         """Convert a native Python3 str to bytes, with the given encoding."""
-        return s.encode(enc if type(enc) == str
-                        else getattr(enc, 'encoding', 'utf8'),
+        return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
                         'backslashreplace')
 
     def u2str(s, enc=None):
@@ -56,8 +55,7 @@ else:
 
     def u2str(s, enc=None):
         """Convert a Unicode string to native Python 2 str."""
-        return s.encode(enc if type(enc) == str
-                        else getattr(enc, 'encoding', 'utf8'),
+        return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
                         'backslashreplace')
 
     def b2str(s, enc=None):
@@ -70,7 +68,7 @@ def b2u(s, enc=None):
 
 def u2b(s, enc=None):
     """Unicode to Bytes"""
-    return u2str(str2b(s, enc), enc)
+    return str2b(u2str(s, enc), enc)
 
 def any2b(s, enc=None):
     """Bytes or Unicode to Bytes"""