From d9938f1a37b8d9759d8a065ebb8a01906711e807 Mon Sep 17 00:00:00 2001 From: Sei Lisa Date: Mon, 9 Nov 2020 01:51:24 +0100 Subject: [PATCH] Misc small non-user-visible fixes Some are Unicode vs Str stuff, others are style adjustments, others in unused functions, and so on. --- lslopt/lslinliner.py | 4 ++-- lslopt/lslparse.py | 2 +- main.py | 41 +++++++++++++++++++++-------------------- run-tests.py | 10 ++++++---- strutil.py | 8 +++----- 5 files changed, 33 insertions(+), 32 deletions(-) diff --git a/lslopt/lslinliner.py b/lslopt/lslinliner.py index e39a03a..afa5cb0 100644 --- a/lslopt/lslinliner.py +++ b/lslopt/lslinliner.py @@ -18,9 +18,9 @@ # Expand inlined functions. This could perhaps be made at parse time, but that # would obfuscate the source too much. -from lslcommon import nr +from lslopt.lslcommon import nr -# Statement-level nodes that have at most 1 child and is of type expression +# Statement-level nodes that have at most 1 child and are of type expression SINGLE_OPT_EXPR_CHILD_NODES = frozenset({'DECL', 'EXPR', 'RETURN', '@', 'STSW', 'JUMP', ';', 'LAMBDA'}) diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py index e4bd70d..cb816b3 100644 --- a/lslopt/lslparse.py +++ b/lslopt/lslparse.py @@ -3007,7 +3007,7 @@ list lazy_list_set(list L, integer i, list v) del self.scopestack if self.enable_inline: - import lslinliner + from lslopt import lslinliner lslinliner.inliner().inline(self.tree, self.symtab) treesymtab = self.tree, self.symtab diff --git a/main.py b/main.py index 99c3058..d1948ab 100755 --- a/main.py +++ b/main.py @@ -96,7 +96,7 @@ def PreparePreproc(script): line up to the point where the string was closed. That will place the next token in the same line and column it previously was. """ - s = '' + s = u'' nlines = 0 col = 0 @@ -120,29 +120,29 @@ def PreparePreproc(script): # least surprise seems to suggest to accept valid LSL strings as LSL # instead of reproducing that C quirk. This also matches what FS is doing # currently, so it's good for compatibility. - tok = re.compile(str2u( + tok = re.compile(str2u( # Python 3.5 does not recognize ur'...' literals r'(?:' r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/' r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n' r'|[^"]' r')+' r'|"' - ), re.S) + , 'utf8'), re.S) # RE used inside strings. tok2 = re.compile(str2u( r'(?:' r"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash) r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])" - # backslash trigraph or actual backslash, - # followed by any trigraph or non-newline + # backslash trigraph or actual backslash, + # followed by any trigraph or non-newline r'|(?!\?\?/\n|\\\n|"|\n).' - # any character that doesn't start a trigraph/ - # backslash escape followed by a newline - # or is a newline or double quote, as we're - # interested in all those individually. - r')+' # as many of those as possible - r'|\?\?/\n|\\\n|\n|"' # or any of those individually - )) + # any character that doesn't start a trigraph/ + # backslash escape followed by a newline + # or is a newline or double quote, as we're + # interested in all those individually. + r')' # as many of those as possible + r'|\?\?/\n|\\\n|\n|"' # or any of those individually + , 'utf8')) pos = 0 match = tok.search(script, pos) @@ -157,24 +157,24 @@ def PreparePreproc(script): matched2 = match2.group(0) pos += len(matched2) - if matched2 == b'\\\n' or matched2 == b'??/\n': + if matched2 == u'\\\n' or matched2 == u'??/\n': nlines += 1 col = 0 match2 = tok2.search(script, pos) continue - if matched2 == b'"': + if matched2 == u'"': if nlines: - if script[pos:pos+1] == b'\n': + if script[pos:pos+1] == u'\n': col = -1 # don't add spaces if not necessary # col misses the quote added here, so add 1 - s += b'"' + b'\n'*nlines + b' '*(col+1) + s += u'"' + u'\n'*nlines + u' '*(col+1) else: - s += b'"' + s += u'"' break - if matched2 == b'\n': + if matched2 == u'\n': nlines += 1 col = 0 - s += b'\\n' + s += u'\\n' else: col += len(matched2) s += matched2 @@ -453,7 +453,7 @@ def main(argv): if chgfix[1:] not in validoptions: Usage(argv[0], 'optimizer-options') werr(u"\nError: Unrecognized" - u" optimizer option: %s\n" % chg.decode('utf8')) + u" optimizer option: %s\n" % chg.decode('utf8')) return 1 if chgfix[0] == '-': options.discard(chgfix[1:]) @@ -642,6 +642,7 @@ def main(argv): # Append user arguments at the end to allow them to override defaults preproc_cmdline += preproc_user_postargs + # Transform to bytes and check Unicode validity if type(script) is unicode: script = script.encode('utf8') else: diff --git a/run-tests.py b/run-tests.py index 9416975..0c6e12f 100755 --- a/run-tests.py +++ b/run-tests.py @@ -698,7 +698,7 @@ def generateScriptTests(): # Create a closure with the test data def makeTestFunction(fbase, suite): def TestFunction(self): - stdin = tryRead(fbase + '.lsl') or '' + stdin = tryRead(fbase + '.lsl') or b'' expected_stdout = tryRead(fbase + '.out') or b'' expected_stderr = tryRead(fbase + '.err') or b'' runargs = (parseArgs(tryRead(fbase + '.run', Binary=False)) @@ -734,8 +734,9 @@ def generateScriptTests(): werr(expected_stderr) werr(u'\n************ actual stderr:\n') werr(actual_stderr) - if difflib and expected_stderr and actual_stderr: - sys.stderr.write(u'\n************ diff:\n' + if difflib and expected_stderr and actual_stderr \ + and not expected_stderr.startswith(b'REGEX\n'): + werr(u'\n************ diff:\n' + u'\n'.join(difflib.unified_diff( b2u(expected_stderr).split(u'\n'), b2u(actual_stderr).split(u'\n'), @@ -755,7 +756,8 @@ def generateScriptTests(): werr(expected_stdout) werr(u'\n************ actual stdout:\n') werr(actual_stdout) - if difflib and expected_stdout and actual_stdout: + if difflib and expected_stdout and actual_stdout \ + and not expected_stdout.startswith(b'REGEX\n'): werr(u'\n************ diff:\n' + u'\n'.join(difflib.unified_diff( b2u(expected_stdout).split('\n'), diff --git a/strutil.py b/strutil.py index 0597ec6..2b71bba 100644 --- a/strutil.py +++ b/strutil.py @@ -31,8 +31,7 @@ if sys.version_info.major >= 3: def str2b(s, enc=None): """Convert a native Python3 str to bytes, with the given encoding.""" - return s.encode(enc if type(enc) == str - else getattr(enc, 'encoding', 'utf8'), + return s.encode(getattr(enc, 'encoding', enc) or 'utf8', 'backslashreplace') def u2str(s, enc=None): @@ -56,8 +55,7 @@ else: def u2str(s, enc=None): """Convert a Unicode string to native Python 2 str.""" - return s.encode(enc if type(enc) == str - else getattr(enc, 'encoding', 'utf8'), + return s.encode(getattr(enc, 'encoding', enc) or 'utf8', 'backslashreplace') def b2str(s, enc=None): @@ -70,7 +68,7 @@ def b2u(s, enc=None): def u2b(s, enc=None): """Unicode to Bytes""" - return u2str(str2b(s, enc), enc) + return str2b(u2str(s, enc), enc) def any2b(s, enc=None): """Bytes or Unicode to Bytes"""