commit 05d00e075b38336290b174d52b493d8a1ea97b94 Author: Sei Lisa Date: Sat Jul 26 02:43:44 2014 +0200 Initial commit. Status so far: - Parser and output modules are thoroughly tested and working. - Most LSL immutable functions are working; some not tested; llJsonSetValue not implemented. - Parser recognizes the following flags that alter syntax: extendedglobalexpr: Allow full expression syntax in globals. extendedtypecast: Allow full unary expressions in typecasts e.g. (float)~i. extendedassignment: Enable the C assignment operators &=, ^=, |=, <<=, >>=. explicitcast: Add explicit casts wherever they are done implicitly, e.g. float f=3; -> float f=(float)3;. Of them, only extendedglobalexpr is useless so far, as it requires the optimizer to be working. diff --git a/lslopt/__init__.py b/lslopt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lslopt/lslbasefuncs.py b/lslopt/lslbasefuncs.py new file mode 100644 index 0000000..85e1d8e --- /dev/null +++ b/lslopt/lslbasefuncs.py @@ -0,0 +1,1550 @@ +# This module is used by the optimizer for resolving constant values. + +# The functions it implements are all functions that always return the same result when given the same input, and that have no side effects. + +# For example, llAbs() is here, but llFrand() is not, because it doesn't always return the same result. + +# This implies that functions present in this module can be precomputed if their arguments are constants. + +import re +from lslcommon import * +import lslcommon +from ctypes import c_float +import math +import hashlib +from base64 import b64encode, b64decode + + +# Regular expressions used along the code. They are needed mainly because +# Python lacks a C-like strtod/strtol (it comes close, but it is very picky +# with what it accepts). We need to extract the number part of a string, or +# Python will complain. +# Also, Base64 needs the correct count of characters (len mod 4 can't be = 1). +# The RE helps both in isolating the Base64 section and in trimming out the +# offending characters; it just doesn't help with padding, with which Python is +# also picky. We deal with that in the code by padding with '='*(-length&3). + +# Despite what http://www.gnu.org/software/libc/manual/html_node/Parsing-of-Floats.html#Parsing-of-Floats +# says, NaN(chars) does not work in LSL (which is relevant in vectors). +# Note infinity vs. inf is necessary for parsing vectors & rotations, +# e.g. (vector)"<1,inf,infix>" is not valid but (vector)"<1,inf,infinity>" is +# as is (vector)"<1,inf,info>". The 1st gives <0,0,0>, the others <1,inf,inf>. +# The lookahead (?!i) is essential for parsing them that way without extra code. +# Note that '|' in REs is order-sensitive. +float_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?' + ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|infinity|inf(?!i)|nan)', + re.I) + +int_re = re.compile(ur'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+', re.I) + +key_re = re.compile(ur'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$', re.I) + +b64_re = re.compile(ur'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?') + +ZERO_VECTOR = Vector((0.0, 0.0, 0.0)) +ZERO_ROTATION = Quaternion((0.0, 0.0, 0.0, 1.0)) + +Infinity = float('inf') +NaN = float('nan') + +class ELSLTypeMismatch(Exception): + def __init__(self): + super(self.__class__, self).__init__("Type mismatch") + +class ELSLMathError(Exception): + def __init__(self): + super(self.__class__, self).__init__("Math Error") + +class ELSLInvalidType(Exception): + def __init__(self): + super(self.__class__, self).__init__("Internal error: Invalid type") + +# LSL types are translated to Python types as follows: +# * LSL string -> Python unicode +# * LSL key -> Key (class derived from unicode, no significant changes except __repr__) +# * LSL integer -> Python int (should never be long) +# * LSL float -> Python float +# * LSL vector -> Vector (class derived from Python tuple) of 3 numbers (float) +# * LSL rotation -> Quaternion (class derived from Python tuple) of 4 numbers (float) +# * LSL list -> Python list + +Types = { + int: 1, # TYPE_INTEGER + float: 2, # TYPE_FLOAT + unicode: 3, # TYPE_STRING + Key: 4, # TYPE_KEY + Vector: 5, # TYPE_VECTOR + Quaternion: 6, # TYPE_ROTATION + #list: 7, # Undefined + } + +# Utility functions + +def F32(f, f32=True): + """Truncate a float to have a precision equivalent to IEEE single""" + + if not f32: # don't truncate + return f + + if isinstance(f, tuple): # vector, quaternion + return f.__class__(F32(i) for i in f) + + # Alternative to the big blurb below. This relies on the machine using IEEE-754, though. + + # Using array: + #from array import array + #return array('f',(f,))[0] + + # Using ctypes: + #from ctypes import c_float + return c_float(f).value + +# # Another alternative. frexp and ldexp solve a lot (but are still troublesome): +# m, x = math.frexp(abs(f)) +# if x > 128: +# return math.copysign(Infinity, f) +# if x < -149: +# return math.copysign(0.0, f) +# if x < -125: +# e = 1<<(x+149) +# else: +# e = 16777216.0 +# # Special corner case with rounding near the maximum float (e.g. 3.4028236e38 gets rounded up, going out of range for a F32) +# if m*e >= 16777215.5 and x == 128: +# return math.copysign(Infinity, f) +# return math.ldexp(math.copysign(math.floor(m*e+0.5)/e, f), x) + + +# # Original old-fashioned strategy (watch out for the 16777215.5 bug above): +# +# if math.isinf(f) or math.isnan(f) or f==0: +# return f +# s = math.copysign(1, f) +# # This number may not be precise enough if Python had infinite precision, but it works for us. +# if f < 0.0000000000000000000000000000000000000000000007006492321624086132496: +# return math.copysign(0.0, s) +# f = abs(f) +# +# +# # TO DO: Check this boundary (this is 2^128) +# if f >= 340282366920938463463374607431768211456.0: +# return math.copysign(Infinity, s) +# +# # TO DO: Check this boundary (2^-126; hopefully there's some overlap and the precision can be cut) +# if f < 0.000000000000000000000000000000000000011754943508222875079687365372222456778186655567720875215087517062784172594547271728515625: +# # Denormal range +# f *= 713623846352979940529142984724747568191373312.0 +# e = 0.00000000000000000000000000000000000000000000140129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125 # 2^-149 +# else: +# e = 1.0 +# # This first loop is an optimization to get closer to the destination faster for very small numbers +# while f < 1.0: +# f *= 16777216.0 +# e *= 0.000000059604644775390625 +# # Go bit by bit +# while f < 8388608.0: +# f *= 2.0 +# e *= 0.5 +# +# #This first loop is an optimization to get closer to the destination faster for very big numbers +# while f >= 140737488355328.0: +# f *= 0.000000059604644775390625 +# e *= 16777216.0 +# # Go bit by bit +# while f >= 16777216.0: +# f *= 0.5 +# e *= 2.0 +# +# return math.copysign(math.floor(f+0.5)*e, s) + +def S32(val): + """Return a signed integer truncated to 32 bits (must deal with longs too)""" + if -2147483648 <= val <= 2147483647: + return int(val) + val &= 0xFFFFFFFF + if val > 2147483647: + return int(val - 4294967296) + return int(val) + +def zstr(s): + if not isinstance(s, unicode): + raise ELSLInvalidType + + zi = s.find(u'\0') + if zi < 0: + return s + return s.__class__(s[:zi]) + +def f2s(val, DP=6): + if math.isinf(val): + return u'Infinity' if val > 0 else u'-Infinity' + if math.isnan(val): + return u'NaN' + if lslcommon.LSO or val == 0.: + return u'%.*f' % (DP, val) # deals with -0.0 too + + # Format according to Mono rules (7 decimals after the DP, found experimentally) + s = u'%.*f' % (DP+7, val) + + if s[:DP+3] == u'-0.' + '0'*DP and s[DP+3] < u'5': + return u'0.' + '0'*DP # underflown negatives return 0.0 except for -0.0 dealt with above + + # Separate the sign + sgn = u'-' if s[0] == u'-' else u'' + if sgn: s = s[1:] + + # Look for position of first nonzero from the left + i = 0 + while s[i] in u'0.': + i += 1 + + dot = s.index(u'.') + + # Find rounding point. It's either the 7th digit after the first significant one, + # or the (DP+1)-th decimal after the period, whichever comes first. + digits = 0 + while digits < 7: + if i >= dot+1+DP: + break + if i == dot: + i += 1 + i += 1 + digits += 1 + + if s[i if i != dot else i+1] >= u'5': # no rounding necessary + # Rounding - increment s[:i] storing result into news + new_s = u'' + ci = i-1 # carry index + while ci >= 0 and s[ci] == u'9': + new_s = u'0' + new_s + ci -= 1 + if ci == dot: + ci -= 1 # skip over the dot + new_s = u'.' + new_s # but add it to new_s + if ci < 0: + new_s = u'1' + new_s # 9...9 -> 10...0 + else: + # increment s[ci] e.g. 43999 -> 44000 + new_s = s[:ci] + chr(ord(s[ci])+1) + new_s + else: + new_s = s[:i] + + if i <= dot: + return sgn + new_s + u'0'*(dot-i) + u'.' + u'0'*DP + return sgn + new_s + u'0'*(dot+1+DP-i) + +def vr2s(v, DP=6): + if type(v) == Vector: + return u'<'+f2s(v[0],DP)+u', '+f2s(v[1],DP)+u', '+f2s(v[2],DP)+u'>' + return u'<'+f2s(v[0],DP)+u', '+f2s(v[1],DP)+u', '+f2s(v[2],DP)+u', '+f2s(v[3],DP)+u'>' + +def InternalTypecast(val, out, InList, f32): + """Type cast val to out, following LSL rules. + + To avoid mutual recursion, it deals with everything except lists. That way + it does not need to call InternalList2Strings which needs to call it. + """ + tval = type(val) + # The case tval == list is handled in typecast() below. + if out == list: + return [val] + + if tval == int: # integer + val = S32(val) + if out == int: return val + if out == float: return F32(val, f32) + if out == unicode: return unicode(val) + raise ELSLTypeMismatch + + if tval == float: + val = F32(val, f32) + if out == int: return S32(int(val)) if val >= -2147483648.0 and val < 2147483648.0 else -2147483648 + if out == float: return val + if out == unicode: return f2s(val, 6) + raise ELSLTypeMismatch + + if tval == Vector: + if out == Vector: return val + if out == unicode: return vr2s(val, 6 if InList else 5) + raise ELSLTypeMismatch + if tval == Quaternion: + if out == Quaternion: return val + if out == unicode: return vr2s(val, 6 if InList else 5) + raise ELSLTypeMismatch + if tval == Key: # key + if out == Key: return zstr(val) + if out == unicode: return zstr(unicode(val)) + raise ELSLTypeMismatch + + if tval == unicode: + val = zstr(val) + if out == unicode: return val + if out == Key: return Key(val) + if out == float: + # Clean up the string for Picky Python + match = float_re.match(val) + if match is None: + return 0.0 + if match.group(1): + return F32(float.fromhex(match.group(0)), f32) + return F32(float(match.group(0)), f32) + if out == int: + match = int_re.match(val) + if match is None: + return 0 + val = match.group(0) + if match.group(1): + val = int(val, 0) + else: + val = int(val) + if -4294967295 <= val <= 4294967295: + return S32(val) + return -1 + if out in (Vector, Quaternion): + Z,dim = (ZERO_VECTOR,3) if out == Vector else (ZERO_ROTATION,4) + ret = [] + if val[0:1] != u'<': + return Z + val = val[1:] + for _ in range(dim): + match = float_re.match(val) + if match is None: + return Z + if match.group(1): + ret.append(F32(float.fromhex(match.group(0)), f32)) + else: + ret.append(F32(float(match.group(0)), f32)) + if len(ret) < dim: + i = match.end() + if val[i:i+1] != u',': + return Z + val = val[i+1:] + return out(ret) # convert type + + # To avoid mutual recursion, this was moved: + #if tval == list: # etc. + + raise ELSLInvalidType + +def InternalList2Strings(val): + """Convert a list of misc.items to a list of strings.""" + ret = [] + for elem in val: + ret.append(InternalTypecast(elem, unicode, InList=True, f32=True)) + return ret + +def typecast(val, out, InList=False, f32=True): + """Type cast an item. Calls InternalList2Strings for lists and + defers the rest to InternalTypecast. + """ + if type(val) == list: + if out == list: + return val # NOTE: We're not duplicating it here. + if out == unicode: + return u''.join(InternalList2Strings(val)) + raise ELSLTypeMismatch + return InternalTypecast(val, out, InList, f32) + +def minus(val): + if type(val) in (int, float): + if type(val) == int and val == -2147483648: + return val + return -val + if isinstance(val, tuple): + return val.__class__(-f for f in val) + raise ELSLTypeMismatch + +def add(a, b, f32=True): + # defined for: + # scalar+scalar + # vector+vector + # rotation+rotation + # string+string + # list+any + # any+list + ta=type(a) + tb=type(b) + if ta in (int, float) and tb in (int, float): + if ta == int and tb == int: + return S32(a+b) + return F32(a+b, f32) + if ta == list and tb == list or ta == unicode and tb == unicode: + return a + b + if ta == list: + return a + [b] + if tb == list: + return [a] + b + if ta == tb in (Vector, Quaternion): + return F32(ta(a[i]+b[i] for i in range(len(a))), f32) + raise ELSLTypeMismatch + +def sub(a, b, f32=True): + # defined for: + # scalar+scalar + # vector+vector + # rotation+rotation + ta=type(a) + tb=type(b) + if ta in (int, float) and tb in (int, float): + if ta == tb == int: + return S32(a-b) + return F32(a-b, f32) + if ta == tb in (Vector, Quaternion): + return F32(ta(a[i]-b[i] for i in range(len(a))), f32) + raise ELSLTypeMismatch + +def mul(a, b, f32=True): + # defined for: + # scalar*scalar + # scalar*vector + # vector*scalar + # vector*vector + # vector*rotation + # rotation*rotation + ta = type(a) + tb = type(b) + # If either type is string, list, or key, error + if ta in (unicode, list, Key) or tb in (unicode, list, Key): + raise ELSLTypeMismatch + # only int, float, vector, quaternion here + if ta in (int, float): + if tb in (int, float): + if ta == tb == int: + return S32(a*b) + return F32(a*b, f32) + if tb != Vector: + # scalar * quat is not defined + raise ELSLTypeMismatch + # scalar * vector + return Vector(F32((a*b[0], a*b[1], a*b[2]), f32)) + + if ta == Quaternion: + # quat * scalar and quat * vector are not defined + if tb != Quaternion: + raise ELSLTypeMismatch + # quaternion product - product formula reversed + return Quaternion(F32((a[0] * b[3] + a[3] * b[0] + a[2] * b[1] - a[1] * b[2], + a[1] * b[3] - a[2] * b[0] + a[3] * b[1] + a[0] * b[2], + a[2] * b[3] + a[1] * b[0] - a[0] * b[1] + a[3] * b[2], + a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2]), f32)) + + if ta != Vector: + raise ELSLInvalidType # Should never happen at this point + + if tb in (int, float): + return Vector(F32((a[0]*b, a[1]*b, a[2]*b), f32)) + + if tb == Vector: + # scalar product + return F32(math.fsum((a[0]*b[0], a[1]*b[1], a[2]*b[2])), f32) + + if tb != Quaternion: + raise ELSLInvalidType # Should never happen at this point + + # vector * quaternion: perform conjugation + #v = mul(Quaternion((-b[0], -b[1], -b[2], b[3])), mul(Quaternion((a[0], a[1], a[2], 0.0)), b, f32=False)) + #return Vector((v[0], v[1], v[2])) + # this is more precise as it goes directly to the gist of it: + return Vector(F32((math.fsum((a[0]*(b[0]*b[0]-b[1]*b[1]-b[2]*b[2]+b[3]*b[3]), + a[1]*2*(b[0]*b[1]-b[2]*b[3]), + a[2]*2*(b[0]*b[2]+b[1]*b[3]))), + math.fsum((a[0]*2*(b[0]*b[1]+b[2]*b[3]), + -a[1]*(b[0]*b[0]-b[1]*b[1]+b[2]*b[2]-b[3]*b[3]), # notice minus sign + a[2]*2*(b[1]*b[2]-b[0]*b[3]))), + math.fsum((a[0]*2*(b[0]*b[2]-b[1]*b[3]), + a[1]*2*(b[1]*b[2]+b[0]*b[3]), + -a[2]*(b[0]*b[0]+b[1]*b[1]-b[2]*b[2]-b[3]*b[3]))) # notice minus sign + ), f32)) + +def div(a, b, f32=True): + # defined for: + # scalar/scalar + # vector/scalar + # vector/rotation + # rotation/rotation + ta = type(a) + tb = type(b) + if tb in (int, float): + if b == 0: + raise ELSLMathError + if ta in (int, float): + if math.isnan(a): # NaN/anything gives math error + raise ELSLMathError + if ta == int and tb == int: + # special case + if a == -2147483648 and b == -1: + return a # this could be handled by using S32 but it's probably faster this way + if (a < 0) ^ (b < 0): + # signs differ - Python rounds towards -inf, we need rounding towards 0 + return - a//-b # that's -(a//-b) not (-a)//-b + return a//b + return F32(a/b, f32) + if ta == Vector: + return Vector(F32((a[0]/b, a[1]/b, a[2]/b), f32)) + if tb == Quaternion: # division by a rotation is multiplication by the conjugate of the rotation + # defer the remaining type checks to mul() + return mul(a, (-b[0],-b[1],-b[2],b[3]), f32) + raise ELSLTypeMismatch + +def mod(a, b, f32=True): + # defined only for integers and vectors + if type(a) == type(b) == int: + if a < 0: + return int(-((-a) % abs(b))) + return int(a % abs(b)) + if type(a) == type(b) == Vector: + # cross product + return F32((a[1]*b[2]-a[2]*b[1], a[2]*b[0]-a[0]*b[2], a[0]*b[1]-a[1]*b[0]), f32) + + raise ELSLTypeMismatch + +# TODO: Change shouldbeXXX to asserts +def shouldbeint(x): + if type(x) != int: + raise ELSLInvalidType + +def shouldbefloat(x): + if type(x) != float: + raise ELSLInvalidType + +def shouldbevector(x): + if type(x) == Vector and len(x) == 3 and type(x[0]) == type(x[1]) == type(x[2]) == float: + return + raise ELSLInvalidType + +def shouldberot(x): + if type(x) == Quaternion and len(x) == 4 and type(x[0]) == type(x[1]) == type(x[2]) == type(x[3]) == float: + return + raise ELSLInvalidType + +def shouldbestring(x): + if type(x) != unicode: + raise ELSLInvalidType + +def shouldbekey(x): + if type(x) != Key: + raise ELSLInvalidType + +def shouldbelist(x): + if type(x) != list: + raise ELSLInvalidType + +# +# LSL-compatible computation functions +# + +def llAbs(i): + shouldbeint(i) + return abs(i) + +def llAcos(f): + shouldbefloat(f) + try: + return F32(math.acos(f)) + except ValueError: + return NaN + +def llAngleBetween(r1, r2): + shouldberot(r1) + shouldberot(r2) + return llRot2Angle(div(r1, r2, f32=False)) + +def llAsin(f): + shouldbefloat(f) + try: + return F32(math.asin(f)) + except ValueError: + return NaN + +def llAtan2(y, x): + shouldbefloat(y) + shouldbefloat(x) + return F32(math.atan2(y, x)) + +def llAxes2Rot(fwd, left, up): + shouldbevector(fwd) + shouldbevector(left) + shouldbevector(up) + + # One of the hardest. + + t = math.fsum((fwd[0], left[1], up[2])) + if t >= 0.: # no danger of division by zero or negative roots + r = math.sqrt(1. + t) + s = 0.5/r + + # For the case of ix+jy+kz > 0, it can return an unnormalized quaternion + return Quaternion((s*(left[2]-up[1]), s*(up[0]-fwd[2]), s*(fwd[1]-left[0]), r*0.5)) + + # Find a positive combo. LSL normalizes the result in these cases only, so we do the same. + + if left[1] <= fwd[0] >= up[2]: # is fwd[0] the greatest? + r = math.sqrt(1. + fwd[0] - left[1] - up[2]) + s = 0.5/r + q = (r*0.5, s*(fwd[1]+left[0]), s*(up[0]+fwd[2]), s*(left[2]-up[1])) + + elif fwd[0] <= left[1] >= up[2]: # is left[1] the greatest? + r = math.sqrt(1. - fwd[0] + left[1] - up[2]) + s = 0.5/r + q = (s*(fwd[1]+left[0]), r*0.5, s*(left[2]+up[1]), s*(up[0]-fwd[2])) + + else: + # Only one case remaining: up[2] is the greatest + r = math.sqrt(1. - fwd[0] - left[1] + up[2]) + s = 0.5/r + q = (s*(up[0]+fwd[2]), s*(left[2]+up[1]), r*0.5, s*(fwd[1]-left[0])) + + # Normalize + if q == (0.,0.,0.,0.): + return Quaternion((0.,0.,0.,1.)) + mag = math.fsum((q[0]*q[0], q[1]*q[1], q[2]*q[2], q[3]*q[3])) + return Quaternion(F32((q[0]/mag, q[1]/mag, q[2]/mag, q[3]/mag))) + + +def llAxisAngle2Rot(axis, angle): + shouldbevector(axis) + shouldbefloat(angle) + axis = llVecNorm(axis) + if axis == ZERO_VECTOR: + angle = 0. + c = math.cos(angle*0.5) + s = math.sin(angle*0.5) + return Quaternion(F32((axis[0]*s, axis[1]*s, axis[2]*s, c))) + +# NOTE: This one does not always return the same value in LSL, but no one should depend +# on the garbage bytes returned. We implement it deterministically. +def llBase64ToInteger(s): + shouldbestring(s) + if len(s) > 8: + return 0 + s = b64_re.match(s).group() + i = len(s) + s = (b64decode(s + u'='*(-i & 3)) + b'\0\0\0\0')[:4] # actually the last 3 bytes should be garbage + i = ord(s[0]) if s[0] < b'\x80' else ord(s[0])-256 + return (i<<24)+(ord(s[1])<<16)+(ord(s[2])<<8)+ord(s[3]) + +def InternalUTF8toString(s): + # Note Mono and LSO behave differently here. + # LSO *CAN* store invalid UTF-8. + # For example, llEscapeURL(llUnescapeURL("%80%C3")) gives "%80%C3" in LSO. + # (But llEscapeURL(llUnescapeURL("%80%00%C3")) still gives "%80") + # We don't emulate it, we've built this with Unicode strings in mind. + + # decode(..., 'replace') replaces invalid chars with U+FFFD which is not + # what LSL does (LSL replaces with '?'). Since U+FFFD must be preserved if + # present, we need to write our own algorithm. + + # Problem: Aliases are not valid UTF-8 for LSL, and code points above + # U+10FFFF are not supported. Both things complicate the alg a bit. + + ret = u'' + partialchar = b'' + pending = 0 + for c in s: + o = ord(c) + if partialchar: + if 0x80 <= o < 0xC0 and ( + partialchar[1:2] + or b'\xC2' <= partialchar < b'\xF4' and partialchar not in b'\xE0\xF0' + or partialchar == b'\xE0' and o >= 0xA0 + or partialchar == b'\xF0' and o >= 0x90 + or partialchar == b'\xF4' and o < 0x90 + ): + partialchar += c + pending -= 1 + if pending == 0: + ret += partialchar.decode('utf8') + partialchar = b'' + c = c + # NOTE: Without the above line, the following one hits a bug in + # python-coverage. It IS executed but not detected. + continue + ret += u'?' * len(partialchar) + partialchar = b'' + # fall through to process current character + if o >= 0xC2 and o <= 0xF4: + partialchar = c + pending = 1 if o < 0xE0 else 2 if o < 0xF0 else 3 + elif o >= 0x80: + ret += u'?' + else: + ret += c.decode('utf8') + + if partialchar: + ret += u'?' * len(partialchar) + + return zstr(ret) + +def llBase64ToString(s): + shouldbestring(s) + s = b64_re.match(s).group(0) + return InternalUTF8toString(b64decode(s + u'='*(-len(s)&3))) + +def llCSV2List(s): + shouldbestring(s) + + bracketlevel = 0 + lastwascomma = False + lastidx = 0 + i = 0 + ret = [] + for c in s: + if bracketlevel: + # ignore ',', focus on nesting level + if c == u'<': + bracketlevel += 1 + elif c == u'>': + bracketlevel -= 1 + elif lastwascomma and c == u' ': # eat space after comma + lastwascomma = False + lastidx = i+1 + else: + if c == u',': + lastwascomma = True + ret.append(s[lastidx:i]) + lastidx = i+1 + elif c == u'<': + bracketlevel += 1 + i += 1 + ret.append(s[lastidx:i]) + return ret + +def llCeil(f): + shouldbefloat(f) + if math.isnan(f) or math.isinf(f) or f >= 2147483648.0 or f < -2147483648.0: + return -2147483648 + return int(math.ceil(f)) + +def llCos(f): + shouldbefloat(f) + if math.isinf(f): + return NaN + if -9223372036854775808.0 <= f < 9223372036854775808.0: + return F32(math.cos(f)) + return f + +# The code of llDeleteSubList and llDeleteSubString is identical except for the type check +def InternalDeleteSubSequence(val, start, end): + shouldbeint(start) + shouldbeint(end) + L = len(val) + if L == 0: + return val[:] + + # Python does much of the same thing here, which helps a lot + if (start+L if start < 0 else start) <= (end+L if end < 0 else end): + if end == -1: end += L + return val[:start] + val[end+1:] + if end == -1: end += L + return val[end+1:start] # Exclusion range + +# The code of llGetSubString and llList2List is identical except for the type check +def InternalGetSubSequence(val, start, end): + shouldbeint(start) + shouldbeint(end) + L = len(val) + if L == 0: + return val[:] + + # Python does much of the same thing as LSL here, which helps a lot + if start < 0: start += L + if end < 0: end += L + if start > end: + if end == -1: end += L + return val[:end+1] + val[start:] # Exclusion range + if end == -1: end += L + return val[start:end+1] + +def llDeleteSubList(lst, start, end): + # This acts as llList2List if there's wraparound + shouldbelist(lst) + return InternalDeleteSubSequence(lst, start, end) + +def llDeleteSubString(s, start, end): + # This acts as llGetSubString if there's wraparound + shouldbestring(s) + return InternalDeleteSubSequence(s, start, end) + +def llDumpList2String(lst, sep): + return sep.join(InternalList2Strings(lst)) + +def llEscapeURL(s): + shouldbestring(s) + s = s.encode('utf8') # get bytes + ret = u'' + for c in s: + if b'A' <= c <= b'Z' or b'a' <= c <= b'z' or b'0' <= c <= b'9': + ret += c.encode('utf8') + else: + ret += u'%%%02X' % ord(c) + return ret + +def llEuler2Rot(v): + shouldbevector(v) + c0 = math.cos(v[0]*0.5) + s0 = math.sin(v[0]*0.5) + c1 = math.cos(v[1]*0.5) + s1 = math.sin(v[1]*0.5) + c2 = math.cos(v[2]*0.5) + s2 = math.sin(v[2]*0.5) + + return Quaternion((s0 * c1 * c2 + c0 * s1 * s2, + c0 * s1 * c2 - s0 * c1 * s2, + c0 * c1 * s2 + s0 * s1 * c2, + c0 * c1 * c2 - s0 * s1 * s2)) + +def llFabs(f): + shouldbefloat(f) + return math.fabs(f) + +def llFloor(f): + shouldbefloat(f) + if math.isnan(f) or math.isinf(f) or f >= 2147483648.0 or f < -2147483648.0: + return -2147483648 + return int(math.floor(f)) + +# not implemented as it does not give the same output for the same input +#def llFrand(lim): + +# not implemented as it does not give the same output for the same input +#def llGenerateKey(): + +def llGetListEntryType(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + return Types(lst[pos]) + except IndexError: + return 0 # TYPE_INVALID + except KeyError: + raise ELSLInvalidType + +def llGetListLength(lst): + shouldbelist(lst) + return len(lst) + +def llGetSubString(s, start, end): + shouldbestring(s) + return InternalGetSubSequence(s, start, end) + +def llInsertString(s, pos, src): + shouldbestring(s) + shouldbeint(pos) + shouldbestring(src) + if pos < 0: pos = 0 # llInsertString does not support negative indices + return s[:pos] + src + s[pos:] + +def llIntegerToBase64(x): + shouldbeint(x) + return b64encode(chr((x>>24)&255) + chr((x>>16)&255) + chr((x>>8)&255) + chr(x&255)).decode('utf8') + +def llList2CSV(lst): + shouldbelist(lst) + tmp = lslcommon.LSO + lslcommon.LSO = True # Use LSO rules for float to string conversion + ret = u', '.join(InternalList2Strings(lst)) + lslcommon.LSO = tmp + return ret + +def llList2Float(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + elem = lst[pos] + if type(elem) == float: + return elem + if type(elem) in (int, unicode): + return InternalTypecast(elem, float, InList=True, f32=True) + except IndexError: + pass + return 0.0 + +def llList2Integer(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + elem = lst[pos] + if type(elem) == int: + return elem + if type(elem) in (float, unicode): + return InternalTypecast(elem, int, InList=True, f32=True) + return 0 + except IndexError: + return 0 + +def llList2Key(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + elem = lst[pos] + if type(elem) == Key: + return elem + if type(elem) == unicode: + return Key(elem) + except IndexError: + pass + if lslcommon.LSO: + return Key(u'00000000-0000-0000-0000-000000000000') # NULL_KEY + return Key(u'') + +def llList2List(lst, start, end): + shouldbelist(lst) + shouldbeint(start) + shouldbeint(end) + return InternalGetSubSequence(lst, start, end) + +def llList2ListStrided(lst, start, end, stride): + shouldbelist(lst) + shouldbeint(start) + shouldbeint(end) + shouldbeint(stride) + stride = abs(stride) if stride != 0 else 1 + L = len(lst) + if start < 0: start += L + if end < 0: end += L + if start > end: + start = 0 + end = L-1 + # start is rounded up to ceil(start/stride)*stride + start = ((start+stride-1)/stride)*stride + # end is rounded down to floor(start/stride)*stride + end = (end/stride)*stride + + return lst[start:end+1:stride] + +def llList2Rot(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + elem = lst[pos] + if type(elem) == Quaternion: + return elem + except IndexError: + pass + return ZERO_ROTATION + +def llList2String(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + return InternalTypecast(lst[pos], unicode, InList=True, f32=True) + except IndexError: + pass + return u'' + +def llList2Vector(lst, pos): + shouldbelist(lst) + shouldbeint(pos) + try: + elem = lst[pos] + if type(elem) == Vector: + return elem + except IndexError: + pass + return ZERO_VECTOR + +def llListFindList(lst, elems): + shouldbelist(lst) + shouldbelist(elems) + # NaN is found in floats, but not in vectors + L1 = len(lst) + L2 = len(elems) + if L2 > L1: + return -1 # can't find a sublist longer than the original list + if L2 == 0: + return 0 # empty list is always found at position 0 + for i in xrange(L1-L2+1): + for j in xrange(L2): + e1 = lst[i+j] + e2 = elems[j] + if type(e1) == type(e2) == float: + if e1 == e2: + continue + if math.isnan(e1) and math.isnan(e2): + continue + break + elif type(e1) == type(e2) in (Vector, Quaternion): + # Unfortunately, Python fails to consider (NaN,) != (NaN,) sometimes + # so we need to implement our own test + for e1e,e2e in zip(e1,e2): + if e1e != e2e: # NaNs are considered different to themselves here as normal + break + else: + continue # equal + break # discrepancy found + elif type(e1) != type(e2) or e1 != e2: + break + else: + return i + return -1 + +def llListInsertList(lst, elems, pos): + shouldbelist(lst) + shouldbelist(elems) + shouldbeint(pos) + # Unlike llInsertString, this function does support negative indices. + return lst[:pos] + elems + lst[pos:] + +# not implemented as it does not give the same output for the same input +#def llListRandomize(x): + +def llListReplaceList(lst, elems, start, end): + shouldbelist(lst) + shouldbelist(elems) + shouldbeint(start) + shouldbeint(end) + L = len(lst) + if (start + L if start < 0 else start) > (end + L if end < 0 else end): + # Exclusion range. Appends elems at 'start' i.e. at end :) + if end == -1: end += L + return lst[end+1:start] + elems + if end == -1: end += L + return lst[:start] + elems + lst[end+1:] + +def llListSort(lst, stride, asc): + shouldbelist(lst) + shouldbeint(stride) + shouldbeint(asc) + lst = lst[:] # make a copy + L = len(lst) + if stride < 1: stride = 1 + if L % stride: + return lst + for i in xrange(0, L-stride, stride): + # Optimized by caching the element in the outer loop AND after swapping. + a = lst[i] + ta = type(a) + if ta == Vector: + a = a[0]*a[0] + a[1]*a[1] + a[2]*a[2] + for j in xrange(i+stride, L, stride): + b = lst[j] + tb = type(b) + gt = False + if ta == tb: + if tb == Vector: + gt = not (a <= b[0]*b[0] + b[1]*b[1] + b[2]*b[2]) + # (note NaNs compare as > thus the reversed condition!) + elif tb != Quaternion: + gt = not (a <= b) # float integer, string, key all compare with this + # (note NaNs compare as > thus the reversed condition!) + if gt ^ (asc != 1): + # swap + lst[i:i+stride],lst[j:j+stride] = lst[j:j+stride],lst[i:i+stride] + # Re-cache + a = lst[i] + ta = type(a) + if ta == Vector: + a = a[0]*a[0] + a[1]*a[1] + a[2]*a[2] + return lst + +def llListStatistics(op, lst): + shouldbeint(op) + shouldbelist(lst) + + nums = [] + # Extract numbers in reverse order. LIST_STAT_MEDIAN uses that. + for elem in lst: + if type(elem) in (int, float): + nums.insert(0, float(elem)) + + if nums == []: + return 0.0 + + if op == 8: # LIST_STAT_NUM_COUNT + return float(len(nums)) + + if op in (0, 1, 2) : # LIST_STAT_RANGE, LIST_STAT_MIN, LIST_STAT_MAX + min = None + for elem in nums: + if min is None: + min = max = elem + else: + if elem < min: + min = elem + if elem > max: + max = elem + return F32((max - min, min, max)[op]) + + if op == 4: # LIST_STAT_MEDIAN requires special treatment + # The function behaves very strangely with NaNs. This seems to reproduce it: + + # llListSort seems to do the right thing with NaNs as needed by the median. + nums = llListSort(nums, 1, 1) + L = len(nums) + if L & 1: + return F32(nums[L>>1]) + return F32((nums[(L>>1)-1] + nums[L>>1])*0.5) + + if op in (3, 5, 6, 7): # LIST_STAT_MEAN, STD_DEV, SUM, SUM_SQUARES + sum = 0. + sumsq = 0. + mean = 0. + N = 0. + M2 = 0. + for elem in nums: + N += 1. + sum += elem + sumsq += elem*elem + delta = elem - mean + mean += delta/N + M2 += delta*(elem-mean) + + if op == 5: # LIST_STAT_STD_DEV + return 0. if N == 1. else F32(math.sqrt(M2/(N-1.))) + if op == 6: # LIST_STAT_SUM + return F32(sum) + if op == 7: # LIST_STAT_SUM_SQUARES + return F32(sumsq) + return F32(mean) + + if op == 9: # LIST_STAT_GEOMETRIC_MEAN + N = 0. + GMlog = 0. + for elem in nums: + if elem <= 0.: + return 0. + N += 1. + delta = math.log(elem) - GMlog + GMlog += delta/N + return F32(math.exp(GMlog)) + + return 0.0 + +def llLog(f): + shouldbefloat(f) + if math.isinf(f) and f < 0 or math.isnan(f) or f <= 0.0: + return 0.0 + return F32(math.log(f)) + +def llLog10(f): + shouldbefloat(f) + if math.isinf(f) and f < 0 or math.isnan(f) or f <= 0.0: + return 0.0 + return F32(math.log10(f)) + +def llMD5String(s, salt): + shouldbestring(s) + shouldbeint(salt) + return hashlib.md5(zstr(s).encode('utf8') + b':' + bytes(salt)).hexdigest().decode('utf8') + +def llModPow(base, exp, mod): + shouldbeint(base) + shouldbeint(exp) + shouldbeint(mod) + # With some luck, this works fully with native ints on 64 bit machines. + if mod in (0, 1): + return 0 + if exp == 0: + return 1 + # Convert all numbers to unsigned + if base < 0: + base += 4294967296 + if exp < 0: + exp += 4294967296 + if mod < 0: + mod += 4294967296 + prod = base + ret = 1 + while True: + if exp & 1: + ret = ((ret * prod) & 0xFFFFFFFF) % mod + exp = exp >> 1 + if exp == 0: + break + prod = ((prod * prod) & 0xFFFFFFFF) % mod + + return S32(ret) + +def llParseString2List(s, exc, inc, KeepNulls=False): + shouldbestring(s) + shouldbelist(exc) + shouldbelist(inc) + if s == u'' and KeepNulls: + return [s] + exc = exc[:8] + inc = inc[:8] + regex = u'' + for i in exc: + if i != u'': + regex += u'|' + re.escape(i) + for i in inc: + if i != u'': + regex += u'|' + re.escape(i) + if regex == u'': + split = [s] + else: + regex = u'(' + regex[1:] + u')' + split = re.split(regex, s) + return [i for i in split if (KeepNulls or i != u'') and i not in exc] + +def llParseStringKeepNulls(s, exc, inc): + return llParseString2List(s, exc, inc, KeepNulls=True) + +def llPow(base, exp): + shouldbefloat(base) + shouldbefloat(exp) + try: + # Python corner cases and LSL corner cases differ + + # Python matches these two, but we don't want to get trapped by our own checks. + if math.isnan(base) or math.isnan(exp): + return NaN + if exp == 0.0: + return 1.0 + + if base == 0.0: # Python gives exception on these, LSL returns stuff + if math.isinf(exp) and exp < 0: + return Infinity # llPow(0.0, -inf) = inf + + if exp < 0.0: + # Negative finite exponent cases + if math.copysign(1, base) < 0 and exp.is_integer() and not (exp/2.).is_integer(): + return -Infinity # llPow(-0.0, -odd_integer) = -inf + return Infinity + + elif abs(base) == 1.0 and math.isinf(exp): + return NaN # Python says 1.0 + + f = F32(math.pow(base, exp)) + return 0.0 if f == 0.0 else f # don't return -0.0 + except ValueError: # should happen only with negative base and noninteger exponent + return NaN + +def llRot2Angle(r): + shouldberot(r) + # Version based on research by Moon Metty, Miranda Umino and Strife Onizuka + return F32(2.*math.atan2(math.sqrt(math.fsum((r[0]*r[0], r[1]*r[1], r[2]*r[2]))), abs(r[3]))); + +def llRot2Axis(r): + shouldberot(r) + return llVecNorm((r[0], r[1], r[2])) + +def llRot2Euler(r): + shouldberot(r) + + # Another one of the hardest. The formula for Z angle in the + # singularity case was inspired by the viewer code. + y = 2*(r[0]*r[2] + r[1]*r[3]) + + # Check gimbal lock conditions + if abs(y) > 0.99999: + return (0., math.asin(y), math.atan2(2.*(r[2]*r[3]+r[0]*r[1]), + 1.-2.*(r[0]*r[0]+r[2]*r[2]))) + + qy2 = r[1]*r[1] + return ( + math.atan2(2.*(r[0]*r[3]-r[1]*r[2]), 1.-2.*(r[0]*r[0]+qy2)), + math.asin(y), + math.atan2(2.*(r[2]*r[3]-r[0]*r[1]), 1.-2.*(r[2]*r[2]+qy2)) + ) + +def llRot2Fwd(r): + shouldberot(r) + v = (1., 0., 0.) + if r == (0., 0., 0., 0.): + return v + return llVecNorm(mul(v, r, f32=False)) + +def llRot2Left(r): + shouldberot(r) + v = (0., 1., 0.) + if r == (0., 0., 0., 0.): + return v + return llVecNorm(mul(v, r, f32=False)) + +def llRot2Up(r): + shouldberot(r) + v = (0., 0., 1.) + if r == (0., 0., 0., 0.): + return v + return llVecNorm(mul(v, r, f32=False)) + +def llRotBetween(v1, v2): + shouldbevector(v1) + shouldbevector(v2) + + aabb = math.sqrt(mul(v1, v1, f32=False) * mul(v2, v2, f32=False)) # product of the squared lengths of the arguments + if aabb == 0.: + return ZERO_ROTATION # the arguments are too small, return zero rotation + ab = mul(v1, v2, f32=False) / aabb # normalized dotproduct of the arguments (cosine) + c = Vector(((v1[1] * v2[2] - v1[2] * v2[1]) / aabb, # normalized crossproduct of the arguments + (v1[2] * v2[0] - v1[0] * v2[2]) / aabb, + (v1[0] * v2[1] - v1[1] * v2[0]) / aabb)) + cc = mul(c, c, f32=False) # squared length of the normalized crossproduct (sine) + if cc != 0.: # test if the arguments are (anti)parallel + if ab > -0.7071067811865476: # test if the angle is smaller than 3/4 PI + s = 1. + ab # use the cosine to adjust the s-element + else: + s = cc / (1. + math.sqrt(1. - cc)); # use the sine to adjust the s-element + m = math.sqrt(cc + s * s) # the magnitude of the quaternion + return Quaternion((c[0] / m, c[1] / m, c[2] / m, s / m)) # return the normalized quaternion + if ab > 0.: # test if the angle is smaller than PI/2 + return ZERO_ROTATION # the arguments are parallel + m = math.sqrt(v1[0] * v1[0] + v1[1] * v1[1]) # the length of one argument projected on the XY-plane + if m != 0.: + return Quaternion((v1[1] / m, -v1[0] / m, 0., 0.)) # return rotation with the axis in the XY-plane + return Quaternion((0., 0., 1., 0.)) # rotate around the Z-axis + + # Algorithm by Moon Metty + dot = mul(v1, v2, f32=False) + cross = mod(v1, v2, f32=False) + csq = mul(cross, cross, f32=False) + + ddc2 = dot*dot + csq + + if ddc2 >= 1.5e-45: + if csq >= 1.5e-45: + s = math.sqrt(ddc2) + dot; + m = math.sqrt(csq + s*s); + return F32(Quaternion((cross[0]/m, cross[1]/m, cross[2]/m, s/m))) + + # Deal with degenerate cases here + if dot > 0: + return ZERO_ROTATION + m = math.sqrt(v1[0]*v1[0] + v1[1]*v1[1]) + if m >= 1.5e-45: + return F32(Quaternion((v1[1]/m, -v1[0]/m, 0., 0.))) + return Quaternion((1., 0., 0., 0.)) + return ZERO_ROTATION + +def llRound(f): + shouldbefloat(f) + if math.isnan(f) or math.isinf(f) or f >= 2147483647.5 or f < -2147483648.0: + return -2147483648 + return int(math.floor(f+0.5)) + +def llSHA1String(s): + shouldbestring(s) + return hashlib.sha1(s.encode('utf8')).hexdigest().decode('utf8') + +def llSin(f): + shouldbefloat(f) + if math.isinf(f): + return NaN + if -9223372036854775808.0 <= f < 9223372036854775808.0: + return F32(math.sin(f)) + return f + +def llSqrt(f): + shouldbefloat(f) + if f < 0.0: + return NaN + # LSL and Python both produce -0.0 when the input is -0.0. + return math.sqrt(f) + +def llStringLength(s): + shouldbestring(s) + return len(s) + +def llStringToBase64(s): + shouldbestring(s) + return b64encode(s.encode('utf8')).decode('utf8') + +def llStringTrim(s, mode): + shouldbestring(s) + shouldbeint(mode) + head = 0 + length = len(s) + tail = length-1 + if mode & 1: # STRING_TRIM_HEAD + while head < length and s[head] in u'\x09\x0a\x0b\x0c\x0d\x20': + head += 1 + if mode & 2: # STRING_TRIM_TAIL + while tail >= head and s[tail] in u'\x09\x0a\x0b\x0c\x0d\x20': + tail -= 1 + return s[head:tail+1] + +def llSubStringIndex(s, pattern): + shouldbestring(s) + shouldbestring(pattern) + return s.find(pattern) + +def llTan(f): + shouldbefloat(f) + if math.isinf(f): + return NaN + if -9223372036854775808.0 <= f < 9223372036854775808.0: + # We only consider the first turn for anomalous results. + if abs(f) == 1.570796251296997: + return math.copysign(13245400.0, f); + if abs(f) == 1.5707963705062866: + return -math.copysign(22877330.0, f); + return F32(math.tan(f)) + return f + +def llToLower(s): + shouldbestring(s) + if lslcommon.LSO: + return zstr(re.sub(u'[A-Z]', lambda x: x.group().lower(), s)) + return zstr(s.lower()) + +def llToUpper(s): + shouldbestring(s) + if lslcommon.LSO: + return zstr(re.sub(u'[a-z]', lambda x: x.group().upper(), s)) + return zstr(s.upper()) + +def llUnescapeURL(s): + shouldbestring(s) + ret = b'' + L = len(s) + i = 0 + while i < L: + c = s[i] + i += 1 + if c != u'%': + ret += c.encode('utf8') + continue + if i >= L: + break + c = s[i] # First digit + i += 1 + if i >= L: + break + v = 0 + if u'0' <= c <= u'9' or u'A' <= c <= u'F' or u'a' <= c <= u'f': + v = int(c, 16)<<4 + c = s[i] # Second digit + if c == u'%': + ret += chr(v) + continue + i += 1 + if u'0' <= c <= u'9' or u'A' <= c <= u'F' or u'a' <= c <= u'f': + v += int(c, 16) + ret += chr(v) + return InternalUTF8toString(ret) + +def llVecDist(v1, v2): + shouldbevector(v1) + shouldbevector(v2) + return llVecMag((v1[0]-v2[0],v1[1]-v2[1],v1[2]-v2[2])) + +def llVecMag(v): + shouldbevector(v) + return F32(math.sqrt(math.fsum((v[0]*v[0], v[1]*v[1], v[2]*v[2])))) + +def llVecNorm(v): + shouldbevector(v) + if v == ZERO_VECTOR: + return v + f = math.sqrt(math.fsum((v[0]*v[0], v[1]*v[1], v[2]*v[2]))) + return F32((v[0]/f,v[1]/f,v[2]/f)) + +# NOTE: llXorBase64 returns garbage bytes if the input xor string +# starts with zero or one valid Base64 characters. We don't emulate that here; +# our output is deterministic. +def llXorBase64(s, xor): + shouldbestring(s) + shouldbestring(xor) + + # Xor the underlying bytes. + + if xor == u'': + return s + + s = b64_re.match(s).group(0) + L1 = len(s) + xor = b64_re.match(xor).group(0) + L2 = len(xor) + + if L2 == 0: + # This is not accurate. This returns garbage (of undefined length) in LSL. + # The first returned byte seems to be zero always though. + xor = u'ABCD'; + + s = b64decode(s + u'='*(-L1 & 3)) + xor = b64decode(xor + u'='*(-L2 & 3)) + L2 = len(xor) + + i = 0 + ret = b'' + + Bug3763 = 3763 in Bugs + # BUG-3763 consists of the binary string having an extra NULL every time after the second repetition of + # the XOR pattern. For example, if the XOR binary stirng is b'pqr' and the input string is + # b'12345678901234567890', the XOR binary string behaves as if it was b'pqrpqr\0pqr\0pqr\0pqr\0pq'. + # We emulate that by adding the zero and increasing the length the first time. + for c in s: + ret += chr(ord(c) ^ ord(xor[i])) + i += 1 + if i >= L2: + i = 0 + if Bug3763: + Bug3763 = False + xor = xor + b'\x00' + L2 += 1 + return b64encode(ret).decode('utf8') + +def llXorBase64Strings(s, xor): + shouldbestring(s) + shouldbestring(xor) + + if xor == u'': + return s + + B64 = u'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + + # Special case when the first character is not a Base64 one. (LL's ways are inextricable) + base = B64.find(xor[0]) + if base < 0: + if xor[0] == u'=': + xor = u'+' + xor[1:] + base = 62 + else: + xor = u'/' + xor[1:] + base = 63 + + ret = u'' + i = 0 + L = len(xor) + for c1 in s: + val1 = B64.find(c1) + val2 = B64.find(xor[i]) + i += 1 + if i >= L: + i = 0 + + if val1 < 0: + ret += u'=' + else: + if val2 < 0: + val2 = base + i = 1 + ret += B64[val1 ^ val2] + return ret + +# NOTE: llXorBase64StringsCorrect returns garbage bytes if the input xor string +# starts with zero or one valid Base64 characters. We don't emulate that here; +# our output is deterministic. +def llXorBase64StringsCorrect(s, xor): + shouldbestring(s) + shouldbestring(xor) + + # Xor the underlying bytes but repeating the xor parameter pattern at the first zero (SCR-35). + + if xor == u'': + return s + + + s = b64_re.match(s).group(0) + L1 = len(s) + xor = b64_re.match(xor).group(0) + L2 = len(xor) + + if L2 == 0: + # This is not accurate. This returns garbage (of length 4?) in LSL. + # The first returned byte seems to be zero always though. + xor = u'ABCD' + + s = b64decode(s + u'='*(-L1 & 3)) + xor = b64decode(xor + u'='*(-L2 & 3)) + b'\x00' + + i = 0 + ret = b'' + + for c in s: + ret += chr(ord(c) ^ ord(xor[i])) + i += 1 + if xor[i] == b'\x00': + i = 0 + return b64encode(ret).decode('utf8') diff --git a/lslopt/lslcommon.py b/lslopt/lslcommon.py new file mode 100644 index 0000000..6c0e80b --- /dev/null +++ b/lslopt/lslcommon.py @@ -0,0 +1,19 @@ +# These types just wrap the Python types to make type() work on them. +# There are no ops defined on them or anything. + +class Key(unicode): + def __repr__(self): + return self.__class__.__name__ + '(' + super(self.__class__, self).__repr__() + ')' + +class Vector(tuple): + def __repr__(self): + return self.__class__.__name__ + '(' + super(self.__class__, self).__repr__() + ')' + +class Quaternion(tuple): + def __repr__(self): + return self.__class__.__name__ + '(' + super(self.__class__, self).__repr__() + ')' + +# Recognized: 3763, 6466, 6495 +Bugs = set([6495]) + +LSO = False diff --git a/lslopt/lslfuncs.py b/lslopt/lslfuncs.py new file mode 100644 index 0000000..8bf8f5e --- /dev/null +++ b/lslopt/lslfuncs.py @@ -0,0 +1,3 @@ +# Put all LSL functions together in one single module +from lslbasefuncs import * +from lsljson import * diff --git a/lslopt/lsljson.py b/lslopt/lsljson.py new file mode 100644 index 0000000..e9eccc1 --- /dev/null +++ b/lslopt/lsljson.py @@ -0,0 +1,642 @@ +import re +import math +from lslcommon import * +from lslbasefuncs import llStringTrim, shouldbestring, shouldbelist, InternalTypecast + +JSON_INVALID = u'\uFDD0' +JSON_OBJECT = u'\uFDD1' +JSON_ARRAY = u'\uFDD2' +JSON_NUMBER = u'\uFDD3' +JSON_STRING = u'\uFDD4' +JSON_NULL = u'\uFDD5' +JSON_TRUE = u'\uFDD6' +JSON_FALSE = u'\uFDD7' +JSON_DELETE = u'\uFDD8' + +JSON_APPEND = -1 + +jsonesc_re = re.compile(u'[\x08\x09\x0A\x0C\x0D"/\\\\]') +jsonesc_dict = {u'\x08':ur'\b', u'\x09':ur'\t', u'\x0A':ur'\n', u'\x0C':ur'\f', + u'\x0D':ur'\r', u'"':ur'\"', u'/':ur'\/', u'\\':ur'\\'} +jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u'\x0D'} + +# LSL JSON numbers differ from standard JSON numbers in many respects: +# Numbers starting with 0 are allowed, e.g. 01.3e4, 00042 +# .5 is allowed. +# 1e+0 is NOT allowed (the + after the e, to be precise). BUG-6466. +# . is allowed, as is -.e-0 etc. +# 1E is allowed. +# E.2 is allowed. +# E is allowed. +# 1E-1.2 is allowed. +# In general, the rule seems to be: at most one 'E' (optionally followed by a +# '-') and one '.', with optional digits interspersed and an optional initial +# minus sign. +# +# Our RE below checks for the two possible orders of '.' and 'E'. One branch +# must have a mandatory 'E'; in the other everything is optional but it must +# have at least 1 character (done by the lookahead assertion). +# +# The capturing groups serve to check whether the first variant was taken, and +# whether there is something after the digits in the second variant. If both +# are empty, then the match is just one or more digits preceded by an optional +# minus sign (i.e. an integer). That's used by llJson2List to return integer +# elements when appropriate. + +# Real JSON number parser: +#jsonnum_re = re.compile(ur'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?') + +# BUG-6466 active: +jsonnumbug_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))') +# BUG-6466 fixed: +# The new RE is just a modified version of the crap, allowing + exponents and +# disallowing zeros, sometimes even when legal (e.g. 0e0) +jsonnum_re = re.compile(ur'-?(?:(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*|(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*))') + + +jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"') + +# This might need some explanation. The ] and - are included in the first +# set, the ] in the first after the ^ and the - in the last positions of +# the set as required by RE syntax. The [ is part of it and isn't special, +# though it confuses things. The set comprises any character not in +# -{}[],:"0123456789 +# The second set comprises zero or more characters not in ,:]} +#word_re = re.compile(ur'[^][{}0-9",:-][^]},:]*') +# Screw that, we're using just a fallback. +jsoncatchall_re = re.compile(u'(.*?)[\x09\x0A\x0B\x0C\x0D ]*(?:[]},]|$)') + +digits_re = re.compile(u'[0-9]{1,9}') + + +class EInternalJsonInvalid(Exception): + """Used to force return of JSON_INVALID from child functions""" + pass + +def InternalJsonQuote(s): + return u'"' + jsonesc_re.sub(lambda x: jsonesc_dict[x.group()], s) + u'"' + +def InternalJsonUnquote(s): + """Relaxed unquote with LSL rules. Assumes string starts and ends in ", + may contain " and may end in \" too (i.e. malformed). E.g. "a"b\" is a + valid string for this function and the result is a"b\ + """ + assert s != u'' + assert s[0] == s[-1] == u'"' and s[1:2] + + ret = u'' + esc = False + for c in s[1:-1]: + if esc: + try: + ret += jsonunesc_dict[c] + except KeyError: + ret += c + esc = False + else: + if c == u'\\': + esc = True + else: + ret += c + if esc: + return ret + u'\\' + return ret + +def InternalJsonUnquoteX(s): + """Rigorous unquote; checks for quotes at the beginning and end only.""" + esc = last = False + first = True + + ret = u'' + for c in s: + if last: + break + if esc: + try: + ret += jsonunesc_dict[c] + except: + ret += c + esc = False + first = False + elif first: + if c != u'"': break + first = False + elif c == u'"': + last = True + first = False + elif c == u'\\': + esc = True + else: + ret += c + else: + if not first and last: + return ret + return s # malformed string, return the original + +def InternalJsonF2S(f): + if math.isnan(f): + return u'nan' + if math.isinf(f): + return u'inf' if f > 0 else u'-inf' + return u'%.6f' % f + +def InternalJsonScanMatching(json, idx): + """Shortcut: scan for a matching pair of {} or [] with proper nesting + and string handling, with no validity check other than well-formedness, + meaning all {} and [] must match. + """ + # TODO: InternalJsonScanMatching: Decide whether to use two nesting level variables rather than a stack. + # That would mean that a nested malformed string like [{]} would be valid. SL may accept that. + # (Or maybe even just ONE nesting level variable for the current element, + # disregarding the nesting of the other, e.g. if we're on an object, + # the [] are not tracked thus {[} would be valid. That sounds like LSL.) + # Experiments are advisable. + stk = [json[idx]] + str = False + esc = False + for i in xrange(idx+1, len(json)): + c = json[i] + if str: + if esc: + esc = False + elif c == u'\\': + esc = True + elif c == u'"': + str = False + elif c == u'"': + str = True + elif c in u'{[': + stk.append(c) + elif c in u']}': + if stk[-1] != (u'{' if c == u'}' else u'['): + return None # bad nesting + stk = stk[:-1] + if stk == []: + return i+1 + return None + +def InternalElement2Json(elem, ParseNumbers = True): + telem = type(elem) + if telem == unicode: + elem = llStringTrim(elem, 3) # STRING_TRIM + if elem == u'': + return u'""' + # Yes, these are checked after trimming. Don't facepalm too hard. + if elem == JSON_NULL: + return u'null' + if elem == JSON_TRUE: + return u'true' + if elem == JSON_FALSE: + return u'false' + if elem[0] == elem[-1] == u'"' and elem[1:2] or elem in ('null','false','true') \ + or elem[0] == u'[' and elem[-1] == u']' \ + or elem[0] == u'{' and elem[-1] == u'}': + return elem + + if ParseNumbers: + match = (jsonnumbug_re if 6466 in Bugs else jsonnum_re).match(elem) + if match and match.end() == len(elem): + return elem + + if elem == JSON_INVALID: + return u'' + + return InternalJsonQuote(elem) + + if telem == Key: + return u'"' + unicode(elem) + u'"' + if telem in (Vector, Quaternion): + return u'"<' + u', '.join([InternalJsonF2S(x) for x in elem]) + u'>"' + if telem == float: + return InternalJsonF2S(elem) + # Integer + return unicode(elem) + +def InternalJsonGetToken(json, idx): + + #start = idx + num_re = jsonnumbug_re if 6466 in Bugs else jsonnum_re + + L = len(json) + while idx < L: + c = json[idx] + if c not in u'\x09\x0A\x0B\x0C\x0D ': + break + idx += 1 + + if idx >= L: + return (idx, idx, None) + + c = json[idx] + if c in u',:{}[]': + return (idx, idx+1, c) + + match = jsonstring_re.match(json, idx) + if match: + return (idx, match.end(), JSON_STRING) + + match = num_re.match(json, idx) + if match: + return (idx, match.end(), JSON_NUMBER) + + match = jsoncatchall_re.match(json, idx) # matches always, even if empty string + s = match.group(1) + if s in (u'null', u'true', u'false'): + return (idx, match.end(1), + JSON_NULL if s == u'null' else JSON_TRUE if s == u'true' else JSON_FALSE) + return (idx, match.end(1), JSON_INVALID) + +def InternalJsonGetTokenFull(json, idx): + ret = InternalJsonGetToken(json, idx) + if ret[2] in (u'{', u'['): + match = InternalJsonScanMatching(json, ret[0]) + if match is not None: + return (ret[0], match, JSON_OBJECT if ret[2] == u'{' else JSON_ARRAY) + return ret + +def InternalJsonPathMatches(key, pathelem): + if type(key) == type(pathelem) == int or type(key) == unicode and isinstance(pathelem, unicode): + return key == pathelem + if type(key) == unicode and type(pathelem) == int: + raise EInternalJsonInvalid + # one combo remains - key is numeric and pathelem is unicode or Key + match = digits_re.match(pathelem) + if not match: + raise EInternalJsonInvalid + return key == int(match.group()) + +def InternalJsonFindValue(json, tgtpath, ReturnsToken, SetRules = False): + + # Building a function that meets the strange requisites of LL's json is not easy. + # These requisites include syntax-checking of all items at the current level, + # but not of items at a deeper nesting level. + + # Making it one-pass iterative O(len) instead of recursive O(depth*len) is even + # more of a challenge, especially with these constraints. + + token = InternalJsonGetToken(json, 0) + + if tgtpath == []: + # No nesting receives special treatment. + if token[2] in (JSON_NUMBER, JSON_STRING, JSON_NULL, JSON_TRUE, JSON_FALSE, JSON_INVALID): + if InternalJsonGetToken(json, token[1])[2] is None: + if ReturnsToken: + return token + if token[2] == JSON_NUMBER: + return json[token[0]:token[1]] + if token[2] == JSON_STRING: + return InternalJsonUnquote(json[token[0]:token[1]]) + if token[2] == JSON_INVALID: + # Accept malformed strings if they start and end in quotes + s = json[token[0]:token[1]] + if s[1:2] and s[0] == s[-1] == u'"': + return InternalJsonUnquote(s) + return token[2] + return JSON_INVALID + if token[2] not in (u'{', u'['): + return JSON_INVALID + + json = llStringTrim(json, 2) # STRING_TRIM_RIGHT + if json[-1] == u'}' and token[2] == u'{': + if ReturnsToken: + return (token[0], len(json), JSON_OBJECT) + return json[token[0]:] + if json[-1] == u']' and token[2] == u'[': + if ReturnsToken: + return (token[0], len(json), JSON_ARRAY) + return json[token[0]:] + return JSON_INVALID + + # This would be the code if there was proper scanning. + #match = InternalJsonScanMatching(json, token[0]) + #if match is None or InternalJsonGetToken(json, match)[2] is not None: + # return JSON_INVALID + #if ReturnsType: # this has been changed tho' - review if ever used + # return JSON_OBJECT if token[2] == u'{' else JSON_ARRAY + #return json[token[0]:match] + + if token[2] not in (u'{', u'['): + return JSON_INVALID + + # Follow the path + L = len(tgtpath) + # For the current position, matchlvl keeps track of how many levels are + # matched. When matchlvl == L, we are at the item of interest. + # For example: if we're at the ! in [1.0, "y", true, [1, ![6], {"a":5}]] + # and the path is [3, 2, "a"], matchlvl will be 1 (meaning the first level + # of the path, i.e. position 3, is matched, but we're not in sub-position + # 2 yet). + matchlvl = 0 + ret = None # the target token, if found, or None if not + + # Keeps track of what we have opened so far. + stk = [token[2]] + + # This tracks the current key within an array or object. Here we assume + # it's an array; if it's an object, the item key will replace it anyway. + curkey = 0 + + just_open = True + just_closed = False + + # Load next token + token = InternalJsonGetToken(json, token[1]) + + try: + while True: + # Process value if it can be present + kind = token[2] + if not (just_closed or + just_open and kind in (u'}', u']')): + # Item processing. + # Not entering here immediately after a } or ] (just_closed) + # or after a { or [ followed by } or ] (just_open...) + just_open = False + if kind in u':,]}' or kind == JSON_INVALID: + return JSON_INVALID + if stk[-1] == u'{': + # Read the current key + if kind != JSON_STRING: + return JSON_INVALID + colon = InternalJsonGetToken(json, token[1]) + if colon[2] != u':': + return JSON_INVALID + curkey = InternalJsonUnquote(json[token[0]:token[1]]) + token = InternalJsonGetToken(json, colon[1]) + kind = token[2] + del colon + if matchlvl < L and InternalJsonPathMatches(curkey, tgtpath[matchlvl]): + # Descend to this level + matchlvl += 1 + ret = None # because e.g. llJsonGetValue("{\"a\":[1],\"a\":2}",["a",0])==JSON_INVALID + if matchlvl == L: + if kind in u'{[': + match = InternalJsonScanMatching(json, token[0]) + if match is None: + return JSON_INVALID + token = (token[0], match, JSON_OBJECT if token[2] == u'{' else JSON_ARRAY) + ret = token + matchlvl -= 1 + elif kind in u'{[': + stk.append(token[2]) + curkey = 0 + just_open = True + token = InternalJsonGetToken(json, token[1]) + continue + else: + # We're skipping the element + if kind in u'[{': + match = InternalJsonScanMatching(json, token[0]) + if match is None: + return JSON_INVALID + token = (None, match) # HACK: shortcut to: (token[0], match, JSON_OBJECT if kind == u'{' else JSON_ARRAY) + just_closed = True + + token = InternalJsonGetToken(json, token[1]) # prepare next token + kind = token[2] + + just_closed = False + # Process coma if it can be present + if not just_open: + if kind == u',': + token = InternalJsonGetToken(json, token[1]) # load next token + if stk[-1] == u'[': + curkey += 1 + continue + + if kind == u'}' and stk[-1] == u'{' or kind == u']' and stk[-1] == u'[': + stk = stk[:-1] + matchlvl -= 1 + if stk == []: + if InternalJsonGetToken(json, token[1])[2] is None: + break # Yay! end of job! + return JSON_INVALID # No yay - something at end of string + just_closed = True + token = InternalJsonGetToken(json, token[1]) + continue + + return JSON_INVALID + + + except EInternalJsonInvalid: + return JSON_INVALID + + if ret is None: + return JSON_INVALID + if ReturnsToken: + return ret + if ret[2] == JSON_STRING: + return InternalJsonUnquote(json[ret[0]:ret[1]]) + if ret[2] in (JSON_NUMBER, JSON_OBJECT, JSON_ARRAY): + return json[ret[0]:ret[1]] + return ret[2] # JSON_TRUE, JSON_FALSE, JSON_NULL + +def InternalJson2Elem(json): + if json == u'': # checking this now lets us check for json[0] and json[-1] later + return u'' + + if json == u'null': + return JSON_NULL + + if json == u'false': + return JSON_FALSE + + if json == u'true': + return JSON_TRUE + + match = (jsonnumbug_re if 6466 in Bugs else jsonnum_re).match(json) + if match and match.end() == len(json): + # HACK: Use our RE to know if the number is an integer + if not match.group(1) and not match.group(2): + # we have just digits with optional minus sign, i.e. an integer + if len(json) > 11: # surely overflown + if json[0] == u'-': + return -2147483648 + return 2147483647 + # a bit harder to test; we could check in ASCII to avoid conversion + # to long in 32 bit systems, but it's probably not worth the effort + elem = int(json) + if elem > 2147483647: + return 2147483647 + if elem < -2147483648: + return -2147483648 + return elem + return InternalTypecast(json, float, InList=False, f32=True) + + # Malformed strings are valid, e.g. "a\" (final \" is converted into a \) + if json[0] == json[-1] == u'"' and json[1:2]: # the latter check ensures len(json) > 1 + return InternalJsonUnquote(json) + + return json + +def llJson2List(json): + shouldbestring(json) + json = llStringTrim(json, 3) # STRING_TRIM + + if json == u'': + return [] + + if json[0] == u'[' and json[-1] == u']': + # Array can of worms. Not all LSL quirks are implemented. + ret = [] + token = InternalJsonGetTokenFull(json, 1) + if token[2] == u']' and token[1] == len(json): + return ret + if token[2] == u':': + return [JSON_INVALID] + if token[2] == u',': + ret.append(u'') + else: + ret.append(InternalJson2Elem(json[token[0]:token[1]])) + token = InternalJsonGetTokenFull(json, token[1]) + while True: + if token[2] == u']' and token[1] == len(json): + break + elif token[2] != u',': + return [JSON_INVALID] + token = InternalJsonGetTokenFull(json, token[1]) + if token[2] == u',' or token[2] == u']' and token[1] == len(json): + ret.append(u'') + else: + if token[2] == u':': + return JSON_INVALID + ret.append(InternalJson2Elem(json[token[0]:token[1]])) + token = InternalJsonGetTokenFull(json, token[1]) + return ret + + if json[0] == u'{' and json[-1] == u'}': + # Object can of worms. Worse than array. Not all LSL quirks are implemented. + + # Parse this grammar: + # object: '{' complete_list incomplete_element '}' $ + # complete_list: | complete_list complete_element ',' + # complete_element: nonempty_string ':' value + # incomplete_element: | value | string ':' value + # string: '"' '"' | nonempty_string + # + # That allows: + # {"a":1,"b":2,} # incomplete_element is empty + # {"a":1,"b":2} # "b" is an incomplete_element + # {2} # complete_list empty + # {} # both empty + # etc. + + ret = [] + token = InternalJsonGetTokenFull(json, 1) + if token[2] == u'}' and token[1] == len(json): + return ret + if token[2] in (u':', u','): + return [JSON_INVALID] + + while True: + k = u'' + if token[2] == u'}' and token[1] == len(json): + ret.append(k) + ret.append(k) + return ret + if token[2] == JSON_STRING: + colon = InternalJsonGetTokenFull(json, token[1]) + if colon[2] == u':': + k = InternalJsonUnquote(json[token[0]:token[1]]) + token = InternalJsonGetTokenFull(json, colon[1]) + if token[2] in (u',', u':'): + return [JSON_INVALID] + ret.append(k) + ret.append(InternalJson2Elem(json[token[0]:token[1]])) + token = InternalJsonGetTokenFull(json, token[1]) + if token[2] == u'}' and token[1] == len(json): + return ret + if token[2] != u',' or k == u'': + return [JSON_INVALID] + token = InternalJsonGetTokenFull(json, token[1]) + + return [InternalJson2Elem(json)] + +def llJsonGetValue(json, lst): + shouldbestring(json) + shouldbelist(lst) + return InternalJsonFindValue(json, lst, ReturnsToken=False) + +'''def InternalJsonRecuriveSetValue(json, lst, val): + # We give up and make it recursive + + if lst == []: + if val == JSON_DELETE: + return val + return InternalElement2Json(val, ParseNumbers=True) + + ret = None + lst0 = lst[0] + tlst0 = type(lst0) + if tlst0 == Key: + tlst0 = unicode + + if val != JSON_DELETE: + + json = llStringTrim(json, 3) # STRING_TRIM + if tlst0 == int and json[0:1] == u'[' and json[-1:] == u']': + ret = [] + close = u']' + if tlst0 == unicode and json[0:1] == u'{' and json[-1:] == u'}': + ret = {} + close = u'}' + + if ret is not None: + if close: pass + + + +def llJsonSetValue(json, lst, val): + shouldbestring(json) + shouldbelist(lst) + shouldbestring(val) + if lst == []: + # [] replaces the entire string no matter if it was invalid + if val == JSON_DELETE: + return val # this is a special case for SetValue with [] + return InternalElement2Json(val, ParseNumbers=True) + # Needs to cope with JSON_APPEND, JSON_DELETE, lastindex+1. + # Needs to do deep assignment. + + # Recursive works best here + return InternalJsonRecursiveSetValue(json, lst, val) + + return u"----unimplemented----" # TODO: Implement llJsonSetValue. +''' + +def llJsonValueType(json, lst): + shouldbestring(json) + shouldbelist(lst) + ret = InternalJsonFindValue(json, lst, ReturnsToken=True) + if ret == JSON_INVALID: + return ret + return ret[2] + +def llList2Json(kind, lst): + shouldbestring(kind) + shouldbelist(lst) + + if kind == JSON_OBJECT: + ret = u'{' + if len(lst) & 1: + return JSON_INVALID + for i in xrange(0, len(lst), 2): + if ret != u'{': + ret += u',' + ret += InternalJsonQuote(lst[i]) + u':' + InternalElement2Json(lst[i+1], ParseNumbers=False) + + ret += u'}' + + elif kind == JSON_ARRAY: + ret = u'[' + if lst: + ret += InternalElement2Json(lst[0], ParseNumbers=False) + del lst[0] + for elem in lst: + ret += u',' + InternalElement2Json(elem, ParseNumbers=False) + ret += u']' + + else: + ret = JSON_INVALID + + return ret diff --git a/lslopt/lsloutput.py b/lslopt/lsloutput.py new file mode 100644 index 0000000..bfa2cea --- /dev/null +++ b/lslopt/lsloutput.py @@ -0,0 +1,280 @@ +# Convert a symbol table (with parse tree) back to a script. +import lslfuncs +from lslcommon import Key, Vector, Quaternion + +class outscript(object): + + # FIXME: is this correct: + binary_operands = frozenset(('||','&&','^','|','&','==','!=','<','<=','>', + '>=','<<','>>','+','-','*','/','%', '=', '+=', '-=', '*=', '/=','%=', + )) + extended_assignments = frozenset(('&=', '|=', '^=', '<<=', '>>=')) + unary_operands = frozenset(('NEG', '!', '~')) + + def Value2LSL(self, value): + if type(value) in (Key, unicode): + if type(value) == Key: + # Constants of type key can not be represented + raise lslfuncs.ELSLTypeMismatch + return '"' + value.encode('utf8').replace('\\','\\\\').replace('"','\\"').replace('\n','\\n') + '"' + if type(value) == int: + return str(value) + if type(value) == float: + s = str(value) + # Try to remove as many decimals as possible but keeping the F32 value intact + exp = s.find('e') + if ~exp: + s, exp = s[:exp], s[exp:] + if '.' not in s: + # I couldn't produce one but it's assumed that if it happens, + # this code deals with it correctly + return s + exp # pragma: no cover + else: + if '.' not in s: + # This should never happen (Python should always return a point or exponent) + return s + '.' # pragma: no cover + exp = '' + while s[-1] != '.' and lslfuncs.F32(float(s[:-1]+exp)) == value: + s = s[:-1] + return s + exp + if type(value) == Vector: + return '<' + self.Value2LSL(value[0]) + ', ' + self.Value2LSL(value[1]) \ + + ', ' + self.Value2LSL(value[2]) + '>' + if type(value) == Quaternion: + return '<' + self.Value2LSL(value[0]) + ', ' + self.Value2LSL(value[1]) \ + + ', ' + self.Value2LSL(value[2]) + ', ' + self.Value2LSL(value[3]) + '>' + if type(value) == list: + if value == []: + return '[]' + if len(value) < 5: + return '[ ' + self.Value2LSL(value[0]) + ' ]' + ret = '\n' + first = True + self.indentlevel += 1 + for entry in value: + if not first: + ret += self.dent() + ', ' + else: + ret += self.dent() + '[ ' + ret += self.Value2LSL(entry) + '\n' + first = False + self.indentlevel -= 1 + return ret + self.dent() + self.indent + ']' + raise lslfuncs.ELSLTypeMismatch + + def dent(self): + return self.indent * self.indentlevel + + def OutIndented(self, code): + if code[0] != '{}': + self.indentlevel += 1 + ret = self.OutCode(code) + if code[0] != '{}': + self.indentlevel -= 1 + return ret + + def OutExprList(self, L): + ret = '' + if L: + for item in L: + if ret != '': + ret += ', ' + ret += self.OutExpr(item) + return ret + + def OutExpr(self, expr): + # Save some recursion by unwrapping the expression + while expr[0] == 'EXPR': + expr = expr[2] + node = expr[0] + + if node == '()': + return '(' + self.OutExpr(expr[2]) + ')' + if node in self.binary_operands: + return self.OutExpr(expr[2]) + ' ' + node + ' ' + self.OutExpr(expr[3]) + + if node == 'IDENT': + return expr[2] + if node == 'CONSTANT': + return self.Value2LSL(expr[2]) + if node == 'CAST': + ret = '(' + expr[1] + ')' + expr = expr[2] + if expr[0] == 'EXPR': + expr = expr[2] + if expr[0] in ('CONSTANT', 'IDENT', 'V++', 'V--', 'VECTOR', + 'ROTATION', 'LIST', 'FIELD', 'PRINT', 'FUNCTION', '()'): + ret += self.OutExpr(expr) + else: + ret += '(' + self.OutExpr(expr) + ')' + return ret + if node == 'LIST': + if len(expr) == 2: + return '[]' + return '[' + self.OutExprList(expr[2:]) + ']' + if node == 'VECTOR': + return '<' + self.OutExpr(expr[2]) + ', ' + self.OutExpr(expr[3]) \ + + ', ' + self.OutExpr(expr[4]) + '>' + if node == 'ROTATION': + return '<' + self.OutExpr(expr[2]) + ', ' + self.OutExpr(expr[3]) \ + + ', ' + self.OutExpr(expr[4]) + ', ' + self.OutExpr(expr[5]) + '>' + if node == 'FUNCTION': + return expr[2] + '(' + self.OutExprList(expr[3]) + ')' + if node == 'PRINT': + return 'print(' + self.OutExpr(expr[2]) + ')' + + if node in self.unary_operands: + if node == 'NEG': + node = '- ' + return node + self.OutExpr(expr[2]) + + if node == 'FIELD': + return self.OutExpr(expr[2]) + '.' + expr[3] + + if node in ('V--', 'V++'): + return self.OutExpr(expr[2]) + node[1:] + if node in ('--V', '++V'): + return node[:-1] + self.OutExpr(expr[2]) + + if node in self.extended_assignments: + op = self.OutExpr(expr[2]) + return op + ' = ' + op + ' ' + node[:-1] + ' (' + self.OutExpr(expr[3]) + ')' + + raise Exception('Internal error: expression type "' + node + '" not handled') # pragma: no cover + + def OutCode(self, code): + #return self.dent() + '{\n' + self.dent() + '}\n' + node = code[0] + if node == '{}': + ret = self.dent() + '{\n' + self.indentlevel += 1 + for stmt in code[2:]: + ret += self.OutCode(stmt) + self.indentlevel -= 1 + return ret + self.dent() + '}\n' + if node == 'IF': + ret = self.dent() + 'if (' + self.OutExpr(code[2]) + ')\n' + ret += self.OutIndented(code[3]) + if len(code) > 4: + ret += self.dent() + 'else\n' + if code[4][0] == 'IF': + ret += self.OutCode(code[4]) + else: + ret += self.OutIndented(code[4]) + return ret + if node == 'EXPR': + return self.dent() + self.OutExpr(code) + ';\n' + if node == 'WHILE': + ret = self.dent() + 'while (' + self.OutExpr(code[2]) + ')\n' + ret += self.OutIndented(code[3]) + return ret + if node == 'DO': + ret = self.dent() + 'do\n' + ret += self.OutIndented(code[2]) + return ret + self.dent() + 'while (' + self.OutExpr(code[3]) + ');\n' + if node == 'FOR': + ret = self.dent() + 'for (' + if code[2]: + ret += self.OutExpr(code[2][0]) + if len(code[2]) > 1: + for expr in code[2][1:]: + ret += ', ' + self.OutExpr(expr) + ret += '; ' + self.OutExpr(code[3]) + '; ' + if code[4]: + ret += self.OutExpr(code[4][0]) + if len(code[4]) > 1: + for expr in code[4][1:]: + ret += ', ' + self.OutExpr(expr) + ret += ')\n' + ret += self.OutIndented(code[5]) + return ret + if node == '@': + return self.dent() + '@' + code[2] + ';\n' + if node == 'JUMP': + assert code[2][0:2] == ['IDENT', 'Label'] + return self.dent() + 'jump ' + code[2][2] + ';\n' + if node == 'STATE': + name = 'default' + if code[2] != 'DEFAULT': + assert code[2][0:2] == ['IDENT', 'State'] + name = code[2][2] + return self.dent() + 'state ' + name + ';\n' + if node == 'RETURN': + if code[2] is None: + return self.dent() + 'return;\n' + return self.dent() + 'return ' + self.OutExpr(code[2]) + ';\n' + if node == 'DECL': + sym = self.symtab[code[3]][code[2]] + ret = self.dent() + sym[1] + ' ' + code[2] + if sym[2] is not None: + ret += ' = ' + self.OutExpr(sym[2]) + return ret + ';\n' + if node == ';': + return self.dent() + ';\n' + + raise Exception('Internal error: statement type not found: ' + repr(node)) # pragma: no cover + + def OutFunc(self, typ, name, paramlist, paramsymtab, code): + ret = self.dent() + if typ is not None: + ret += typ + ' ' + ret += name + '(' + first = True + if paramlist: + for name in paramlist: + if not first: + ret += ', ' + ret += paramsymtab[name][1] + ' ' + name + first = False + return ret + ')\n' + self.OutCode(code) + + def output(self, symtab): + # Build a sorted list of dict entries + order = [] + self.symtab = symtab + + for i in symtab: + item = [] + for j in sorted(i.items(), key=lambda k: -1 if k[0]==-1 else k[1][0]): + if j[0] != -1: + item.append(j[0]) + order.append(item) + + ret = '' + self.indent = ' ' + self.indentlevel = 0 + for name in order[0]: + sym = symtab[0][name] + + ret += self.dent() + if sym[1] == 'State': + if name == 'default': + ret += 'default\n{\n' + else: + ret += 'state ' + name + '\n{\n' + + self.indentlevel += 1 + eventorder = [] + for event in sorted(sym[2].items(), key=lambda k: k[1][0]): + eventorder.append(event[0]) + for name in eventorder: + eventdef = sym[2][name] + ret += self.OutFunc(eventdef[1], name, eventdef[3], symtab[eventdef[4]], eventdef[2]) + self.indentlevel -= 1 + ret += self.dent() + '}\n' + + elif len(sym) > 3: + ret += self.OutFunc(sym[1], name, sym[3], symtab[sym[4]], sym[2]) + + else: + ret += sym[1] + ' ' + name + if sym[2] is not None: + ret += ' = ' + if type(sym[2]) == tuple: + ret += self.OutExpr(sym[2]) + else: + ret += self.Value2LSL(sym[2]) + + ret += ';\n' + + return ret diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py new file mode 100644 index 0000000..91cbba8 --- /dev/null +++ b/lslopt/lslparse.py @@ -0,0 +1,1873 @@ +from lslcommon import Key, Vector, Quaternion +import lslfuncs +import sys, re + +# Note this module was basically written from bottom to top, which may help +# reading it. + +def warning(txt): + assert type(txt) == unicode + sys.stderr.write(txt + u'\n') + +def isdigit(c): + return '0' <= c <= '9' + +def isalpha_(c): + return c == '_' or 'A' <= c <= 'Z' or 'a' <= c <= 'z' + +def isalphanum_(c): + return isalpha_(c) or isdigit(c) + +def ishex(c): + return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f' + +def fieldpos(inp, sep, n): + "Return the starting position of field n in a string inp that has zero or more fields separated by sep" + i = -1 + for n in xrange(n): + i = inp.find(sep, i + 1) + if i < 0: + return i + return i + 1 + +class EParse(Exception): + + def __init__(self, parser, msg): + lno = parser.script.count('\n', 0, parser.errorpos) + cno = parser.errorpos - fieldpos(parser.script, '\n', lno) + # Note the column number reported is in bytes. + + msg = u"(Line %d char %d): ERROR: %s" % (lno + 1, cno + 1, msg) + super(EParse, self).__init__(msg) + +class EParseUEOF(EParse): + def __init__(self, parser): + parser.errorpos = len(parser.script) + super(self.__class__, self).__init__(parser, u"Unexpected EOF") + +class EParseSyntax(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Syntax error") + +class EParseAlreadyDefined(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Name previously declared within scope") + +class EParseUndefined(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Name not defined within scope") + +class EParseUnexpected(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Unexpected internal error") + +class EParseTypeMismatch(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Type mismatch") + +class EParseReturnShouldBeEmpty(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Return statement type doesn't match function return type") + +class EParseReturnIsEmpty(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Function returns a value but return statement doesn't") + +# This error message may sound funny, for good reasons. +class EParseInvalidField(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Use of vector or quaternion method on incorrect type") + +class EParseFunctionMismatch(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Function type mismatches type or number of arguments") + +class EParseDeclarationScope(EParse): + def __init__(self, parser): + super(self.__class__, self).__init__(parser, u"Declaration requires a new scope -- use { and }") + +class EInternal(Exception): + """This exception is a construct to allow a different function to cause an + immediate return of EOF from parser.GetToken(). Reused elsewhere for + detecting parsing errors. + """ + pass + +# This table is to save memory in the parse tree in interpreters that don't +# intern strings. +S = ('integer','float','string','key','vector','rotation','quaternion','list', + 'IDENT','x','y','z','s','CAST','<','<=','>=','>','CONSTANT','VECTOR', + 'ROTATION','LIST','PRINT','FUNCTION','FIELD','EXPR','V++','V--','=', + '+=','-=','*=','/=','%=','&=','|=','^=','<<=','>>=','NEG','!','~','++V', + '--V','()','*','/','%','+','-','<<','>>','==','!=','&','^','|','&&','||', + '@','JUMP','STATE','RETURN','IF','WHILE','DO','FOR','DECL','{}', + 'Label','State','TRUE','FALSE','default','DEFAULT' + ) +S = {i:i for i in S} + +class parser(object): + assignment_ops = frozenset(('=', '+=', '-=', '*=', '/=', '%=')) + extassignment_ops = frozenset(('|=', '&=', '^=', '<<=', '>>=')) + + double_ops = frozenset(('++', '--', '+=', '-=', '*=', '/=', '%=', '==', + '!=', '>=', '<=', '&&', '||', '<<', '>>')) + extdouble_ops = frozenset(('|=', '&=', '^=')) + + # These are hardcoded because additions or modifications imply + # important changes to the code anyway. + keywords = frozenset((S['default'], 'state', 'event', 'jump', 'return', 'if', + 'else', 'for', 'do', 'while', 'print', S['TRUE'], S['FALSE'])) + types = frozenset((S['integer'],S['float'],S['string'],S['key'],S['vector'], + S['quaternion'],S['rotation'],S['list'])) + # Default values per type when declaring variables + DefaultValues = {S['integer']: 0, S['float']: 0.0, S['string']: u'', + S['key']: Key(u''), S['vector']: lslfuncs.ZERO_VECTOR, + S['rotation']: lslfuncs.ZERO_ROTATION, S['list']: [] + } + + PythonType2LSL = {int: S['integer'], float: S['float'], + unicode: S['string'], Key: S['key'], Vector: S['vector'], + Quaternion: S['rotation'], list: S['list']} + + PythonType2LSLToken = {int:'INTEGER_VALUE', float:'FLOAT_VALUE', + unicode:'STRING_VALUE', Key:'KEY_VALUE', Vector:'VECTOR_VALUE', + Quaternion:'ROTATION_VALUE', list:'LIST_VALUE'} + + + def PushScope(self): + """Create a new symbol table / scope level""" + self.symtab.append({-1: self.scopeindex}) + self.scopeindex = len(self.symtab)-1 + + def PopScope(self): + """Return to the previous scope level""" + self.scopeindex = self.symtab[self.scopeindex][-1] + if self.scopeindex is None: + raise EParseUnexpected(self) + + def FindSymbolPartial(self, symbol, MustBeLabel = False): + """Find a symbol in all visible scopes in order. + + Labels have special scope rules: other identifiers with the same + name that are not labels are invisible to JUMP statements. Example: + + default{timer(){ @x; {integer x; jump x;} }} + + finds the label at the outer block. However: + + default{timer(){ @x; integer x; }} + + gives an identifier already defined error. On the other hand, labels + hide other types (but that's dealt with in the caller to this function): + + default{timer(){ integer a; { @a; a++; } }} + + gives an Name Not Defined error. + """ + scope = self.scopeindex + while scope is not None: + symtab = self.symtab[scope] + if symbol in symtab and (not MustBeLabel or symtab[symbol][1] == 'Label'): + return symtab[symbol] + scope = symtab[-1] # it's a dict, not a list; -1 is a key + return None + + # No labels or states allowed here (but functions are) + def FindSymbolFull(self, symbol): + """Returns either a string with the LSL type, or a tuple if it's a + function. + """ + scope = self.scopeindex + while scope: + symtab = self.symtab[scope] + if symbol in symtab: + # This can't happen, as functions can't be local + #if len(symtab[symbol]) > 3: + # return (symtab[symbol][1], symtab[symbol][3]) + return symtab[symbol][1] + scope = symtab[-1] + if symbol not in self.globals: + return None + return self.globals[symbol] + + def ValidateField(self, typ, field): + if typ == 'vector' and field in ('x', 'y', 'z') \ + or typ == 'rotation' and field in ('x', 'y', 'z', 's'): + return + raise EParseInvalidField(self) + + def order(self): + self.dictorder += 1 + return self.dictorder + + def autocastcheck(self, value, typ): + """Check if automatic dynamic cast is possible, and insert it if + requested explicitly. + """ + if value[1] == typ: + return value + if value[1] in ('string', 'key') and typ in ('string', 'key') \ + or value[1] == 'integer' and typ == 'float': + if self.explicitcast: + return [S['CAST'], S[typ], value] + return value + raise EParseTypeMismatch(self) + + def ueof(self): + "Check for unexpected EOF" + if self.pos >= self.length: + raise EParseUEOF(self) + + def ceof(self): + "Check for normal EOF" + if self.pos >= self.length: + raise EInternal() # force GetToken to return EOF + + def GetToken(self): + "Lexer" + + # Keep track of the current position. If an error occurs, it will happen at the start of this token. + self.errorpos = self.pos + + try: + while self.pos < self.length: + c = self.script[self.pos] + self.pos += 1 + + # Process comments + if c == '/': + if self.script[self.pos:self.pos+1] == '/': + self.pos += 1 + self.ceof() + while self.script[self.pos] != '\n': + self.pos += 1 + self.ceof() # A single-line comment at EOF is not unexpected EOF. + + self.pos += 1 + self.ceof() + continue + + elif self.script[self.pos:self.pos+1] == '*': + self.pos += 2 + while self.script[self.pos-1:self.pos+1] != '*/': + self.pos += 1 + self.ueof() # An unterminated multiline comment *is* unexpected EOF. + + self.pos += 1 + self.ceof() + continue + + # Process strings + if c == '"' or c == 'L' and self.script[self.pos:self.pos+1] == '"': + strliteral = '' + if c == 'L': + self.pos += 1 + strliteral = '"' + + while self.script[self.pos:self.pos+1] != '"': + self.ueof() + if self.script[self.pos] == '\\': + self.pos += 1 + self.ueof() + if self.script[self.pos] == 'n': + strliteral += '\n' + elif self.script[self.pos] == 't': + strliteral += ' ' + else: + strliteral += self.script[self.pos] + else: + strliteral += self.script[self.pos] + self.pos += 1 + + self.pos += 1 + return ('STRING_VALUE', strliteral.decode('utf8')) + + if isalpha_(c): + # Identifier or reserved + + ident = c + while isalphanum_(self.script[self.pos:self.pos+1]): + ident += self.script[self.pos] + self.pos += 1 + + # Got an identifier - check if it's a reserved word + if ident in self.keywords: + return (ident.upper(),) + if ident in self.types: + if ident == 'quaternion': + ident = 'rotation' # Normalize types + return ('TYPE',ident) + if ident in self.events: + return ('EVENT_NAME',ident) + if ident in self.constants: + value = self.constants[ident] + return (self.PythonType2LSLToken[type(value)], value) + + return ('IDENT', ident) + + # Process numbers: float, hex integer, dec integer + if c == '.' or isdigit(c): + + number = '' + if c != '.': + # We have a digit, which means we have for sure either + # an integer or a float. + + # Eat as many decimal digits as possible + number = c + while isdigit(self.script[self.pos:self.pos+1]): + number += self.script[self.pos] + self.pos += 1 + + if number == '0' and self.script[self.pos:self.pos+1] in ('x','X') \ + and ishex(self.script[self.pos+1:self.pos+2]): + # We don't need the 0x prefix. + + self.pos += 1 + # Eat leading zeros to know the real length. + while self.script[self.pos:self.pos+1] == '0': + self.pos += 1 + number = '' + + while ishex(self.script[self.pos:self.pos+1]): + if len(number) < 9: # don't let it grow more than necessary + number += self.script[self.pos] + self.pos += 1 + if number == '': + # We know there was at least a valid digit so it + # must've been all zeros. + number = '0' + if len(number) > 8: + number = -1 + else: + number = lslfuncs.S32(int(number, 16)) + return ('INTEGER_VALUE', number) + + # Add the dot if present + if self.script[self.pos:self.pos+1] == '.': + number += '.' + self.pos += 1 + else: + number = c + + while isdigit(self.script[self.pos:self.pos+1]): + number += self.script[self.pos] + self.pos += 1 + + # At this point, number contains as many digits as there are before the dot, + # the dot if present, and as many digits as there are after the dot. + if number != '.': # A dot alone can't be a number so we rule it out here. + exp = '' + if self.script[self.pos:self.pos+1] in ('e','E'): + epos = self.pos # Temporary position tracker, made permanent only if the match succeeds + exp = self.script[epos] + epos += 1 + if self.script[epos:epos+1] in ('+','-'): + exp += self.script[epos] + epos += 1 + if isdigit(self.script[epos:epos+1]): + # Now we *do* have an exponent. + exp += self.script[epos] + epos += 1 + while isdigit(self.script[epos:epos+1]): + exp += self.script[epos] + epos += 1 + self.pos = epos # "Commit" the new position + else: + exp = '' # No cigar. Rollback and backtrack. Invalidate exp. + + if exp != '' or '.' in number: # Float + if '.' in number: + # Eat the 'F' if present + if self.script[self.pos:self.pos+1] in ('f','F'): + # Python doesn't like the 'F' so don't return it + #exp += self.script[self.pos] + self.pos += 1 + return ('FLOAT_VALUE', lslfuncs.F32(float(number + exp))) + + if len(number) > 10 or len(number) == 10 and number > '4294967295': + number = -1 + else: + number = lslfuncs.S32(int(number)) + + return ('INTEGER_VALUE', number) + + if self.script[self.pos-1:self.pos+1] in self.double_ops \ + or self.extendedassignment and self.script[self.pos-1:self.pos+1] in self.extdouble_ops: + self.pos += 1 + if self.extendedassignment and self.script[self.pos-2:self.pos+1] in ('<<=', '>>='): + self.pos += 1 + return (self.script[self.pos-3:self.pos],) + return (self.script[self.pos-2:self.pos],) + + if c in '.;{},=()-+*/%@:<>[]&|^~!' and c != '': + return (c,) + + # We eat spacers AND any other character so the following is not needed, + # although the lex file includes it (the lex file does not count() invalid characters + # for the purpose of error reporting). + #if c in ' \n\r\x0B': + # continue + + except EInternal: + pass # clear the exception and fall through + + return ('EOF',) + + def NextToken(self): + """Calls GetToken and sets the internal token.""" + self.tok = self.GetToken() + + # Recursive-descendent parser. The result is a symbol table. + + def expect(self, toktype): + """Raise exception if the current token is not the given one.""" + if self.tok[0] != toktype: + if self.tok[0] == 'EOF': + raise EParseUEOF(self) + raise EParseSyntax(self) + + def Parse_vector_rotation_tail(self): + """(See Parse_unary_postfix_expression for context) + + To our advantage, the precedence of the closing '>' in a vector or + rotation literal is that of an inequality. Our strategy will thus be + to perform the job of an inequality, calling the lower level 'shift' + rule and building the inequalities if they are not '>'. When we find a + '>', we check whether the next token makes sense as beginning an + inequality; if not, we finally close the vector or rotation. + + But first, a quaternion _may_ have a full expression at the third + component, so we tentatively parse this position as an expression, and + backtrack if it causes an error. This is the only point where this + parser backtracks. + """ + ret = [] + pos = self.pos + errorpos = self.errorpos + tok = self.tok + try: + ret.append(self.Parse_expression()) + + # Checking here for '>' might parse a different grammar, because + # it might allow e.g. <1,2,3==3>; as a vector, which is not valid. + # Not too sure about that, but we're cautious and disable this + # just in case. + #if self.tok[0] == '>': + # return ret + + self.expect(',') + self.NextToken() + except EParseSyntax: + # Backtrack + self.pos = pos + self.errorpos = errorpos + self.tok = tok + + # OK, here we are. + inequality = self.Parse_shift() # shift is the descendant of inequality + while self.tok[0] in ('<', '<=', '>=', '>'): + op = self.tok[0] + self.NextToken() + if op == '>': + # Check if the current token can be a part of a comparison. + # If not, it's a vector/quaternion terminator. + if self.tok[0] not in ( + # List adapted from this section of the bison report: +#state 570 +# +# 176 expression: expression '>' . expression +# 214 quaternion_initializer: '<' expression ',' expression ',' expression ',' expression '>' . + + 'IDENT', 'INTEGER_VALUE', 'FLOAT_VALUE', 'STRING_VALUE', + 'KEY_VALUE', 'VECTOR_VALUE', 'ROTATION_VALUE', 'LIST_VALUE', + 'TRUE', 'FALSE', '++', '--', 'PRINT', '!', '~', '(', '[' + ): + ret.append(inequality) + return ret + # This is basically a copy/paste of the Parse_inequality handler + type1 = inequality[1] + if type1 not in ('integer', 'float'): + raise EParseTypeMismatch(self) + value = self.Parse_shift() + type2 = value[1] + if type2 not in ('integer', 'float'): + raise EParseTypeMismatch(self) + if type1 != type2: + if type2 == 'float': + inequality = self.autocastcheck(inequality, type2) + else: + value = self.autocastcheck(value, type1) + inequality = [S[op], S['integer'], inequality, value] + + # Reaching this means an operator or lower precedence happened, + # e.g. <1,1,1,2==2> (that's syntax error in ==) + raise EParseSyntax(self) + + + def Parse_unary_postfix_expression(self, AllowAssignment = True): + """Grammar parsed here: + + unary_postfix_expression: INTEGER_VALUE | FLOAT_VALUE + | STRING_VALUE | KEY_VALUE | VECTOR_VALUE | ROTATION_VALUE + | LIST_VALUE | TRUE | FALSE | vector_literal | rotation_literal | list_literal + | PRINT '(' expression ')' | IDENT '(' expression_list ')' + | lvalue '++' | lvalue '--' | assignment %if allowed + | lvalue + vector_literal: '<' expression ',' expression ',' expression '>' + rotation_literal: '<' expression ',' expression ',' expression + ',' expression '>' + list_literal: '[' optional_expression_list ']' + assignment: lvalue '=' expression | lvalue '+=' expression + | lvalue '-=' expression | lvalue '*=' expression + | lvalue '/=' expression | lvalue '%=' expression + %EXTENDED RULES: + | lvalue '|=' expression | lvalue '&=' expression + | lvalue '<<=' expression | lvalue '>>=' expression + lvalue: IDENT | IDENT '.' IDENT + """ + tok0 = self.tok[0] + val = self.tok[1] if len(self.tok) > 1 else None + self.NextToken() + CONSTANT = S['CONSTANT'] + if tok0 == '-' and self.tok[0] in ('INTEGER_VALUE', 'FLOAT_VALUE'): + tok0 = self.tok[0] + val = self.tok[1] + self.NextToken() + return [CONSTANT, S['integer' if type(val) == int else 'float'], -val] + if tok0 == 'INTEGER_VALUE': + return [CONSTANT, S['integer'], val] + if tok0 == 'FLOAT_VALUE': + return [CONSTANT, S['float'], val] + if tok0 == 'STRING_VALUE': + return [CONSTANT, S['string'], val] + # Key constants are not currently supported - use string + #if tok0 == 'KEY_VALUE': + # return [CONSTANT, S['key'], val] + if tok0 == 'VECTOR_VALUE': + return [CONSTANT, S['vector'], val] + if tok0 == 'ROTATION_VALUE': + return [CONSTANT, S['rotation'], val] + if tok0 == 'LIST_VALUE': + return [CONSTANT, S['list'], val] + if tok0 in ('TRUE', 'FALSE'): + return [CONSTANT, S['integer'], 1 if tok0 == 'TRUE' else 0] + if tok0 == '<': + val = [self.Parse_expression()] + self.expect(',') + self.NextToken() + val.append(self.Parse_expression()) + self.expect(',') + self.NextToken() + + # It would be cute if it were this simple: + #val.append(self.Parse_expression()) + #if self.tok[0] == '>': + # self.NextToken() + # return [S['VECTOR'], S['vector']] + val + #self.expect(',') + #self.NextToken() + #val.append(self.Parse_inequality()) + #self.expect('>') + #self.NextToken() + #return [S['ROTATION'], S['rotation']] + val + + # Alas, it isn't. The closing angle bracket of a vector '>' + # conflicts with the inequality operator '>' in unexpected ways. + # Example: <2,2,2> * 2 will trigger the problem: + # the expression parser tries to parse the inequality 2 > *2, + # choking at the *. To make things worse, LSL admits things such as + # <2,2,2 > 2> (but not things like <2,2,2 == 2> because the == has + # lower precedence than the '>' and thus it forces termination of + # the vector constant). And to make things even worse, it also + # admits things such as <2,2,2 == 2, 2> because the comma is not in + # the precedence scale, so it's quite complex to handle. + + # We defer it to a separate function. + val += self.Parse_vector_rotation_tail() + + if len(val) == 3: + return [S['VECTOR'], S['vector']] + val + return [S['ROTATION'], S['rotation']] + val + + if tok0 == '[': + val = self.Parse_optional_expression_list() + self.expect(']') + self.NextToken() + return [S['LIST'], S['list']] + val + if tok0 == 'PRINT': + self.expect('(') + self.NextToken() + val = self.Parse_expression() + if val[1] not in self.types: + raise EParseTypeMismatch(self) if val[1] is None else EParseUndefined(self) + self.expect(')') + self.NextToken() + return [S['PRINT'], None, val] + + if tok0 != 'IDENT': + if tok0 == 'EOF': + raise EParseUEOF(self) + raise EParseSyntax(self) + typ = self.FindSymbolFull(val) + if typ is None: + raise EParseUndefined(self) + # Note this may fail to do interning of the string from the symbol table. + # Doing so with a dictionary key may affect performance. + name = val + + # Course of action decided here. + tok0 = self.tok[0] + if tok0 == '(': + # Function call + self.NextToken() + if type(typ) != tuple: + raise EParseUndefined(self) + args = self.Parse_optional_expression_list(typ[1]) + self.expect(')') + self.NextToken() + return [S['FUNCTION'], None if typ[0] is None else S[typ[0]], name, args, self.scopeindex] + if typ not in self.types: + raise EParseTypeMismatch(self) + typ = S[typ] + lvalue = [S['IDENT'], typ, name, self.scopeindex] + if tok0 == '.': + self.NextToken() + self.expect('IDENT') + self.ValidateField(typ, self.tok[1]) + lvalue = [S['FIELD'], S['float'], lvalue, S[self.tok[1]]] + self.NextToken() + tok0 = self.tok[0] + + if tok0 in ('++', '--'): + self.NextToken() + if lvalue[1] not in ('integer', 'float'): + raise EParseTypeMismatch(self) + return [S['V'+tok0], lvalue[1], lvalue] + if AllowAssignment and (tok0 in self.assignment_ops + or self.extendedassignment and tok0 in self.extassignment_ops): + self.NextToken() + expr = self.Parse_expression() + rtyp = expr[1] + if rtyp not in self.types: + raise EParseTypeMismatch(self) + if typ in ('integer', 'float'): + # LSL admits integer *= float (go figger). + # It acts like: lhs = (integer)((float)lhs * rhs) + # That would trigger an error without this check. + if tok0 != '*=' or typ == 'float': + expr = self.autocastcheck(expr, typ) + rtyp = typ + # Lots of drama for checking types. This is pretty much like + # addition, subtraction, multiply, divide, etc. all in one go. + if tok0 == '=': + if typ == 'list' != rtyp: + if self.explicitcast: + expr = [S['CAST'], typ, expr] + else: + expr = self.autocastcheck(expr, typ) + + return [S['='], typ, lvalue, expr] + + if tok0 == '+=': + if typ == 'float': + expr = self.autocastcheck(expr, typ) + if rtyp != typ != 'list' or typ == rtyp == 'key': + # key + key is the only disallowed combo of equals + raise EParseTypeMismatch(self) + if self.explicitcast: + if typ == 'list' != rtyp: + expr = [S['CAST'], S[typ], expr] + return [S[tok0], typ, lvalue, expr] + + if tok0 == '-=': + if typ == rtyp in ('integer', 'float', 'vector', 'rotation'): + return [S[tok0], typ, lvalue, expr] + raise EParseTypeMismatch(self) + + if tok0 in ('*=', '/='): + # There is a special case dealt with in advance. + if tok0 == '*=' and typ == 'integer' and rtyp == 'float': + return [S[tok0], typ, lvalue, expr] + + if (typ == rtyp or typ == 'vector') and rtyp in ('integer', 'float', 'rotation'): + if typ == 'vector' and rtyp == 'integer': + expr = self.autocastcheck(expr, 'float') + return [S[tok0], typ, lvalue, expr] + raise EParseTypeMismatch(self) + + if tok0 == '%=': + if typ == rtyp in ('integer', 'vector'): + return [S[tok0], typ, lvalue, expr] + + # Rest take integer operands only + + if typ == rtyp == 'integer': + return [S[tok0], typ, lvalue, expr] + + return lvalue + + def Parse_unary_expression(self, AllowAssignment = True): + """Grammar parsed here: + + unary_expression: '-' factor | '!' unary_expression | '~' unary_expression + # we expand lvalue here to facilitate parsing + | '++' IDENT | '++' IDENT '.' IDENT + | '--' IDENT | '--' IDENT '.' IDENT + | '(' TYPE ')' typecast_expression | '(' expression ')' + | unary_postfix_expression + %NORMAL RULES ONLY: + typecast_expression: '(' expression ')' | unary_postfix_expression %except assignment + %EXTENDED RULES ONLY: + typecast_expression: unary_expression %except assignment + """ + tok0 = self.tok[0] + if tok0 == '-': + # Unary minus + self.NextToken() + value = self.Parse_factor() + if value[1] not in ('integer', 'float', 'vector', 'rotation'): + raise EParseTypeMismatch(self) + return [S['NEG'], value[1], value] + if tok0 in ('!', '~'): + # Unary logic and bitwise NOT - applies to integers only + self.NextToken() + value = self.Parse_unary_expression() + if value[1] != 'integer': + raise EParseTypeMismatch(self) + return [S[tok0], S['integer'], value] + if tok0 in ('++', '--'): + # Pre-increment / pre-decrement + self.NextToken() + self.expect('IDENT') + name = self.tok[1] + typ = self.FindSymbolFull(name) + if typ not in self.types: + # Pretend it doesn't exist + raise EParseUndefined(self) + typ = S[typ] + + ret = [S['IDENT'], typ, name, self.scopeindex] + self.NextToken() + if self.tok[0] == '.': + self.NextToken() + self.expect('IDENT') + self.ValidateField(typ, self.tok[1]) + ret = [S['FIELD'], S['float'], ret, S[self.tok[1]]] + self.NextToken() + + typ = ret[1] + if typ not in ('integer', 'float'): + raise EParseTypeMismatch(self) + + return [S[tok0+'V'], typ, ret] + + if tok0 == '(': + # Parenthesized expression or typecast + + self.NextToken() + if self.tok[0] != 'TYPE': + # Parenthesized expression + expr = self.Parse_expression() + self.expect(')') + self.NextToken() + return [S['()'], expr[1], expr] + + # Typecast + typ = S[self.tok[1]] + self.NextToken() + self.expect(')') + self.NextToken() + + if self.extendedtypecast: + # Allow any unary expression (except assignment). The type cast + # acts as a prefix operator. + expr = self.Parse_unary_expression(AllowAssignment = False) + else: + if self.tok[0] == '(': + self.NextToken() + expr = self.Parse_expression() + self.expect(')') + self.NextToken() + expr = [S['()'], expr[1], expr] + else: + expr = self.Parse_unary_postfix_expression(AllowAssignment = False) + basetype = expr[1] + if typ == 'list' and basetype in self.types \ + or basetype in ('integer', 'float') and typ in ('integer', 'float', 'string') \ + or basetype == 'string' and typ in self.types \ + or basetype == 'key' and typ in ('string', 'key') \ + or basetype == 'vector' and typ in ('string', 'vector') \ + or basetype == 'rotation' and typ in ('string', 'rotation') \ + or basetype == 'list' and typ == 'string': + return [S['CAST'], typ, expr] + raise EParseTypeMismatch(self) + + # Must be a postfix expression. + return self.Parse_unary_postfix_expression(AllowAssignment) + + def Parse_factor(self): + """Grammar parsed here: + + factor: unary_expression | factor '*' unary_expression + | factor '/' unary_expresssion | factor '%' unary_expression + """ + factor = self.Parse_unary_expression() + while self.tok[0] in ('*', '/', '%'): + op = self.tok[0] + type1 = factor[1] + # Acceptable types for LHS + if op in ('*', '/') and type1 not in ('integer', 'float', + 'vector', 'rotation') \ + or op == '%' and type1 not in ('integer', 'vector'): + raise EParseTypeMismatch(self) + self.NextToken() + value = self.Parse_unary_expression() + type2 = value[1] + # Mod is easier to check for + if op == '%' and type1 != type2: + raise EParseTypeMismatch(self) + if op == '%' or type1 == type2 == 'integer': + # Deal with the special cases first (it's easy) + factor = [S[op], S[type1], factor, value] + else: + # Any integer must be promoted to float now + if type1 == 'integer': + type1 = 'float' + factor = self.autocastcheck(factor, type1) + if type2 == 'integer': + type2 = 'float' + value = self.autocastcheck(value, type2) + if type1 == 'float' and type2 in ('float', 'vector') \ + or type1 == 'vector' and type2 in ('float', 'vector', 'rotation') \ + or type1 == type2 == 'rotation': + if op == '/' and type2 == 'vector': + # Division by vector isn't valid + raise EParseTypeMismatch(self) + # The rest are valid + if type1 == 'float' and type2 == 'vector': + resulttype = type2 + elif type1 == type2 == 'vector': + resulttype = 'float' + else: + resulttype = type1 + factor = [S[op], S[resulttype], factor, value] + else: + raise EParseTypeMismatch(self) + + return factor + + def Parse_term(self): + """Grammar parsed here: + + term: factor | term '+' factor | term '-' factor + """ + term = self.Parse_factor() + while self.tok[0] in ('+', '-'): + op = self.tok[0] + type1 = term[1] + if op == '+' and type1 not in self.types \ + or op == '-' and type1 not in ('integer', 'float', + 'vector', 'rotation'): + raise EParseTypeMismatch(self) + self.NextToken() + value = self.Parse_factor() + type2 = value[1] + # This is necessary, but the reason is subtle. + # The types must match in principle (except integer/float), so it + # doesn't seem necessary to check type2. But there's the case + # where the first element is a list, where the types don't need to + # match but the second type must make sense. + if op == '+' and type2 not in self.types: + #or op == '-' and type2 not in ('integer', 'float', + # 'vector', 'rotation'): + raise EParseTypeMismatch(self) + if op == '+' and (type1 == type2 or type1 == 'list' or type2 == 'list'): + if type1 == type2 == 'key': + # key + key is the only disallowed combo of equals + raise EParseTypeMismatch(self) + if self.explicitcast: + if type1 == 'list' != type2: + value = [S['CAST'], S[type1], value] + #type2 = type1 # unused + elif type2 == 'list' != type1: + term = [S['CAST'], S[type2], term] + type1 = type2 + term = [S[op], S[type1], term, value] + # Note that although list + nonlist is semantically the same as + # list + (list)nonlist and same goes for nonlist + list, they + # don't compile to the same thing, but the optimizer should deal + # with typecast removal anyway. + elif type1 == 'key' or type2 == 'key': + # Only list + key or key + list is allowed, otherwise keys can't + # be added or subtracted with anything. + raise EParseTypeMismatch(self) + else: + if type1 == 'float': + # Promote value to float + term = [S[op], S[type1], term, self.autocastcheck(value, type1)] + else: + # Convert LHS to type2 if possible (note keys are not allowed) + term = [S[op], S[type2], self.autocastcheck(term, type2), value] + + return term + + def Parse_shift(self): + """Grammar parsed here: + + shift: term | shift '<<' term | shift '>>' term + """ + shift = self.Parse_term() + while self.tok[0] in ('<<', '>>'): + if shift[1] != 'integer': + raise EParseTypeMismatch(self) + op = self.tok[0] + self.NextToken() + shift = [S[op], S['integer'], shift , self.Parse_term()] + if shift[3][1] != 'integer': + raise EParseTypeMismatch(self) + + return shift + + def Parse_inequality(self): + """Grammar parsed here: + + inequality: shift | inequality '<' shift | inequality '<=' shift + | inequality '>' shift | inequality '>=' shift + """ + inequality = self.Parse_shift() + while self.tok[0] in ('<', '<=', '>', '>='): + op = self.tok[0] + type1 = inequality[1] + if type1 not in ('integer', 'float'): + raise EParseTypeMismatch(self) + self.NextToken() + value = self.Parse_shift() + type2 = value[1] + if type2 not in ('integer', 'float'): + raise EParseTypeMismatch(self) + if type1 != type2: + if type2 == 'float': + inequality = self.autocastcheck(inequality, type2) + else: + value = self.autocastcheck(value, type1) + inequality = [S[op], S['integer'], inequality, value] + + return inequality + + def Parse_comparison(self): + """Grammar parsed here: + + comparison: inequality | comparison '==' inequality + | comparison '!=' inequality + """ + comparison = self.Parse_inequality() + while self.tok[0] in ('==', '!='): + op = self.tok[0] + type1 = comparison[1] + if type1 not in self.types: + raise EParseTypeMismatch(self) + self.NextToken() + value = self.Parse_inequality() + type2 = value[1] + if type1 == 'float': + value = self.autocastcheck(value, type1) + else: + # For string & key, RHS (type2) mandates the conversion + # (that's room for optimization: always compare strings) + comparison = self.autocastcheck(comparison, type2) + comparison = [S[op], S['integer'], comparison, value] + + return comparison + + def Parse_bitbool_factor(self): + """Grammar parsed here: + + bitbool_factor: comparison | bitbool_factor '&' comparison + """ + bitbool_factor = self.Parse_comparison() + while self.tok[0] == '&': + if bitbool_factor[1] != 'integer': + raise EParseTypeMismatch(self) + op = self.tok[0] + self.NextToken() + bitbool_factor = [S[op], S['integer'], bitbool_factor, self.Parse_comparison()] + if bitbool_factor[3][1] != 'integer': + raise EParseTypeMismatch(self) + + return bitbool_factor + + def Parse_bitxor_term(self): + """Grammar parsed here: + + bitxor_term: bitbool_factor | bitxor_term '^' bitbool_factor + """ + bitxor_term = self.Parse_bitbool_factor() + while self.tok[0] == '^': + if bitxor_term[1] != 'integer': + raise EParseTypeMismatch(self) + op = self.tok[0] + self.NextToken() + bitxor_term = [S[op], S['integer'], bitxor_term, self.Parse_bitbool_factor()] + if bitxor_term[3][1] != 'integer': + raise EParseTypeMismatch(self) + + return bitxor_term + + def Parse_bitbool_term(self): + """Grammar parsed here: + + bitbool_term: bitxor_term | bitbool_term '|' bitxor_term + """ + bitbool_term = self.Parse_bitxor_term() + while self.tok[0] == '|': + if bitbool_term[1] != 'integer': + raise EParseTypeMismatch(self) + op = self.tok[0] + self.NextToken() + bitbool_term = [S[op], S['integer'], bitbool_term, self.Parse_bitxor_term()] + if bitbool_term[3][1] != 'integer': + raise EParseTypeMismatch(self) + + return bitbool_term + + def Parse_expression(self): + """Grammar parsed here: + + expression: bitbool_term | expression '||' bitbool_term + | expression '&&' bitbool_term + + Most operators with same priority, in general, are executed in + right-to-left order but calculated with precedence left-to-right. + That is, the tree is generated LTR but traversed RTL (in post-order). + + E.g. a-b+c is calculated (in RPN notation) as: c, b, a, swap, -, + + i.e. c is evaluated first and a last, but the operation is still (a-b)+c + which is normal LTR. + + At this point we're just constructing the tree, so we follow normal + precedence rules. + """ + expression = self.Parse_bitbool_term() + while self.tok[0] in ('&&', '||'): + if expression[1] != 'integer': + raise EParseTypeMismatch(self) + op = self.tok[0] + self.NextToken() + expression = [S[op], S['integer'], expression, self.Parse_bitbool_term()] + if expression[3][1] != 'integer': + raise EParseTypeMismatch(self) + + return [S['EXPR'], expression[1], expression] + + def Parse_optional_expression_list(self, expected_types = None): + """Grammar parsed here: + + optional_expression_list: LAMBDA | expression_list + expression_list: expression | expression_list ',' expression + """ + # This is a maze of which we get out with a dirty hack. + # optional_expression_list is used by FOR statements (closed by ';' or ')'), + # list constants (closed by ']') and function arguments (closed by ')'). + # If it's not the right token, we'll err anyway, in Parse_expression or + # upon return. + ret = [] + idx = 0 + if self.tok[0] not in (']', ')', ';'): + while True: + val = self.Parse_expression() + if expected_types is not None: + if idx >= len(expected_types): + raise EParseFunctionMismatch(self) + try: + val = self.autocastcheck(val, expected_types[idx]); + except EParseTypeMismatch: + raise EParseFunctionMismatch(self) + else: + if val[1] not in self.types: + raise EParseTypeMismatch(self) + idx += 1 + ret.append(val) + if self.tok[0] != ',': + break + self.NextToken() + if expected_types is not None and idx != len(expected_types): + raise EParseFunctionMismatch(self) + return ret + + def Parse_statement(self, ReturnType, AllowDecl = False): + """Grammar parsed here: + + statement: ';' | single_statement | code_block + single_statement: if_statement | while_statement | do_statement + | for_statement | jump_statement | state_statement | label_statement + | return_statement | declaration_statement | expression ';' + if_statement: IF '(' expression ')' statement ELSE statement + | IF '(' expression ')' statement + while_statement: WHILE '(' expression ')' statement + do_statement: DO statement WHILE '(' expression ')' ';' + for_statement: FOR '(' optional_expression_list ';' expression ';' + optional_expression_list ')' statement + jump_statement: JUMP IDENT ';' + state_statement: STATE DEFAULT ';' | STATE IDENT ';' + label_statement: '@' IDENT ';' + return_statement: RETURN ';' | RETURN expression ';' + declaration_statement: TYPE lvalue ';' | TYPE lvalue '=' expression ';' + + There's a restriction: a *single* statement can not be a declaration. + """ + tok0 = self.tok[0] + if tok0 == '{': + return self.Parse_code_block(ReturnType) + if tok0 == ';': + self.NextToken() + return [';', None] + if tok0 == '@': + self.NextToken() + self.expect('IDENT') + name = self.tok[1] + if name in self.symtab[self.scopeindex]: + raise EParseAlreadyDefined(self) + self.symtab[self.scopeindex][name] = (self.order(), S['Label']) + self.NextToken() + self.expect(';') + self.NextToken() + return [S['@'], None, name] + if tok0 == 'JUMP': + self.NextToken() + self.expect('IDENT') + name = self.tok[1] + tmp = self.FindSymbolPartial(name, MustBeLabel=True) + if not tmp or tmp[1] != 'Label': + # It might still be a forward reference, so we add it to the + # list of things to look up when done + self.jump_lookups.append((name, self.scopeindex, self.errorpos)) + self.NextToken() + self.expect(';') + self.NextToken() + return [S['JUMP'], None, ['IDENT', S['Label'], name, self.scopeindex]] + if tok0 == 'STATE': + self.NextToken() + if self.tok[0] not in ('DEFAULT', 'IDENT'): + raise EParseSyntax(self) + # States are only searched in the global scope + name = self.tok[1] if self.tok[0] == 'IDENT' else 'default' + if name not in self.symtab[0] and (name not in self.globals or self.globals[name] != 'State'): + raise EParseUndefined(self) + self.NextToken() + self.expect(';') + self.NextToken() + return [S['STATE'], None, + [S['IDENT'], S['State'], name, self.scopeindex] if name != 'default' else S['DEFAULT']] + if tok0 == 'RETURN': + self.NextToken() + if self.tok[0] == ';': + value = None + else: + value = self.Parse_expression() + self.expect(';') + self.NextToken() + if ReturnType is None and value is not None: + raise EParseReturnShouldBeEmpty(self) + if ReturnType is not None and value is None: + raise EParseReturnIsEmpty(self) + if value is None: + return [S['RETURN'], None, None] + return [S['RETURN'], None, self.autocastcheck(value, ReturnType)] + if tok0 == 'IF': + self.NextToken() + self.expect('(') + self.NextToken() + condition = self.Parse_expression() + self.expect(')') + self.NextToken() + then_branch = self.Parse_statement(ReturnType) + else_branch = None + if self.tok[0] == 'ELSE': + self.NextToken() + else_branch = self.Parse_statement(ReturnType) + return [S['IF'], None, condition, then_branch] + ([else_branch] if else_branch is not None else []) + if tok0 == 'WHILE': + self.NextToken() + self.expect('(') + self.NextToken() + condition = self.Parse_expression() + self.expect(')') + self.NextToken() + return [S['WHILE'], None, condition, self.Parse_statement(ReturnType)] + if tok0 == 'DO': + self.NextToken() + stmt = self.Parse_statement(ReturnType) + self.expect('WHILE') + self.NextToken() + self.expect('(') + self.NextToken() + condition = self.Parse_expression() + self.expect(')') + self.NextToken() + self.expect(';') + self.NextToken() + return [S['DO'], None, stmt, condition] + if tok0 == 'FOR': + self.NextToken() + self.expect('(') + self.NextToken() + initializer = self.Parse_optional_expression_list() + self.expect(';') + self.NextToken() + condition = self.Parse_expression() + self.expect(';') + self.NextToken() + iterator = self.Parse_optional_expression_list() + self.expect(')') + self.NextToken() + stmt = self.Parse_statement(ReturnType) + return [S['FOR'], None, initializer, condition, iterator, stmt] + if tok0 == 'TYPE': + if not AllowDecl: + raise EParseDeclarationScope(self) + typ = S[self.tok[1]] + self.NextToken() + self.expect('IDENT') + name = self.tok[1] + if name in self.symtab[self.scopeindex]: + raise EParseAlreadyDefined(self) + self.NextToken() + value = None + if self.tok[0] == '=': + self.NextToken() + value = self.Parse_expression() + self.expect(';') + self.NextToken() + self.symtab[self.scopeindex][name] = (self.order(), typ, value) + return [S['DECL'], None, name, self.scopeindex] + + # If none of the above, it must be an expression. + value = self.Parse_expression() + self.expect(';') + self.NextToken() + return value + + def Parse_code_block(self, ReturnType): + """Grammar parsed here: + + code_block: '{' statements '}' + statements: LAMBDA | statements statement + + It receives the return type to expect for return statements. + """ + self.expect('{') + self.NextToken() + + self.PushScope() + + ret = [S['{}'], None] + while True: + if self.tok[0] == '}': + break + ret.append(self.Parse_statement(ReturnType, AllowDecl = True)) + + self.PopScope() + + self.expect('}') + self.NextToken() + + return ret + + def Parse_simple_expr(self, List=False): + """Grammar parsed here: + + simple_expr: simple_expr_except_list | list_simple_expr + simple_expr_except_list: STRING_VALUE | KEY_VALUE | VECTOR_VALUE + | ROTATION_VALUE | TRUE | FALSE | number_value + | '<' simple_expr ',' simple_expr ',' simple_expr '>' + | '<' simple_expr ',' simple_expr ',' simple_expr ',' simple_expr '>' + number_value: FLOAT_VALUE | INTEGER_VALUE | '-' FLOAT_VALUE | '-' INTEGER_VALUE + list_simple_expr: '[' ']' | '[' list_simple_expr_items ']' + list_simple_expr_items: simple_expr_except_list + | list_simple_expr_items ',' simple_expr_except_list + """ + tok = self.tok + self.NextToken() + if tok[0] == 'TRUE': # TRUE and FALSE don't admit sign in globals + return 1 + if tok[0] == 'FALSE': + return 0 + if tok[0] in ('STRING_VALUE', 'KEY_VALUE', 'VECTOR_VALUE', 'ROTATION_VALUE', 'LIST_VALUE'): + return tok[1] + if tok[0] == 'IDENT': + tmp = self.FindSymbolPartial(tok[1]) + if tmp is None or len(tmp) > 3 or tmp[1] not in self.types: + raise EParseUndefined(self) + #return tmp[2] + return (S['IDENT'], S[tmp[1]], tok[1], self.scopeindex) + if tok[0] == '<': + value = [self.Parse_simple_expr()] + self.autocastcheck((0, self.PythonType2LSL[type(value[0])]), 'float') + self.expect(',') + self.NextToken() + value.append(self.Parse_simple_expr()) + self.autocastcheck((0, self.PythonType2LSL[type(value[1])]), 'float') + self.expect(',') + self.NextToken() + value.append(self.Parse_simple_expr()) + self.autocastcheck((0, self.PythonType2LSL[type(value[2])]), 'float') + if self.tok[0] == '>': + self.NextToken() + return Vector(value) + self.expect(',') + self.NextToken() + value.append(self.Parse_simple_expr()) + self.autocastcheck((0, self.PythonType2LSL[type(value[3])]), 'float') + self.expect('>') + self.NextToken() + return Quaternion(value) + + if tok[0] == '[' and not List: + value = [] + if self.tok[0] == ']': + self.NextToken() + return value + while True: + value.append(self.Parse_simple_expr(List=True)) + if self.tok[0] == ']': + self.NextToken() + return value + self.expect(',') + self.NextToken() + neg = False + if tok[0] == '-': + neg = True + tok = self.tok + self.NextToken() + if tok[0] not in ('INTEGER_VALUE', 'FLOAT_VALUE'): + raise EParseSyntax(self) + if neg: + if tok[0] == 'INTEGER_VALUE': + if tok[1] == -2147483648: + return -2147483648 + return -tok[1] + return tok[1] + + def Parse_optional_param_list(self): + """Grammar parsed here: + + optional_param_list: LAMBDA | param_list + param_list: TYPE IDENT | param_list ',' TYPE IDENT + """ + ret = [] + + if self.tok[0] == 'TYPE': + while True: + typ = S[self.tok[1]] + self.NextToken() + self.expect('IDENT') + + name = self.tok[1] + ret.append(name) + if name in self.symtab[self.scopeindex]: + raise EParseAlreadyDefined(self) + + self.symtab[self.scopeindex][name] = (self.order(), typ, None) # Value is not predefined + self.NextToken() + if self.tok[0] != ',': + break + self.NextToken() + self.expect('TYPE') + + return tuple(ret) + + def Parse_events(self): + """Grammar parsed here: + + events: event | events event + event: EVENT_NAME '(' optional_parameter_list ')' code_block + """ + self.expect('EVENT_NAME') # mandatory + + ret = {} + + while self.tok[0] == 'EVENT_NAME': + name = self.tok[1] + self.NextToken() + self.expect('(') + self.NextToken() + # Function parameters go to a dedicated symbol table. + self.PushScope() + params = self.Parse_optional_param_list() + # NOTE: Parse_events: This is a bit crude, as the error is given at the end of the param list. + # To do it correctly, we can pass the parameter list to Parse_optional_param_list(). + if tuple(self.symtab[self.scopeindex][x][1] for x in params) != self.events[name]: + raise EParseSyntax(self) + self.expect(')') + self.NextToken() + value = tuple(self.Parse_code_block(None)) + ret[name] = (self.order(), None, value, params, self.scopeindex) + self.PopScope() + + return ret + + def Parse_globals(self): + """Grammar parsed here: + + globals: LAMBDA | globals var_def | globals func_def + var_def: TYPE IDENT ';' | TYPE IDENT '=' simple_expr ';' + func_def: optional_type IDENT '(' optional_param_list ')' code_block + optional_type: LAMBDA | TYPE + """ + while self.tok[0] in ('TYPE','IDENT'): + typ = None + if self.tok[0] == 'TYPE': + typ = S[self.tok[1]] + self.NextToken() + self.expect('IDENT') + + name = self.tok[1] + if name in self.symtab[self.scopeindex]: + raise EParseAlreadyDefined(self) + self.NextToken() + + if self.tok[0] == '=' or self.tok[0] == ';': + # This is a variable definition + if typ is None: # Typeless variables are not allowed + raise EParseSyntax(self) + + if self.tok[0] == '=': + self.NextToken() + if self.extendedglobalexpr: + value = tuple(self.Parse_expression()) # Use advanced expression evaluation. + else: + value = self.Parse_simple_expr() # Use LSL's dull global expression. + self.expect(';') + self.NextToken() + else: # must be semicolon + self.NextToken() + value = None + + if value is not None: + if type(value) != tuple and not self.extendedglobalexpr: + self.autocastcheck((0, self.PythonType2LSL[type(value)]), typ) + else: + self.autocastcheck(value, typ) + self.symtab[self.scopeindex][name] = (self.order(), typ, value) + + elif self.tok[0] == '(': + # This is a function definition + self.NextToken() + self.PushScope() # Parameter names don't conflict with globals. + params = self.Parse_optional_param_list() + self.expect(')') + self.NextToken() + value = tuple(self.Parse_code_block(typ)) + paramscope = self.scopeindex + self.PopScope() + self.symtab[self.scopeindex][name] = (self.order(), typ, value, params, paramscope) + else: + raise EParseSyntax(self) + pass + + def Parse_states(self): + """Grammar parsed here: + + states: LAMBDA | states state + state: state_header '{' events '}' + state_header: DEFAULT | STATE IDENT + + (but we enforce DEFAULT to be the first token found, meaning there will + be at least one state and the first must be DEFAULT as in the original + grammar) + """ + self.expect('DEFAULT') + + while True: + if self.tok[0] != 'DEFAULT' and self.tok[0] != 'STATE': + return + + if self.tok[0] == 'DEFAULT': + name = S['default'] + else: + self.NextToken() + if self.tok[0] != 'IDENT': + raise EParseSyntax(self) + name = self.tok[1] + + if name in self.symtab[self.scopeindex]: + raise EParseAlreadyDefined(self) + + self.symtab[self.scopeindex][name] = (self.order(), S['State']) # to expand later + self.NextToken() + + self.expect('{') + self.NextToken() + + events = self.Parse_events() + + self.expect('}') + self.symtab[self.scopeindex][name] += (events,) + self.NextToken() + + def Parse_script(self): + """Parses the whole LSL script + + Grammar parsed here: + + script: globals states EOF + """ + + + self.Parse_globals() + self.Parse_states() + self.expect('EOF') + + # Check the pending jump targets + for tgt in self.jump_lookups: + self.scopeindex = tgt[1] + if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None: + self.errorpos = tgt[2] + raise EParseUndefined(self) + + + def BuildTempGlobalsTable(self): + """Build an approximate globals table. + + If the script syntax is correct, the globals table will be accurate. + If it is not, it may contain too many or too few symbols (normally the + latter). This globals table is not the normal globals in the symbol + table; it's just needed to resolve which names are declared at all as + globals and their type. It's temporary. + + The grammar is approximately: + script: globals states + globals: [global [global [...]]] + global: [TYPE] IDENT '(' TYPE anytoken [',' TYPE anytoken [...]] + anytoken_except_comma balanced_braces_or_anything_else + | TYPE IDENT [anytoken_except_semicolon [...]] ';' + states: state [state [...]] + state: (DEFAULT | STATE IDENT) balanced_braces_or_anything_else + """ + ret = self.functions.copy() # The library functions go here too. + + # If there's a syntax error, that's not our business. We just return + # what we have so far. Doing a proper parse will determine the exact + # location and cause. + + # Here we don't even care if it's duplicate - that will be caught + # when adding to the real symbol table. + + # Scan globals + try: + while self.tok[0] not in ('DEFAULT', 'EOF'): + typ = None + if self.tok[0] == 'TYPE': + typ = S[self.tok[1]] + self.NextToken() + if self.tok[0] != 'IDENT': + return ret + name = self.tok[1] + self.NextToken() + if self.tok[0] == '(': + # Function call + self.NextToken() + params = [] + if self.tok[0] != ')': + while True: + if self.tok[0] != 'TYPE': + return ret + params.append(S[self.tok[1]]) + self.NextToken() + self.NextToken() # not interested in parameter names + if self.tok[0] != ',': + break + self.NextToken() + self.NextToken() + if self.tok[0] != '{': + return ret + self.NextToken() # Enter the first brace + + bracelevel = 1 + while bracelevel and self.tok[0] != 'EOF': + if self.tok[0] == '{': + bracelevel += 1 + elif self.tok[0] == '}': + bracelevel -= 1 + self.NextToken() + ret[name] = (typ, tuple(params)) + + elif typ is None: + return ret # A variable needs a type + else: + ret[name] = typ + + while self.tok[0] != ';': # Don't stop to analyze what's before the ending ';' + self.NextToken() + self.NextToken() + except EParseUEOF: + return ret + + # Scan states + while True: + if self.tok[0] not in ('DEFAULT', 'STATE'): + return ret # includes EOF i.e. this is the normal return + + if self.tok[0] == 'STATE': + self.NextToken() + if self.tok[0] != 'IDENT': + return ret + name = self.tok[1] + else: + name = S['default'] + + ret[name] = S['State'] + self.NextToken() + + if self.tok[0] != '{': + return ret + self.NextToken() # Enter the first brace + + bracelevel = 1 + while bracelevel and self.tok[0] != 'EOF': + if self.tok[0] == '{': + bracelevel += 1 + elif self.tok[0] == '}': + bracelevel -= 1 + self.NextToken() + + + def parse(self, script, options = set()): + """Parse the given stream with the given options. + + This function also builds the temporary globals table. + """ + self.script = script + self.length = len(script) + + # Extended expressions in globals (needs support from the optimizer to work) + self.extendedglobalexpr = 'extendedglobalexpr' in options + + # Extended typecast syntax (typecast as a regular unary operator) + self.extendedtypecast = 'extendedtypecast' in options + + # Extended assignment operators: |= &= <<= >>= + self.extendedassignment = 'extendedassignment' in options + + # Add explicit type casts when implicit (the output module takes care of + # the correctness of the output) + self.explicitcast = 'explicitcast' in options + + # (TODO:) Allow string + key + #self.allowkeyconcat = 'allowkeyconcat' in options + + # (TODO:) Allow C style string composition: "blah" "blah" + #self.allowcstrings = 'allowcstrings' in options + + # Symbol table: + # This is a list of all local and global symbol tables. + # The first element (0) is the global scope. Each symbol table is a + # dictionary. Element -1 of the dictionary is the parent index. The + # entries are lists of three or four values. The first one is the + # order; the second is the type, the third is the value, and if it's + # a function, the fourth is the parameter list. Functions contain a + # parse tree as their value. + self.symtab = [{-1: None}] + self.scopeindex = 0 + + self.dictorder = 0 + + # Globals and labels can be referenced before they are defined. That + # includes states. + # + # Our first approach was going to be to build a list that keeps track of + # undefined references, to check them after parsing. But that has a big + # problem: expressions need to know the types of the arguments in order + # to give appropriate errors if they don't suit the operand, and in + # order to mark and check the types appropriately. But we don't know the + # types of the globals that haven't been found yet. Therefore, sticking + # to this approach would mean to scan the tree for every expression with + # a pending reference, fixing up every node upstream with the correct + # type with the possibility to find a type mismatch in a place for which + # we have no location info. + # + # For that reason, we change the strategy. We still don't want to do + # two full or almost full passes of the parser, nitpicking on every + # detail. But given LSL's structure, it's relatively easy to do a fast + # incomplete parsing pass, gathering globals with their types and + # function arguments. And that's what we do. + + self.pos = 0 + self.tok = self.GetToken() + + self.globals = self.BuildTempGlobalsTable() + + # We need a table of undefined jump references anyway, to check later, + # as jumps are local, not global, and allow forward definitions. + self.jump_lookups = [] + + # Restart + + self.pos = 0 + self.tok = self.GetToken() + + # Start the parsing proper + self.Parse_script() + + del self.globals # No longer needed. The data that is not in self.functions is in self.symtab[0]. + del self.jump_lookups # Already used. + + #while self.tok[0] != 'EOF': + # print self.tok + # self.NextToken() + + #for n in xrange(len(self.symtab)): + # print n, '{', + # i = self.symtab[n] + # for j in sorted(i.items(), key=lambda k: -1 if k[0]==-1 else k[1][0]): + # print repr(j[0]) + ':' + repr(j[1]) + ',', + # print '}' + + return self.symtab + + def parsefile(self, filename, options = set()): + """Convenience function to parse a file""" + f = open(filename, 'rb') + try: + script = f.read() + finally: + f.close() + + return self.parse(script, options) + + def __init__(self): + """Reads the library.""" + + self.events = {} + self.constants = {} + self.functions = {} + + # Library read code + + parse_lin_re = re.compile( + r'^\s*(event|void|integer|float|string|key|vector|quaternion|rotation|list)\s+' + r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*(' + r'(?:integer|float|string|key|vector|quaternion|rotation|list)\s+[a-zA-Z_][a-zA-Z0-9_]*' + r'(?:\s*,\s*(?:integer|float|string|key|vector|quaternion|rotation|list)\s+[a-zA-Z_][a-zA-Z0-9_]*)*' + r')?\s*\)\s*$' + r'|' + r'^\s*const\s+(integer|float|string|key|vector|quaternion|rotation|list)' + r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$' + r'|' + r'^\s*(?:#.*|//.*)?$') + parse_arg_re = re.compile(r'^\s*([a-z]+)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*$') + parse_num_re = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$') + parse_str_re = re.compile(ur'^"((?:[^"\\]|\\.)*)"$') + + f = open('builtins.txt', 'rb') + try: + while True: + line = f.readline() + if not line: break + match = parse_lin_re.match(line) + if not match: + warning(u'Syntax error in builtins.txt: ' + line.decode('utf8')) + continue + if match.group(1): + # event or function + typ = match.group(1) + if typ == 'quaternion': + typ = 'rotation' + if typ == 'void': + typ = None + args = [] + arglist = match.group(3) + if arglist: + arglist = arglist.split(',') + for arg in arglist: + args.append(parse_arg_re.match(arg).group(1)) + name = match.group(2) + if typ == 'event': + if name in self.events: + warning(u'Event already defined in bultins.txt, overwriting: ' + name.decode('utf8')) + self.events[name] = tuple(args) + else: + # Library functions go to the functions table. If + # they are implemented in lslfuncs.*, they get a + # reference to the implementation; otherwise None. + if name in self.functions: + warning(u'Function already defined in bultins.txt, overwriting: ' + name.decode('utf8')) + self.functions[name] = (typ, tuple(args), getattr(lslfuncs, name, None)) + elif match.group(4): + # constant + name = match.group(5) + if name in self.constants: + warning(u'Global already defined in bultins.txt, overwriting: ' + name.decode('utf8')) + try: + typ = match.group(4) + if typ == 'quaternion': + typ = 'rotation' + val = match.group(6) + if typ == 'integer': + val = int(val, 0) + elif typ == 'float': + val = lslfuncs.F32(float(val)) + elif typ == 'string': + val = val.decode('utf8') + if not parse_str_re.match(val): + raise EInternal + esc = False + tmp = val[1:-1] + val = u'' + for c in tmp: + if esc: + if c == u'n': + c = u'\n' + elif c == u't': + c = u' ' + val += c + esc = False + elif c == u'\\': + esc = True + else: + val += c + #if typ == 'key': + # val = Key(val) + elif typ == 'key': + warning(u'Key constants not supported: ' + line.decode('utf8')) + val = None + elif typ in ('vector', 'rotation'): + if val[0:1] != '<' or val[-1:] != '>': + raise ValueError + val = val[1:-1].split(',') + if len(val) != (3 if typ == 'vector' else 4): + raise ValueError + num = parse_num_re.match(val[0]) + if not num: + raise ValueError + val[0] = lslfuncs.F32(float(num.group(1))) + num = parse_num_re.match(val[1]) + if not num: + raise ValueError + val[1] = lslfuncs.F32(float(num.group(1))) + num = parse_num_re.match(val[2]) + if not num: + raise ValueError + val[2] = lslfuncs.F32(float(num.group(1))) + if typ != 'vector': + num = parse_num_re.match(val[3]) + if not num: + raise ValueError + val[3] = lslfuncs.F32(float(num.group(1))) + val = Quaternion(val) + else: + val = Vector(val) + else: + assert typ == 'list' + warning(u'List constants not supported: ' + line.decode('utf8')) + val = None + if val is not None: + self.constants[name] = val + + except EInternal: + warning(u'Invalid string in builtins.txt: ' + line.decode('utf8')) + except ValueError: + warning(u'Invalid vector syntax in builtins.txt: ' + line.decode('utf8')) + finally: + f.close() diff --git a/main.py b/main.py new file mode 100644 index 0000000..b2b1768 --- /dev/null +++ b/main.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +from lslopt.lslparse import parser,EParse +from lslopt.lsloutput import outscript +import sys + +def main(): + if len(sys.argv) > 1: + p = parser() + try: + symtab = p.parsefile(sys.argv[1]) + except EParse as e: + print e.message + return 1 + del p + outs = outscript() + script = outs.output(symtab) + del outs + del symtab + print script.decode('utf8'), + return 0 + +sys.exit(main())