Improve python library, add LLGLTF asset parser, Add tests
This commit is contained in:
@@ -47,6 +47,11 @@ export class PythonList extends PythonObject
|
||||
throw new Error('Expected ] end bracket in list')
|
||||
}
|
||||
|
||||
public get(index: number): PythonType | undefined
|
||||
{
|
||||
return this.data[index];
|
||||
}
|
||||
|
||||
get length(): number
|
||||
{
|
||||
return this.data.length;
|
||||
|
||||
148
lib/classes/python/PythonParser.spec.ts
Normal file
148
lib/classes/python/PythonParser.spec.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
import { PythonParser } from './PythonParser';
|
||||
|
||||
import * as assert from 'assert';
|
||||
import { PythonDict } from './PythonDict';
|
||||
import { PythonList } from './PythonList';
|
||||
import { PythonTuple } from './PythonTuple';
|
||||
|
||||
describe('PythonParser', () =>
|
||||
{
|
||||
describe('parse', () =>
|
||||
{
|
||||
it('can parse a complex python dictionary notation', () =>
|
||||
{
|
||||
const notationDoc = `{
|
||||
"nested_dict": {
|
||||
"key1": "value1",
|
||||
"key2": {
|
||||
"inner_key": "inner_value"
|
||||
}
|
||||
},
|
||||
"list": [1, 2, 3, [4, 5]],
|
||||
"boolean": True,
|
||||
"tuple": (1, 2, ("nested_tuple", 3)),
|
||||
"bytes": b'hello',
|
||||
"float": 3.14,
|
||||
'integer': 42,
|
||||
"hex_number": 0x1A,
|
||||
"octal_number": 0o52,
|
||||
"string_single": 'single-quoted\\' string',
|
||||
"string_double": "double-quoted \\" string",
|
||||
"string_triple_single": '''triple-quoted\'
|
||||
single-quoted string''',
|
||||
"string_triple_double": """triple-quoted\"
|
||||
double-quoted string""",
|
||||
"raw_string_single": r'raw single-quoted\\ string',
|
||||
"raw_string_double": r"raw double-quoted\\ string",
|
||||
"raw_string_triple_single": r'''raw triple\\''-quoted
|
||||
single-quoted string''',
|
||||
"raw_string_triple_double": r"""raw triple\\""-quoted
|
||||
double-quoted string"""
|
||||
}`;
|
||||
const parsed = PythonParser.parse(notationDoc);
|
||||
if (!(parsed instanceof PythonDict))
|
||||
{
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const nested = parsed.get('nested_dict');
|
||||
assert.ok(nested);
|
||||
if (!(nested instanceof PythonDict))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(nested.get('key1'), 'value1');
|
||||
const key2 = nested.get('key2');
|
||||
if (!(key2 instanceof PythonDict))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(key2.get('inner_key'), 'inner_value');
|
||||
}
|
||||
}
|
||||
|
||||
const list = parsed.get('list');
|
||||
if (!(list instanceof PythonList))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(list.length, 4);
|
||||
assert.equal(list.get(0), 1);
|
||||
assert.equal(list.get(1), 2);
|
||||
assert.equal(list.get(2), 3);
|
||||
const nestedList = list.get(3);
|
||||
if (!(nestedList instanceof PythonList))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(nestedList.get(0), 4);
|
||||
assert.equal(nestedList.get(1), 5);
|
||||
}
|
||||
assert.equal(list.get(4), undefined);
|
||||
}
|
||||
|
||||
assert.equal(parsed.get('boolean'), true);
|
||||
const tuple = parsed.get('tuple');
|
||||
if (!(tuple instanceof PythonTuple))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(tuple.get(0), 1);
|
||||
assert.equal(tuple.get(1), 2);
|
||||
const nestedTuple = tuple.get(2);
|
||||
if (!(nestedTuple instanceof PythonTuple))
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert.equal(nestedTuple.get(0), 'nested_tuple');
|
||||
assert.equal(nestedTuple.get(1), 3);
|
||||
}
|
||||
assert.equal(tuple.get(3), undefined);
|
||||
}
|
||||
const buf = parsed.get('bytes');
|
||||
if (buf instanceof Buffer)
|
||||
{
|
||||
assert.equal(Buffer.from('hello', 'binary').compare(buf), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
assert.equal(parsed.get('float'), 3.14);
|
||||
assert.equal(parsed.get('integer'), 42);
|
||||
assert.equal(parsed.get('hex_number'), 26);
|
||||
assert.equal(parsed.get('octal_number'), 42);
|
||||
assert.equal(parsed.get('string_single'), 'single-quoted\\\' string');
|
||||
assert.equal(parsed.get('string_double'), 'double-quoted \\" string');
|
||||
assert.equal(parsed.get('string_triple_single'), 'triple-quoted\'\nsingle-quoted string');
|
||||
assert.equal(parsed.get('string_triple_double'), 'triple-quoted\"\ndouble-quoted string');
|
||||
|
||||
/*
|
||||
raw_string_single": r'raw single-quoted\ string',
|
||||
"raw_string_double": r"raw double-quoted\ string",
|
||||
"raw_string_triple_single": r'''raw triple\''-quoted
|
||||
single-quoted string''',
|
||||
"raw_string_triple_double": r"""raw triple\''-quoted
|
||||
double-quoted string"""
|
||||
*/
|
||||
assert.equal(parsed.get('raw_string_single'), 'raw single-quoted\\ string');
|
||||
assert.equal(parsed.get('raw_string_double'), 'raw double-quoted\\ string');
|
||||
assert.equal(parsed.get('raw_string_triple_single'), 'raw triple\\\'\'-quoted\nsingle-quoted string');
|
||||
assert.equal(parsed.get('raw_string_triple_double'), 'raw triple\\""-quoted\ndouble-quoted string');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -23,27 +23,30 @@ export class PythonParser
|
||||
{ regex: /^[,]/, type: PythonTokenType.COMMA },
|
||||
{ regex: /^None\b/, type: PythonTokenType.NONE },
|
||||
{ regex: /^(True|False)\b/, type: PythonTokenType.BOOLEAN },
|
||||
{ regex: /^((?:-?[0-9]+\.[0-9]*)|(?:-?[0.9]*\.[0-9]+))/, type: PythonTokenType.FLOAT },
|
||||
{ regex: /^\d+\b/, type: PythonTokenType.INTEGER },
|
||||
{ regex: /^0x([0-9a-fA-F]+\b)/, type: PythonTokenType.HEX },
|
||||
{ regex: /^0o([0-7]+)/, type: PythonTokenType.OCTAL },
|
||||
{ regex: /^b'(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\['"abfnrtv\\]|[^'\\])*'/, type: PythonTokenType.BYTES },
|
||||
{ regex: /^b"(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\["'abfnrtv\\]|[^"\\])*"/, type: PythonTokenType.BYTES },
|
||||
{ regex: /^-?\d+\.\d+\b/, type: PythonTokenType.FLOAT },
|
||||
{ regex: /^-?\d+\.?\d*[jJ]\b/, type: PythonTokenType.COMPLEX },
|
||||
{ regex: /^\(/, type: PythonTokenType.TUPLE_START },
|
||||
{ regex: /^\)/, type: PythonTokenType.TUPLE_END },
|
||||
{ regex: /^\[/, type: PythonTokenType.LIST_START },
|
||||
{ regex: /^\]/, type: PythonTokenType.LIST_END },
|
||||
{ regex: /^'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Single-quoted strings
|
||||
{ regex: /^"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Double-quoted strings
|
||||
{ regex: /^'''((?:\\.|[^'\\]|'{1,2}(?![']))*)'''/, type: PythonTokenType.STRING }, // Triple-quoted single strings
|
||||
{ regex: /^"""((?:\\.|[^"\\]|"{1,2}(?!["]))*)"""/, type: PythonTokenType.STRING }, // Triple-quoted double strings
|
||||
{ regex: /^r'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Raw single-quoted strings
|
||||
{ regex: /^r"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Raw double-quoted strings
|
||||
{ regex: /^\\u[\dA-Fa-f]{4}/, type: PythonTokenType.STRING }, // Unicode escape sequences
|
||||
{ regex: /^\\U[\dA-Fa-f]{8}/, type: PythonTokenType.STRING }, // Unicode escape sequences
|
||||
{ regex: /^"""((?:[^"]*|\n|\\"|")*?)"""/, type: PythonTokenType.STRING }, // triple double quoted string
|
||||
{ regex: /^'''((?:[^']*|\n|\\'|')*?)'''/, type: PythonTokenType.STRING }, // triple single quoted string
|
||||
{ regex: /^'([^'\\]*(?:\\.[^'\\\n]*)*)'/, type: PythonTokenType.STRING }, // single quoted string
|
||||
{ regex: /^"([^"\\]*(?:\\.[^"\\\n]*)*)"/, type: PythonTokenType.STRING }, // double quoted string
|
||||
|
||||
{ regex: /^b"""((?:[^"]*|\n|\\"|")*?)"""/, type: PythonTokenType.BINARY_STRING }, // triple double quoted string
|
||||
{ regex: /^b'''((?:[^']*|\n|\\'|')*?)'''/, type: PythonTokenType.BINARY_STRING }, // triple single quoted string
|
||||
{ regex: /^b'([^'\\]*(?:\\.[^'\\\n]*)*)'/, type: PythonTokenType.BINARY_STRING }, // single quoted string
|
||||
{ regex: /^b"([^"\\]*(?:\\.[^"\\\n]*)*)"/, type: PythonTokenType.BINARY_STRING }, // double quoted string
|
||||
|
||||
{ regex: /^r"""((?:[^"]*|\n|")*?)"""/, type: PythonTokenType.RAW_STRING }, // triple double quoted string
|
||||
{ regex: /^r'''((?:[^']*|\n|')*?)'''/, type: PythonTokenType.RAW_STRING }, // triple single quoted string
|
||||
{ regex: /^r'([^'\n]*?)'/, type: PythonTokenType.RAW_STRING }, // single quoted string
|
||||
{ regex: /^r"([^"\n]*?)"/, type: PythonTokenType.RAW_STRING }, // double quoted string
|
||||
|
||||
{ regex: /^-?\d+\.?\d*[eE][-+]?\d+/, type: PythonTokenType.FLOAT }, // Scientific notation
|
||||
{ regex: /^-?\.\d+\b/, type: PythonTokenType.FLOAT }, // Leading dot float, e.g., .123
|
||||
{ regex: /^\d+(_\d+)*\b/, type: PythonTokenType.INTEGER }, // Integer with underscores, e.g., 1_000_000
|
||||
{ regex: /^[^\s:{},"'\[\]\(\)]+/, type: PythonTokenType.UNKNOWN } // Catch all for other non-structured sequences
|
||||
];
|
||||
@@ -89,50 +92,6 @@ export class PythonParser
|
||||
}
|
||||
}
|
||||
|
||||
private static interpretEscapes(byteString: string): Buffer
|
||||
{
|
||||
const byteArray: number[] = [];
|
||||
const regex = /\\x([0-9A-Fa-f]{2})|\\([0-7]{1,3})|\\(['"abfnrtv\\])|([^\\]+)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = regex.exec(byteString)) !== null)
|
||||
{
|
||||
if (match[1]) // Hexadecimal sequence
|
||||
{
|
||||
byteArray.push(parseInt(match[1], 16));
|
||||
}
|
||||
else if (match[2]) // Octal sequence
|
||||
{
|
||||
byteArray.push(parseInt(match[2], 8));
|
||||
}
|
||||
else if (match[3]) // Special escape character
|
||||
{
|
||||
const specialChars: { [key: string]: number } = {
|
||||
'a': 7, // Alert (bell)
|
||||
'b': 8, // Backspace
|
||||
'f': 12, // Formfeed
|
||||
'n': 10, // New line
|
||||
'r': 13, // Carriage return
|
||||
't': 9, // Horizontal tab
|
||||
'v': 11, // Vertical tab
|
||||
'\\': 92, // Backslash
|
||||
'\'': 39, // Single quote
|
||||
'"': 34, // Double quote
|
||||
};
|
||||
byteArray.push(specialChars[match[3]]);
|
||||
}
|
||||
else if (match[4]) // Regular characters
|
||||
{
|
||||
for (let i = 0; i < match[4].length; ++i)
|
||||
{
|
||||
byteArray.push(match[4].charCodeAt(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Buffer.from(byteArray);
|
||||
}
|
||||
|
||||
public static parseValueToken(container: PythonTokenContainer): PythonType
|
||||
{
|
||||
const t = container.tokens[container.index++];
|
||||
@@ -146,6 +105,14 @@ export class PythonParser
|
||||
{
|
||||
return t.value;
|
||||
}
|
||||
case PythonTokenType.BINARY_STRING:
|
||||
{
|
||||
return Buffer.from(t.value, 'binary');
|
||||
}
|
||||
case PythonTokenType.RAW_STRING:
|
||||
{
|
||||
return t.value;
|
||||
}
|
||||
case PythonTokenType.BOOLEAN:
|
||||
{
|
||||
return t.value === 'True';
|
||||
@@ -158,10 +125,6 @@ export class PythonParser
|
||||
{
|
||||
return PythonTuple.parse(container);
|
||||
}
|
||||
case PythonTokenType.BYTES:
|
||||
{
|
||||
return this.interpretEscapes(t.value);
|
||||
}
|
||||
case PythonTokenType.NONE:
|
||||
{
|
||||
return null;
|
||||
@@ -182,10 +145,6 @@ export class PythonParser
|
||||
{
|
||||
return parseFloat(t.value);
|
||||
}
|
||||
case PythonTokenType.COMPLEX:
|
||||
{
|
||||
throw new Error('Complex numbers are currently unhandled');
|
||||
}
|
||||
default:
|
||||
throw new Error('Unexpected token: ' + PythonTokenType[t.type]);
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ export enum PythonTokenType
|
||||
BOOLEAN,
|
||||
INTEGER,
|
||||
FLOAT,
|
||||
COMPLEX,
|
||||
STRING,
|
||||
BINARY_STRING,
|
||||
RAW_STRING,
|
||||
LIST_START,
|
||||
LIST_END,
|
||||
TUPLE_START,
|
||||
TUPLE_END,
|
||||
BYTES,
|
||||
HEX,
|
||||
OCTAL,
|
||||
UNKNOWN // Catch all for other sequences
|
||||
|
||||
@@ -10,7 +10,7 @@ export class PythonTuple extends PythonObject
|
||||
|
||||
public static parse(container: PythonTokenContainer): PythonTuple
|
||||
{
|
||||
let expectingComma = true;
|
||||
let expectingComma = false;
|
||||
const tuple = new PythonTuple();
|
||||
do
|
||||
{
|
||||
@@ -48,6 +48,11 @@ export class PythonTuple extends PythonObject
|
||||
throw new Error('Expected ) end bracket in tuple')
|
||||
}
|
||||
|
||||
public get(index: number): PythonType | undefined
|
||||
{
|
||||
return this.data[index];
|
||||
}
|
||||
|
||||
get length(): number
|
||||
{
|
||||
return this.data.length;
|
||||
|
||||
Reference in New Issue
Block a user