Improve python library, add LLGLTF asset parser, Add tests

This commit is contained in:
Casper Warden
2023-11-10 00:14:59 +00:00
parent 6c8273ea5e
commit 72d4eff2d8
13 changed files with 591 additions and 348 deletions

View File

@@ -47,6 +47,11 @@ export class PythonList extends PythonObject
throw new Error('Expected ] end bracket in list')
}
public get(index: number): PythonType | undefined
{
return this.data[index];
}
get length(): number
{
return this.data.length;

View File

@@ -0,0 +1,148 @@
import { PythonParser } from './PythonParser';
import * as assert from 'assert';
import { PythonDict } from './PythonDict';
import { PythonList } from './PythonList';
import { PythonTuple } from './PythonTuple';
describe('PythonParser', () =>
{
describe('parse', () =>
{
it('can parse a complex python dictionary notation', () =>
{
const notationDoc = `{
"nested_dict": {
"key1": "value1",
"key2": {
"inner_key": "inner_value"
}
},
"list": [1, 2, 3, [4, 5]],
"boolean": True,
"tuple": (1, 2, ("nested_tuple", 3)),
"bytes": b'hello',
"float": 3.14,
'integer': 42,
"hex_number": 0x1A,
"octal_number": 0o52,
"string_single": 'single-quoted\\' string',
"string_double": "double-quoted \\" string",
"string_triple_single": '''triple-quoted\'
single-quoted string''',
"string_triple_double": """triple-quoted\"
double-quoted string""",
"raw_string_single": r'raw single-quoted\\ string',
"raw_string_double": r"raw double-quoted\\ string",
"raw_string_triple_single": r'''raw triple\\''-quoted
single-quoted string''',
"raw_string_triple_double": r"""raw triple\\""-quoted
double-quoted string"""
}`;
const parsed = PythonParser.parse(notationDoc);
if (!(parsed instanceof PythonDict))
{
assert(false);
return;
}
const nested = parsed.get('nested_dict');
assert.ok(nested);
if (!(nested instanceof PythonDict))
{
assert(false);
}
else
{
assert.equal(nested.get('key1'), 'value1');
const key2 = nested.get('key2');
if (!(key2 instanceof PythonDict))
{
assert(false);
}
else
{
assert.equal(key2.get('inner_key'), 'inner_value');
}
}
const list = parsed.get('list');
if (!(list instanceof PythonList))
{
assert(false);
}
else
{
assert.equal(list.length, 4);
assert.equal(list.get(0), 1);
assert.equal(list.get(1), 2);
assert.equal(list.get(2), 3);
const nestedList = list.get(3);
if (!(nestedList instanceof PythonList))
{
assert(false);
}
else
{
assert.equal(nestedList.get(0), 4);
assert.equal(nestedList.get(1), 5);
}
assert.equal(list.get(4), undefined);
}
assert.equal(parsed.get('boolean'), true);
const tuple = parsed.get('tuple');
if (!(tuple instanceof PythonTuple))
{
assert(false);
}
else
{
assert.equal(tuple.get(0), 1);
assert.equal(tuple.get(1), 2);
const nestedTuple = tuple.get(2);
if (!(nestedTuple instanceof PythonTuple))
{
assert(false);
}
else
{
assert.equal(nestedTuple.get(0), 'nested_tuple');
assert.equal(nestedTuple.get(1), 3);
}
assert.equal(tuple.get(3), undefined);
}
const buf = parsed.get('bytes');
if (buf instanceof Buffer)
{
assert.equal(Buffer.from('hello', 'binary').compare(buf), 0);
}
else
{
assert(false);
}
assert.equal(parsed.get('float'), 3.14);
assert.equal(parsed.get('integer'), 42);
assert.equal(parsed.get('hex_number'), 26);
assert.equal(parsed.get('octal_number'), 42);
assert.equal(parsed.get('string_single'), 'single-quoted\\\' string');
assert.equal(parsed.get('string_double'), 'double-quoted \\" string');
assert.equal(parsed.get('string_triple_single'), 'triple-quoted\'\nsingle-quoted string');
assert.equal(parsed.get('string_triple_double'), 'triple-quoted\"\ndouble-quoted string');
/*
raw_string_single": r'raw single-quoted\ string',
"raw_string_double": r"raw double-quoted\ string",
"raw_string_triple_single": r'''raw triple\''-quoted
single-quoted string''',
"raw_string_triple_double": r"""raw triple\''-quoted
double-quoted string"""
*/
assert.equal(parsed.get('raw_string_single'), 'raw single-quoted\\ string');
assert.equal(parsed.get('raw_string_double'), 'raw double-quoted\\ string');
assert.equal(parsed.get('raw_string_triple_single'), 'raw triple\\\'\'-quoted\nsingle-quoted string');
assert.equal(parsed.get('raw_string_triple_double'), 'raw triple\\""-quoted\ndouble-quoted string');
});
});
});

View File

@@ -23,27 +23,30 @@ export class PythonParser
{ regex: /^[,]/, type: PythonTokenType.COMMA },
{ regex: /^None\b/, type: PythonTokenType.NONE },
{ regex: /^(True|False)\b/, type: PythonTokenType.BOOLEAN },
{ regex: /^((?:-?[0-9]+\.[0-9]*)|(?:-?[0.9]*\.[0-9]+))/, type: PythonTokenType.FLOAT },
{ regex: /^\d+\b/, type: PythonTokenType.INTEGER },
{ regex: /^0x([0-9a-fA-F]+\b)/, type: PythonTokenType.HEX },
{ regex: /^0o([0-7]+)/, type: PythonTokenType.OCTAL },
{ regex: /^b'(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\['"abfnrtv\\]|[^'\\])*'/, type: PythonTokenType.BYTES },
{ regex: /^b"(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\["'abfnrtv\\]|[^"\\])*"/, type: PythonTokenType.BYTES },
{ regex: /^-?\d+\.\d+\b/, type: PythonTokenType.FLOAT },
{ regex: /^-?\d+\.?\d*[jJ]\b/, type: PythonTokenType.COMPLEX },
{ regex: /^\(/, type: PythonTokenType.TUPLE_START },
{ regex: /^\)/, type: PythonTokenType.TUPLE_END },
{ regex: /^\[/, type: PythonTokenType.LIST_START },
{ regex: /^\]/, type: PythonTokenType.LIST_END },
{ regex: /^'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Single-quoted strings
{ regex: /^"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Double-quoted strings
{ regex: /^'''((?:\\.|[^'\\]|'{1,2}(?![']))*)'''/, type: PythonTokenType.STRING }, // Triple-quoted single strings
{ regex: /^"""((?:\\.|[^"\\]|"{1,2}(?!["]))*)"""/, type: PythonTokenType.STRING }, // Triple-quoted double strings
{ regex: /^r'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Raw single-quoted strings
{ regex: /^r"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Raw double-quoted strings
{ regex: /^\\u[\dA-Fa-f]{4}/, type: PythonTokenType.STRING }, // Unicode escape sequences
{ regex: /^\\U[\dA-Fa-f]{8}/, type: PythonTokenType.STRING }, // Unicode escape sequences
{ regex: /^"""((?:[^"]*|\n|\\"|")*?)"""/, type: PythonTokenType.STRING }, // triple double quoted string
{ regex: /^'''((?:[^']*|\n|\\'|')*?)'''/, type: PythonTokenType.STRING }, // triple single quoted string
{ regex: /^'([^'\\]*(?:\\.[^'\\\n]*)*)'/, type: PythonTokenType.STRING }, // single quoted string
{ regex: /^"([^"\\]*(?:\\.[^"\\\n]*)*)"/, type: PythonTokenType.STRING }, // double quoted string
{ regex: /^b"""((?:[^"]*|\n|\\"|")*?)"""/, type: PythonTokenType.BINARY_STRING }, // triple double quoted string
{ regex: /^b'''((?:[^']*|\n|\\'|')*?)'''/, type: PythonTokenType.BINARY_STRING }, // triple single quoted string
{ regex: /^b'([^'\\]*(?:\\.[^'\\\n]*)*)'/, type: PythonTokenType.BINARY_STRING }, // single quoted string
{ regex: /^b"([^"\\]*(?:\\.[^"\\\n]*)*)"/, type: PythonTokenType.BINARY_STRING }, // double quoted string
{ regex: /^r"""((?:[^"]*|\n|")*?)"""/, type: PythonTokenType.RAW_STRING }, // triple double quoted string
{ regex: /^r'''((?:[^']*|\n|')*?)'''/, type: PythonTokenType.RAW_STRING }, // triple single quoted string
{ regex: /^r'([^'\n]*?)'/, type: PythonTokenType.RAW_STRING }, // single quoted string
{ regex: /^r"([^"\n]*?)"/, type: PythonTokenType.RAW_STRING }, // double quoted string
{ regex: /^-?\d+\.?\d*[eE][-+]?\d+/, type: PythonTokenType.FLOAT }, // Scientific notation
{ regex: /^-?\.\d+\b/, type: PythonTokenType.FLOAT }, // Leading dot float, e.g., .123
{ regex: /^\d+(_\d+)*\b/, type: PythonTokenType.INTEGER }, // Integer with underscores, e.g., 1_000_000
{ regex: /^[^\s:{},"'\[\]\(\)]+/, type: PythonTokenType.UNKNOWN } // Catch all for other non-structured sequences
];
@@ -89,50 +92,6 @@ export class PythonParser
}
}
private static interpretEscapes(byteString: string): Buffer
{
const byteArray: number[] = [];
const regex = /\\x([0-9A-Fa-f]{2})|\\([0-7]{1,3})|\\(['"abfnrtv\\])|([^\\]+)/g;
let match: RegExpExecArray | null;
while ((match = regex.exec(byteString)) !== null)
{
if (match[1]) // Hexadecimal sequence
{
byteArray.push(parseInt(match[1], 16));
}
else if (match[2]) // Octal sequence
{
byteArray.push(parseInt(match[2], 8));
}
else if (match[3]) // Special escape character
{
const specialChars: { [key: string]: number } = {
'a': 7, // Alert (bell)
'b': 8, // Backspace
'f': 12, // Formfeed
'n': 10, // New line
'r': 13, // Carriage return
't': 9, // Horizontal tab
'v': 11, // Vertical tab
'\\': 92, // Backslash
'\'': 39, // Single quote
'"': 34, // Double quote
};
byteArray.push(specialChars[match[3]]);
}
else if (match[4]) // Regular characters
{
for (let i = 0; i < match[4].length; ++i)
{
byteArray.push(match[4].charCodeAt(i));
}
}
}
return Buffer.from(byteArray);
}
public static parseValueToken(container: PythonTokenContainer): PythonType
{
const t = container.tokens[container.index++];
@@ -146,6 +105,14 @@ export class PythonParser
{
return t.value;
}
case PythonTokenType.BINARY_STRING:
{
return Buffer.from(t.value, 'binary');
}
case PythonTokenType.RAW_STRING:
{
return t.value;
}
case PythonTokenType.BOOLEAN:
{
return t.value === 'True';
@@ -158,10 +125,6 @@ export class PythonParser
{
return PythonTuple.parse(container);
}
case PythonTokenType.BYTES:
{
return this.interpretEscapes(t.value);
}
case PythonTokenType.NONE:
{
return null;
@@ -182,10 +145,6 @@ export class PythonParser
{
return parseFloat(t.value);
}
case PythonTokenType.COMPLEX:
{
throw new Error('Complex numbers are currently unhandled');
}
default:
throw new Error('Unexpected token: ' + PythonTokenType[t.type]);
}

View File

@@ -8,13 +8,13 @@ export enum PythonTokenType
BOOLEAN,
INTEGER,
FLOAT,
COMPLEX,
STRING,
BINARY_STRING,
RAW_STRING,
LIST_START,
LIST_END,
TUPLE_START,
TUPLE_END,
BYTES,
HEX,
OCTAL,
UNKNOWN // Catch all for other sequences

View File

@@ -10,7 +10,7 @@ export class PythonTuple extends PythonObject
public static parse(container: PythonTokenContainer): PythonTuple
{
let expectingComma = true;
let expectingComma = false;
const tuple = new PythonTuple();
do
{
@@ -48,6 +48,11 @@ export class PythonTuple extends PythonObject
throw new Error('Expected ) end bracket in tuple')
}
public get(index: number): PythonType | undefined
{
return this.data[index];
}
get length(): number
{
return this.data.length;