Support for GenericStreamingMessage

2023-11-09 18:08:56 +00:00
parent 53506ff2cb
commit b71b8d1ec9
12 changed files with 594 additions and 2 deletions
--- a/lib/classes/python/PythonDict.ts
+++ b/lib/classes/python/PythonDict.ts
@@ -0,0 +1,109 @@
+import { PythonTokenContainer } from './PythonTokenContainer';
+import { PythonObject } from './PythonObject';
+import { PythonTokenType } from './PythonTokenType';
+import { PythonType } from './PythonType';
+import { PythonParser } from './PythonParser';
+import { PythonTuple } from './PythonTuple';
+
+// Define PythonKey type for dictionary keys
+export type PythonKey = string | boolean | PythonType[] | number | PythonTuple;
+
+export class PythonDict extends PythonObject
+{
+    public data: Map<PythonKey, PythonType> = new Map<PythonKey, PythonType>();
+
+    public static parse(container: PythonTokenContainer): PythonDict
+    {
+        const dict = new PythonDict();
+
+        let isKey = true;
+        let key: PythonKey | null = null;
+
+        while (container.index < container.tokens.length)
+        {
+            const token = container.tokens[container.index];
+
+            switch (token.type)
+            {
+                case PythonTokenType.BRACE_END:
+                {
+                    if (isKey)
+                    {
+                        // The last token is a key, which is invalid
+                        throw new Error('Unexpected end of dictionary: Expected a key-value pair.');
+                    }
+
+                    container.index++;
+                    return dict;
+                }
+                case PythonTokenType.COLON:
+                {
+                    if (!isKey)
+                    {
+                        throw new Error('Expected a key before the colon in a dictionary.');
+                    }
+                    isKey = false;
+                    container.index++;
+                    break;
+                }
+                case PythonTokenType.COMMA:
+                {
+                    if (isKey)
+                    {
+                        throw new Error('No value provided with dictionary key');
+                    }
+                    isKey = true;
+                    container.index++;
+                    break;
+                }
+                default:
+                {
+                    if (isKey)
+                    {
+                        // Parse the key and check its type
+                        key = PythonParser.parseValueToken(container) as PythonKey;
+
+                        if (
+                            typeof key !== 'string' &&
+                            typeof key !== 'number' &&
+                            typeof key !== 'boolean' &&
+                            !(key instanceof PythonTuple) && // Check if it's a PythonTuple
+                            typeof key !== 'object' // Allow floats
+                        )
+                        {
+                            throw new Error('Invalid key type in a dictionary.');
+                        }
+                    }
+                    else
+                    {
+                        // Parse the value
+                        if (key === null)
+                        {
+                            throw new Error('Key cannot be null in a dictionary.');
+                        }
+                        const value = PythonParser.parseValueToken(container);
+                        dict.data.set(key, value);
+                        key = null;
+                    }
+                }
+            }
+        }
+
+        throw new Error('Expected close brace } in dictionary');
+    }
+
+    get length(): number
+    {
+        return Object.keys(this.data).length;
+    }
+
+    public get(key: PythonKey): PythonType | undefined
+    {
+        return this.data.get(key);
+    }
+
+    public toJSON(): unknown
+    {
+        return Object.fromEntries(this.data);
+    }
+}
--- a/lib/classes/python/PythonList.ts
+++ b/lib/classes/python/PythonList.ts
@@ -0,0 +1,64 @@
+import { PythonTokenContainer } from './PythonTokenContainer';
+import { PythonTokenType } from './PythonTokenType';
+import { PythonObject } from './PythonObject';
+import { PythonType } from './PythonType';
+import { PythonParser } from './PythonParser';
+
+export class PythonList extends PythonObject
+{
+    public data: PythonType[] = [];
+
+    public static parse(container: PythonTokenContainer): PythonList
+    {
+        let expectingComma = false;
+        const list = new PythonList();
+        do
+        {
+            const token = container.tokens[container.index];
+            switch (token.type)
+            {
+                case PythonTokenType.LIST_END:
+                {
+                    container.index++;
+                    return list;
+                }
+                case PythonTokenType.COMMA:
+                {
+                    if (!expectingComma)
+                    {
+                        throw new Error('Unexpected comma in list');
+                    }
+                    expectingComma = false;
+                    container.index++;
+                    break;
+                }
+                default:
+                {
+                    if (expectingComma)
+                    {
+                        throw new Error('Unexpected token')
+                    }
+                    list.data.push(PythonParser.parseValueToken(container));
+                    expectingComma = true;
+                }
+            }
+        }
+        while (container.index < container.tokens.length);
+        throw new Error('Expected ] end bracket in list')
+    }
+
+    get length(): number
+    {
+        return this.data.length;
+    }
+
+    public toString(): string
+    {
+        return '[' + this.data.join(', ') + ']';
+    }
+
+    public toJSON(): unknown
+    {
+        return this.data;
+    }
+}
--- a/lib/classes/python/PythonObject.ts
+++ b/lib/classes/python/PythonObject.ts
@@ -0,0 +1,9 @@
+export abstract class PythonObject
+{
+    public toString(): string
+    {
+        return JSON.stringify(this.toJSON());
+    }
+
+    public abstract toJSON(): unknown;
+}
--- a/lib/classes/python/PythonParser.ts
+++ b/lib/classes/python/PythonParser.ts
@@ -0,0 +1,209 @@
+import { PythonTokenType } from './PythonTokenType';
+import { PythonToken } from './PythonToken';
+import { PythonSet } from './PythonSet';
+import { PythonTokenContainer } from './PythonTokenContainer';
+import { PythonType } from './PythonType';
+import { PythonList } from './PythonList';
+import { PythonTuple } from './PythonTuple';
+
+interface TokenSpec
+{
+    regex: RegExp;
+    type: PythonTokenType;
+}
+
+export class PythonParser
+{
+    private static tokenSpecs: TokenSpec[] =
+        [
+            { regex: /^\s+/, type: PythonTokenType.UNKNOWN }, // WHITESPACE is treated as UNKNOWN
+            { regex: /^{/, type: PythonTokenType.BRACE_START },
+            { regex: /^}/, type: PythonTokenType.BRACE_END },
+            { regex: /^[:]/, type: PythonTokenType.COLON },
+            { regex: /^[,]/, type: PythonTokenType.COMMA },
+            { regex: /^None\b/, type: PythonTokenType.NONE },
+            { regex: /^(True|False)\b/, type: PythonTokenType.BOOLEAN },
+            { regex: /^\d+\b/, type: PythonTokenType.INTEGER },
+            { regex: /^0x([0-9a-fA-F]+\b)/, type: PythonTokenType.HEX },
+            { regex: /^0o([0-7]+)/, type: PythonTokenType.OCTAL },
+            { regex: /^b'(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\['"abfnrtv\\]|[^'\\])*'/, type: PythonTokenType.BYTES },
+            { regex: /^b"(\\[0-7]{3}|\\x[0-9A-Fa-f]{2}|\\["'abfnrtv\\]|[^"\\])*"/, type: PythonTokenType.BYTES },
+            { regex: /^-?\d+\.\d+\b/, type: PythonTokenType.FLOAT },
+            { regex: /^-?\d+\.?\d*[jJ]\b/, type: PythonTokenType.COMPLEX },
+            { regex: /^\(/, type: PythonTokenType.TUPLE_START },
+            { regex: /^\)/, type: PythonTokenType.TUPLE_END },
+            { regex: /^\[/, type: PythonTokenType.LIST_START },
+            { regex: /^\]/, type: PythonTokenType.LIST_END },
+            { regex: /^'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Single-quoted strings
+            { regex: /^"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Double-quoted strings
+            { regex: /^'''((?:\\.|[^'\\]|'{1,2}(?![']))*)'''/, type: PythonTokenType.STRING }, // Triple-quoted single strings
+            { regex: /^"""((?:\\.|[^"\\]|"{1,2}(?!["]))*)"""/, type: PythonTokenType.STRING }, // Triple-quoted double strings
+            { regex: /^r'((?:\\.|[^'\\])*)'/, type: PythonTokenType.STRING }, // Raw single-quoted strings
+            { regex: /^r"((?:\\.|[^"\\])*)"/, type: PythonTokenType.STRING }, // Raw double-quoted strings
+            { regex: /^\\u[\dA-Fa-f]{4}/, type: PythonTokenType.STRING }, // Unicode escape sequences
+            { regex: /^\\U[\dA-Fa-f]{8}/, type: PythonTokenType.STRING }, // Unicode escape sequences
+            { regex: /^-?\d+\.?\d*[eE][-+]?\d+/, type: PythonTokenType.FLOAT }, // Scientific notation
+            { regex: /^-?\.\d+\b/, type: PythonTokenType.FLOAT }, // Leading dot float, e.g., .123
+            { regex: /^\d+(_\d+)*\b/, type: PythonTokenType.INTEGER }, // Integer with underscores, e.g., 1_000_000
+            { regex: /^[^\s:{},"'\[\]\(\)]+/, type: PythonTokenType.UNKNOWN } // Catch all for other non-structured sequences
+        ];
+
+    private static* tokenize(input: string): Generator<PythonToken, void, undefined>
+    {
+        let index = 0;
+        while (index < input.length)
+        {
+            const currentInput = input.slice(index);
+
+            if (currentInput.length === 0)
+            {
+                return; // End of input
+            }
+
+            let matched = false;
+            for (const { regex, type } of PythonParser.tokenSpecs)
+            {
+                const tokenMatch = currentInput.match(regex);
+                if (tokenMatch)
+                {
+                    matched = true;
+                    let value = tokenMatch[0];
+                    if (tokenMatch.length > 1)
+                    {
+                        value = tokenMatch[tokenMatch.length - 1];
+                    }
+                    index += tokenMatch[0].length; // Move past this token
+
+                    if (type !== PythonTokenType.UNKNOWN) // WHITESPACE is UNKNOWN and not yielded
+                    {
+                        yield { type, value };
+                    }
+                    break;
+                }
+            }
+
+            if (!matched)
+            {
+                throw new Error(`Unexpected token at index ${index}: "${currentInput[0]}"`);
+            }
+        }
+    }
+
+    private static interpretEscapes(byteString: string): Buffer
+    {
+        const byteArray: number[] = [];
+        const regex = /\\x([0-9A-Fa-f]{2})|\\([0-7]{1,3})|\\(['"abfnrtv\\])|([^\\]+)/g;
+        let match: RegExpExecArray | null;
+
+        while ((match = regex.exec(byteString)) !== null)
+        {
+            if (match[1]) // Hexadecimal sequence
+            {
+                byteArray.push(parseInt(match[1], 16));
+            }
+            else if (match[2]) // Octal sequence
+            {
+                byteArray.push(parseInt(match[2], 8));
+            }
+            else if (match[3]) // Special escape character
+            {
+                const specialChars: { [key: string]: number } = {
+                    'a': 7,    // Alert (bell)
+                    'b': 8,    // Backspace
+                    'f': 12,   // Formfeed
+                    'n': 10,   // New line
+                    'r': 13,   // Carriage return
+                    't': 9,    // Horizontal tab
+                    'v': 11,   // Vertical tab
+                    '\\': 92,  // Backslash
+                    '\'': 39,  // Single quote
+                    '"': 34,   // Double quote
+                };
+                byteArray.push(specialChars[match[3]]);
+            }
+            else if (match[4]) // Regular characters
+            {
+                for (let i = 0; i < match[4].length; ++i)
+                {
+                    byteArray.push(match[4].charCodeAt(i));
+                }
+            }
+        }
+
+        return Buffer.from(byteArray);
+    }
+
+    public static parseValueToken(container: PythonTokenContainer): PythonType
+    {
+        const t = container.tokens[container.index++];
+        switch (t.type)
+        {
+            case PythonTokenType.BRACE_START:
+            {
+                return PythonSet.parse(container);
+            }
+            case PythonTokenType.STRING:
+            {
+                return t.value;
+            }
+            case PythonTokenType.BOOLEAN:
+            {
+                return t.value === 'True';
+            }
+            case PythonTokenType.LIST_START:
+            {
+                return PythonList.parse(container);
+            }
+            case PythonTokenType.TUPLE_START:
+            {
+                return PythonTuple.parse(container);
+            }
+            case PythonTokenType.BYTES:
+            {
+                return this.interpretEscapes(t.value);
+            }
+            case PythonTokenType.NONE:
+            {
+                return null;
+            }
+            case PythonTokenType.HEX:
+            {
+                return parseInt(t.value, 16);
+            }
+            case PythonTokenType.OCTAL:
+            {
+                return parseInt(t.value, 8);
+            }
+            case PythonTokenType.INTEGER:
+            {
+                return parseInt(t.value, 10);
+            }
+            case PythonTokenType.FLOAT:
+            {
+                return parseFloat(t.value);
+            }
+            case PythonTokenType.COMPLEX:
+            {
+                throw new Error('Complex numbers are currently unhandled');
+            }
+            default:
+                throw new Error('Unexpected token: ' + PythonTokenType[t.type]);
+        }
+    }
+
+    public static parse(input: string): PythonType
+    {
+        const cont = new PythonTokenContainer()
+        for (const token of PythonParser.tokenize(input))
+        {
+            cont.tokens.push(token);
+        }
+
+        const parsedToken = this.parseValueToken(cont);
+        if (cont.index < cont.tokens.length)
+        {
+            throw new Error('Only one token expected at root level');
+        }
+        return parsedToken;
+    }
+}
--- a/lib/classes/python/PythonSet.ts
+++ b/lib/classes/python/PythonSet.ts
@@ -0,0 +1,70 @@
+import { PythonTokenContainer } from './PythonTokenContainer';
+import { PythonTokenType } from './PythonTokenType';
+import { PythonDict } from './PythonDict';
+import { PythonObject } from './PythonObject';
+import { PythonType } from './PythonType';
+import { PythonParser } from './PythonParser';
+
+export class PythonSet extends PythonObject
+{
+    public data = new Set<PythonType>();
+
+    public static parse(container: PythonTokenContainer): PythonSet | PythonDict
+    {
+        let expectingComma = false;
+        const startIndex = container.index;
+        const set = new PythonSet();
+        do
+        {
+            const token = container.tokens[container.index];
+            switch (token.type)
+            {
+                case PythonTokenType.BRACE_END:
+                {
+                    if (container.index === startIndex)
+                    {
+                        // Empty braces, this is an empty PythonDict
+                        return new PythonDict();
+                    }
+                    else
+                    {
+                        container.index++;
+                        return set;
+                    }
+                }
+                case PythonTokenType.COMMA:
+                {
+                    if (!expectingComma)
+                    {
+                        throw new Error('Unexpected comma in list');
+                    }
+                    expectingComma = false;
+                    container.index++;
+                    break;
+                }
+                case PythonTokenType.COLON:
+                {
+                    // This is a dictionary, not a set, start again..
+                    container.index = startIndex;
+                    return PythonDict.parse(container);
+                }
+                default:
+                {
+                    if (expectingComma)
+                    {
+                        throw new Error('Unexpected token')
+                    }
+                    set.data.add(PythonParser.parseValueToken(container));
+                    expectingComma = true;
+                }
+            }
+        }
+        while (container.index < container.tokens.length);
+        throw new Error('Expected } end brace in set')
+    }
+
+    public toJSON(): unknown
+    {
+        return Array.from(this.data);
+    }
+}
--- a/lib/classes/python/PythonToken.ts
+++ b/lib/classes/python/PythonToken.ts
@@ -0,0 +1,7 @@
+import { PythonTokenType } from './PythonTokenType';
+
+export interface PythonToken
+{
+    type: PythonTokenType;
+    value: string;
+}
--- a/lib/classes/python/PythonTokenContainer.ts
+++ b/lib/classes/python/PythonTokenContainer.ts
@@ -0,0 +1,7 @@
+import { PythonToken } from './PythonToken';
+
+export class PythonTokenContainer
+{
+    tokens: PythonToken[] = [];
+    index = 0;
+}
--- a/lib/classes/python/PythonTokenType.ts
+++ b/lib/classes/python/PythonTokenType.ts
@@ -0,0 +1,21 @@
+export enum PythonTokenType
+{
+    NONE,
+    BRACE_START,
+    BRACE_END,
+    COLON,
+    COMMA,
+    BOOLEAN,
+    INTEGER,
+    FLOAT,
+    COMPLEX,
+    STRING,
+    LIST_START,
+    LIST_END,
+    TUPLE_START,
+    TUPLE_END,
+    BYTES,
+    HEX,
+    OCTAL,
+    UNKNOWN // Catch all for other sequences
+}
--- a/lib/classes/python/PythonTuple.ts
+++ b/lib/classes/python/PythonTuple.ts
@@ -0,0 +1,65 @@
+import { PythonTokenContainer } from './PythonTokenContainer';
+import { PythonTokenType } from './PythonTokenType';
+import { PythonObject } from './PythonObject';
+import { PythonType } from './PythonType';
+import { PythonParser } from './PythonParser';
+
+export class PythonTuple extends PythonObject
+{
+    public data: PythonType[] = [];
+
+    public static parse(container: PythonTokenContainer): PythonTuple
+    {
+        let expectingComma = true;
+        const tuple = new PythonTuple();
+        do
+        {
+            const token = container.tokens[container.index];
+            switch (token.type)
+            {
+                case PythonTokenType.TUPLE_END:
+                {
+                    container.index++;
+                    return tuple;
+                }
+                case PythonTokenType.COMMA:
+                {
+                    if (!expectingComma)
+                    {
+                        throw new Error('Unexpected comma in list');
+                    }
+                    expectingComma = false;
+                    container.index++;
+                    break;
+                }
+                default:
+                {
+                    if (expectingComma)
+                    {
+                        throw new Error('Unexpected token')
+                    }
+                    tuple.data.push(PythonParser.parseValueToken(container));
+                    expectingComma = true;
+                    break;
+                }
+            }
+        }
+        while (container.index < container.tokens.length);
+        throw new Error('Expected ) end bracket in tuple')
+    }
+
+    get length(): number
+    {
+        return this.data.length;
+    }
+
+    public toString(): string
+    {
+        return '(' + this.data.join(', ') + ')';
+    }
+
+    public toJSON(): unknown
+    {
+        return this.data;
+    }
+}
--- a/lib/classes/python/PythonType.ts
+++ b/lib/classes/python/PythonType.ts
@@ -0,0 +1,3 @@
+import { PythonObject } from './PythonObject';
+
+export type PythonType = number | boolean | string | Buffer | PythonObject | null;