feat(base-formula): add Pratt parser

This commit is contained in:
Philipinho
2026-04-23 23:44:09 +01:00
parent dc825b0f62
commit d8c96089b1
4 changed files with 281 additions and 0 deletions
@@ -0,0 +1,114 @@
import { parseRaw } from "@docmost/base-formula/server";
import type { RawFormulaAST } from "@docmost/base-formula/server";
describe("parseRaw", () => {
it("parses a number literal", () => {
expect(parseRaw("42")).toEqual({ t: "num", v: 42 });
});
it("parses a string literal", () => {
expect(parseRaw('"hi"')).toEqual({ t: "str", v: "hi" });
});
it("parses true/false/null", () => {
expect(parseRaw("true")).toEqual({ t: "bool", v: true });
expect(parseRaw("false")).toEqual({ t: "bool", v: false });
expect(parseRaw("null")).toEqual({ t: "null" });
});
it("parses prop(\"Name\")", () => {
expect(parseRaw('prop("Price")')).toEqual({ t: "propName", name: "Price" });
});
it("parses unary minus", () => {
expect(parseRaw("-5")).toEqual({
t: "op", op: "neg", args: [{ t: "num", v: 5 }],
});
});
it("parses binary arithmetic with precedence", () => {
expect(parseRaw("1 + 2 * 3")).toEqual({
t: "op", op: "+", args: [
{ t: "num", v: 1 },
{ t: "op", op: "*", args: [{ t: "num", v: 2 }, { t: "num", v: 3 }] },
],
});
});
it("respects parentheses", () => {
expect(parseRaw("(1 + 2) * 3")).toEqual({
t: "op", op: "*", args: [
{ t: "op", op: "+", args: [{ t: "num", v: 1 }, { t: "num", v: 2 }] },
{ t: "num", v: 3 },
],
});
});
it("parses comparisons below arithmetic precedence", () => {
const ast = parseRaw("1 + 2 == 3") as any;
expect(ast.t).toBe("op");
expect(ast.op).toBe("==");
});
it("parses and/or/not with correct precedence", () => {
const ast = parseRaw('prop("A") or prop("B") and prop("C")') as any;
expect(ast.t).toBe("or");
expect(ast.args[1].t).toBe("and");
});
it("parses not as highest unary", () => {
const ast = parseRaw('not prop("A")') as any;
expect(ast.t).toBe("op");
expect(ast.op).toBe("not");
});
it("parses function calls", () => {
expect(parseRaw("round(1.5)")).toEqual({
t: "call", fn: "round", args: [{ t: "num", v: 1.5 }],
});
expect(parseRaw("concat(\"a\", \"b\", \"c\")")).toEqual({
t: "call", fn: "concat", args: [
{ t: "str", v: "a" }, { t: "str", v: "b" }, { t: "str", v: "c" },
],
});
});
it("parses if/and/or as dedicated nodes", () => {
const iff = parseRaw('if(true, 1, 2)') as any;
expect(iff.t).toBe("if");
expect(iff.cond).toEqual({ t: "bool", v: true });
expect(iff.then).toEqual({ t: "num", v: 1 });
expect(iff.else).toEqual({ t: "num", v: 2 });
const an = parseRaw('and(true, false)') as any;
expect(an.t).toBe("and");
expect(an.args.length).toBe(2);
});
it("accepts nested calls", () => {
const ast = parseRaw('round(concat("a", 1))') as any;
expect(ast.t).toBe("call");
expect(ast.args[0].t).toBe("call");
});
it("throws on empty input", () => {
expect(() => parseRaw("")).toThrow();
});
it("throws on trailing garbage", () => {
expect(() => parseRaw("1 2")).toThrow();
});
it("throws on unbalanced parens", () => {
expect(() => parseRaw("(1 + 2")).toThrow();
});
it("throws when prop() is called without a string", () => {
expect(() => parseRaw("prop(5)")).toThrow();
});
it("throws when prop() has wrong arity", () => {
expect(() => parseRaw('prop("A", "B")')).toThrow();
expect(() => parseRaw("prop()")).toThrow();
});
});
@@ -4,3 +4,4 @@ export * from "./ast";
export * from "./types";
export * from "./error";
export * from "./tokenizer";
export * from "./parser";
@@ -3,3 +3,4 @@ export * from "./ast";
export * from "./types";
export * from "./error";
export * from "./tokenizer";
export * from "./parser";
+165
View File
@@ -0,0 +1,165 @@
// packages/base-formula/src/parser.ts
import { tokenize, Token, TokenKind } from "./tokenizer";
import { FormulaParseError } from "./error";
import type { OpCode } from "./ast";
import type { RawFormulaAST } from "./ast";
/*
* Pratt parser. Top-level entry parses a full expression and then asserts EOF.
* Binary operators are dispatched through a precedence table in `bp` below.
* `prop(...)`, `if(...)`, `and(...)`, `or(...)` are intercepted when an
* identifier is followed by `(` so they become their dedicated AST nodes.
*/
export function parseRaw(src: string): RawFormulaAST {
const tokens = tokenize(src);
const p = new Parser(tokens);
const expr = p.parseExpr(0);
p.expect(TokenKind.EOF, "Expected end of input");
return expr;
}
const BP: Partial<Record<TokenKind, number>> = {
[TokenKind.OR]: 10,
[TokenKind.AND]: 20,
[TokenKind.EQ]: 30, [TokenKind.NEQ]: 30,
[TokenKind.LT]: 40, [TokenKind.GT]: 40,
[TokenKind.LTE]: 40, [TokenKind.GTE]: 40,
[TokenKind.PLUS]: 50, [TokenKind.MINUS]: 50,
[TokenKind.STAR]: 60, [TokenKind.SLASH]: 60, [TokenKind.PERCENT]: 60,
};
const TOK_TO_OP: Partial<Record<TokenKind, OpCode>> = {
[TokenKind.PLUS]: "+", [TokenKind.MINUS]: "-",
[TokenKind.STAR]: "*", [TokenKind.SLASH]: "/", [TokenKind.PERCENT]: "%",
[TokenKind.EQ]: "==", [TokenKind.NEQ]: "!=",
[TokenKind.LT]: "<", [TokenKind.GT]: ">",
[TokenKind.LTE]: "<=", [TokenKind.GTE]: ">=",
};
class Parser {
private i = 0;
constructor(private tokens: Token[]) {}
peek(): Token { return this.tokens[this.i]; }
next(): Token { return this.tokens[this.i++]; }
expect(kind: TokenKind, msg: string): Token {
const t = this.peek();
if (t.kind !== kind) {
throw new FormulaParseError([{
code: "UNEXPECTED_TOKEN", message: msg, span: { start: t.start, end: t.end },
}]);
}
return this.next();
}
parseExpr(minBp: number): RawFormulaAST {
let lhs = this.parseUnary();
while (true) {
const tok = this.peek();
if (tok.kind === TokenKind.AND) {
if (BP[TokenKind.AND]! < minBp) break;
this.next();
const rhs = this.parseExpr(BP[TokenKind.AND]! + 1);
lhs = { t: "and", args: [lhs, rhs] };
continue;
}
if (tok.kind === TokenKind.OR) {
if (BP[TokenKind.OR]! < minBp) break;
this.next();
const rhs = this.parseExpr(BP[TokenKind.OR]! + 1);
lhs = { t: "or", args: [lhs, rhs] };
continue;
}
const bp = BP[tok.kind];
if (bp == null || bp < minBp) break;
this.next();
const rhs = this.parseExpr(bp + 1);
const op = TOK_TO_OP[tok.kind]!;
lhs = { t: "op", op, args: [lhs, rhs] };
}
return lhs;
}
parseUnary(): RawFormulaAST {
const tok = this.peek();
if (tok.kind === TokenKind.MINUS) {
this.next();
const arg = this.parseUnary();
return { t: "op", op: "neg", args: [arg] };
}
if (tok.kind === TokenKind.NOT) {
this.next();
const arg = this.parseUnary();
return { t: "op", op: "not", args: [arg] };
}
return this.parsePrimary();
}
parsePrimary(): RawFormulaAST {
const tok = this.next();
switch (tok.kind) {
case TokenKind.NUMBER: return { t: "num", v: Number(tok.text) };
case TokenKind.STRING: return { t: "str", v: tok.text };
case TokenKind.TRUE: return { t: "bool", v: true };
case TokenKind.FALSE: return { t: "bool", v: false };
case TokenKind.NULL: return { t: "null" };
case TokenKind.LPAREN: {
const e = this.parseExpr(0);
this.expect(TokenKind.RPAREN, "Expected ')'");
return e;
}
case TokenKind.AND:
case TokenKind.OR:
case TokenKind.IDENT: {
if (this.peek().kind !== TokenKind.LPAREN) {
throw new FormulaParseError([{
code: "UNEXPECTED_TOKEN",
message: `Unexpected identifier '${tok.text}' (did you mean prop("${tok.text}")?)`,
span: { start: tok.start, end: tok.end },
}]);
}
this.next(); // LPAREN
const args: RawFormulaAST[] = [];
if (this.peek().kind !== TokenKind.RPAREN) {
args.push(this.parseExpr(0));
while (this.peek().kind === TokenKind.COMMA) {
this.next();
args.push(this.parseExpr(0));
}
}
this.expect(TokenKind.RPAREN, "Expected ')'");
if (tok.text === "prop") {
if (args.length !== 1 || args[0].t !== "str") {
throw new FormulaParseError([{
code: "UNEXPECTED_TOKEN",
message: 'prop() expects exactly one string literal argument',
span: { start: tok.start, end: tok.end },
}]);
}
return { t: "propName", name: args[0].v };
}
if (tok.text === "if") {
if (args.length !== 3) {
throw new FormulaParseError([{
code: "ARITY_MISMATCH",
message: "if() expects exactly 3 arguments",
span: { start: tok.start, end: tok.end },
}]);
}
return { t: "if", cond: args[0], then: args[1], else: args[2] };
}
if (tok.text === "and") return { t: "and", args };
if (tok.text === "or") return { t: "or", args };
return { t: "call", fn: tok.text, args };
}
default:
throw new FormulaParseError([{
code: "UNEXPECTED_TOKEN",
message: `Unexpected token '${tok.text || tok.kind}'`,
span: { start: tok.start, end: tok.end },
}]);
}
}
}