mirror of
https://github.com/docmost/docmost.git
synced 2026-06-10 10:13:01 +08:00
feat(base-formula): add Pratt parser
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
import { parseRaw } from "@docmost/base-formula/server";
|
||||
import type { RawFormulaAST } from "@docmost/base-formula/server";
|
||||
|
||||
describe("parseRaw", () => {
|
||||
it("parses a number literal", () => {
|
||||
expect(parseRaw("42")).toEqual({ t: "num", v: 42 });
|
||||
});
|
||||
|
||||
it("parses a string literal", () => {
|
||||
expect(parseRaw('"hi"')).toEqual({ t: "str", v: "hi" });
|
||||
});
|
||||
|
||||
it("parses true/false/null", () => {
|
||||
expect(parseRaw("true")).toEqual({ t: "bool", v: true });
|
||||
expect(parseRaw("false")).toEqual({ t: "bool", v: false });
|
||||
expect(parseRaw("null")).toEqual({ t: "null" });
|
||||
});
|
||||
|
||||
it("parses prop(\"Name\")", () => {
|
||||
expect(parseRaw('prop("Price")')).toEqual({ t: "propName", name: "Price" });
|
||||
});
|
||||
|
||||
it("parses unary minus", () => {
|
||||
expect(parseRaw("-5")).toEqual({
|
||||
t: "op", op: "neg", args: [{ t: "num", v: 5 }],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses binary arithmetic with precedence", () => {
|
||||
expect(parseRaw("1 + 2 * 3")).toEqual({
|
||||
t: "op", op: "+", args: [
|
||||
{ t: "num", v: 1 },
|
||||
{ t: "op", op: "*", args: [{ t: "num", v: 2 }, { t: "num", v: 3 }] },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("respects parentheses", () => {
|
||||
expect(parseRaw("(1 + 2) * 3")).toEqual({
|
||||
t: "op", op: "*", args: [
|
||||
{ t: "op", op: "+", args: [{ t: "num", v: 1 }, { t: "num", v: 2 }] },
|
||||
{ t: "num", v: 3 },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses comparisons below arithmetic precedence", () => {
|
||||
const ast = parseRaw("1 + 2 == 3") as any;
|
||||
expect(ast.t).toBe("op");
|
||||
expect(ast.op).toBe("==");
|
||||
});
|
||||
|
||||
it("parses and/or/not with correct precedence", () => {
|
||||
const ast = parseRaw('prop("A") or prop("B") and prop("C")') as any;
|
||||
expect(ast.t).toBe("or");
|
||||
expect(ast.args[1].t).toBe("and");
|
||||
});
|
||||
|
||||
it("parses not as highest unary", () => {
|
||||
const ast = parseRaw('not prop("A")') as any;
|
||||
expect(ast.t).toBe("op");
|
||||
expect(ast.op).toBe("not");
|
||||
});
|
||||
|
||||
it("parses function calls", () => {
|
||||
expect(parseRaw("round(1.5)")).toEqual({
|
||||
t: "call", fn: "round", args: [{ t: "num", v: 1.5 }],
|
||||
});
|
||||
expect(parseRaw("concat(\"a\", \"b\", \"c\")")).toEqual({
|
||||
t: "call", fn: "concat", args: [
|
||||
{ t: "str", v: "a" }, { t: "str", v: "b" }, { t: "str", v: "c" },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses if/and/or as dedicated nodes", () => {
|
||||
const iff = parseRaw('if(true, 1, 2)') as any;
|
||||
expect(iff.t).toBe("if");
|
||||
expect(iff.cond).toEqual({ t: "bool", v: true });
|
||||
expect(iff.then).toEqual({ t: "num", v: 1 });
|
||||
expect(iff.else).toEqual({ t: "num", v: 2 });
|
||||
|
||||
const an = parseRaw('and(true, false)') as any;
|
||||
expect(an.t).toBe("and");
|
||||
expect(an.args.length).toBe(2);
|
||||
});
|
||||
|
||||
it("accepts nested calls", () => {
|
||||
const ast = parseRaw('round(concat("a", 1))') as any;
|
||||
expect(ast.t).toBe("call");
|
||||
expect(ast.args[0].t).toBe("call");
|
||||
});
|
||||
|
||||
it("throws on empty input", () => {
|
||||
expect(() => parseRaw("")).toThrow();
|
||||
});
|
||||
|
||||
it("throws on trailing garbage", () => {
|
||||
expect(() => parseRaw("1 2")).toThrow();
|
||||
});
|
||||
|
||||
it("throws on unbalanced parens", () => {
|
||||
expect(() => parseRaw("(1 + 2")).toThrow();
|
||||
});
|
||||
|
||||
it("throws when prop() is called without a string", () => {
|
||||
expect(() => parseRaw("prop(5)")).toThrow();
|
||||
});
|
||||
|
||||
it("throws when prop() has wrong arity", () => {
|
||||
expect(() => parseRaw('prop("A", "B")')).toThrow();
|
||||
expect(() => parseRaw("prop()")).toThrow();
|
||||
});
|
||||
});
|
||||
@@ -4,3 +4,4 @@ export * from "./ast";
|
||||
export * from "./types";
|
||||
export * from "./error";
|
||||
export * from "./tokenizer";
|
||||
export * from "./parser";
|
||||
|
||||
@@ -3,3 +3,4 @@ export * from "./ast";
|
||||
export * from "./types";
|
||||
export * from "./error";
|
||||
export * from "./tokenizer";
|
||||
export * from "./parser";
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
// packages/base-formula/src/parser.ts
|
||||
import { tokenize, Token, TokenKind } from "./tokenizer";
|
||||
import { FormulaParseError } from "./error";
|
||||
import type { OpCode } from "./ast";
|
||||
import type { RawFormulaAST } from "./ast";
|
||||
|
||||
/*
|
||||
* Pratt parser. Top-level entry parses a full expression and then asserts EOF.
|
||||
* Binary operators are dispatched through a precedence table in `bp` below.
|
||||
* `prop(...)`, `if(...)`, `and(...)`, `or(...)` are intercepted when an
|
||||
* identifier is followed by `(` so they become their dedicated AST nodes.
|
||||
*/
|
||||
export function parseRaw(src: string): RawFormulaAST {
|
||||
const tokens = tokenize(src);
|
||||
const p = new Parser(tokens);
|
||||
const expr = p.parseExpr(0);
|
||||
p.expect(TokenKind.EOF, "Expected end of input");
|
||||
return expr;
|
||||
}
|
||||
|
||||
const BP: Partial<Record<TokenKind, number>> = {
|
||||
[TokenKind.OR]: 10,
|
||||
[TokenKind.AND]: 20,
|
||||
[TokenKind.EQ]: 30, [TokenKind.NEQ]: 30,
|
||||
[TokenKind.LT]: 40, [TokenKind.GT]: 40,
|
||||
[TokenKind.LTE]: 40, [TokenKind.GTE]: 40,
|
||||
[TokenKind.PLUS]: 50, [TokenKind.MINUS]: 50,
|
||||
[TokenKind.STAR]: 60, [TokenKind.SLASH]: 60, [TokenKind.PERCENT]: 60,
|
||||
};
|
||||
|
||||
const TOK_TO_OP: Partial<Record<TokenKind, OpCode>> = {
|
||||
[TokenKind.PLUS]: "+", [TokenKind.MINUS]: "-",
|
||||
[TokenKind.STAR]: "*", [TokenKind.SLASH]: "/", [TokenKind.PERCENT]: "%",
|
||||
[TokenKind.EQ]: "==", [TokenKind.NEQ]: "!=",
|
||||
[TokenKind.LT]: "<", [TokenKind.GT]: ">",
|
||||
[TokenKind.LTE]: "<=", [TokenKind.GTE]: ">=",
|
||||
};
|
||||
|
||||
class Parser {
|
||||
private i = 0;
|
||||
constructor(private tokens: Token[]) {}
|
||||
|
||||
peek(): Token { return this.tokens[this.i]; }
|
||||
next(): Token { return this.tokens[this.i++]; }
|
||||
expect(kind: TokenKind, msg: string): Token {
|
||||
const t = this.peek();
|
||||
if (t.kind !== kind) {
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_TOKEN", message: msg, span: { start: t.start, end: t.end },
|
||||
}]);
|
||||
}
|
||||
return this.next();
|
||||
}
|
||||
|
||||
parseExpr(minBp: number): RawFormulaAST {
|
||||
let lhs = this.parseUnary();
|
||||
|
||||
while (true) {
|
||||
const tok = this.peek();
|
||||
if (tok.kind === TokenKind.AND) {
|
||||
if (BP[TokenKind.AND]! < minBp) break;
|
||||
this.next();
|
||||
const rhs = this.parseExpr(BP[TokenKind.AND]! + 1);
|
||||
lhs = { t: "and", args: [lhs, rhs] };
|
||||
continue;
|
||||
}
|
||||
if (tok.kind === TokenKind.OR) {
|
||||
if (BP[TokenKind.OR]! < minBp) break;
|
||||
this.next();
|
||||
const rhs = this.parseExpr(BP[TokenKind.OR]! + 1);
|
||||
lhs = { t: "or", args: [lhs, rhs] };
|
||||
continue;
|
||||
}
|
||||
const bp = BP[tok.kind];
|
||||
if (bp == null || bp < minBp) break;
|
||||
this.next();
|
||||
const rhs = this.parseExpr(bp + 1);
|
||||
const op = TOK_TO_OP[tok.kind]!;
|
||||
lhs = { t: "op", op, args: [lhs, rhs] };
|
||||
}
|
||||
return lhs;
|
||||
}
|
||||
|
||||
parseUnary(): RawFormulaAST {
|
||||
const tok = this.peek();
|
||||
if (tok.kind === TokenKind.MINUS) {
|
||||
this.next();
|
||||
const arg = this.parseUnary();
|
||||
return { t: "op", op: "neg", args: [arg] };
|
||||
}
|
||||
if (tok.kind === TokenKind.NOT) {
|
||||
this.next();
|
||||
const arg = this.parseUnary();
|
||||
return { t: "op", op: "not", args: [arg] };
|
||||
}
|
||||
return this.parsePrimary();
|
||||
}
|
||||
|
||||
parsePrimary(): RawFormulaAST {
|
||||
const tok = this.next();
|
||||
switch (tok.kind) {
|
||||
case TokenKind.NUMBER: return { t: "num", v: Number(tok.text) };
|
||||
case TokenKind.STRING: return { t: "str", v: tok.text };
|
||||
case TokenKind.TRUE: return { t: "bool", v: true };
|
||||
case TokenKind.FALSE: return { t: "bool", v: false };
|
||||
case TokenKind.NULL: return { t: "null" };
|
||||
case TokenKind.LPAREN: {
|
||||
const e = this.parseExpr(0);
|
||||
this.expect(TokenKind.RPAREN, "Expected ')'");
|
||||
return e;
|
||||
}
|
||||
case TokenKind.AND:
|
||||
case TokenKind.OR:
|
||||
case TokenKind.IDENT: {
|
||||
if (this.peek().kind !== TokenKind.LPAREN) {
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_TOKEN",
|
||||
message: `Unexpected identifier '${tok.text}' (did you mean prop("${tok.text}")?)`,
|
||||
span: { start: tok.start, end: tok.end },
|
||||
}]);
|
||||
}
|
||||
this.next(); // LPAREN
|
||||
const args: RawFormulaAST[] = [];
|
||||
if (this.peek().kind !== TokenKind.RPAREN) {
|
||||
args.push(this.parseExpr(0));
|
||||
while (this.peek().kind === TokenKind.COMMA) {
|
||||
this.next();
|
||||
args.push(this.parseExpr(0));
|
||||
}
|
||||
}
|
||||
this.expect(TokenKind.RPAREN, "Expected ')'");
|
||||
|
||||
if (tok.text === "prop") {
|
||||
if (args.length !== 1 || args[0].t !== "str") {
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_TOKEN",
|
||||
message: 'prop() expects exactly one string literal argument',
|
||||
span: { start: tok.start, end: tok.end },
|
||||
}]);
|
||||
}
|
||||
return { t: "propName", name: args[0].v };
|
||||
}
|
||||
if (tok.text === "if") {
|
||||
if (args.length !== 3) {
|
||||
throw new FormulaParseError([{
|
||||
code: "ARITY_MISMATCH",
|
||||
message: "if() expects exactly 3 arguments",
|
||||
span: { start: tok.start, end: tok.end },
|
||||
}]);
|
||||
}
|
||||
return { t: "if", cond: args[0], then: args[1], else: args[2] };
|
||||
}
|
||||
if (tok.text === "and") return { t: "and", args };
|
||||
if (tok.text === "or") return { t: "or", args };
|
||||
return { t: "call", fn: tok.text, args };
|
||||
}
|
||||
default:
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_TOKEN",
|
||||
message: `Unexpected token '${tok.text || tok.kind}'`,
|
||||
span: { start: tok.start, end: tok.end },
|
||||
}]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user