feat(base-formula): add tokenizer

This commit is contained in:
Philipinho
2026-04-23 23:40:14 +01:00
parent 7202e65a07
commit dc825b0f62
6 changed files with 225 additions and 2 deletions
+3 -1
View File
@@ -174,7 +174,9 @@
"moduleNameMapper": {
"^@docmost/db/(.*)$": "<rootDir>/database/$1",
"^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1"
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
"^@docmost/base-formula/server$": "<rootDir>/../../../packages/base-formula/src/index.server.ts",
"^@docmost/base-formula/client$": "<rootDir>/../../../packages/base-formula/src/index.client.ts"
}
}
}
@@ -0,0 +1,71 @@
import { tokenize, TokenKind } from "@docmost/base-formula/server";
describe("tokenize", () => {
const kinds = (src: string) => tokenize(src).map((t) => t.kind);
const texts = (src: string) => tokenize(src).map((t) => t.text);
it("emits EOF for empty input", () => {
expect(kinds("")).toEqual([TokenKind.EOF]);
});
it("tokenizes numbers", () => {
expect(kinds("42")).toEqual([TokenKind.NUMBER, TokenKind.EOF]);
expect(kinds("-4.5")).toEqual([
TokenKind.MINUS, TokenKind.NUMBER, TokenKind.EOF,
]);
expect(texts("123.45")).toEqual(["123.45", ""]);
});
it("tokenizes string literals with either quote style", () => {
expect(kinds('"hi"')).toEqual([TokenKind.STRING, TokenKind.EOF]);
expect(kinds("'hi'")).toEqual([TokenKind.STRING, TokenKind.EOF]);
expect(texts('"a b c"')[0]).toBe("a b c");
});
it("handles escape sequences in strings", () => {
expect(texts('"a\\"b"')[0]).toBe('a"b');
expect(texts('"a\\\\b"')[0]).toBe("a\\b");
expect(texts('"a\\nb"')[0]).toBe("a\nb");
});
it("tokenizes keywords and identifiers", () => {
expect(kinds("true false null and or not")).toEqual([
TokenKind.TRUE, TokenKind.FALSE, TokenKind.NULL,
TokenKind.AND, TokenKind.OR, TokenKind.NOT, TokenKind.EOF,
]);
expect(kinds("prop foo _bar")).toEqual([
TokenKind.IDENT, TokenKind.IDENT, TokenKind.IDENT, TokenKind.EOF,
]);
});
it("tokenizes operators", () => {
expect(kinds("+ - * / %")).toEqual([
TokenKind.PLUS, TokenKind.MINUS, TokenKind.STAR,
TokenKind.SLASH, TokenKind.PERCENT, TokenKind.EOF,
]);
expect(kinds("== != < > <= >=")).toEqual([
TokenKind.EQ, TokenKind.NEQ, TokenKind.LT, TokenKind.GT,
TokenKind.LTE, TokenKind.GTE, TokenKind.EOF,
]);
});
it("tokenizes punctuation", () => {
expect(kinds("(),")).toEqual([
TokenKind.LPAREN, TokenKind.RPAREN, TokenKind.COMMA, TokenKind.EOF,
]);
});
it("records source spans", () => {
const ts = tokenize(" 42 ");
expect(ts[0].start).toBe(2);
expect(ts[0].end).toBe(4);
});
it("throws on unterminated string with useful span", () => {
expect(() => tokenize('"hi')).toThrow(/UNEXPECTED_EOF|unterminated/i);
});
it("throws on unknown character", () => {
expect(() => tokenize("2 @ 3")).toThrow(/UNEXPECTED_TOKEN|unexpected/i);
});
});
+7 -1
View File
@@ -22,7 +22,13 @@
"paths": {
"@docmost/db/*": ["./src/database/*"],
"@docmost/transactional/*": ["./src/integrations/transactional/*"],
"@docmost/ee/*": ["./src/ee/*"]
"@docmost/ee/*": ["./src/ee/*"],
"@docmost/base-formula/server": [
"../../packages/base-formula/src/index.server.ts"
],
"@docmost/base-formula/client": [
"../../packages/base-formula/src/index.client.ts"
]
}
}
}
@@ -3,3 +3,4 @@
export * from "./ast";
export * from "./types";
export * from "./error";
export * from "./tokenizer";
@@ -2,3 +2,4 @@
export * from "./ast";
export * from "./types";
export * from "./error";
export * from "./tokenizer";
+142
View File
@@ -0,0 +1,142 @@
import { FormulaParseError } from "./error";
export enum TokenKind {
NUMBER = "NUMBER",
STRING = "STRING",
IDENT = "IDENT",
TRUE = "TRUE",
FALSE = "FALSE",
NULL = "NULL",
AND = "AND",
OR = "OR",
NOT = "NOT",
PLUS = "PLUS",
MINUS = "MINUS",
STAR = "STAR",
SLASH = "SLASH",
PERCENT = "PERCENT",
EQ = "EQ",
NEQ = "NEQ",
LT = "LT",
GT = "GT",
LTE = "LTE",
GTE = "GTE",
LPAREN = "LPAREN",
RPAREN = "RPAREN",
COMMA = "COMMA",
EOF = "EOF",
}
export type Token = {
kind: TokenKind;
text: string;
start: number;
end: number;
};
const KEYWORDS: Record<string, TokenKind> = {
true: TokenKind.TRUE,
false: TokenKind.FALSE,
null: TokenKind.NULL,
and: TokenKind.AND,
or: TokenKind.OR,
not: TokenKind.NOT,
};
export function tokenize(src: string): Token[] {
const tokens: Token[] = [];
let i = 0;
const push = (kind: TokenKind, text: string, start: number, end: number) =>
tokens.push({ kind, text, start, end });
while (i < src.length) {
const ch = src[i];
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") { i++; continue; }
if (ch >= "0" && ch <= "9") {
const start = i;
while (i < src.length && src[i] >= "0" && src[i] <= "9") i++;
if (src[i] === ".") {
i++;
while (i < src.length && src[i] >= "0" && src[i] <= "9") i++;
}
push(TokenKind.NUMBER, src.slice(start, i), start, i);
continue;
}
if (ch === '"' || ch === "'") {
const quote = ch;
const start = i;
i++;
let body = "";
while (i < src.length && src[i] !== quote) {
if (src[i] === "\\") {
if (i + 1 >= src.length) {
throw new FormulaParseError([{
code: "UNEXPECTED_EOF",
message: "Unterminated escape in string",
span: { start, end: i + 1 },
}]);
}
const esc = src[i + 1];
body += esc === "n" ? "\n" : esc === "t" ? "\t" : esc;
i += 2;
} else {
body += src[i];
i++;
}
}
if (i >= src.length) {
throw new FormulaParseError([{
code: "UNEXPECTED_EOF",
message: "Unterminated string literal",
span: { start, end: src.length },
}]);
}
i++;
push(TokenKind.STRING, body, start, i);
continue;
}
if ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_") {
const start = i;
while (
i < src.length &&
(
(src[i] >= "a" && src[i] <= "z") ||
(src[i] >= "A" && src[i] <= "Z") ||
(src[i] >= "0" && src[i] <= "9") ||
src[i] === "_"
)
) i++;
const text = src.slice(start, i);
push(KEYWORDS[text] ?? TokenKind.IDENT, text, start, i);
continue;
}
const start = i;
const two = src.slice(i, i + 2);
if (two === "==") { push(TokenKind.EQ, two, start, i + 2); i += 2; continue; }
if (two === "!=") { push(TokenKind.NEQ, two, start, i + 2); i += 2; continue; }
if (two === "<=") { push(TokenKind.LTE, two, start, i + 2); i += 2; continue; }
if (two === ">=") { push(TokenKind.GTE, two, start, i + 2); i += 2; continue; }
const singleMap: Record<string, TokenKind> = {
"+": TokenKind.PLUS, "-": TokenKind.MINUS, "*": TokenKind.STAR,
"/": TokenKind.SLASH, "%": TokenKind.PERCENT,
"<": TokenKind.LT, ">": TokenKind.GT,
"(": TokenKind.LPAREN, ")": TokenKind.RPAREN, ",": TokenKind.COMMA,
};
if (singleMap[ch]) { push(singleMap[ch], ch, start, i + 1); i++; continue; }
throw new FormulaParseError([{
code: "UNEXPECTED_TOKEN",
message: `Unexpected character '${ch}'`,
span: { start: i, end: i + 1 },
}]);
}
tokens.push({ kind: TokenKind.EOF, text: "", start: i, end: i });
return tokens;
}