mirror of
https://github.com/docmost/docmost.git
synced 2026-06-10 10:13:01 +08:00
feat(base-formula): add tokenizer
This commit is contained in:
@@ -174,7 +174,9 @@
|
||||
"moduleNameMapper": {
|
||||
"^@docmost/db/(.*)$": "<rootDir>/database/$1",
|
||||
"^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
|
||||
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1"
|
||||
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
|
||||
"^@docmost/base-formula/server$": "<rootDir>/../../../packages/base-formula/src/index.server.ts",
|
||||
"^@docmost/base-formula/client$": "<rootDir>/../../../packages/base-formula/src/index.client.ts"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import { tokenize, TokenKind } from "@docmost/base-formula/server";
|
||||
|
||||
describe("tokenize", () => {
|
||||
const kinds = (src: string) => tokenize(src).map((t) => t.kind);
|
||||
const texts = (src: string) => tokenize(src).map((t) => t.text);
|
||||
|
||||
it("emits EOF for empty input", () => {
|
||||
expect(kinds("")).toEqual([TokenKind.EOF]);
|
||||
});
|
||||
|
||||
it("tokenizes numbers", () => {
|
||||
expect(kinds("42")).toEqual([TokenKind.NUMBER, TokenKind.EOF]);
|
||||
expect(kinds("-4.5")).toEqual([
|
||||
TokenKind.MINUS, TokenKind.NUMBER, TokenKind.EOF,
|
||||
]);
|
||||
expect(texts("123.45")).toEqual(["123.45", ""]);
|
||||
});
|
||||
|
||||
it("tokenizes string literals with either quote style", () => {
|
||||
expect(kinds('"hi"')).toEqual([TokenKind.STRING, TokenKind.EOF]);
|
||||
expect(kinds("'hi'")).toEqual([TokenKind.STRING, TokenKind.EOF]);
|
||||
expect(texts('"a b c"')[0]).toBe("a b c");
|
||||
});
|
||||
|
||||
it("handles escape sequences in strings", () => {
|
||||
expect(texts('"a\\"b"')[0]).toBe('a"b');
|
||||
expect(texts('"a\\\\b"')[0]).toBe("a\\b");
|
||||
expect(texts('"a\\nb"')[0]).toBe("a\nb");
|
||||
});
|
||||
|
||||
it("tokenizes keywords and identifiers", () => {
|
||||
expect(kinds("true false null and or not")).toEqual([
|
||||
TokenKind.TRUE, TokenKind.FALSE, TokenKind.NULL,
|
||||
TokenKind.AND, TokenKind.OR, TokenKind.NOT, TokenKind.EOF,
|
||||
]);
|
||||
expect(kinds("prop foo _bar")).toEqual([
|
||||
TokenKind.IDENT, TokenKind.IDENT, TokenKind.IDENT, TokenKind.EOF,
|
||||
]);
|
||||
});
|
||||
|
||||
it("tokenizes operators", () => {
|
||||
expect(kinds("+ - * / %")).toEqual([
|
||||
TokenKind.PLUS, TokenKind.MINUS, TokenKind.STAR,
|
||||
TokenKind.SLASH, TokenKind.PERCENT, TokenKind.EOF,
|
||||
]);
|
||||
expect(kinds("== != < > <= >=")).toEqual([
|
||||
TokenKind.EQ, TokenKind.NEQ, TokenKind.LT, TokenKind.GT,
|
||||
TokenKind.LTE, TokenKind.GTE, TokenKind.EOF,
|
||||
]);
|
||||
});
|
||||
|
||||
it("tokenizes punctuation", () => {
|
||||
expect(kinds("(),")).toEqual([
|
||||
TokenKind.LPAREN, TokenKind.RPAREN, TokenKind.COMMA, TokenKind.EOF,
|
||||
]);
|
||||
});
|
||||
|
||||
it("records source spans", () => {
|
||||
const ts = tokenize(" 42 ");
|
||||
expect(ts[0].start).toBe(2);
|
||||
expect(ts[0].end).toBe(4);
|
||||
});
|
||||
|
||||
it("throws on unterminated string with useful span", () => {
|
||||
expect(() => tokenize('"hi')).toThrow(/UNEXPECTED_EOF|unterminated/i);
|
||||
});
|
||||
|
||||
it("throws on unknown character", () => {
|
||||
expect(() => tokenize("2 @ 3")).toThrow(/UNEXPECTED_TOKEN|unexpected/i);
|
||||
});
|
||||
});
|
||||
@@ -22,7 +22,13 @@
|
||||
"paths": {
|
||||
"@docmost/db/*": ["./src/database/*"],
|
||||
"@docmost/transactional/*": ["./src/integrations/transactional/*"],
|
||||
"@docmost/ee/*": ["./src/ee/*"]
|
||||
"@docmost/ee/*": ["./src/ee/*"],
|
||||
"@docmost/base-formula/server": [
|
||||
"../../packages/base-formula/src/index.server.ts"
|
||||
],
|
||||
"@docmost/base-formula/client": [
|
||||
"../../packages/base-formula/src/index.client.ts"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,3 +3,4 @@
|
||||
export * from "./ast";
|
||||
export * from "./types";
|
||||
export * from "./error";
|
||||
export * from "./tokenizer";
|
||||
|
||||
@@ -2,3 +2,4 @@
|
||||
export * from "./ast";
|
||||
export * from "./types";
|
||||
export * from "./error";
|
||||
export * from "./tokenizer";
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
import { FormulaParseError } from "./error";
|
||||
|
||||
export enum TokenKind {
|
||||
NUMBER = "NUMBER",
|
||||
STRING = "STRING",
|
||||
IDENT = "IDENT",
|
||||
TRUE = "TRUE",
|
||||
FALSE = "FALSE",
|
||||
NULL = "NULL",
|
||||
AND = "AND",
|
||||
OR = "OR",
|
||||
NOT = "NOT",
|
||||
PLUS = "PLUS",
|
||||
MINUS = "MINUS",
|
||||
STAR = "STAR",
|
||||
SLASH = "SLASH",
|
||||
PERCENT = "PERCENT",
|
||||
EQ = "EQ",
|
||||
NEQ = "NEQ",
|
||||
LT = "LT",
|
||||
GT = "GT",
|
||||
LTE = "LTE",
|
||||
GTE = "GTE",
|
||||
LPAREN = "LPAREN",
|
||||
RPAREN = "RPAREN",
|
||||
COMMA = "COMMA",
|
||||
EOF = "EOF",
|
||||
}
|
||||
|
||||
export type Token = {
|
||||
kind: TokenKind;
|
||||
text: string;
|
||||
start: number;
|
||||
end: number;
|
||||
};
|
||||
|
||||
const KEYWORDS: Record<string, TokenKind> = {
|
||||
true: TokenKind.TRUE,
|
||||
false: TokenKind.FALSE,
|
||||
null: TokenKind.NULL,
|
||||
and: TokenKind.AND,
|
||||
or: TokenKind.OR,
|
||||
not: TokenKind.NOT,
|
||||
};
|
||||
|
||||
export function tokenize(src: string): Token[] {
|
||||
const tokens: Token[] = [];
|
||||
let i = 0;
|
||||
|
||||
const push = (kind: TokenKind, text: string, start: number, end: number) =>
|
||||
tokens.push({ kind, text, start, end });
|
||||
|
||||
while (i < src.length) {
|
||||
const ch = src[i];
|
||||
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") { i++; continue; }
|
||||
|
||||
if (ch >= "0" && ch <= "9") {
|
||||
const start = i;
|
||||
while (i < src.length && src[i] >= "0" && src[i] <= "9") i++;
|
||||
if (src[i] === ".") {
|
||||
i++;
|
||||
while (i < src.length && src[i] >= "0" && src[i] <= "9") i++;
|
||||
}
|
||||
push(TokenKind.NUMBER, src.slice(start, i), start, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"' || ch === "'") {
|
||||
const quote = ch;
|
||||
const start = i;
|
||||
i++;
|
||||
let body = "";
|
||||
while (i < src.length && src[i] !== quote) {
|
||||
if (src[i] === "\\") {
|
||||
if (i + 1 >= src.length) {
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_EOF",
|
||||
message: "Unterminated escape in string",
|
||||
span: { start, end: i + 1 },
|
||||
}]);
|
||||
}
|
||||
const esc = src[i + 1];
|
||||
body += esc === "n" ? "\n" : esc === "t" ? "\t" : esc;
|
||||
i += 2;
|
||||
} else {
|
||||
body += src[i];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (i >= src.length) {
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_EOF",
|
||||
message: "Unterminated string literal",
|
||||
span: { start, end: src.length },
|
||||
}]);
|
||||
}
|
||||
i++;
|
||||
push(TokenKind.STRING, body, start, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_") {
|
||||
const start = i;
|
||||
while (
|
||||
i < src.length &&
|
||||
(
|
||||
(src[i] >= "a" && src[i] <= "z") ||
|
||||
(src[i] >= "A" && src[i] <= "Z") ||
|
||||
(src[i] >= "0" && src[i] <= "9") ||
|
||||
src[i] === "_"
|
||||
)
|
||||
) i++;
|
||||
const text = src.slice(start, i);
|
||||
push(KEYWORDS[text] ?? TokenKind.IDENT, text, start, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
const start = i;
|
||||
const two = src.slice(i, i + 2);
|
||||
if (two === "==") { push(TokenKind.EQ, two, start, i + 2); i += 2; continue; }
|
||||
if (two === "!=") { push(TokenKind.NEQ, two, start, i + 2); i += 2; continue; }
|
||||
if (two === "<=") { push(TokenKind.LTE, two, start, i + 2); i += 2; continue; }
|
||||
if (two === ">=") { push(TokenKind.GTE, two, start, i + 2); i += 2; continue; }
|
||||
|
||||
const singleMap: Record<string, TokenKind> = {
|
||||
"+": TokenKind.PLUS, "-": TokenKind.MINUS, "*": TokenKind.STAR,
|
||||
"/": TokenKind.SLASH, "%": TokenKind.PERCENT,
|
||||
"<": TokenKind.LT, ">": TokenKind.GT,
|
||||
"(": TokenKind.LPAREN, ")": TokenKind.RPAREN, ",": TokenKind.COMMA,
|
||||
};
|
||||
if (singleMap[ch]) { push(singleMap[ch], ch, start, i + 1); i++; continue; }
|
||||
|
||||
throw new FormulaParseError([{
|
||||
code: "UNEXPECTED_TOKEN",
|
||||
message: `Unexpected character '${ch}'`,
|
||||
span: { start: i, end: i + 1 },
|
||||
}]);
|
||||
}
|
||||
|
||||
tokens.push({ kind: TokenKind.EOF, text: "", start: i, end: i });
|
||||
return tokens;
|
||||
}
|
||||
Reference in New Issue
Block a user