feat(base): schema-qualified loader sql for single-instance duckdb

This commit is contained in:
Philipinho
2026-04-23 16:15:42 +01:00
parent 838d8892f0
commit f12a0675ea
2 changed files with 65 additions and 85 deletions
@@ -4,6 +4,7 @@ import { BasePropertyType } from '../base.schemas';
const BASE_ID = '019c69a3-dd47-7014-8b87-ec8f1675aaaa'; const BASE_ID = '019c69a3-dd47-7014-8b87-ec8f1675aaaa';
const WORKSPACE_ID = '019c69a3-dd47-7014-8b87-ec8f1675bbbb'; const WORKSPACE_ID = '019c69a3-dd47-7014-8b87-ec8f1675bbbb';
const SCHEMA = 'b_019c69a3dd4770148b87ec8f1675aaaa';
const sys: ColumnSpec[] = [ const sys: ColumnSpec[] = [
{ column: 'id', ddlType: 'VARCHAR', indexable: false }, { column: 'id', ddlType: 'VARCHAR', indexable: false },
@@ -24,15 +25,10 @@ const makeProp = (
): ColumnSpec['property'] => ({ id, type, typeOptions: null } as any); ): ColumnSpec['property'] => ({ id, type, typeOptions: null } as any);
describe('buildLoaderSql', () => { describe('buildLoaderSql', () => {
it('projects system columns verbatim from pg.base_rows', () => { it('creates schema-qualified rows table and wraps the SELECT in postgres_query', () => {
const sql = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); const sql = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, SCHEMA);
expect(sql).toContain('CREATE TABLE rows AS'); expect(sql).toContain(`CREATE TABLE ${SCHEMA}.rows AS`);
expect(sql).toContain("SELECT * FROM postgres_query('pg', $pgsql$"); expect(sql).toContain("SELECT * FROM postgres_query('pg', $pgsql$");
expect(sql).toContain('id::text AS id');
expect(sql).toContain('base_id::text AS base_id');
expect(sql).toContain('position');
expect(sql).toContain('created_at');
expect(sql).toContain("''::VARCHAR AS search_text");
expect(sql).toContain('FROM base_rows'); expect(sql).toContain('FROM base_rows');
expect(sql).toContain(`WHERE base_id = '${BASE_ID}'::uuid`); expect(sql).toContain(`WHERE base_id = '${BASE_ID}'::uuid`);
expect(sql).toContain(`AND workspace_id = '${WORKSPACE_ID}'::uuid`); expect(sql).toContain(`AND workspace_id = '${WORKSPACE_ID}'::uuid`);
@@ -40,15 +36,21 @@ describe('buildLoaderSql', () => {
expect(sql).toContain('$pgsql$)'); expect(sql).toContain('$pgsql$)');
}); });
it('maps TEXT -> base_cell_text', () => { it('projects system columns verbatim inside the inner SELECT', () => {
const sql = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, SCHEMA);
expect(sql).toContain('id::text AS id');
expect(sql).toContain('base_id::text AS base_id');
expect(sql).toContain('position');
expect(sql).toContain("''::VARCHAR AS search_text");
});
it('maps TEXT -> base_cell_text with schema-qualified alias', () => {
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577aa', BasePropertyType.TEXT); const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577aa', BasePropertyType.TEXT);
const sql = buildLoaderSql( const sql = buildLoaderSql(
[ [...sys, { column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop }],
...sys,
{ column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
); );
expect(sql).toContain( expect(sql).toContain(
`base_cell_text(cells, '019c69a3-dd47-7014-8b87-ec8f167577aa'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577aa"`, `base_cell_text(cells, '019c69a3-dd47-7014-8b87-ec8f167577aa'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577aa"`,
@@ -58,12 +60,10 @@ describe('buildLoaderSql', () => {
it('maps NUMBER -> base_cell_numeric', () => { it('maps NUMBER -> base_cell_numeric', () => {
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577bb', BasePropertyType.NUMBER); const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577bb', BasePropertyType.NUMBER);
const sql = buildLoaderSql( const sql = buildLoaderSql(
[ [...sys, { column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop }],
...sys,
{ column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
); );
expect(sql).toContain( expect(sql).toContain(
`base_cell_numeric(cells, '019c69a3-dd47-7014-8b87-ec8f167577bb'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577bb"`, `base_cell_numeric(cells, '019c69a3-dd47-7014-8b87-ec8f167577bb'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577bb"`,
@@ -73,12 +73,10 @@ describe('buildLoaderSql', () => {
it('maps DATE -> base_cell_timestamptz', () => { it('maps DATE -> base_cell_timestamptz', () => {
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577cc', BasePropertyType.DATE); const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577cc', BasePropertyType.DATE);
const sql = buildLoaderSql( const sql = buildLoaderSql(
[ [...sys, { column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop }],
...sys,
{ column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
); );
expect(sql).toContain( expect(sql).toContain(
`base_cell_timestamptz(cells, '019c69a3-dd47-7014-8b87-ec8f167577cc'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577cc"`, `base_cell_timestamptz(cells, '019c69a3-dd47-7014-8b87-ec8f167577cc'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577cc"`,
@@ -88,12 +86,10 @@ describe('buildLoaderSql', () => {
it('maps CHECKBOX -> base_cell_bool', () => { it('maps CHECKBOX -> base_cell_bool', () => {
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577dd', BasePropertyType.CHECKBOX); const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577dd', BasePropertyType.CHECKBOX);
const sql = buildLoaderSql( const sql = buildLoaderSql(
[ [...sys, { column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop }],
...sys,
{ column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
); );
expect(sql).toContain( expect(sql).toContain(
`base_cell_bool(cells, '019c69a3-dd47-7014-8b87-ec8f167577dd'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577dd"`, `base_cell_bool(cells, '019c69a3-dd47-7014-8b87-ec8f167577dd'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577dd"`,
@@ -103,25 +99,23 @@ describe('buildLoaderSql', () => {
it('maps MULTI_SELECT (JSON) -> raw jsonb cast to text', () => { it('maps MULTI_SELECT (JSON) -> raw jsonb cast to text', () => {
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577ee', BasePropertyType.MULTI_SELECT); const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577ee', BasePropertyType.MULTI_SELECT);
const sql = buildLoaderSql( const sql = buildLoaderSql(
[ [...sys, { column: prop!.id, ddlType: 'JSON', indexable: false, property: prop }],
...sys,
{ column: prop!.id, ddlType: 'JSON', indexable: false, property: prop },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
); );
expect(sql).toContain( expect(sql).toContain(
`(cells -> '019c69a3-dd47-7014-8b87-ec8f167577ee')::text AS "019c69a3-dd47-7014-8b87-ec8f167577ee"`, `(cells -> '019c69a3-dd47-7014-8b87-ec8f167577ee')::text AS "019c69a3-dd47-7014-8b87-ec8f167577ee"`,
); );
}); });
it('rejects invalid column names (defense-in-depth against quoting bugs)', () => { it('rejects invalid column names', () => {
const bad: ColumnSpec = { const bad: ColumnSpec = {
column: 'pwned"; DROP TABLE rows; --', column: 'pwned"; DROP TABLE rows; --',
ddlType: 'VARCHAR', ddlType: 'VARCHAR',
indexable: false, indexable: false,
}; };
expect(() => buildLoaderSql([bad], BASE_ID, WORKSPACE_ID)).toThrow( expect(() => buildLoaderSql([bad], BASE_ID, WORKSPACE_ID, SCHEMA)).toThrow(
/invalid column name/i, /invalid column name/i,
); );
}); });
@@ -130,30 +124,31 @@ describe('buildLoaderSql', () => {
const badProp = { id: 'not-a-uuid', type: BasePropertyType.TEXT, typeOptions: null } as any; const badProp = { id: 'not-a-uuid', type: BasePropertyType.TEXT, typeOptions: null } as any;
expect(() => expect(() =>
buildLoaderSql( buildLoaderSql(
[ [{ column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp }],
{ column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp },
],
BASE_ID, BASE_ID,
WORKSPACE_ID, WORKSPACE_ID,
SCHEMA,
), ),
).toThrow(/invalid property uuid/i); ).toThrow(/invalid property uuid/i);
}); });
it('rejects invalid base id', () => { it('rejects invalid base id', () => {
expect(() => buildLoaderSql(sys, 'not-a-uuid', WORKSPACE_ID)).toThrow( expect(() => buildLoaderSql(sys, 'not-a-uuid', WORKSPACE_ID, SCHEMA)).toThrow(/invalid base id/i);
/invalid base id/i,
);
}); });
it('rejects invalid workspace id', () => { it('rejects invalid workspace id', () => {
expect(() => buildLoaderSql(sys, BASE_ID, 'not-a-uuid')).toThrow( expect(() => buildLoaderSql(sys, BASE_ID, 'not-a-uuid', SCHEMA)).toThrow(/invalid workspace id/i);
/invalid workspace id/i,
);
}); });
it('produces deterministic column order across invocations', () => { it('rejects invalid schema name', () => {
const a = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); expect(() => buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, 'bad name')).toThrow(/invalid schema/i);
const b = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); expect(() => buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, '1starts_with_digit')).toThrow(/invalid schema/i);
expect(a).toEqual(b); expect(() => buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, '')).toThrow(/invalid schema/i);
});
it('is deterministic', () => {
expect(buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, SCHEMA)).toEqual(
buildLoaderSql(sys, BASE_ID, WORKSPACE_ID, SCHEMA),
);
}); });
}); });
@@ -1,44 +1,27 @@
import { ColumnSpec } from './query-cache.types'; import { ColumnSpec } from './query-cache.types';
/* /*
* Pure SQL builder for the cold-load query executed by DuckDB's postgres * Pure SQL builder for the cold-load query executed against the process-wide
* extension against the attached Postgres database. * DuckDB instance. The resulting SQL creates `<schema>.rows` inside the
* attached in-memory database for the base, populated from Postgres via the
* `postgres_query` function:
* *
* The outer statement is a DuckDB `CREATE TABLE ... AS SELECT * FROM * CREATE TABLE <schema>.rows AS
* postgres_query('pg', $pgsql$ ... $pgsql$)`. `postgres_query` ships the * SELECT * FROM postgres_query('pg', $pgsql$ ... $pgsql$);
* raw inner SQL to Postgres and returns typed rows; this is the only way
* to invoke custom Postgres UDFs (`base_cell_text`, etc.) because DuckDB's
* postgres extension does not push unknown scalar functions down — it
* would otherwise try to evaluate them locally and fail.
* *
* Design notes: * The inner SQL uses the Postgres helper functions (`base_cell_text`,
* `base_cell_numeric`, `base_cell_timestamptz`, `base_cell_bool`) so JSONB
* extraction happens server-side.
* *
* - Inside `postgres_query`, the table is native `base_rows` (no `pg.` * Callers must pass a validated `schema` name (use `baseSchemaName()`).
* schema prefix — that prefix is DuckDB's ATTACH alias, not visible * Schema, baseId, and workspaceId are interpolated after validation: schema
* to Postgres). * is regex-checked and baseId/workspaceId are UUID-validated.
*
* - Every SYSTEM_COLUMN maps directly onto a column in `base_rows`.
* UUID columns cast to text so they land in DuckDB's VARCHAR column.
*
* - User columns delegate to the Postgres helper functions defined in
* migration 20260417T120000 (`base_cell_text`, `base_cell_numeric`,
* `base_cell_timestamptz`, `base_cell_bool`).
*
* - JSON columns (multi-select, file, multi-person) are passed as raw JSON
* text (`(cells -> 'uuid')::text`). DuckDB's JSON column accepts that.
*
* - `baseId` and `workspaceId` are interpolated directly as single-quoted
* UUID literals inside the inner SQL. They are UUID-validated before
* interpolation; UUID-shape is the only thing that makes inlining safe.
*
* - Identifiers are validated before interpolation. `ColumnSpec.column` is
* always a UUID or snake_case system name; the regex catches any
* programming mistake that would otherwise break SQL quoting.
*/ */
export function buildLoaderSql( export function buildLoaderSql(
specs: ColumnSpec[], specs: ColumnSpec[],
baseId: string, baseId: string,
workspaceId: string, workspaceId: string,
schema: string,
): string { ): string {
if (!UUID.test(baseId)) { if (!UUID.test(baseId)) {
throw new Error(`Invalid base id "${baseId}"`); throw new Error(`Invalid base id "${baseId}"`);
@@ -46,9 +29,11 @@ export function buildLoaderSql(
if (!UUID.test(workspaceId)) { if (!UUID.test(workspaceId)) {
throw new Error(`Invalid workspace id "${workspaceId}"`); throw new Error(`Invalid workspace id "${workspaceId}"`);
} }
validateSchema(schema);
const projections = specs.map((spec) => projectionFor(spec)); const projections = specs.map((spec) => projectionFor(spec));
return [ return [
'CREATE TABLE rows AS', `CREATE TABLE ${schema}.rows AS`,
"SELECT * FROM postgres_query('pg', $pgsql$", "SELECT * FROM postgres_query('pg', $pgsql$",
' SELECT', ' SELECT',
' ' + projections.join(',\n '), ' ' + projections.join(',\n '),
@@ -64,7 +49,6 @@ function projectionFor(spec: ColumnSpec): string {
validateColumnName(spec.column); validateColumnName(spec.column);
const qid = `"${spec.column}"`; const qid = `"${spec.column}"`;
// System columns — fixed mapping onto base_rows.
switch (spec.column) { switch (spec.column) {
case 'id': return 'id::text AS id'; case 'id': return 'id::text AS id';
case 'base_id': return 'base_id::text AS base_id'; case 'base_id': return 'base_id::text AS base_id';
@@ -78,7 +62,6 @@ function projectionFor(spec: ColumnSpec): string {
case 'search_text': return "''::VARCHAR AS search_text"; case 'search_text': return "''::VARCHAR AS search_text";
} }
// User columns.
const prop = spec.property; const prop = spec.property;
if (!prop) { if (!prop) {
throw new Error( throw new Error(
@@ -87,11 +70,12 @@ function projectionFor(spec: ColumnSpec): string {
} }
const id = prop.id; const id = prop.id;
validateUuid(id); if (!UUID.test(id)) {
throw new Error(`Invalid property UUID "${id}"`);
}
switch (spec.ddlType) { switch (spec.ddlType) {
case 'VARCHAR': case 'VARCHAR':
// TEXT, URL, EMAIL, SELECT, STATUS, single-PERSON all map to VARCHAR.
return `base_cell_text(cells, '${id}'::uuid) AS ${qid}`; return `base_cell_text(cells, '${id}'::uuid) AS ${qid}`;
case 'DOUBLE': case 'DOUBLE':
return `base_cell_numeric(cells, '${id}'::uuid) AS ${qid}`; return `base_cell_numeric(cells, '${id}'::uuid) AS ${qid}`;
@@ -100,7 +84,6 @@ function projectionFor(spec: ColumnSpec): string {
case 'BOOLEAN': case 'BOOLEAN':
return `base_cell_bool(cells, '${id}'::uuid) AS ${qid}`; return `base_cell_bool(cells, '${id}'::uuid) AS ${qid}`;
case 'JSON': case 'JSON':
// MULTI_SELECT / FILE / multi-PERSON.
return `(cells -> '${id}')::text AS ${qid}`; return `(cells -> '${id}')::text AS ${qid}`;
default: { default: {
const _never: never = spec.ddlType; const _never: never = spec.ddlType;
@@ -109,6 +92,9 @@ function projectionFor(spec: ColumnSpec): string {
} }
} }
const UUID =
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
const VALID_COL = /^[a-zA-Z0-9_\-]+$/; const VALID_COL = /^[a-zA-Z0-9_\-]+$/;
function validateColumnName(name: string): void { function validateColumnName(name: string): void {
if (!VALID_COL.test(name)) { if (!VALID_COL.test(name)) {
@@ -116,10 +102,9 @@ function validateColumnName(name: string): void {
} }
} }
const UUID = const VALID_SCHEMA = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/; function validateSchema(name: string): void {
function validateUuid(s: string): void { if (!VALID_SCHEMA.test(name)) {
if (!UUID.test(s)) { throw new Error(`Invalid schema name "${name}"`);
throw new Error(`Invalid property UUID "${s}"`);
} }
} }