From 378d17350c57c40d2985bd1187148abac382ec5b Mon Sep 17 00:00:00 2001 From: Philipinho <16838612+Philipinho@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:39:30 +0100 Subject: [PATCH] fix(base): use postgres_query to invoke pg-side udfs from duckdb loader --- .../core/base/query-cache/loader-sql.spec.ts | 105 ++++++++++++------ .../src/core/base/query-cache/loader-sql.ts | 47 ++++++-- 2 files changed, 110 insertions(+), 42 deletions(-) diff --git a/apps/server/src/core/base/query-cache/loader-sql.spec.ts b/apps/server/src/core/base/query-cache/loader-sql.spec.ts index a8f4349d..a7bfa1ca 100644 --- a/apps/server/src/core/base/query-cache/loader-sql.spec.ts +++ b/apps/server/src/core/base/query-cache/loader-sql.spec.ts @@ -2,6 +2,9 @@ import { buildLoaderSql } from './loader-sql'; import { ColumnSpec } from './query-cache.types'; import { BasePropertyType } from '../base.schemas'; +const BASE_ID = '019c69a3-dd47-7014-8b87-ec8f1675aaaa'; +const WORKSPACE_ID = '019c69a3-dd47-7014-8b87-ec8f1675bbbb'; + const sys: ColumnSpec[] = [ { column: 'id', ddlType: 'VARCHAR', indexable: false }, { column: 'base_id', ddlType: 'VARCHAR', indexable: false }, @@ -22,25 +25,31 @@ const makeProp = ( describe('buildLoaderSql', () => { it('projects system columns verbatim from pg.base_rows', () => { - const sql = buildLoaderSql(sys); + const sql = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); expect(sql).toContain('CREATE TABLE rows AS'); + expect(sql).toContain("SELECT * FROM postgres_query('pg', $pgsql$"); expect(sql).toContain('id::text AS id'); expect(sql).toContain('base_id::text AS base_id'); expect(sql).toContain('position'); expect(sql).toContain('created_at'); expect(sql).toContain("''::VARCHAR AS search_text"); - expect(sql).toContain('FROM pg.base_rows'); - expect(sql).toContain( - 'WHERE base_id = $1::uuid AND workspace_id = $2::uuid AND deleted_at IS NULL', - ); + expect(sql).toContain('FROM base_rows'); + expect(sql).toContain(`WHERE base_id = '${BASE_ID}'::uuid`); + expect(sql).toContain(`AND workspace_id = '${WORKSPACE_ID}'::uuid`); + expect(sql).toContain('AND deleted_at IS NULL'); + expect(sql).toContain('$pgsql$)'); }); it('maps TEXT -> base_cell_text', () => { const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577aa', BasePropertyType.TEXT); - const sql = buildLoaderSql([ - ...sys, - { column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop }, - ]); + const sql = buildLoaderSql( + [ + ...sys, + { column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop }, + ], + BASE_ID, + WORKSPACE_ID, + ); expect(sql).toContain( `base_cell_text(cells, '019c69a3-dd47-7014-8b87-ec8f167577aa'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577aa"`, ); @@ -48,10 +57,14 @@ describe('buildLoaderSql', () => { it('maps NUMBER -> base_cell_numeric', () => { const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577bb', BasePropertyType.NUMBER); - const sql = buildLoaderSql([ - ...sys, - { column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop }, - ]); + const sql = buildLoaderSql( + [ + ...sys, + { column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop }, + ], + BASE_ID, + WORKSPACE_ID, + ); expect(sql).toContain( `base_cell_numeric(cells, '019c69a3-dd47-7014-8b87-ec8f167577bb'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577bb"`, ); @@ -59,10 +72,14 @@ describe('buildLoaderSql', () => { it('maps DATE -> base_cell_timestamptz', () => { const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577cc', BasePropertyType.DATE); - const sql = buildLoaderSql([ - ...sys, - { column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop }, - ]); + const sql = buildLoaderSql( + [ + ...sys, + { column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop }, + ], + BASE_ID, + WORKSPACE_ID, + ); expect(sql).toContain( `base_cell_timestamptz(cells, '019c69a3-dd47-7014-8b87-ec8f167577cc'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577cc"`, ); @@ -70,10 +87,14 @@ describe('buildLoaderSql', () => { it('maps CHECKBOX -> base_cell_bool', () => { const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577dd', BasePropertyType.CHECKBOX); - const sql = buildLoaderSql([ - ...sys, - { column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop }, - ]); + const sql = buildLoaderSql( + [ + ...sys, + { column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop }, + ], + BASE_ID, + WORKSPACE_ID, + ); expect(sql).toContain( `base_cell_bool(cells, '019c69a3-dd47-7014-8b87-ec8f167577dd'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577dd"`, ); @@ -81,10 +102,14 @@ describe('buildLoaderSql', () => { it('maps MULTI_SELECT (JSON) -> raw jsonb cast to text', () => { const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577ee', BasePropertyType.MULTI_SELECT); - const sql = buildLoaderSql([ - ...sys, - { column: prop!.id, ddlType: 'JSON', indexable: false, property: prop }, - ]); + const sql = buildLoaderSql( + [ + ...sys, + { column: prop!.id, ddlType: 'JSON', indexable: false, property: prop }, + ], + BASE_ID, + WORKSPACE_ID, + ); expect(sql).toContain( `(cells -> '019c69a3-dd47-7014-8b87-ec8f167577ee')::text AS "019c69a3-dd47-7014-8b87-ec8f167577ee"`, ); @@ -96,21 +121,39 @@ describe('buildLoaderSql', () => { ddlType: 'VARCHAR', indexable: false, }; - expect(() => buildLoaderSql([bad])).toThrow(/invalid column name/i); + expect(() => buildLoaderSql([bad], BASE_ID, WORKSPACE_ID)).toThrow( + /invalid column name/i, + ); }); it('rejects non-UUID property ids', () => { const badProp = { id: 'not-a-uuid', type: BasePropertyType.TEXT, typeOptions: null } as any; expect(() => - buildLoaderSql([ - { column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp }, - ]), + buildLoaderSql( + [ + { column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp }, + ], + BASE_ID, + WORKSPACE_ID, + ), ).toThrow(/invalid property uuid/i); }); + it('rejects invalid base id', () => { + expect(() => buildLoaderSql(sys, 'not-a-uuid', WORKSPACE_ID)).toThrow( + /invalid base id/i, + ); + }); + + it('rejects invalid workspace id', () => { + expect(() => buildLoaderSql(sys, BASE_ID, 'not-a-uuid')).toThrow( + /invalid workspace id/i, + ); + }); + it('produces deterministic column order across invocations', () => { - const a = buildLoaderSql(sys); - const b = buildLoaderSql(sys); + const a = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); + const b = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID); expect(a).toEqual(b); }); }); diff --git a/apps/server/src/core/base/query-cache/loader-sql.ts b/apps/server/src/core/base/query-cache/loader-sql.ts index f8c99030..94c70d5b 100644 --- a/apps/server/src/core/base/query-cache/loader-sql.ts +++ b/apps/server/src/core/base/query-cache/loader-sql.ts @@ -2,36 +2,61 @@ import { ColumnSpec } from './query-cache.types'; /* * Pure SQL builder for the cold-load query executed by DuckDB's postgres - * extension against `pg.base_rows`. Parameterized by - * $1 = baseId (uuid), $2 = workspaceId (uuid) - * Callers bind via prepared statements. + * extension against the attached Postgres database. + * + * The outer statement is a DuckDB `CREATE TABLE ... AS SELECT * FROM + * postgres_query('pg', $pgsql$ ... $pgsql$)`. `postgres_query` ships the + * raw inner SQL to Postgres and returns typed rows; this is the only way + * to invoke custom Postgres UDFs (`base_cell_text`, etc.) because DuckDB's + * postgres extension does not push unknown scalar functions down — it + * would otherwise try to evaluate them locally and fail. * * Design notes: * + * - Inside `postgres_query`, the table is native `base_rows` (no `pg.` + * schema prefix — that prefix is DuckDB's ATTACH alias, not visible + * to Postgres). + * * - Every SYSTEM_COLUMN maps directly onto a column in `base_rows`. * UUID columns cast to text so they land in DuckDB's VARCHAR column. * * - User columns delegate to the Postgres helper functions defined in * migration 20260417T120000 (`base_cell_text`, `base_cell_numeric`, - * `base_cell_timestamptz`, `base_cell_bool`). These run on the - * Postgres side — DuckDB ships the full SELECT through the extension; - * JSONB extraction never touches DuckDB. + * `base_cell_timestamptz`, `base_cell_bool`). * * - JSON columns (multi-select, file, multi-person) are passed as raw JSON * text (`(cells -> 'uuid')::text`). DuckDB's JSON column accepts that. * + * - `baseId` and `workspaceId` are interpolated directly as single-quoted + * UUID literals inside the inner SQL. They are UUID-validated before + * interpolation; UUID-shape is the only thing that makes inlining safe. + * * - Identifiers are validated before interpolation. `ColumnSpec.column` is * always a UUID or snake_case system name; the regex catches any * programming mistake that would otherwise break SQL quoting. */ -export function buildLoaderSql(specs: ColumnSpec[]): string { +export function buildLoaderSql( + specs: ColumnSpec[], + baseId: string, + workspaceId: string, +): string { + if (!UUID.test(baseId)) { + throw new Error(`Invalid base id "${baseId}"`); + } + if (!UUID.test(workspaceId)) { + throw new Error(`Invalid workspace id "${workspaceId}"`); + } const projections = specs.map((spec) => projectionFor(spec)); return [ 'CREATE TABLE rows AS', - 'SELECT', - ' ' + projections.join(',\n '), - 'FROM pg.base_rows', - 'WHERE base_id = $1::uuid AND workspace_id = $2::uuid AND deleted_at IS NULL', + "SELECT * FROM postgres_query('pg', $pgsql$", + ' SELECT', + ' ' + projections.join(',\n '), + ' FROM base_rows', + ` WHERE base_id = '${baseId}'::uuid`, + ` AND workspace_id = '${workspaceId}'::uuid`, + ' AND deleted_at IS NULL', + '$pgsql$)', ].join('\n'); }