mirror of
https://github.com/docmost/docmost.git
synced 2026-05-07 06:23:06 +08:00
fix(base): use postgres_query to invoke pg-side udfs from duckdb loader
This commit is contained in:
@@ -2,6 +2,9 @@ import { buildLoaderSql } from './loader-sql';
|
|||||||
import { ColumnSpec } from './query-cache.types';
|
import { ColumnSpec } from './query-cache.types';
|
||||||
import { BasePropertyType } from '../base.schemas';
|
import { BasePropertyType } from '../base.schemas';
|
||||||
|
|
||||||
|
const BASE_ID = '019c69a3-dd47-7014-8b87-ec8f1675aaaa';
|
||||||
|
const WORKSPACE_ID = '019c69a3-dd47-7014-8b87-ec8f1675bbbb';
|
||||||
|
|
||||||
const sys: ColumnSpec[] = [
|
const sys: ColumnSpec[] = [
|
||||||
{ column: 'id', ddlType: 'VARCHAR', indexable: false },
|
{ column: 'id', ddlType: 'VARCHAR', indexable: false },
|
||||||
{ column: 'base_id', ddlType: 'VARCHAR', indexable: false },
|
{ column: 'base_id', ddlType: 'VARCHAR', indexable: false },
|
||||||
@@ -22,25 +25,31 @@ const makeProp = (
|
|||||||
|
|
||||||
describe('buildLoaderSql', () => {
|
describe('buildLoaderSql', () => {
|
||||||
it('projects system columns verbatim from pg.base_rows', () => {
|
it('projects system columns verbatim from pg.base_rows', () => {
|
||||||
const sql = buildLoaderSql(sys);
|
const sql = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID);
|
||||||
expect(sql).toContain('CREATE TABLE rows AS');
|
expect(sql).toContain('CREATE TABLE rows AS');
|
||||||
|
expect(sql).toContain("SELECT * FROM postgres_query('pg', $pgsql$");
|
||||||
expect(sql).toContain('id::text AS id');
|
expect(sql).toContain('id::text AS id');
|
||||||
expect(sql).toContain('base_id::text AS base_id');
|
expect(sql).toContain('base_id::text AS base_id');
|
||||||
expect(sql).toContain('position');
|
expect(sql).toContain('position');
|
||||||
expect(sql).toContain('created_at');
|
expect(sql).toContain('created_at');
|
||||||
expect(sql).toContain("''::VARCHAR AS search_text");
|
expect(sql).toContain("''::VARCHAR AS search_text");
|
||||||
expect(sql).toContain('FROM pg.base_rows');
|
expect(sql).toContain('FROM base_rows');
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(`WHERE base_id = '${BASE_ID}'::uuid`);
|
||||||
'WHERE base_id = $1::uuid AND workspace_id = $2::uuid AND deleted_at IS NULL',
|
expect(sql).toContain(`AND workspace_id = '${WORKSPACE_ID}'::uuid`);
|
||||||
);
|
expect(sql).toContain('AND deleted_at IS NULL');
|
||||||
|
expect(sql).toContain('$pgsql$)');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('maps TEXT -> base_cell_text', () => {
|
it('maps TEXT -> base_cell_text', () => {
|
||||||
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577aa', BasePropertyType.TEXT);
|
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577aa', BasePropertyType.TEXT);
|
||||||
const sql = buildLoaderSql([
|
const sql = buildLoaderSql(
|
||||||
...sys,
|
[
|
||||||
{ column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop },
|
...sys,
|
||||||
]);
|
{ column: prop!.id, ddlType: 'VARCHAR', indexable: true, property: prop },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
);
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(
|
||||||
`base_cell_text(cells, '019c69a3-dd47-7014-8b87-ec8f167577aa'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577aa"`,
|
`base_cell_text(cells, '019c69a3-dd47-7014-8b87-ec8f167577aa'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577aa"`,
|
||||||
);
|
);
|
||||||
@@ -48,10 +57,14 @@ describe('buildLoaderSql', () => {
|
|||||||
|
|
||||||
it('maps NUMBER -> base_cell_numeric', () => {
|
it('maps NUMBER -> base_cell_numeric', () => {
|
||||||
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577bb', BasePropertyType.NUMBER);
|
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577bb', BasePropertyType.NUMBER);
|
||||||
const sql = buildLoaderSql([
|
const sql = buildLoaderSql(
|
||||||
...sys,
|
[
|
||||||
{ column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop },
|
...sys,
|
||||||
]);
|
{ column: prop!.id, ddlType: 'DOUBLE', indexable: true, property: prop },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
);
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(
|
||||||
`base_cell_numeric(cells, '019c69a3-dd47-7014-8b87-ec8f167577bb'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577bb"`,
|
`base_cell_numeric(cells, '019c69a3-dd47-7014-8b87-ec8f167577bb'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577bb"`,
|
||||||
);
|
);
|
||||||
@@ -59,10 +72,14 @@ describe('buildLoaderSql', () => {
|
|||||||
|
|
||||||
it('maps DATE -> base_cell_timestamptz', () => {
|
it('maps DATE -> base_cell_timestamptz', () => {
|
||||||
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577cc', BasePropertyType.DATE);
|
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577cc', BasePropertyType.DATE);
|
||||||
const sql = buildLoaderSql([
|
const sql = buildLoaderSql(
|
||||||
...sys,
|
[
|
||||||
{ column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop },
|
...sys,
|
||||||
]);
|
{ column: prop!.id, ddlType: 'TIMESTAMPTZ', indexable: true, property: prop },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
);
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(
|
||||||
`base_cell_timestamptz(cells, '019c69a3-dd47-7014-8b87-ec8f167577cc'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577cc"`,
|
`base_cell_timestamptz(cells, '019c69a3-dd47-7014-8b87-ec8f167577cc'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577cc"`,
|
||||||
);
|
);
|
||||||
@@ -70,10 +87,14 @@ describe('buildLoaderSql', () => {
|
|||||||
|
|
||||||
it('maps CHECKBOX -> base_cell_bool', () => {
|
it('maps CHECKBOX -> base_cell_bool', () => {
|
||||||
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577dd', BasePropertyType.CHECKBOX);
|
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577dd', BasePropertyType.CHECKBOX);
|
||||||
const sql = buildLoaderSql([
|
const sql = buildLoaderSql(
|
||||||
...sys,
|
[
|
||||||
{ column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop },
|
...sys,
|
||||||
]);
|
{ column: prop!.id, ddlType: 'BOOLEAN', indexable: true, property: prop },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
);
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(
|
||||||
`base_cell_bool(cells, '019c69a3-dd47-7014-8b87-ec8f167577dd'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577dd"`,
|
`base_cell_bool(cells, '019c69a3-dd47-7014-8b87-ec8f167577dd'::uuid) AS "019c69a3-dd47-7014-8b87-ec8f167577dd"`,
|
||||||
);
|
);
|
||||||
@@ -81,10 +102,14 @@ describe('buildLoaderSql', () => {
|
|||||||
|
|
||||||
it('maps MULTI_SELECT (JSON) -> raw jsonb cast to text', () => {
|
it('maps MULTI_SELECT (JSON) -> raw jsonb cast to text', () => {
|
||||||
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577ee', BasePropertyType.MULTI_SELECT);
|
const prop = makeProp('019c69a3-dd47-7014-8b87-ec8f167577ee', BasePropertyType.MULTI_SELECT);
|
||||||
const sql = buildLoaderSql([
|
const sql = buildLoaderSql(
|
||||||
...sys,
|
[
|
||||||
{ column: prop!.id, ddlType: 'JSON', indexable: false, property: prop },
|
...sys,
|
||||||
]);
|
{ column: prop!.id, ddlType: 'JSON', indexable: false, property: prop },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
);
|
||||||
expect(sql).toContain(
|
expect(sql).toContain(
|
||||||
`(cells -> '019c69a3-dd47-7014-8b87-ec8f167577ee')::text AS "019c69a3-dd47-7014-8b87-ec8f167577ee"`,
|
`(cells -> '019c69a3-dd47-7014-8b87-ec8f167577ee')::text AS "019c69a3-dd47-7014-8b87-ec8f167577ee"`,
|
||||||
);
|
);
|
||||||
@@ -96,21 +121,39 @@ describe('buildLoaderSql', () => {
|
|||||||
ddlType: 'VARCHAR',
|
ddlType: 'VARCHAR',
|
||||||
indexable: false,
|
indexable: false,
|
||||||
};
|
};
|
||||||
expect(() => buildLoaderSql([bad])).toThrow(/invalid column name/i);
|
expect(() => buildLoaderSql([bad], BASE_ID, WORKSPACE_ID)).toThrow(
|
||||||
|
/invalid column name/i,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('rejects non-UUID property ids', () => {
|
it('rejects non-UUID property ids', () => {
|
||||||
const badProp = { id: 'not-a-uuid', type: BasePropertyType.TEXT, typeOptions: null } as any;
|
const badProp = { id: 'not-a-uuid', type: BasePropertyType.TEXT, typeOptions: null } as any;
|
||||||
expect(() =>
|
expect(() =>
|
||||||
buildLoaderSql([
|
buildLoaderSql(
|
||||||
{ column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp },
|
[
|
||||||
]),
|
{ column: 'some-uuid-col', ddlType: 'VARCHAR', indexable: true, property: badProp },
|
||||||
|
],
|
||||||
|
BASE_ID,
|
||||||
|
WORKSPACE_ID,
|
||||||
|
),
|
||||||
).toThrow(/invalid property uuid/i);
|
).toThrow(/invalid property uuid/i);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('rejects invalid base id', () => {
|
||||||
|
expect(() => buildLoaderSql(sys, 'not-a-uuid', WORKSPACE_ID)).toThrow(
|
||||||
|
/invalid base id/i,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid workspace id', () => {
|
||||||
|
expect(() => buildLoaderSql(sys, BASE_ID, 'not-a-uuid')).toThrow(
|
||||||
|
/invalid workspace id/i,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('produces deterministic column order across invocations', () => {
|
it('produces deterministic column order across invocations', () => {
|
||||||
const a = buildLoaderSql(sys);
|
const a = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID);
|
||||||
const b = buildLoaderSql(sys);
|
const b = buildLoaderSql(sys, BASE_ID, WORKSPACE_ID);
|
||||||
expect(a).toEqual(b);
|
expect(a).toEqual(b);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -2,36 +2,61 @@ import { ColumnSpec } from './query-cache.types';
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Pure SQL builder for the cold-load query executed by DuckDB's postgres
|
* Pure SQL builder for the cold-load query executed by DuckDB's postgres
|
||||||
* extension against `pg.base_rows`. Parameterized by
|
* extension against the attached Postgres database.
|
||||||
* $1 = baseId (uuid), $2 = workspaceId (uuid)
|
*
|
||||||
* Callers bind via prepared statements.
|
* The outer statement is a DuckDB `CREATE TABLE ... AS SELECT * FROM
|
||||||
|
* postgres_query('pg', $pgsql$ ... $pgsql$)`. `postgres_query` ships the
|
||||||
|
* raw inner SQL to Postgres and returns typed rows; this is the only way
|
||||||
|
* to invoke custom Postgres UDFs (`base_cell_text`, etc.) because DuckDB's
|
||||||
|
* postgres extension does not push unknown scalar functions down — it
|
||||||
|
* would otherwise try to evaluate them locally and fail.
|
||||||
*
|
*
|
||||||
* Design notes:
|
* Design notes:
|
||||||
*
|
*
|
||||||
|
* - Inside `postgres_query`, the table is native `base_rows` (no `pg.`
|
||||||
|
* schema prefix — that prefix is DuckDB's ATTACH alias, not visible
|
||||||
|
* to Postgres).
|
||||||
|
*
|
||||||
* - Every SYSTEM_COLUMN maps directly onto a column in `base_rows`.
|
* - Every SYSTEM_COLUMN maps directly onto a column in `base_rows`.
|
||||||
* UUID columns cast to text so they land in DuckDB's VARCHAR column.
|
* UUID columns cast to text so they land in DuckDB's VARCHAR column.
|
||||||
*
|
*
|
||||||
* - User columns delegate to the Postgres helper functions defined in
|
* - User columns delegate to the Postgres helper functions defined in
|
||||||
* migration 20260417T120000 (`base_cell_text`, `base_cell_numeric`,
|
* migration 20260417T120000 (`base_cell_text`, `base_cell_numeric`,
|
||||||
* `base_cell_timestamptz`, `base_cell_bool`). These run on the
|
* `base_cell_timestamptz`, `base_cell_bool`).
|
||||||
* Postgres side — DuckDB ships the full SELECT through the extension;
|
|
||||||
* JSONB extraction never touches DuckDB.
|
|
||||||
*
|
*
|
||||||
* - JSON columns (multi-select, file, multi-person) are passed as raw JSON
|
* - JSON columns (multi-select, file, multi-person) are passed as raw JSON
|
||||||
* text (`(cells -> 'uuid')::text`). DuckDB's JSON column accepts that.
|
* text (`(cells -> 'uuid')::text`). DuckDB's JSON column accepts that.
|
||||||
*
|
*
|
||||||
|
* - `baseId` and `workspaceId` are interpolated directly as single-quoted
|
||||||
|
* UUID literals inside the inner SQL. They are UUID-validated before
|
||||||
|
* interpolation; UUID-shape is the only thing that makes inlining safe.
|
||||||
|
*
|
||||||
* - Identifiers are validated before interpolation. `ColumnSpec.column` is
|
* - Identifiers are validated before interpolation. `ColumnSpec.column` is
|
||||||
* always a UUID or snake_case system name; the regex catches any
|
* always a UUID or snake_case system name; the regex catches any
|
||||||
* programming mistake that would otherwise break SQL quoting.
|
* programming mistake that would otherwise break SQL quoting.
|
||||||
*/
|
*/
|
||||||
export function buildLoaderSql(specs: ColumnSpec[]): string {
|
export function buildLoaderSql(
|
||||||
|
specs: ColumnSpec[],
|
||||||
|
baseId: string,
|
||||||
|
workspaceId: string,
|
||||||
|
): string {
|
||||||
|
if (!UUID.test(baseId)) {
|
||||||
|
throw new Error(`Invalid base id "${baseId}"`);
|
||||||
|
}
|
||||||
|
if (!UUID.test(workspaceId)) {
|
||||||
|
throw new Error(`Invalid workspace id "${workspaceId}"`);
|
||||||
|
}
|
||||||
const projections = specs.map((spec) => projectionFor(spec));
|
const projections = specs.map((spec) => projectionFor(spec));
|
||||||
return [
|
return [
|
||||||
'CREATE TABLE rows AS',
|
'CREATE TABLE rows AS',
|
||||||
'SELECT',
|
"SELECT * FROM postgres_query('pg', $pgsql$",
|
||||||
' ' + projections.join(',\n '),
|
' SELECT',
|
||||||
'FROM pg.base_rows',
|
' ' + projections.join(',\n '),
|
||||||
'WHERE base_id = $1::uuid AND workspace_id = $2::uuid AND deleted_at IS NULL',
|
' FROM base_rows',
|
||||||
|
` WHERE base_id = '${baseId}'::uuid`,
|
||||||
|
` AND workspace_id = '${workspaceId}'::uuid`,
|
||||||
|
' AND deleted_at IS NULL',
|
||||||
|
'$pgsql$)',
|
||||||
].join('\n');
|
].join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user