From 5ebab5cd9e35e9d9fd921b63fd0b882e32bf0ffd Mon Sep 17 00:00:00 2001 From: Philipinho <16838612+Philipinho@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:52:20 +0100 Subject: [PATCH] fix(base): make cell-extractor pg functions genuinely parallel-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plpgsql + EXCEPTION versions of base_cell_numeric, base_cell_timestamptz, and base_cell_bool were labeled PARALLEL SAFE but EXCEPTION blocks require subtransactions, which Postgres cannot start in a parallel worker. Any parallel scan that invoked them crashed with 'cannot start subtransactions during a parallel operation' — notably DuckDB's postgres extension on large base COPY reads. Rewrite each as a pure SQL function using jsonb_typeof + regex validation for the 'coerce-or-null' semantics. No plpgsql, no subtransactions, genuinely parallel-safe. Signatures unchanged so existing call sites (loader, expression indexes, engine predicates) are untouched. --- ...24902-fix-parallel-safe-cell-extractors.ts | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 apps/server/src/database/migrations/20260423T124902-fix-parallel-safe-cell-extractors.ts diff --git a/apps/server/src/database/migrations/20260423T124902-fix-parallel-safe-cell-extractors.ts b/apps/server/src/database/migrations/20260423T124902-fix-parallel-safe-cell-extractors.ts new file mode 100644 index 00000000..508903f7 --- /dev/null +++ b/apps/server/src/database/migrations/20260423T124902-fix-parallel-safe-cell-extractors.ts @@ -0,0 +1,116 @@ +import { type Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + // These functions previously used plpgsql + EXCEPTION blocks to catch bad + // casts. EXCEPTION blocks require subtransactions, which Postgres cannot + // use in parallel workers. The functions were marked PARALLEL SAFE but + // aren't actually parallel-safe. DuckDB's postgres extension triggers + // parallel COPY scans and fails on any row that invokes these. + // + // Rewrite each as a pure SQL function using jsonb_typeof + regex + // validation to achieve the same "coerce-or-null" semantics without + // plpgsql. SQL functions with no volatile side effects are genuinely + // parallel-safe. + + await sql` + CREATE OR REPLACE FUNCTION base_cell_numeric(cells jsonb, prop uuid) + RETURNS numeric + LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + SELECT CASE jsonb_typeof(cells -> prop::text) + WHEN 'number' THEN (cells->>prop::text)::numeric + WHEN 'string' THEN + CASE WHEN (cells->>prop::text) ~ '^\\s*-?\\d+(\\.\\d+)?([eE][+-]?\\d+)?\\s*$' + THEN (cells->>prop::text)::numeric + ELSE NULL END + ELSE NULL + END + $$ + `.execute(db); + + await sql` + CREATE OR REPLACE FUNCTION base_cell_timestamptz(cells jsonb, prop uuid) + RETURNS timestamptz + LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + SELECT CASE + WHEN jsonb_typeof(cells -> prop::text) = 'string' + AND (cells->>prop::text) ~ '^\\d{4}-\\d{2}-\\d{2}([ T]\\d{2}:\\d{2}(:\\d{2}(\\.\\d+)?)?([+-]\\d{2}(:?\\d{2})?|Z)?)?$' + THEN (cells->>prop::text)::timestamptz + ELSE NULL + END + $$ + `.execute(db); + + await sql` + CREATE OR REPLACE FUNCTION base_cell_bool(cells jsonb, prop uuid) + RETURNS boolean + LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + SELECT CASE jsonb_typeof(cells -> prop::text) + WHEN 'boolean' THEN (cells->>prop::text)::boolean + WHEN 'string' THEN + CASE lower(cells->>prop::text) + WHEN 'true' THEN true + WHEN 't' THEN true + WHEN 'yes' THEN true + WHEN 'y' THEN true + WHEN '1' THEN true + WHEN 'false' THEN false + WHEN 'f' THEN false + WHEN 'no' THEN false + WHEN 'n' THEN false + WHEN '0' THEN false + ELSE NULL + END + ELSE NULL + END + $$ + `.execute(db); +} + +export async function down(db: Kysely): Promise { + // Restore the previous plpgsql + EXCEPTION versions. Same PARALLEL SAFE + // labels — they were broken before, they'll still be broken after rollback, + // but rollback means you're going back to the prior bug not inventing a + // new one. + + await sql` + CREATE OR REPLACE FUNCTION base_cell_numeric(cells jsonb, prop uuid) + RETURNS numeric + LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + BEGIN + RETURN (cells->>prop::text)::numeric; + EXCEPTION WHEN others THEN + RETURN NULL; + END; + $$ + `.execute(db); + + await sql` + CREATE OR REPLACE FUNCTION base_cell_timestamptz(cells jsonb, prop uuid) + RETURNS timestamptz + LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + BEGIN + RETURN (cells->>prop::text)::timestamptz; + EXCEPTION WHEN others THEN + RETURN NULL; + END; + $$ + `.execute(db); + + await sql` + CREATE OR REPLACE FUNCTION base_cell_bool(cells jsonb, prop uuid) + RETURNS boolean + LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE + AS $$ + BEGIN + RETURN (cells->>prop::text)::boolean; + EXCEPTION WHEN others THEN + RETURN NULL; + END; + $$ + `.execute(db); +}