mirror of
https://github.com/docmost/docmost.git
synced 2026-05-07 06:23:06 +08:00
refactor(base): replace streaming loader with pg-extension CREATE TABLE AS SELECT
This commit is contained in:
@@ -2,19 +2,11 @@ import { Injectable, Logger } from '@nestjs/common';
|
|||||||
import { DuckDBInstance } from '@duckdb/node-api';
|
import { DuckDBInstance } from '@duckdb/node-api';
|
||||||
import { BaseRepo } from '@docmost/db/repos/base/base.repo';
|
import { BaseRepo } from '@docmost/db/repos/base/base.repo';
|
||||||
import { BasePropertyRepo } from '@docmost/db/repos/base/base-property.repo';
|
import { BasePropertyRepo } from '@docmost/db/repos/base/base-property.repo';
|
||||||
import { BaseRowRepo } from '@docmost/db/repos/base/base-row.repo';
|
|
||||||
import { BaseRow } from '@docmost/db/types/entity.types';
|
|
||||||
import { BasePropertyType } from '../base.schemas';
|
|
||||||
import { buildColumnSpecs } from './column-types';
|
import { buildColumnSpecs } from './column-types';
|
||||||
import { ColumnSpec, LoadedCollection } from './query-cache.types';
|
import { buildLoaderSql } from './loader-sql';
|
||||||
|
import { LoadedCollection } from './query-cache.types';
|
||||||
// System property type → DuckDB system column name (snake_case). Mirrors
|
import { PostgresExtensionService } from './postgres-extension.service';
|
||||||
// the mapping in duckdb-query-builder.ts.
|
import { QueryCacheConfigProvider } from './query-cache.config';
|
||||||
const SYSTEM_PROPERTY_COLUMN: Record<string, keyof BaseRow> = {
|
|
||||||
[BasePropertyType.CREATED_AT]: 'createdAt',
|
|
||||||
[BasePropertyType.LAST_EDITED_AT]: 'updatedAt',
|
|
||||||
[BasePropertyType.LAST_EDITED_BY]: 'lastUpdatedById',
|
|
||||||
};
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class CollectionLoader {
|
export class CollectionLoader {
|
||||||
@@ -23,10 +15,18 @@ export class CollectionLoader {
|
|||||||
constructor(
|
constructor(
|
||||||
private readonly baseRepo: BaseRepo,
|
private readonly baseRepo: BaseRepo,
|
||||||
private readonly basePropertyRepo: BasePropertyRepo,
|
private readonly basePropertyRepo: BasePropertyRepo,
|
||||||
private readonly baseRowRepo: BaseRowRepo,
|
private readonly pgExtension: PostgresExtensionService,
|
||||||
|
private readonly config: QueryCacheConfigProvider,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
async load(baseId: string, workspaceId: string): Promise<LoadedCollection> {
|
async load(baseId: string, workspaceId: string): Promise<LoadedCollection> {
|
||||||
|
if (!this.pgExtension.isReady()) {
|
||||||
|
throw new Error(
|
||||||
|
`Cannot load collection ${baseId}: postgres extension not ready. ` +
|
||||||
|
'Check PostgresExtensionService bootstrap logs.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const base = await this.baseRepo.findById(baseId);
|
const base = await this.baseRepo.findById(baseId);
|
||||||
if (!base) {
|
if (!base) {
|
||||||
throw new Error(`Base ${baseId} not found`);
|
throw new Error(`Base ${baseId} not found`);
|
||||||
@@ -36,75 +36,30 @@ export class CollectionLoader {
|
|||||||
const properties = await this.basePropertyRepo.findByBaseId(baseId);
|
const properties = await this.basePropertyRepo.findByBaseId(baseId);
|
||||||
const specs = buildColumnSpecs(properties);
|
const specs = buildColumnSpecs(properties);
|
||||||
|
|
||||||
const instance = await DuckDBInstance.create(':memory:');
|
const { memoryLimit, threads } = this.config.config;
|
||||||
|
const instance = await DuckDBInstance.create(':memory:', {
|
||||||
|
memory_limit: memoryLimit,
|
||||||
|
threads: String(threads),
|
||||||
|
});
|
||||||
const connection = await instance.connect();
|
const connection = await instance.connect();
|
||||||
let appender: Awaited<ReturnType<typeof connection.createAppender>> | null =
|
|
||||||
null;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const ddl = `CREATE TABLE rows (${specs
|
await this.pgExtension.configureOnConnection(connection);
|
||||||
.map((s) => `${quoteIdent(s.column)} ${s.ddlType}`)
|
|
||||||
.join(', ')}, PRIMARY KEY (${quoteIdent('id')}))`;
|
|
||||||
await connection.run(ddl);
|
|
||||||
|
|
||||||
appender = await connection.createAppender('rows');
|
// Bulk load via CREATE TABLE AS SELECT. JSONB extraction happens
|
||||||
|
// server-side via the base_cell_* helpers; DuckDB streams typed
|
||||||
|
// columns over COPY BINARY into its vectorized insert path.
|
||||||
|
const sql = buildLoaderSql(specs);
|
||||||
|
const prepared = await connection.prepare(sql);
|
||||||
|
prepared.bindVarchar(1, baseId);
|
||||||
|
prepared.bindVarchar(2, workspaceId);
|
||||||
|
await prepared.run();
|
||||||
|
|
||||||
let rowCount = 0;
|
// Release the PG connection held by the ATTACH — we're done with
|
||||||
for await (const chunk of this.baseRowRepo.streamByBaseId(baseId, {
|
// Postgres; all subsequent queries run purely against the local table.
|
||||||
workspaceId,
|
await this.pgExtension.detach(connection);
|
||||||
chunkSize: 5000,
|
|
||||||
})) {
|
|
||||||
for (const row of chunk) {
|
|
||||||
for (const spec of specs) {
|
|
||||||
const raw = readFromRow(row, spec);
|
|
||||||
if (raw == null) {
|
|
||||||
appender.appendNull();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
switch (spec.ddlType) {
|
|
||||||
case 'VARCHAR':
|
|
||||||
appender.appendVarchar(String(raw));
|
|
||||||
break;
|
|
||||||
case 'DOUBLE': {
|
|
||||||
const n = Number(raw);
|
|
||||||
if (Number.isNaN(n)) {
|
|
||||||
this.logger.debug(
|
|
||||||
`Malformed number for ${spec.column} on row ${row.id}`,
|
|
||||||
);
|
|
||||||
appender.appendNull();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
appender.appendDouble(n);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 'BOOLEAN':
|
|
||||||
appender.appendBoolean(Boolean(raw));
|
|
||||||
break;
|
|
||||||
case 'TIMESTAMPTZ': {
|
|
||||||
const d = raw instanceof Date ? raw : new Date(String(raw));
|
|
||||||
if (Number.isNaN(d.getTime())) {
|
|
||||||
this.logger.debug(
|
|
||||||
`Malformed timestamp for ${spec.column} on row ${row.id}`,
|
|
||||||
);
|
|
||||||
appender.appendNull();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
appender.appendVarchar(d.toISOString());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 'JSON':
|
|
||||||
appender.appendVarchar(JSON.stringify(raw));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
appender.endRow();
|
|
||||||
rowCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
appender.flushSync();
|
|
||||||
appender.closeSync();
|
|
||||||
appender = null;
|
|
||||||
|
|
||||||
|
// Build ART indexes on indexable columns.
|
||||||
for (const spec of specs) {
|
for (const spec of specs) {
|
||||||
if (!spec.indexable) continue;
|
if (!spec.indexable) continue;
|
||||||
const safe = spec.column.replace(/[^a-zA-Z0-9_]/g, '_');
|
const safe = spec.column.replace(/[^a-zA-Z0-9_]/g, '_');
|
||||||
@@ -113,17 +68,17 @@ export class CollectionLoader {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.logger.debug(
|
|
||||||
`Loaded ${rowCount} rows for base ${baseId} (schemaVersion=${schemaVersion})`,
|
|
||||||
);
|
|
||||||
|
|
||||||
const countResult = await connection.runAndReadAll(
|
const countResult = await connection.runAndReadAll(
|
||||||
'SELECT count(*) AS c FROM rows',
|
'SELECT count(*) AS c FROM rows',
|
||||||
);
|
);
|
||||||
const cachedRowCount = Number(
|
const rowCount = Number(
|
||||||
(countResult.getRowObjects()[0] as { c: bigint | number }).c,
|
(countResult.getRowObjects()[0] as { c: bigint | number }).c,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
this.logger.debug(
|
||||||
|
`Loaded ${rowCount} rows for base ${baseId} (schemaVersion=${schemaVersion})`,
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
baseId,
|
baseId,
|
||||||
schemaVersion,
|
schemaVersion,
|
||||||
@@ -131,67 +86,29 @@ export class CollectionLoader {
|
|||||||
instance,
|
instance,
|
||||||
connection,
|
connection,
|
||||||
lastAccessedAt: Date.now(),
|
lastAccessedAt: Date.now(),
|
||||||
rowCount: cachedRowCount,
|
rowCount,
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (appender) {
|
|
||||||
try {
|
try {
|
||||||
appender.closeSync();
|
await this.pgExtension.detach(connection);
|
||||||
} catch {
|
} catch {
|
||||||
// swallow — best-effort cleanup
|
/* swallow */
|
||||||
}
|
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
connection.closeSync();
|
connection.closeSync();
|
||||||
} catch {
|
} catch {
|
||||||
// swallow — best-effort cleanup
|
/* swallow */
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
instance.closeSync();
|
instance.closeSync();
|
||||||
} catch {
|
} catch {
|
||||||
// swallow — best-effort cleanup
|
/* swallow */
|
||||||
}
|
}
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function readFromRow(row: BaseRow, spec: ColumnSpec): unknown {
|
|
||||||
// System columns
|
|
||||||
switch (spec.column) {
|
|
||||||
case 'id':
|
|
||||||
return row.id;
|
|
||||||
case 'base_id':
|
|
||||||
return row.baseId;
|
|
||||||
case 'workspace_id':
|
|
||||||
return row.workspaceId;
|
|
||||||
case 'creator_id':
|
|
||||||
return row.creatorId;
|
|
||||||
case 'position':
|
|
||||||
return row.position;
|
|
||||||
case 'created_at':
|
|
||||||
return row.createdAt;
|
|
||||||
case 'updated_at':
|
|
||||||
return row.updatedAt;
|
|
||||||
case 'last_updated_by_id':
|
|
||||||
return row.lastUpdatedById;
|
|
||||||
case 'deleted_at':
|
|
||||||
return null; // loader only inserts live rows
|
|
||||||
case 'search_text':
|
|
||||||
return ''; // search stays on Postgres in v1
|
|
||||||
}
|
|
||||||
|
|
||||||
// User-defined columns: look up by property id
|
|
||||||
const prop = spec.property;
|
|
||||||
if (!prop) return null;
|
|
||||||
|
|
||||||
const sysColumn = SYSTEM_PROPERTY_COLUMN[prop.type];
|
|
||||||
if (sysColumn) return (row as any)[sysColumn];
|
|
||||||
|
|
||||||
const cells = (row.cells as Record<string, unknown> | null) ?? {};
|
|
||||||
return cells[prop.id] ?? null;
|
|
||||||
}
|
|
||||||
|
|
||||||
function quoteIdent(name: string): string {
|
function quoteIdent(name: string): string {
|
||||||
return `"${name.replace(/"/g, '""')}"`;
|
return `"${name.replace(/"/g, '""')}"`;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user