mirror of
https://github.com/docmost/docmost.git
synced 2026-05-07 06:23:06 +08:00
feat(base): single-instance duckdb runtime with writer + reader pool
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
import { DuckDbRuntime } from './duckdb-runtime';
|
||||
import { QueryCacheConfigProvider } from './query-cache.config';
|
||||
|
||||
const makeConfig = (
|
||||
overrides: Partial<QueryCacheConfigProvider['config']> = {},
|
||||
): QueryCacheConfigProvider =>
|
||||
({
|
||||
config: {
|
||||
enabled: true,
|
||||
minRows: 25_000,
|
||||
maxCollections: 50,
|
||||
warmTopN: 50,
|
||||
memoryLimit: '256MB',
|
||||
threads: 2,
|
||||
tempDirectory: `${require('node:os').tmpdir()}/docmost-duckdb-runtime-test`,
|
||||
trace: false,
|
||||
readerPoolSize: 2,
|
||||
...overrides,
|
||||
},
|
||||
}) as unknown as QueryCacheConfigProvider;
|
||||
|
||||
const makeEnv = (): { getDatabaseURL: () => string } => ({
|
||||
getDatabaseURL: () => process.env.DATABASE_URL ?? '',
|
||||
});
|
||||
|
||||
describe('DuckDbRuntime', () => {
|
||||
it('no-ops when the cache is disabled', async () => {
|
||||
const rt = new DuckDbRuntime(makeConfig({ enabled: false }), makeEnv() as any);
|
||||
await rt.onApplicationBootstrap();
|
||||
expect(rt.isReady()).toBe(false);
|
||||
await rt.onModuleDestroy();
|
||||
});
|
||||
|
||||
it('bootstraps instance, extension, PG attach, and reader pool', async () => {
|
||||
const rt = new DuckDbRuntime(makeConfig(), makeEnv() as any);
|
||||
await rt.onApplicationBootstrap();
|
||||
expect(rt.isReady()).toBe(true);
|
||||
expect(rt.readerPoolSize()).toBe(2);
|
||||
await rt.onModuleDestroy();
|
||||
});
|
||||
|
||||
it('attachBase creates a per-base schema and detachBase removes it', async () => {
|
||||
const rt = new DuckDbRuntime(makeConfig(), makeEnv() as any);
|
||||
await rt.onApplicationBootstrap();
|
||||
try {
|
||||
const schema = 'b_testaaaaaaaaaaaaaaaaaaaaaaaaaa';
|
||||
await rt.attachBase(schema);
|
||||
await rt.getWriter().run(`CREATE TABLE ${schema}.t (x INTEGER)`);
|
||||
await rt.getWriter().run(`INSERT INTO ${schema}.t VALUES (1), (2), (3)`);
|
||||
const res = await rt
|
||||
.getWriter()
|
||||
.runAndReadAll(`SELECT count(*) AS c FROM ${schema}.t`);
|
||||
const row = res.getRowObjects()[0] as { c: bigint | number };
|
||||
expect(Number(row.c)).toBe(3);
|
||||
|
||||
await rt.detachBase(schema);
|
||||
await expect(
|
||||
rt.getWriter().run(`SELECT count(*) FROM ${schema}.t`),
|
||||
).rejects.toThrow();
|
||||
} finally {
|
||||
await rt.onModuleDestroy();
|
||||
}
|
||||
});
|
||||
|
||||
it('withReader parallelises across pool', async () => {
|
||||
const rt = new DuckDbRuntime(makeConfig({ readerPoolSize: 2 }), makeEnv() as any);
|
||||
await rt.onApplicationBootstrap();
|
||||
try {
|
||||
const started: string[] = [];
|
||||
const ended: string[] = [];
|
||||
const p1 = rt.withReader(async (conn) => {
|
||||
started.push('a');
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
await conn.runAndReadAll('SELECT 1');
|
||||
ended.push('a');
|
||||
});
|
||||
const p2 = rt.withReader(async (conn) => {
|
||||
started.push('b');
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
await conn.runAndReadAll('SELECT 1');
|
||||
ended.push('b');
|
||||
});
|
||||
await Promise.all([p1, p2]);
|
||||
expect(new Set(started)).toEqual(new Set(['a', 'b']));
|
||||
expect(started.length).toBe(2);
|
||||
expect(ended.length).toBe(2);
|
||||
} finally {
|
||||
await rt.onModuleDestroy();
|
||||
}
|
||||
});
|
||||
|
||||
it('withReader on a 3rd concurrent request with pool=2 queues correctly', async () => {
|
||||
const rt = new DuckDbRuntime(makeConfig({ readerPoolSize: 2 }), makeEnv() as any);
|
||||
await rt.onApplicationBootstrap();
|
||||
try {
|
||||
const order: number[] = [];
|
||||
const makeOne = (n: number, delayMs: number) =>
|
||||
rt.withReader(async () => {
|
||||
await new Promise((r) => setTimeout(r, delayMs));
|
||||
order.push(n);
|
||||
});
|
||||
const p1 = makeOne(1, 40);
|
||||
const p2 = makeOne(2, 40);
|
||||
const p3 = makeOne(3, 5);
|
||||
await Promise.all([p1, p2, p3]);
|
||||
expect(order.length).toBe(3);
|
||||
expect(order.indexOf(3)).toBeGreaterThan(0);
|
||||
} finally {
|
||||
await rt.onModuleDestroy();
|
||||
}
|
||||
});
|
||||
|
||||
it('getWriter throws if not ready', () => {
|
||||
const rt = new DuckDbRuntime(makeConfig(), makeEnv() as any);
|
||||
expect(() => rt.getWriter()).toThrow(/not ready/i);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,211 @@
|
||||
import {
|
||||
Injectable,
|
||||
Logger,
|
||||
OnApplicationBootstrap,
|
||||
OnModuleDestroy,
|
||||
} from '@nestjs/common';
|
||||
import { DuckDBInstance, DuckDBConnection } from '@duckdb/node-api';
|
||||
import * as fs from 'node:fs';
|
||||
import { QueryCacheConfigProvider } from './query-cache.config';
|
||||
import { EnvironmentService } from '../../../integrations/environment/environment.service';
|
||||
import { ConnectionPool } from './connection-pool';
|
||||
|
||||
/*
|
||||
* DuckDbRuntime
|
||||
* -------------
|
||||
* Owns the process-wide DuckDB instance and everything attached to it:
|
||||
*
|
||||
* - One `DuckDBInstance` at `:memory:` with `memory_limit`, `threads`,
|
||||
* `temp_directory` configured from env.
|
||||
* - One writer `DuckDBConnection` for ATTACH/DETACH/CREATE TABLE/INSERT.
|
||||
* - A pool of N reader connections for SELECTs; `withReader(fn)` lends
|
||||
* one out, runs the callback, returns it — fair FIFO under contention.
|
||||
* - The `postgres` extension is installed + loaded once, not per-base.
|
||||
* - A single long-lived ATTACH against Postgres (READ_ONLY). All loaders
|
||||
* reference `postgres_query('pg', $pgsql$ ... $pgsql$)` without doing
|
||||
* their own attach/detach.
|
||||
*
|
||||
* When the query cache is disabled (`config.enabled === false`), the
|
||||
* runtime is a no-op: nothing is created, `isReady()` returns false, and
|
||||
* every consumer's own gate prevents it from touching the runtime.
|
||||
*/
|
||||
@Injectable()
|
||||
export class DuckDbRuntime implements OnApplicationBootstrap, OnModuleDestroy {
|
||||
private readonly logger = new Logger(DuckDbRuntime.name);
|
||||
private instance: DuckDBInstance | null = null;
|
||||
private writer: DuckDBConnection | null = null;
|
||||
private readonly readerPool = new ConnectionPool<DuckDBConnection>();
|
||||
private readonly attachedSchemas = new Set<string>();
|
||||
private ready = false;
|
||||
private bootstrapFailure: string | null = null;
|
||||
|
||||
constructor(
|
||||
private readonly configProvider: QueryCacheConfigProvider,
|
||||
private readonly env: EnvironmentService,
|
||||
) {}
|
||||
|
||||
async onApplicationBootstrap(): Promise<void> {
|
||||
const config = this.configProvider.config;
|
||||
if (!config.enabled) {
|
||||
this.logger.log('query cache disabled; skipping duckdb runtime bootstrap');
|
||||
return;
|
||||
}
|
||||
|
||||
const dbUrl = this.env.getDatabaseURL();
|
||||
if (!dbUrl) {
|
||||
this.bootstrapFailure = 'DATABASE_URL is empty';
|
||||
this.logger.error('DuckDbRuntime cannot bootstrap: DATABASE_URL is empty');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
fs.mkdirSync(config.tempDirectory, { recursive: true });
|
||||
} catch {
|
||||
/* swallow */
|
||||
}
|
||||
|
||||
try {
|
||||
this.instance = await DuckDBInstance.create(':memory:', {
|
||||
memory_limit: config.memoryLimit,
|
||||
threads: String(config.threads),
|
||||
temp_directory: config.tempDirectory,
|
||||
});
|
||||
|
||||
this.writer = await this.instance.connect();
|
||||
await this.writer.run('SET preserve_insertion_order = false');
|
||||
await this.writer.run('INSTALL postgres');
|
||||
await this.writer.run('LOAD postgres');
|
||||
await this.writer.run(
|
||||
`ATTACH ${escapeSqlString(dbUrl)} AS pg (TYPE POSTGRES, READ_ONLY)`,
|
||||
);
|
||||
|
||||
const readers: DuckDBConnection[] = [];
|
||||
for (let i = 0; i < config.readerPoolSize; i++) {
|
||||
const reader = await this.instance.connect();
|
||||
await reader.run('SET preserve_insertion_order = false');
|
||||
readers.push(reader);
|
||||
}
|
||||
this.readerPool.init(readers);
|
||||
|
||||
this.ready = true;
|
||||
this.logger.log(
|
||||
`DuckDbRuntime ready (readers=${config.readerPoolSize}, memory_limit=${config.memoryLimit})`,
|
||||
);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
this.bootstrapFailure = error.message;
|
||||
this.logger.error(`DuckDbRuntime bootstrap failed: ${error.message}`);
|
||||
if (error.stack) this.logger.error(error.stack);
|
||||
this.ready = false;
|
||||
try {
|
||||
this.readerPool.close().forEach((c) => c.closeSync());
|
||||
} catch { /* swallow */ }
|
||||
try {
|
||||
this.writer?.closeSync();
|
||||
} catch { /* swallow */ }
|
||||
try {
|
||||
this.instance?.closeSync();
|
||||
} catch { /* swallow */ }
|
||||
this.writer = null;
|
||||
this.instance = null;
|
||||
}
|
||||
}
|
||||
|
||||
async onModuleDestroy(): Promise<void> {
|
||||
for (const c of this.readerPool.close()) {
|
||||
try {
|
||||
c.closeSync();
|
||||
} catch { /* swallow */ }
|
||||
}
|
||||
if (this.writer) {
|
||||
try {
|
||||
this.writer.closeSync();
|
||||
} catch { /* swallow */ }
|
||||
this.writer = null;
|
||||
}
|
||||
if (this.instance) {
|
||||
try {
|
||||
this.instance.closeSync();
|
||||
} catch { /* swallow */ }
|
||||
this.instance = null;
|
||||
}
|
||||
this.attachedSchemas.clear();
|
||||
this.ready = false;
|
||||
}
|
||||
|
||||
isReady(): boolean {
|
||||
return this.ready;
|
||||
}
|
||||
|
||||
readerPoolSize(): number {
|
||||
return this.readerPool.size();
|
||||
}
|
||||
|
||||
lastBootstrapFailure(): string | null {
|
||||
return this.bootstrapFailure;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a new in-memory database for a base. Idempotent: if the schema
|
||||
* is already attached, this is a no-op. Schema name must come from
|
||||
* `baseSchemaName()` — validated by the caller; we check shape here
|
||||
* as defense-in-depth.
|
||||
*/
|
||||
async attachBase(schema: string): Promise<void> {
|
||||
this.requireReady();
|
||||
this.requireSchemaShape(schema);
|
||||
if (this.attachedSchemas.has(schema)) return;
|
||||
|
||||
await this.writer!.run(`ATTACH ':memory:' AS ${schema}`);
|
||||
this.attachedSchemas.add(schema);
|
||||
}
|
||||
|
||||
/*
|
||||
* Detach an in-memory database. Idempotent: detaching a non-attached
|
||||
* schema is a swallow. Frees all memory held by the attached DB back
|
||||
* to the shared buffer pool.
|
||||
*/
|
||||
async detachBase(schema: string): Promise<void> {
|
||||
if (!this.ready || !this.writer) return;
|
||||
this.requireSchemaShape(schema);
|
||||
if (!this.attachedSchemas.has(schema)) return;
|
||||
|
||||
try {
|
||||
await this.writer.run(`DETACH DATABASE ${schema}`);
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message ?? '';
|
||||
if (!/not attached|does not exist|unknown database/i.test(msg)) {
|
||||
throw err;
|
||||
}
|
||||
} finally {
|
||||
this.attachedSchemas.delete(schema);
|
||||
}
|
||||
}
|
||||
|
||||
getWriter(): DuckDBConnection {
|
||||
this.requireReady();
|
||||
return this.writer!;
|
||||
}
|
||||
|
||||
async withReader<T>(fn: (conn: DuckDBConnection) => Promise<T>): Promise<T> {
|
||||
this.requireReady();
|
||||
return this.readerPool.withResource(fn);
|
||||
}
|
||||
|
||||
private requireReady(): void {
|
||||
if (!this.ready || !this.writer) {
|
||||
const detail = this.bootstrapFailure ? `: ${this.bootstrapFailure}` : '';
|
||||
throw new Error(`DuckDbRuntime not ready${detail}`);
|
||||
}
|
||||
}
|
||||
|
||||
private requireSchemaShape(schema: string): void {
|
||||
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(schema)) {
|
||||
throw new Error(`Invalid schema name "${schema}"`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function escapeSqlString(s: string): string {
|
||||
return `'${s.replace(/'/g, "''")}'`;
|
||||
}
|
||||
Reference in New Issue
Block a user