mirror of
https://github.com/docmost/docmost.git
synced 2026-05-18 23:44:24 +08:00
fix(base): enable duckdb disk spill + raise memory default to avoid oom on large bases
This commit is contained in:
@@ -65,6 +65,9 @@ class FakeEnvService {
|
|||||||
getBaseQueryCacheThreads() {
|
getBaseQueryCacheThreads() {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
getBaseQueryCacheTempDirectory() {
|
||||||
|
return require('node:os').tmpdir() + '/docmost-duckdb-test';
|
||||||
|
}
|
||||||
getRedisUrl() {
|
getRedisUrl() {
|
||||||
return REDIS_URL;
|
return REDIS_URL;
|
||||||
}
|
}
|
||||||
@@ -453,6 +456,9 @@ describeIntegration('BaseQueryCacheService LRU eviction', () => {
|
|||||||
getBaseQueryCacheThreads() {
|
getBaseQueryCacheThreads() {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
getBaseQueryCacheTempDirectory() {
|
||||||
|
return require('node:os').tmpdir() + '/docmost-duckdb-test';
|
||||||
|
}
|
||||||
getRedisUrl() {
|
getRedisUrl() {
|
||||||
return REDIS_URL;
|
return REDIS_URL;
|
||||||
}
|
}
|
||||||
@@ -1101,6 +1107,9 @@ describeIntegration('BaseQueryCacheService warm-up on boot', () => {
|
|||||||
getBaseQueryCacheThreads() {
|
getBaseQueryCacheThreads() {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
getBaseQueryCacheTempDirectory() {
|
||||||
|
return require('node:os').tmpdir() + '/docmost-duckdb-test';
|
||||||
|
}
|
||||||
getRedisUrl() {
|
getRedisUrl() {
|
||||||
return REDIS_URL;
|
return REDIS_URL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,16 +36,34 @@ export class CollectionLoader {
|
|||||||
const properties = await this.basePropertyRepo.findByBaseId(baseId);
|
const properties = await this.basePropertyRepo.findByBaseId(baseId);
|
||||||
const specs = buildColumnSpecs(properties);
|
const specs = buildColumnSpecs(properties);
|
||||||
|
|
||||||
const { memoryLimit, threads } = this.config.config;
|
const { memoryLimit, threads, tempDirectory } = this.config.config;
|
||||||
|
|
||||||
|
// Ensure the temp directory exists so DuckDB can spill to it.
|
||||||
|
// Swallow errors — if creation fails, DuckDB will fail its own sanity
|
||||||
|
// check and we'll log that instead of crashing here.
|
||||||
|
try {
|
||||||
|
const fs = require('node:fs');
|
||||||
|
fs.mkdirSync(tempDirectory, { recursive: true });
|
||||||
|
} catch {
|
||||||
|
/* swallow */
|
||||||
|
}
|
||||||
|
|
||||||
const instance = await DuckDBInstance.create(':memory:', {
|
const instance = await DuckDBInstance.create(':memory:', {
|
||||||
memory_limit: memoryLimit,
|
memory_limit: memoryLimit,
|
||||||
threads: String(threads),
|
threads: String(threads),
|
||||||
|
temp_directory: tempDirectory,
|
||||||
});
|
});
|
||||||
const connection = await instance.connect();
|
const connection = await instance.connect();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this.pgExtension.configureOnConnection(connection);
|
await this.pgExtension.configureOnConnection(connection);
|
||||||
|
|
||||||
|
// Disable insertion-order preservation during bulk load — DuckDB's docs
|
||||||
|
// explicitly recommend this for memory-pressure on large inserts. Our
|
||||||
|
// loader doesn't depend on the insertion order (we sort via indexes
|
||||||
|
// or keyset cursors later), so this is free memory savings.
|
||||||
|
await connection.run('SET preserve_insertion_order = false');
|
||||||
|
|
||||||
// Bulk load via CREATE TABLE AS SELECT. JSONB extraction happens
|
// Bulk load via CREATE TABLE AS SELECT. JSONB extraction happens
|
||||||
// server-side via the base_cell_* helpers; DuckDB streams typed
|
// server-side via the base_cell_* helpers; DuckDB streams typed
|
||||||
// columns over COPY BINARY into its vectorized insert path.
|
// columns over COPY BINARY into its vectorized insert path.
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ export type QueryCacheConfig = {
|
|||||||
memoryLimit: string;
|
memoryLimit: string;
|
||||||
threads: number;
|
threads: number;
|
||||||
trace: boolean;
|
trace: boolean;
|
||||||
|
tempDirectory: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
@@ -23,6 +24,7 @@ export class QueryCacheConfigProvider {
|
|||||||
memoryLimit: env.getBaseQueryCacheMemoryLimit(),
|
memoryLimit: env.getBaseQueryCacheMemoryLimit(),
|
||||||
threads: env.getBaseQueryCacheThreads(),
|
threads: env.getBaseQueryCacheThreads(),
|
||||||
trace: env.getBaseQueryCacheTrace(),
|
trace: env.getBaseQueryCacheTrace(),
|
||||||
|
tempDirectory: env.getBaseQueryCacheTempDirectory(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -354,11 +354,26 @@ export class EnvironmentService {
|
|||||||
|
|
||||||
getBaseQueryCacheMemoryLimit(): string {
|
getBaseQueryCacheMemoryLimit(): string {
|
||||||
// Per-DuckDB-instance memory ceiling. DuckDB accepts human-readable sizes:
|
// Per-DuckDB-instance memory ceiling. DuckDB accepts human-readable sizes:
|
||||||
// '32MB', '128MB', '1GB'. Default keeps a single instance from
|
// '256MB', '1GB', etc. Default 512MB is sized for bases up to ~300K rows
|
||||||
// monopolising the heap if a runaway query needs to spill.
|
// with moderate schemas without spilling. DuckDB automatically spills
|
||||||
|
// to `temp_directory` when this is exceeded, so over-allocating is
|
||||||
|
// cheap — the risk is under-sizing.
|
||||||
return this.configService.get<string>(
|
return this.configService.get<string>(
|
||||||
'BASE_QUERY_CACHE_MEMORY_LIMIT',
|
'BASE_QUERY_CACHE_MEMORY_LIMIT',
|
||||||
'64MB',
|
'512MB',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
getBaseQueryCacheTempDirectory(): string {
|
||||||
|
// Directory DuckDB uses to spill pages when an instance exceeds its
|
||||||
|
// memory_limit. Defaults to the system temp dir plus a namespace so
|
||||||
|
// different processes don't collide. Setting this explicitly is what
|
||||||
|
// enables spill-to-disk on `:memory:` instances — without it, DuckDB
|
||||||
|
// OOMs at memory_limit instead of paging.
|
||||||
|
const defaultPath = `${require('node:os').tmpdir()}/docmost-duckdb-cache`;
|
||||||
|
return this.configService.get<string>(
|
||||||
|
'BASE_QUERY_CACHE_TEMP_DIR',
|
||||||
|
defaultPath,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user