feat: enforce strict transclusion schema

This commit is contained in:
Philipinho
2026-05-08 00:26:31 +01:00
parent c5ebc29d6b
commit 09f2b84988
12 changed files with 91 additions and 308 deletions
@@ -2,7 +2,6 @@ import { EditorProvider } from "@tiptap/react";
import { useMemo } from "react";
import { mainExtensions } from "@/features/editor/extensions/extensions";
import { UniqueID } from "@docmost/editor-ext";
import { TransclusionLookupProvider } from "./transclusion-lookup-context";
type Props = {
content: unknown;
@@ -31,21 +30,19 @@ export default function TransclusionContent({ content }: Props) {
const stop = (e: React.SyntheticEvent) => e.stopPropagation();
return (
<TransclusionLookupProvider>
<div
onMouseDown={stop}
onClick={stop}
onDragStart={stop}
onDragOver={stop}
onDrop={stop}
>
<EditorProvider
editable={false}
immediatelyRender={true}
extensions={extensions}
content={content as any}
/>
</div>
</TransclusionLookupProvider>
<div
onMouseDown={stop}
onClick={stop}
onDragStart={stop}
onDragOver={stop}
onDrop={stop}
>
<EditorProvider
editable={false}
immediatelyRender={true}
extensions={extensions}
content={content as any}
/>
</div>
);
}
@@ -255,14 +255,17 @@ export class PersistenceExtension implements Extension {
try {
await this.transclusionService.syncPageTransclusions(pageId, tiptapJson);
} catch (err) {
this.logger.error(`Failed to sync transclusions for page ${pageId}`, err);
this.logger.error(
{ err, pageId },
'Failed to sync transclusions for page',
);
}
try {
await this.transclusionService.syncPageReferences(pageId, tiptapJson);
} catch (err) {
this.logger.error(
`Failed to sync transclusion references for page ${pageId}`,
err,
{ err, pageId },
'Failed to sync transclusion references for page',
);
}
}
@@ -149,7 +149,7 @@ describe('collectReferencesFromPmJson', () => {
],
};
expect(collectReferencesFromPmJson(doc)).toEqual([
{ containingTransclusionId: null, sourcePageId: 'p1', transclusionId: 'e1' },
{ sourcePageId: 'p1', transclusionId: 'e1' },
]);
});
@@ -190,12 +190,12 @@ describe('collectReferencesFromPmJson', () => {
],
};
expect(collectReferencesFromPmJson(doc)).toEqual([
{ containingTransclusionId: null, sourcePageId: 'p1', transclusionId: 'e1' },
{ containingTransclusionId: null, sourcePageId: 'p2', transclusionId: 'e2' },
{ sourcePageId: 'p1', transclusionId: 'e1' },
{ sourcePageId: 'p2', transclusionId: 'e2' },
]);
});
it('also finds references nested inside a transclusion (source) node', () => {
it('does not recurse into a transclusion source (schema forbids references inside)', () => {
const doc = {
type: 'doc',
content: [
@@ -211,12 +211,10 @@ describe('collectReferencesFromPmJson', () => {
},
],
};
expect(collectReferencesFromPmJson(doc)).toEqual([
{ containingTransclusionId: 'src1', sourcePageId: 'p1', transclusionId: 'e1' },
]);
expect(collectReferencesFromPmJson(doc)).toEqual([]);
});
it('dedupes identical (containingTransclusionId, sourcePageId, transclusionId) triples', () => {
it('dedupes identical (sourcePageId, transclusionId) pairs', () => {
const doc = {
type: 'doc',
content: [
@@ -235,8 +233,8 @@ describe('collectReferencesFromPmJson', () => {
],
};
expect(collectReferencesFromPmJson(doc)).toEqual([
{ containingTransclusionId: null, sourcePageId: 'p1', transclusionId: 'e1' },
{ containingTransclusionId: null, sourcePageId: 'p2', transclusionId: 'e2' },
{ sourcePageId: 'p1', transclusionId: 'e1' },
{ sourcePageId: 'p2', transclusionId: 'e2' },
]);
});
});
@@ -177,8 +177,6 @@ describe('TransclusionService.syncPageReferences', () => {
findByReferencePageId: jest.fn(),
insertMany: jest.fn(),
deleteByReferenceAndKeys: jest.fn(),
findCyclicEdgesForSource: jest.fn().mockResolvedValue([]),
deleteByIds: jest.fn(),
};
const module = await Test.createTestingModule({
providers: [
@@ -220,13 +218,11 @@ describe('TransclusionService.syncPageReferences', () => {
[
{
referencePageId,
containingTransclusionId: null,
sourcePageId: 'p1',
transclusionId: 'e1',
},
{
referencePageId,
containingTransclusionId: null,
sourcePageId: 'p2',
transclusionId: 'e2',
},
@@ -234,11 +230,9 @@ describe('TransclusionService.syncPageReferences', () => {
undefined,
);
expect(refRepo.deleteByReferenceAndKeys).not.toHaveBeenCalled();
// Loose references never seed cycle detection.
expect(refRepo.findCyclicEdgesForSource).not.toHaveBeenCalled();
});
it('records the containing transclusion when references nest in a source', async () => {
it('ignores references nested inside a source (schema-forbidden)', async () => {
refRepo.findByReferencePageId.mockResolvedValue([]);
const pm = {
type: 'doc',
@@ -258,60 +252,8 @@ describe('TransclusionService.syncPageReferences', () => {
const result = await service.syncPageReferences(referencePageId, pm);
expect(result).toEqual({ inserted: 1, deleted: 0 });
expect(refRepo.insertMany).toHaveBeenCalledWith(
[
{
referencePageId,
containingTransclusionId: 's1',
sourcePageId: 'p2',
transclusionId: 'e2',
},
],
undefined,
);
expect(refRepo.findCyclicEdgesForSource).toHaveBeenCalledWith(
'p2',
'e2',
undefined,
);
});
it('deletes edges that close a cycle and excludes them from the inserted count', async () => {
refRepo.findByReferencePageId.mockResolvedValue([]);
refRepo.findCyclicEdgesForSource.mockResolvedValue([
{
id: 'closing-edge-id',
referencePageId,
containingTransclusionId: 's1',
sourcePageId: 'p2',
transclusionId: 'e2',
createdAt: new Date(),
} as any,
]);
const pm = {
type: 'doc',
content: [
{
type: 'transclusionSource',
attrs: { id: 's1' },
content: [
{
type: 'transclusionReference',
attrs: { sourcePageId: 'p2', transclusionId: 'e2' },
},
],
},
],
};
const result = await service.syncPageReferences(referencePageId, pm);
expect(result).toEqual({ inserted: 0, deleted: 0 });
expect(refRepo.deleteByIds).toHaveBeenCalledWith(
['closing-edge-id'],
undefined,
);
expect(refRepo.insertMany).not.toHaveBeenCalled();
});
it('deletes references that no longer appear', async () => {
@@ -319,7 +261,6 @@ describe('TransclusionService.syncPageReferences', () => {
{
id: 'r1',
referencePageId,
containingTransclusionId: null,
sourcePageId: 'p1',
transclusionId: 'e1',
createdAt: new Date(),
@@ -334,7 +275,6 @@ describe('TransclusionService.syncPageReferences', () => {
referencePageId,
[
{
containingTransclusionId: null,
sourcePageId: 'p1',
transclusionId: 'e1',
},
@@ -349,7 +289,6 @@ describe('TransclusionService.syncPageReferences', () => {
{
id: 'r',
referencePageId,
containingTransclusionId: null,
sourcePageId: 'p1',
transclusionId: 'e1',
createdAt: new Date(),
@@ -107,11 +107,9 @@ export class TransclusionService {
): Promise<{ inserted: number; deleted: number }> {
const desired = collectReferencesFromPmJson(pmJson);
const keyOf = (s: {
containingTransclusionId: string | null;
sourcePageId: string;
transclusionId: string;
}) =>
`${s.containingTransclusionId ?? ''}::${s.sourcePageId}::${s.transclusionId}`;
}) => `${s.sourcePageId}::${s.transclusionId}`;
const desiredKeys = new Set(desired.map(keyOf));
const existing = await this.pageTransclusionReferencesRepo.findByReferencePageId(
@@ -124,7 +122,6 @@ export class TransclusionService {
.filter((d) => !existingKeys.has(keyOf(d)))
.map((d) => ({
referencePageId,
containingTransclusionId: d.containingTransclusionId,
sourcePageId: d.sourcePageId,
transclusionId: d.transclusionId,
}));
@@ -132,7 +129,6 @@ export class TransclusionService {
const toDelete = existing
.filter((e) => !desiredKeys.has(keyOf(e)))
.map((e) => ({
containingTransclusionId: e.containingTransclusionId,
sourcePageId: e.sourcePageId,
transclusionId: e.transclusionId,
}));
@@ -148,66 +144,12 @@ export class TransclusionService {
);
}
const removedCount = await this.removeCyclicEdgesIntroducedBy(
toInsert,
trx,
);
return {
inserted: toInsert.length - removedCount,
inserted: toInsert.length,
deleted: toDelete.length,
};
}
/**
* Run cycle detection rooted at each newly-introduced edge's target and
* delete any closing edge that belongs to a cycle. Lookups for those rows
* then return `not_found`, which the editor renders as the cycle-aware
* placeholder. Returns the count of rows removed.
*/
private async removeCyclicEdgesIntroducedBy(
candidates: ReadonlyArray<{
referencePageId: string;
containingTransclusionId: string | null;
sourcePageId: string;
transclusionId: string;
}>,
trx?: KyselyTransaction,
): Promise<number> {
const seedKeys = new Set<string>();
const seeds: Array<{ sourcePageId: string; transclusionId: string }> = [];
for (const c of candidates) {
if (c.containingTransclusionId === null) continue;
const key = `${c.sourcePageId}::${c.transclusionId}`;
if (seedKeys.has(key)) continue;
seedKeys.add(key);
seeds.push({
sourcePageId: c.sourcePageId,
transclusionId: c.transclusionId,
});
}
if (seeds.length === 0) return 0;
const offendingIds = new Set<string>();
for (const seed of seeds) {
const cyclicEdges =
await this.pageTransclusionReferencesRepo.findCyclicEdgesForSource(
seed.sourcePageId,
seed.transclusionId,
trx,
);
for (const edge of cyclicEdges) offendingIds.add(edge.id);
}
if (offendingIds.size === 0) return 0;
await this.pageTransclusionReferencesRepo.deleteByIds(
Array.from(offendingIds),
trx,
);
return offendingIds.size;
}
/**
* Extract transclusions from each page's PM JSON and bulk-insert into
* `page_transclusions` in a single statement. Intended for brand-new pages
@@ -235,12 +177,8 @@ export class TransclusionService {
/**
* Walk each page's PM JSON for `transclusionReference` nodes and bulk-insert
* one row per `(containing, source, target)` triple. For brand-new pages
* one row per `(referencePage, source, target)`. For brand-new pages
* (duplication, import) where there is nothing to diff against.
*
* Cycle detection runs once per distinct seed source after the bulk insert;
* any closing edges are removed so lookups return `not_found` and the
* editor renders the cycle-aware placeholder.
*/
async insertReferencesForPages(
pages: Array<{ id: string; content: unknown }>,
@@ -248,7 +186,6 @@ export class TransclusionService {
): Promise<{ inserted: number }> {
const rows: Array<{
referencePageId: string;
containingTransclusionId: string | null;
sourcePageId: string;
transclusionId: string;
}> = [];
@@ -257,7 +194,6 @@ export class TransclusionService {
for (const r of refs) {
rows.push({
referencePageId: page.id,
containingTransclusionId: r.containingTransclusionId,
sourcePageId: r.sourcePageId,
transclusionId: r.transclusionId,
});
@@ -265,9 +201,7 @@ export class TransclusionService {
}
if (rows.length === 0) return { inserted: 0 };
await this.pageTransclusionReferencesRepo.insertMany(rows, trx);
const removedCount = await this.removeCyclicEdgesIntroducedBy(rows, trx);
return { inserted: rows.length - removedCount };
return { inserted: rows.length };
}
async lookup(
@@ -4,12 +4,6 @@ const TRANSCLUSION_TYPE = 'transclusionSource';
const REFERENCE_TYPE = 'transclusionReference';
export type TransclusionReferenceSnapshot = {
/**
* Id of the `transclusion` (source) node whose content holds this reference,
* or `null` if the reference is loose on the page (not nested inside a source).
* Used by the cycle-detection CTE to walk source-to-source edges.
*/
containingTransclusionId: string | null;
sourcePageId: string;
transclusionId: string;
};
@@ -53,9 +47,9 @@ export function collectTransclusionsFromPmJson(
/**
* Walks a ProseMirror JSON document and returns one snapshot per unique
* `(containingTransclusionId, sourcePageId, transclusionId)` triple found on
* `transclusionReference` nodes. Recurses into every container, including
* `transclusion` (a source node may contain a reference to another source).
* `(sourcePageId, transclusionId)` pair found on `transclusionReference`
* nodes. The schema forbids references inside a `transclusionSource` so this
* walk stops at source boundaries — references can only appear at page level.
* Order preserved by first-seen.
*/
export function collectReferencesFromPmJson(
@@ -66,7 +60,7 @@ export function collectReferencesFromPmJson(
const seen = new Set<string>();
const out: TransclusionReferenceSnapshot[] = [];
const visit = (node: any, containingTransclusionId: string | null): void => {
const visit = (node: any): void => {
if (!node || typeof node !== 'object') return;
if (node.type === REFERENCE_TYPE) {
@@ -78,29 +72,24 @@ export function collectReferencesFromPmJson(
typeof transclusionId === 'string' &&
transclusionId.length > 0
) {
const key = `${containingTransclusionId ?? ''}::${sourcePageId}::${transclusionId}`;
const key = `${sourcePageId}::${transclusionId}`;
if (!seen.has(key)) {
seen.add(key);
out.push({
containingTransclusionId,
sourcePageId,
transclusionId,
});
out.push({ sourcePageId, transclusionId });
}
}
return; // atom node - no children
}
const nextContainer =
node.type === TRANSCLUSION_TYPE && typeof node.attrs?.id === 'string'
? node.attrs.id
: containingTransclusionId;
// References cannot live inside a source (schema-enforced); skip recursing
// so a malformed inbound doc can't sneak in a nested reference here.
if (node.type === TRANSCLUSION_TYPE) return;
if (Array.isArray(node.content)) {
for (const child of node.content) visit(child, nextContainer);
for (const child of node.content) visit(child);
}
};
visit(doc, null);
visit(doc);
return out;
}
@@ -31,7 +31,6 @@ export async function up(db: Kysely<any>): Promise<void> {
.addColumn('reference_page_id', 'uuid', (col) =>
col.notNull().references('pages.id').onDelete('cascade'),
)
.addColumn('containing_transclusion_id', 'varchar')
.addColumn('source_page_id', 'uuid', (col) =>
col.notNull().references('pages.id').onDelete('cascade'),
)
@@ -41,7 +40,6 @@ export async function up(db: Kysely<any>): Promise<void> {
)
.addUniqueConstraint('page_transclusion_references_unique', [
'reference_page_id',
'containing_transclusion_id',
'source_page_id',
'transclusion_id',
])
@@ -52,12 +50,6 @@ export async function up(db: Kysely<any>): Promise<void> {
.on('page_transclusion_references')
.columns(['source_page_id', 'transclusion_id'])
.execute();
await db.schema
.createIndex('idx_page_transclusion_references_container')
.on('page_transclusion_references')
.columns(['reference_page_id', 'containing_transclusion_id'])
.execute();
}
export async function down(db: Kysely<any>): Promise<void> {
@@ -1,6 +1,5 @@
import { Injectable } from '@nestjs/common';
import { InjectKysely } from 'nestjs-kysely';
import { sql } from 'kysely';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { dbOrTx } from '@docmost/db/utils';
import {
@@ -9,7 +8,6 @@ import {
} from '@docmost/db/types/entity.types';
export type TransclusionReferenceKey = {
containingTransclusionId: string | null;
sourcePageId: string;
transclusionId: string;
};
@@ -54,12 +52,7 @@ export class PageTransclusionReferencesRepo {
.values(rows)
.onConflict((oc) =>
oc
.columns([
'referencePageId',
'containingTransclusionId',
'sourcePageId',
'transclusionId',
])
.columns(['referencePageId', 'sourcePageId', 'transclusionId'])
.doNothing(),
)
.execute();
@@ -78,13 +71,6 @@ export class PageTransclusionReferencesRepo {
eb.or(
keys.map((k) =>
eb.and([
k.containingTransclusionId === null
? eb('containingTransclusionId', 'is', null)
: eb(
'containingTransclusionId',
'=',
k.containingTransclusionId,
),
eb('sourcePageId', '=', k.sourcePageId),
eb('transclusionId', '=', k.transclusionId),
]),
@@ -107,75 +93,4 @@ export class PageTransclusionReferencesRepo {
.where('transclusionId', '=', transclusionId)
.execute();
}
async deleteByIds(ids: string[], trx?: KyselyTransaction): Promise<void> {
if (ids.length === 0) return;
await dbOrTx(this.db, trx)
.deleteFrom('pageTransclusionReferences')
.where('id', 'in', ids)
.execute();
}
/**
* Finds reference rows that participate in a cycle reachable from a given
* source `(pageId, transclusionId)`. The walk follows source-to-source edges
* (rows where `containing_transclusion_id IS NOT NULL`); loose page-level
* references are not graph edges and are ignored.
*
* Returned rows are the *closing edges* — those whose insertion completed a
* cycle. They are the safe set to remove to break the cycle while preserving
* unrelated structure.
*/
async findCyclicEdgesForSource(
sourcePageId: string,
transclusionId: string,
trx?: KyselyTransaction,
): Promise<PageTransclusionReference[]> {
const rows = await sql<PageTransclusionReference>`
WITH RECURSIVE walk(
start_page,
start_id,
page_id,
transclusion_id,
edge_id,
is_cycle,
path
) AS (
SELECT
${sourcePageId}::uuid,
${transclusionId}::varchar,
${sourcePageId}::uuid,
${transclusionId}::varchar,
NULL::uuid,
false,
ARRAY[(${sourcePageId}::uuid, ${transclusionId}::varchar)]
UNION ALL
SELECT
w.start_page,
w.start_id,
r.source_page_id,
r.transclusion_id,
r.id,
(r.source_page_id, r.transclusion_id) = ANY(w.path),
w.path || ARRAY[(r.source_page_id, r.transclusion_id)]
FROM page_transclusion_references r
JOIN walk w
ON r.reference_page_id = w.page_id
AND r.containing_transclusion_id = w.transclusion_id
WHERE r.containing_transclusion_id IS NOT NULL
AND NOT w.is_cycle
)
SELECT
r.id,
r.created_at AS "createdAt",
r.reference_page_id AS "referencePageId",
r.containing_transclusion_id AS "containingTransclusionId",
r.source_page_id AS "sourcePageId",
r.transclusion_id AS "transclusionId"
FROM walk w
JOIN page_transclusion_references r ON r.id = w.edge_id
WHERE w.is_cycle
`.execute(dbOrTx(this.db, trx));
return rows.rows;
}
}
-1
View File
@@ -232,7 +232,6 @@ export interface PageTransclusionReferences {
createdAt: Generated<Timestamp>;
transclusionId: string;
referencePageId: string;
containingTransclusionId: string | null;
id: Generated<string>;
sourcePageId: string;
}
@@ -0,0 +1,41 @@
/**
* Top-level block node types allowed inside a `transclusionSource`.
* Notably excludes:
* - `transclusionSource` — sync blocks cannot wrap other sync blocks (sources are leaves).
* - `transclusionReference` — sync blocks cannot transclude other sync blocks,
* which keeps the transclusion graph acyclic and lets the renderer skip
* cycle-aware traversal entirely.
*
* Also excludes child-only nodes (`listItem`, `tableRow`, `column`, etc.)
* — they're already constrained by their parent containers.
*/
export const TRANSCLUSION_SOURCE_ALLOWED_NODE_TYPES = [
'paragraph',
'heading',
'blockquote',
'codeBlock',
'horizontalRule',
'bulletList',
'orderedList',
'taskList',
'image',
'video',
'audio',
'attachment',
'callout',
'details',
'embed',
'mathBlock',
'table',
'drawio',
'excalidraw',
'pdf',
'subpages',
'columns',
'youtube',
] as const;
export type TransclusionSourceAllowedNodeType =
(typeof TRANSCLUSION_SOURCE_ALLOWED_NODE_TYPES)[number];
export const TRANSCLUSION_SOURCE_CONTENT_EXPRESSION = `(${TRANSCLUSION_SOURCE_ALLOWED_NODE_TYPES.join(' | ')})+`;
@@ -1,2 +1,3 @@
export * from "./constants";
export * from "./transclusion-source";
export * from "./transclusion-reference";
@@ -1,6 +1,6 @@
import { mergeAttributes, Node } from "@tiptap/core";
import { ReactNodeViewRenderer } from "@tiptap/react";
import { Plugin, PluginKey } from "@tiptap/pm/state";
import { TRANSCLUSION_SOURCE_CONTENT_EXPRESSION } from "./constants";
export interface TransclusionSourceOptions {
HTMLAttributes: Record<string, any>;
@@ -34,7 +34,8 @@ export const TransclusionSource = Node.create<TransclusionSourceOptions>({
},
group: "block",
content: "block+",
// Schema-enforced allow-list. Excludes `transclusionSource` (no nesting)
content: TRANSCLUSION_SOURCE_CONTENT_EXPRESSION,
defining: true,
isolating: true,
@@ -130,30 +131,4 @@ export const TransclusionSource = Node.create<TransclusionSourceOptions>({
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
addProseMirrorPlugins() {
const typeName = this.name;
return [
new Plugin({
key: new PluginKey(`${typeName}-noNesting`),
filterTransaction: (tr) => {
if (!tr.docChanged) return true;
let nested = false;
tr.doc.descendants((node, pos) => {
if (nested) return false;
if (node.type.name !== typeName) return true;
const $pos = tr.doc.resolve(pos);
for (let depth = $pos.depth; depth > 0; depth -= 1) {
if ($pos.node(depth).type.name === typeName) {
nested = true;
return false;
}
}
return false;
});
return !nested;
},
}),
];
},
});