From 3e66aff1e25ba64bcd6045d0c239476d37453678 Mon Sep 17 00:00:00 2001 From: Philipinho <16838612+Philipinho@users.noreply.github.com> Date: Sat, 16 May 2026 01:21:14 +0100 Subject: [PATCH] feat(import): extract Confluence indent helper with auto-detect unit and tests --- .../import/utils/confluence-indent.spec.ts | 149 ++++++++++++++++++ .../import/utils/confluence-indent.ts | 76 +++++++++ .../import/utils/import-formatter.ts | 76 +-------- 3 files changed, 227 insertions(+), 74 deletions(-) create mode 100644 apps/server/src/integrations/import/utils/confluence-indent.spec.ts create mode 100644 apps/server/src/integrations/import/utils/confluence-indent.ts diff --git a/apps/server/src/integrations/import/utils/confluence-indent.spec.ts b/apps/server/src/integrations/import/utils/confluence-indent.spec.ts new file mode 100644 index 000000000..c420dbba3 --- /dev/null +++ b/apps/server/src/integrations/import/utils/confluence-indent.spec.ts @@ -0,0 +1,149 @@ +import { load } from 'cheerio'; +import { applyConfluenceMarginLeftIndent } from './confluence-indent'; + +function run(html: string): string { + const $ = load(html); + applyConfluenceMarginLeftIndent($, $.root()); + // cheerio's html() includes
; return the body's inner HTML so + // tests can assert on the meaningful portion. + return $('body').html() ?? $.html(); +} + +describe('applyConfluenceMarginLeftIndent', () => { + describe('Confluence Cloud (30 px per level, max 6)', () => { + it('maps 30/60/90/120/150/180 px to data-indent 1..6', () => { + const html = + 'L1
' + + 'L2
' + + 'L3
' + + 'L4
' + + 'L5
' + + 'L6
'; + const out = run(html); + expect(out).toContain('L1
'); + expect(out).toContain('L2
'); + expect(out).toContain('L3
'); + expect(out).toContain('L4
'); + expect(out).toContain('L5
'); + expect(out).toContain('L6
'); + expect(out).not.toContain('margin-left'); + }); + }); + + describe('Confluence Data Center (40 px per level, no upper bound)', () => { + it('maps 40/80/120/160/200/240 px to data-indent 1..6', () => { + const html = + 'one
' + + 'two
' + + 'three
' + + 'four
' + + 'five
' + + 'six
'; + const out = run(html); + expect(out).toContain('one
'); + expect(out).toContain('two
'); + expect(out).toContain('three
'); + expect(out).toContain('four
'); + expect(out).toContain('five
'); + expect(out).toContain('six
'); + expect(out).not.toContain('margin-left'); + }); + + it('clamps DC levels above 8 down to 8', () => { + const html = + 'L8
' + + 'L9
' + + 'L15
'; + const out = run(html); + expect(out).toContain('L8
'); + expect(out).toContain('L9
'); + expect(out).toContain('L15
'); + }); + }); + + describe('headings', () => { + it('handles indent on h1-h6 the same way as paragraphs', () => { + const html = + 'x
'; + const out = run(html); + expect(out).toMatch(/x<\/p>/); + expect(out).not.toContain('margin-left'); + }); + + it('removes the style attribute entirely when only margin-left was set', () => { + // Two values so GCD detection sees a real unit (60 px) instead of + // collapsing to the lone value. The point of this test is the style + // attribute being stripped, not the level number. + const html = + '
x
' + + 'y
'; + const out = run(html); + expect(out).toContain('x
'); + expect(out).toContain('y
'); + expect(out).not.toContain('style='); + }); + }); + + describe('scope and edge cases', () => { + it('leaves elements without margin-left untouched', () => { + const html = 'plain
plain
zero
' + + 'neg
' + + 'auto
'; + const out = run(html); + expect(out).not.toContain('data-indent'); + }); + + it('honors an explicit pxPerLevel override', () => { + // Mixed Cloud-and-DC nominal values forced to 40 px/level interpretation. + const $ = load( + 'a
' + + 'b
', + ); + applyConfluenceMarginLeftIndent($, $.root(), { pxPerLevel: 40 }); + const out = $('body').html() ?? ''; + expect(out).toContain('a
'); + expect(out).toContain('b
'); + }); + + it('returns a no-op when no indented elements are present', () => { + const html = 'hi
'; + const out = run(html); + expect(out).toBe('hi
'); + }); + + it('handles a single ambiguous value by clamping to level 1', () => { + // GCD of a single value is the value itself, so 120 / 120 = 1. + const html = 'only
'; + const out = run(html); + expect(out).toContain('only
'); + }); + }); +}); diff --git a/apps/server/src/integrations/import/utils/confluence-indent.ts b/apps/server/src/integrations/import/utils/confluence-indent.ts new file mode 100644 index 000000000..3bdc3aa2d --- /dev/null +++ b/apps/server/src/integrations/import/utils/confluence-indent.ts @@ -0,0 +1,76 @@ +import { Cheerio, CheerioAPI } from 'cheerio'; + +// Maximum indent level supported by the Indent editor extension (see +// packages/editor-ext/src/lib/indent.ts). Values above this clamp down. +const MAX_INDENT_LEVEL = 8; +const MARGIN_LEFT_RE = /margin-left\s*:\s*(-?\d*\.?\d+)\s*px/i; +const MARGIN_LEFT_STRIP_RE = /margin-left\s*:\s*-?\d*\.?\d+\s*px\s*;?/i; + +/** + * Confluence encodes paragraph indent as inline `style="margin-left: Npx"`. + * The per-level pixel value differs by edition: Cloud uses 30 (max 6 levels), + * Data Center uses 40 (no upper limit). The HTML-export ZIP path has no + * edition information available, so we auto-detect the per-level unit from + * the GCD of all margin-left values in the document. The API converter can + * pass `pxPerLevel` explicitly when the edition is known. + * + * Levels are written to `data-indent` for the TipTap Indent extension to + * pick up; the margin-left style is stripped from the element so the + * normalized indent doesn't double up with the editor's own indent padding. + */ +export function applyConfluenceMarginLeftIndent( + $: CheerioAPI, + $root: Cheerio