diff --git a/apps/server/src/integrations/import/utils/confluence-indent.spec.ts b/apps/server/src/integrations/import/utils/confluence-indent.spec.ts
new file mode 100644
index 000000000..c420dbba3
--- /dev/null
+++ b/apps/server/src/integrations/import/utils/confluence-indent.spec.ts
@@ -0,0 +1,149 @@
+import { load } from 'cheerio';
+import { applyConfluenceMarginLeftIndent } from './confluence-indent';
+
+function run(html: string): string {
+ const $ = load(html);
+ applyConfluenceMarginLeftIndent($, $.root());
+ // cheerio's html() includes
; return the body's inner HTML so
+ // tests can assert on the meaningful portion.
+ return $('body').html() ?? $.html();
+}
+
+describe('applyConfluenceMarginLeftIndent', () => {
+ describe('Confluence Cloud (30 px per level, max 6)', () => {
+ it('maps 30/60/90/120/150/180 px to data-indent 1..6', () => {
+ const html =
+ 'L1
' +
+ 'L2
' +
+ 'L3
' +
+ 'L4
' +
+ 'L5
' +
+ 'L6
';
+ const out = run(html);
+ expect(out).toContain('L1
');
+ expect(out).toContain('L2
');
+ expect(out).toContain('L3
');
+ expect(out).toContain('L4
');
+ expect(out).toContain('L5
');
+ expect(out).toContain('L6
');
+ expect(out).not.toContain('margin-left');
+ });
+ });
+
+ describe('Confluence Data Center (40 px per level, no upper bound)', () => {
+ it('maps 40/80/120/160/200/240 px to data-indent 1..6', () => {
+ const html =
+ 'one
' +
+ 'two
' +
+ 'three
' +
+ 'four
' +
+ 'five
' +
+ 'six
';
+ const out = run(html);
+ expect(out).toContain('one
');
+ expect(out).toContain('two
');
+ expect(out).toContain('three
');
+ expect(out).toContain('four
');
+ expect(out).toContain('five
');
+ expect(out).toContain('six
');
+ expect(out).not.toContain('margin-left');
+ });
+
+ it('clamps DC levels above 8 down to 8', () => {
+ const html =
+ 'L8
' +
+ 'L9
' +
+ 'L15
';
+ const out = run(html);
+ expect(out).toContain('L8
');
+ expect(out).toContain('L9
');
+ expect(out).toContain('L15
');
+ });
+ });
+
+ describe('headings', () => {
+ it('handles indent on h1-h6 the same way as paragraphs', () => {
+ const html =
+ 'a
' +
+ 'b
';
+ const out = run(html);
+ expect(out).toContain('a
');
+ expect(out).toContain('b
');
+ });
+ });
+
+ describe('style attribute handling', () => {
+ it('strips margin-left but preserves other inline styles', () => {
+ const html =
+ 'x
';
+ const out = run(html);
+ expect(out).toMatch(/x<\/p>/);
+ expect(out).not.toContain('margin-left');
+ });
+
+ it('removes the style attribute entirely when only margin-left was set', () => {
+ // Two values so GCD detection sees a real unit (60 px) instead of
+ // collapsing to the lone value. The point of this test is the style
+ // attribute being stripped, not the level number.
+ const html =
+ '
x
' +
+ 'y
';
+ const out = run(html);
+ expect(out).toContain('x
');
+ expect(out).toContain('y
');
+ expect(out).not.toContain('style=');
+ });
+ });
+
+ describe('scope and edge cases', () => {
+ it('leaves elements without margin-left untouched', () => {
+ const html = 'plain
heading
';
+ const out = run(html);
+ expect(out).toBe('plain
heading
');
+ });
+
+ it('does not touch divs, spans, or list items', () => {
+ const html =
+ 'div
' +
+ 'li' +
+ 'span';
+ const out = run(html);
+ expect(out).not.toContain('data-indent');
+ expect(out).toContain('margin-left: 30px');
+ });
+
+ it('ignores zero, negative, and unparseable margin-left values', () => {
+ const html =
+ 'zero
' +
+ 'neg
' +
+ 'auto
';
+ const out = run(html);
+ expect(out).not.toContain('data-indent');
+ });
+
+ it('honors an explicit pxPerLevel override', () => {
+ // Mixed Cloud-and-DC nominal values forced to 40 px/level interpretation.
+ const $ = load(
+ 'a
' +
+ 'b
',
+ );
+ applyConfluenceMarginLeftIndent($, $.root(), { pxPerLevel: 40 });
+ const out = $('body').html() ?? '';
+ expect(out).toContain('a
');
+ expect(out).toContain('b
');
+ });
+
+ it('returns a no-op when no indented elements are present', () => {
+ const html = 'hi
';
+ const out = run(html);
+ expect(out).toBe('hi
');
+ });
+
+ it('handles a single ambiguous value by clamping to level 1', () => {
+ // GCD of a single value is the value itself, so 120 / 120 = 1.
+ const html = 'only
';
+ const out = run(html);
+ expect(out).toContain('only
');
+ });
+ });
+});
diff --git a/apps/server/src/integrations/import/utils/confluence-indent.ts b/apps/server/src/integrations/import/utils/confluence-indent.ts
new file mode 100644
index 000000000..3bdc3aa2d
--- /dev/null
+++ b/apps/server/src/integrations/import/utils/confluence-indent.ts
@@ -0,0 +1,76 @@
+import { Cheerio, CheerioAPI } from 'cheerio';
+
+// Maximum indent level supported by the Indent editor extension (see
+// packages/editor-ext/src/lib/indent.ts). Values above this clamp down.
+const MAX_INDENT_LEVEL = 8;
+const MARGIN_LEFT_RE = /margin-left\s*:\s*(-?\d*\.?\d+)\s*px/i;
+const MARGIN_LEFT_STRIP_RE = /margin-left\s*:\s*-?\d*\.?\d+\s*px\s*;?/i;
+
+/**
+ * Confluence encodes paragraph indent as inline `style="margin-left: Npx"`.
+ * The per-level pixel value differs by edition: Cloud uses 30 (max 6 levels),
+ * Data Center uses 40 (no upper limit). The HTML-export ZIP path has no
+ * edition information available, so we auto-detect the per-level unit from
+ * the GCD of all margin-left values in the document. The API converter can
+ * pass `pxPerLevel` explicitly when the edition is known.
+ *
+ * Levels are written to `data-indent` for the TipTap Indent extension to
+ * pick up; the margin-left style is stripped from the element so the
+ * normalized indent doesn't double up with the editor's own indent padding.
+ */
+export function applyConfluenceMarginLeftIndent(
+ $: CheerioAPI,
+ $root: Cheerio,
+ options?: { pxPerLevel?: number },
+): void {
+ const $els = $root.find('p, h1, h2, h3, h4, h5, h6');
+
+ const values: number[] = [];
+ $els.each((_, el) => {
+ const style = $(el).attr('style');
+ if (!style) return;
+ const match = MARGIN_LEFT_RE.exec(style);
+ if (!match) return;
+ const px = parseFloat(match[1]);
+ if (Number.isFinite(px) && px > 0) values.push(px);
+ });
+ if (values.length === 0) return;
+
+ const unit = options?.pxPerLevel ?? detectIndentUnit(values);
+ if (!unit || unit <= 0) return;
+
+ $els.each((_, el) => {
+ const $el = $(el);
+ const style = $el.attr('style');
+ if (!style) return;
+ const match = MARGIN_LEFT_RE.exec(style);
+ if (!match) return;
+ const px = parseFloat(match[1]);
+ if (!Number.isFinite(px) || px <= 0) return;
+ const level = Math.min(
+ MAX_INDENT_LEVEL,
+ Math.max(1, Math.round(px / unit)),
+ );
+ $el.attr('data-indent', String(level));
+ const remaining = style.replace(MARGIN_LEFT_STRIP_RE, '').trim();
+ if (remaining) {
+ $el.attr('style', remaining);
+ } else {
+ $el.removeAttr('style');
+ }
+ });
+}
+
+function detectIndentUnit(values: number[]): number {
+ // Confluence emits floats like "30.0"; round to ints for a clean GCD.
+ const ints = values.map((v) => Math.round(v)).filter((v) => v > 0);
+ if (ints.length === 0) return 0;
+ return ints.reduce((a, b) => gcd(a, b));
+}
+
+function gcd(a: number, b: number): number {
+ while (b !== 0) {
+ [a, b] = [b, a % b];
+ }
+ return a;
+}
diff --git a/apps/server/src/integrations/import/utils/import-formatter.ts b/apps/server/src/integrations/import/utils/import-formatter.ts
index 87cfd4cd0..7b8f6892e 100644
--- a/apps/server/src/integrations/import/utils/import-formatter.ts
+++ b/apps/server/src/integrations/import/utils/import-formatter.ts
@@ -97,80 +97,8 @@ export function xwikiFormatter($: CheerioAPI, $root: Cheerio) {
}
}
-// Maximum indent level supported by the Indent editor extension (see
-// packages/editor-ext/src/lib/indent.ts). Values above this clamp down.
-const MAX_INDENT_LEVEL = 8;
-const MARGIN_LEFT_RE = /margin-left\s*:\s*(-?\d*\.?\d+)\s*px/i;
-const MARGIN_LEFT_STRIP_RE = /margin-left\s*:\s*-?\d*\.?\d+\s*px\s*;?/i;
-
-/**
- * Confluence encodes paragraph indent as inline `style="margin-left: Npx"`.
- * The per-level pixel value differs by edition: Cloud uses 30 (max 6 levels),
- * Data Center uses 40 (no upper limit). The HTML-export ZIP path has no
- * edition information available, so we auto-detect the per-level unit from
- * the GCD of all margin-left values in the document. The API converter can
- * pass `pxPerLevel` explicitly when the edition is known.
- *
- * Levels are written to `data-indent` for the TipTap Indent extension to
- * pick up; the margin-left style is stripped from the element so the
- * normalized indent doesn't double up with the editor's own indent padding.
- */
-export function applyConfluenceMarginLeftIndent(
- $: CheerioAPI,
- $root: Cheerio,
- options?: { pxPerLevel?: number },
-): void {
- const $els = $root.find('p, h1, h2, h3, h4, h5, h6');
-
- const values: number[] = [];
- $els.each((_, el) => {
- const style = $(el).attr('style');
- if (!style) return;
- const match = MARGIN_LEFT_RE.exec(style);
- if (!match) return;
- const px = parseFloat(match[1]);
- if (Number.isFinite(px) && px > 0) values.push(px);
- });
- if (values.length === 0) return;
-
- const unit = options?.pxPerLevel ?? detectIndentUnit(values);
- if (!unit || unit <= 0) return;
-
- $els.each((_, el) => {
- const $el = $(el);
- const style = $el.attr('style');
- if (!style) return;
- const match = MARGIN_LEFT_RE.exec(style);
- if (!match) return;
- const px = parseFloat(match[1]);
- if (!Number.isFinite(px) || px <= 0) return;
- const level = Math.min(
- MAX_INDENT_LEVEL,
- Math.max(1, Math.round(px / unit)),
- );
- $el.attr('data-indent', String(level));
- const remaining = style.replace(MARGIN_LEFT_STRIP_RE, '').trim();
- if (remaining) {
- $el.attr('style', remaining);
- } else {
- $el.removeAttr('style');
- }
- });
-}
-
-function detectIndentUnit(values: number[]): number {
- // Confluence emits floats like "30.0"; round to ints for a clean GCD.
- const ints = values.map((v) => Math.round(v)).filter((v) => v > 0);
- if (ints.length === 0) return 0;
- return ints.reduce((a, b) => gcd(a, b));
-}
-
-function gcd(a: number, b: number): number {
- while (b !== 0) {
- [a, b] = [b, a % b];
- }
- return a;
-}
+import { applyConfluenceMarginLeftIndent } from './confluence-indent';
+export { applyConfluenceMarginLeftIndent };
export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio) {
normalizeTableColumnWidths($, $root);