mirror of
https://github.com/docmost/docmost.git
synced 2026-05-20 00:14:10 +08:00
feat(import): extract Confluence indent helper with auto-detect unit and tests
This commit is contained in:
@@ -0,0 +1,149 @@
|
||||
import { load } from 'cheerio';
|
||||
import { applyConfluenceMarginLeftIndent } from './confluence-indent';
|
||||
|
||||
function run(html: string): string {
|
||||
const $ = load(html);
|
||||
applyConfluenceMarginLeftIndent($, $.root());
|
||||
// cheerio's html() includes <html><body>; return the body's inner HTML so
|
||||
// tests can assert on the meaningful portion.
|
||||
return $('body').html() ?? $.html();
|
||||
}
|
||||
|
||||
describe('applyConfluenceMarginLeftIndent', () => {
|
||||
describe('Confluence Cloud (30 px per level, max 6)', () => {
|
||||
it('maps 30/60/90/120/150/180 px to data-indent 1..6', () => {
|
||||
const html =
|
||||
'<p style="margin-left: 30.0px;">L1</p>' +
|
||||
'<p style="margin-left: 60.0px;">L2</p>' +
|
||||
'<p style="margin-left: 90.0px;">L3</p>' +
|
||||
'<p style="margin-left: 120.0px;">L4</p>' +
|
||||
'<p style="margin-left: 150.0px;">L5</p>' +
|
||||
'<p style="margin-left: 180.0px;">L6</p>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<p data-indent="1">L1</p>');
|
||||
expect(out).toContain('<p data-indent="2">L2</p>');
|
||||
expect(out).toContain('<p data-indent="3">L3</p>');
|
||||
expect(out).toContain('<p data-indent="4">L4</p>');
|
||||
expect(out).toContain('<p data-indent="5">L5</p>');
|
||||
expect(out).toContain('<p data-indent="6">L6</p>');
|
||||
expect(out).not.toContain('margin-left');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Confluence Data Center (40 px per level, no upper bound)', () => {
|
||||
it('maps 40/80/120/160/200/240 px to data-indent 1..6', () => {
|
||||
const html =
|
||||
'<p style="margin-left: 40.0px;">one</p>' +
|
||||
'<p style="margin-left: 80.0px;">two</p>' +
|
||||
'<p style="margin-left: 120.0px;">three</p>' +
|
||||
'<p style="margin-left: 160.0px;">four</p>' +
|
||||
'<p style="margin-left: 200.0px;">five</p>' +
|
||||
'<p style="margin-left: 240.0px;">six</p>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<p data-indent="1">one</p>');
|
||||
expect(out).toContain('<p data-indent="2">two</p>');
|
||||
expect(out).toContain('<p data-indent="3">three</p>');
|
||||
expect(out).toContain('<p data-indent="4">four</p>');
|
||||
expect(out).toContain('<p data-indent="5">five</p>');
|
||||
expect(out).toContain('<p data-indent="6">six</p>');
|
||||
expect(out).not.toContain('margin-left');
|
||||
});
|
||||
|
||||
it('clamps DC levels above 8 down to 8', () => {
|
||||
const html =
|
||||
'<p style="margin-left: 320.0px;">L8</p>' +
|
||||
'<p style="margin-left: 360.0px;">L9</p>' +
|
||||
'<p style="margin-left: 600.0px;">L15</p>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<p data-indent="8">L8</p>');
|
||||
expect(out).toContain('<p data-indent="8">L9</p>');
|
||||
expect(out).toContain('<p data-indent="8">L15</p>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('headings', () => {
|
||||
it('handles indent on h1-h6 the same way as paragraphs', () => {
|
||||
const html =
|
||||
'<h1 style="margin-left: 30px;">a</h1>' +
|
||||
'<h6 style="margin-left: 90px;">b</h6>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<h1 data-indent="1">a</h1>');
|
||||
expect(out).toContain('<h6 data-indent="3">b</h6>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('style attribute handling', () => {
|
||||
it('strips margin-left but preserves other inline styles', () => {
|
||||
const html =
|
||||
'<p style="color: red; margin-left: 30px; font-weight: bold;">x</p>';
|
||||
const out = run(html);
|
||||
expect(out).toMatch(/<p style="color: red;\s+font-weight: bold;?" data-indent="1">x<\/p>/);
|
||||
expect(out).not.toContain('margin-left');
|
||||
});
|
||||
|
||||
it('removes the style attribute entirely when only margin-left was set', () => {
|
||||
// Two values so GCD detection sees a real unit (60 px) instead of
|
||||
// collapsing to the lone value. The point of this test is the style
|
||||
// attribute being stripped, not the level number.
|
||||
const html =
|
||||
'<p style="margin-left: 60px;">x</p>' +
|
||||
'<p style="margin-left: 120px;">y</p>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<p data-indent="1">x</p>');
|
||||
expect(out).toContain('<p data-indent="2">y</p>');
|
||||
expect(out).not.toContain('style=');
|
||||
});
|
||||
});
|
||||
|
||||
describe('scope and edge cases', () => {
|
||||
it('leaves elements without margin-left untouched', () => {
|
||||
const html = '<p>plain</p><h2>heading</h2>';
|
||||
const out = run(html);
|
||||
expect(out).toBe('<p>plain</p><h2>heading</h2>');
|
||||
});
|
||||
|
||||
it('does not touch divs, spans, or list items', () => {
|
||||
const html =
|
||||
'<div style="margin-left: 30px;">div</div>' +
|
||||
'<li style="margin-left: 30px;">li</li>' +
|
||||
'<span style="margin-left: 30px;">span</span>';
|
||||
const out = run(html);
|
||||
expect(out).not.toContain('data-indent');
|
||||
expect(out).toContain('margin-left: 30px');
|
||||
});
|
||||
|
||||
it('ignores zero, negative, and unparseable margin-left values', () => {
|
||||
const html =
|
||||
'<p style="margin-left: 0px;">zero</p>' +
|
||||
'<p style="margin-left: -30px;">neg</p>' +
|
||||
'<p style="margin-left: auto;">auto</p>';
|
||||
const out = run(html);
|
||||
expect(out).not.toContain('data-indent');
|
||||
});
|
||||
|
||||
it('honors an explicit pxPerLevel override', () => {
|
||||
// Mixed Cloud-and-DC nominal values forced to 40 px/level interpretation.
|
||||
const $ = load(
|
||||
'<p style="margin-left: 40px;">a</p>' +
|
||||
'<p style="margin-left: 80px;">b</p>',
|
||||
);
|
||||
applyConfluenceMarginLeftIndent($, $.root(), { pxPerLevel: 40 });
|
||||
const out = $('body').html() ?? '';
|
||||
expect(out).toContain('<p data-indent="1">a</p>');
|
||||
expect(out).toContain('<p data-indent="2">b</p>');
|
||||
});
|
||||
|
||||
it('returns a no-op when no indented elements are present', () => {
|
||||
const html = '<p>hi</p>';
|
||||
const out = run(html);
|
||||
expect(out).toBe('<p>hi</p>');
|
||||
});
|
||||
|
||||
it('handles a single ambiguous value by clamping to level 1', () => {
|
||||
// GCD of a single value is the value itself, so 120 / 120 = 1.
|
||||
const html = '<p style="margin-left: 120px;">only</p>';
|
||||
const out = run(html);
|
||||
expect(out).toContain('<p data-indent="1">only</p>');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,76 @@
|
||||
import { Cheerio, CheerioAPI } from 'cheerio';
|
||||
|
||||
// Maximum indent level supported by the Indent editor extension (see
|
||||
// packages/editor-ext/src/lib/indent.ts). Values above this clamp down.
|
||||
const MAX_INDENT_LEVEL = 8;
|
||||
const MARGIN_LEFT_RE = /margin-left\s*:\s*(-?\d*\.?\d+)\s*px/i;
|
||||
const MARGIN_LEFT_STRIP_RE = /margin-left\s*:\s*-?\d*\.?\d+\s*px\s*;?/i;
|
||||
|
||||
/**
|
||||
* Confluence encodes paragraph indent as inline `style="margin-left: Npx"`.
|
||||
* The per-level pixel value differs by edition: Cloud uses 30 (max 6 levels),
|
||||
* Data Center uses 40 (no upper limit). The HTML-export ZIP path has no
|
||||
* edition information available, so we auto-detect the per-level unit from
|
||||
* the GCD of all margin-left values in the document. The API converter can
|
||||
* pass `pxPerLevel` explicitly when the edition is known.
|
||||
*
|
||||
* Levels are written to `data-indent` for the TipTap Indent extension to
|
||||
* pick up; the margin-left style is stripped from the element so the
|
||||
* normalized indent doesn't double up with the editor's own indent padding.
|
||||
*/
|
||||
export function applyConfluenceMarginLeftIndent(
|
||||
$: CheerioAPI,
|
||||
$root: Cheerio<any>,
|
||||
options?: { pxPerLevel?: number },
|
||||
): void {
|
||||
const $els = $root.find('p, h1, h2, h3, h4, h5, h6');
|
||||
|
||||
const values: number[] = [];
|
||||
$els.each((_, el) => {
|
||||
const style = $(el).attr('style');
|
||||
if (!style) return;
|
||||
const match = MARGIN_LEFT_RE.exec(style);
|
||||
if (!match) return;
|
||||
const px = parseFloat(match[1]);
|
||||
if (Number.isFinite(px) && px > 0) values.push(px);
|
||||
});
|
||||
if (values.length === 0) return;
|
||||
|
||||
const unit = options?.pxPerLevel ?? detectIndentUnit(values);
|
||||
if (!unit || unit <= 0) return;
|
||||
|
||||
$els.each((_, el) => {
|
||||
const $el = $(el);
|
||||
const style = $el.attr('style');
|
||||
if (!style) return;
|
||||
const match = MARGIN_LEFT_RE.exec(style);
|
||||
if (!match) return;
|
||||
const px = parseFloat(match[1]);
|
||||
if (!Number.isFinite(px) || px <= 0) return;
|
||||
const level = Math.min(
|
||||
MAX_INDENT_LEVEL,
|
||||
Math.max(1, Math.round(px / unit)),
|
||||
);
|
||||
$el.attr('data-indent', String(level));
|
||||
const remaining = style.replace(MARGIN_LEFT_STRIP_RE, '').trim();
|
||||
if (remaining) {
|
||||
$el.attr('style', remaining);
|
||||
} else {
|
||||
$el.removeAttr('style');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function detectIndentUnit(values: number[]): number {
|
||||
// Confluence emits floats like "30.0"; round to ints for a clean GCD.
|
||||
const ints = values.map((v) => Math.round(v)).filter((v) => v > 0);
|
||||
if (ints.length === 0) return 0;
|
||||
return ints.reduce((a, b) => gcd(a, b));
|
||||
}
|
||||
|
||||
function gcd(a: number, b: number): number {
|
||||
while (b !== 0) {
|
||||
[a, b] = [b, a % b];
|
||||
}
|
||||
return a;
|
||||
}
|
||||
@@ -97,80 +97,8 @@ export function xwikiFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum indent level supported by the Indent editor extension (see
|
||||
// packages/editor-ext/src/lib/indent.ts). Values above this clamp down.
|
||||
const MAX_INDENT_LEVEL = 8;
|
||||
const MARGIN_LEFT_RE = /margin-left\s*:\s*(-?\d*\.?\d+)\s*px/i;
|
||||
const MARGIN_LEFT_STRIP_RE = /margin-left\s*:\s*-?\d*\.?\d+\s*px\s*;?/i;
|
||||
|
||||
/**
|
||||
* Confluence encodes paragraph indent as inline `style="margin-left: Npx"`.
|
||||
* The per-level pixel value differs by edition: Cloud uses 30 (max 6 levels),
|
||||
* Data Center uses 40 (no upper limit). The HTML-export ZIP path has no
|
||||
* edition information available, so we auto-detect the per-level unit from
|
||||
* the GCD of all margin-left values in the document. The API converter can
|
||||
* pass `pxPerLevel` explicitly when the edition is known.
|
||||
*
|
||||
* Levels are written to `data-indent` for the TipTap Indent extension to
|
||||
* pick up; the margin-left style is stripped from the element so the
|
||||
* normalized indent doesn't double up with the editor's own indent padding.
|
||||
*/
|
||||
export function applyConfluenceMarginLeftIndent(
|
||||
$: CheerioAPI,
|
||||
$root: Cheerio<any>,
|
||||
options?: { pxPerLevel?: number },
|
||||
): void {
|
||||
const $els = $root.find('p, h1, h2, h3, h4, h5, h6');
|
||||
|
||||
const values: number[] = [];
|
||||
$els.each((_, el) => {
|
||||
const style = $(el).attr('style');
|
||||
if (!style) return;
|
||||
const match = MARGIN_LEFT_RE.exec(style);
|
||||
if (!match) return;
|
||||
const px = parseFloat(match[1]);
|
||||
if (Number.isFinite(px) && px > 0) values.push(px);
|
||||
});
|
||||
if (values.length === 0) return;
|
||||
|
||||
const unit = options?.pxPerLevel ?? detectIndentUnit(values);
|
||||
if (!unit || unit <= 0) return;
|
||||
|
||||
$els.each((_, el) => {
|
||||
const $el = $(el);
|
||||
const style = $el.attr('style');
|
||||
if (!style) return;
|
||||
const match = MARGIN_LEFT_RE.exec(style);
|
||||
if (!match) return;
|
||||
const px = parseFloat(match[1]);
|
||||
if (!Number.isFinite(px) || px <= 0) return;
|
||||
const level = Math.min(
|
||||
MAX_INDENT_LEVEL,
|
||||
Math.max(1, Math.round(px / unit)),
|
||||
);
|
||||
$el.attr('data-indent', String(level));
|
||||
const remaining = style.replace(MARGIN_LEFT_STRIP_RE, '').trim();
|
||||
if (remaining) {
|
||||
$el.attr('style', remaining);
|
||||
} else {
|
||||
$el.removeAttr('style');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function detectIndentUnit(values: number[]): number {
|
||||
// Confluence emits floats like "30.0"; round to ints for a clean GCD.
|
||||
const ints = values.map((v) => Math.round(v)).filter((v) => v > 0);
|
||||
if (ints.length === 0) return 0;
|
||||
return ints.reduce((a, b) => gcd(a, b));
|
||||
}
|
||||
|
||||
function gcd(a: number, b: number): number {
|
||||
while (b !== 0) {
|
||||
[a, b] = [b, a % b];
|
||||
}
|
||||
return a;
|
||||
}
|
||||
import { applyConfluenceMarginLeftIndent } from './confluence-indent';
|
||||
export { applyConfluenceMarginLeftIndent };
|
||||
|
||||
export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||
normalizeTableColumnWidths($, $root);
|
||||
|
||||
Reference in New Issue
Block a user