From 09c69d7a0f21085fbd0c89d6d298537e6378d85d Mon Sep 17 00:00:00 2001 From: Philip Okugbe <16838612+Philipinho@users.noreply.github.com> Date: Fri, 1 May 2026 00:49:31 +0100 Subject: [PATCH] feat: properly preserve table width (#2143) --- .../editor/extensions/markdown-clipboard.ts | 93 ++++++++++++++- .../import/services/import.service.ts | 6 +- .../import/utils/import-formatter.ts | 24 +++- .../integrations/import/utils/table-utils.ts | 107 ++++++++++++++++++ 4 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 apps/server/src/integrations/import/utils/table-utils.ts diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts index 230798c5..bebb567a 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts @@ -80,10 +80,12 @@ export const MarkdownClipboard = Extension.create({ const { from, to } = view.state.selection; const parsed = markdownToHtml(text.replace(/\n+$/, "")); + const body = elementFromString(parsed); + normalizeTableColumnWidths(body); const contentNodes = DOMParser.fromSchema( this.editor.schema, - ).parseSlice(elementFromString(parsed), { + ).parseSlice(body, { preserveWhitespace: true, }); @@ -137,3 +139,92 @@ function elementFromString(value) { return new window.DOMParser().parseFromString(wrappedValue, "text/html").body; } + +const DEFAULT_PASTE_COL_WIDTH_PX = 150; + +function parsePixelWidth(el: Element): number | null { + const attr = el.getAttribute("width"); + if (attr) { + const n = parseInt(attr, 10); + if (Number.isFinite(n) && n > 0) return n; + } + const style = el.getAttribute("style") || ""; + const m = style.match(/(?:^|;)\s*width\s*:\s*([\d.]+)\s*px/i); + if (m) { + const n = parseInt(m[1], 10); + if (Number.isFinite(n) && n > 0) return n; + } + return null; +} + +function getFirstRow(table: Element): Element | null { + const tbodyRow = table.querySelector(":scope > tbody > tr"); + if (tbodyRow) return tbodyRow; + const theadRow = table.querySelector(":scope > thead > tr"); + if (theadRow) return theadRow; + return table.querySelector(":scope > tr"); +} + +function deriveColumnWidths(table: Element): (number | null)[] | null { + const cols = table.querySelectorAll(":scope > colgroup > col"); + if (cols.length > 0) { + const widths: (number | null)[] = []; + cols.forEach((col) => widths.push(parsePixelWidth(col))); + if (widths.some((w) => w !== null)) return widths; + } + + const firstRow = getFirstRow(table); + if (!firstRow) return null; + + const widths: (number | null)[] = []; + Array.from(firstRow.children) + .filter((c) => c.tagName === "TD" || c.tagName === "TH") + .forEach((cell) => { + const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1; + const w = parsePixelWidth(cell); + for (let i = 0; i < colspan; i++) { + widths.push(w !== null ? Math.round(w / colspan) : null); + } + }); + if (widths.length === 0 || widths.every((w) => w === null)) return null; + return widths; +} + +// Mirror of server normalizeTableColumnWidths (see import/utils/table-utils.ts): +// markdown source has no widths, so without this every pasted table renders +// at table-layout:fixed/100% and squashes columns to fit the editor instead of +// letting .tableWrapper's overflow-x: auto scroll. +export function normalizeTableColumnWidths(root: Element): void { + root.querySelectorAll("table").forEach((table) => { + const firstRow = getFirstRow(table); + if (!firstRow) return; + + let colWidths = deriveColumnWidths(table); + if (!colWidths) { + let count = 0; + Array.from(firstRow.children) + .filter((c) => c.tagName === "TD" || c.tagName === "TH") + .forEach((cell) => { + count += parseInt(cell.getAttribute("colspan") || "1", 10) || 1; + }); + if (count === 0) return; + colWidths = new Array(count).fill(DEFAULT_PASTE_COL_WIDTH_PX); + } + + let col = 0; + Array.from(firstRow.children) + .filter((c) => c.tagName === "TD" || c.tagName === "TH") + .forEach((cell) => { + if (cell.getAttribute("colwidth")) { + col += parseInt(cell.getAttribute("colspan") || "1", 10) || 1; + return; + } + const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1; + const slice = colWidths!.slice(col, col + colspan); + col += colspan; + if (slice.length === 0 || slice.every((w) => w === null)) return; + const values = slice.map((w) => (w == null ? 100 : w)); + cell.setAttribute("colwidth", values.join(",")); + }); + }); +} diff --git a/apps/server/src/integrations/import/services/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts index 66c57585..0eb3ae40 100644 --- a/apps/server/src/integrations/import/services/import.service.ts +++ b/apps/server/src/integrations/import/services/import.service.ts @@ -29,6 +29,8 @@ import { InjectQueue } from '@nestjs/bullmq'; import { Queue } from 'bullmq'; import { QueueJob, QueueName } from '../../queue/constants'; import { ModuleRef } from '@nestjs/core'; +import { load } from 'cheerio'; +import { normalizeImportHtml } from '../utils/import-formatter'; @Injectable() export class ImportService { @@ -136,7 +138,9 @@ export class ImportService { async processHTML(htmlInput: string): Promise { try { - return htmlToJson(htmlInput); + const $ = load(htmlInput); + normalizeImportHtml($, $.root()); + return htmlToJson($.html() || ''); } catch (err) { throw err; } diff --git a/apps/server/src/integrations/import/utils/import-formatter.ts b/apps/server/src/integrations/import/utils/import-formatter.ts index 3b0738a5..0333b75e 100644 --- a/apps/server/src/integrations/import/utils/import-formatter.ts +++ b/apps/server/src/integrations/import/utils/import-formatter.ts @@ -5,6 +5,7 @@ import { v7 } from 'uuid'; import { InsertableBacklink } from '@docmost/db/types/entity.types'; import { Cheerio, CheerioAPI, load } from 'cheerio'; import slugify from '@sindresorhus/slugify'; +import { normalizeTableColumnWidths } from './table-utils'; // Check if text contains Unicode characters (for emojis/icons) function isUnicodeCharacter(text: string): boolean { @@ -51,9 +52,7 @@ export async function formatImportHtml(opts: { } } - notionFormatter($, $root); - xwikiFormatter($, $root); - defaultHtmlFormatter($, $root); + normalizeImportHtml($, $root); const backlinks = await rewriteInternalLinksToMentionHtml( $, @@ -73,6 +72,23 @@ export async function formatImportHtml(opts: { }; } +/** + * Contextless HTML cleanup shared by every import path. + * - notionFormatter: no-op on non-Notion HTML (class-selector-based). + * - xwikiFormatter: no-op on non-XWiki HTML (looks for #xwikicontent). + * - defaultHtmlFormatter: table column widths + provider auto-embeds. + * + * Does NOT run rewriteInternalLinksToMentionHtml — that requires zip context. + */ +export function normalizeImportHtml( + $: CheerioAPI, + $root: Cheerio, +): void { + notionFormatter($, $root); + xwikiFormatter($, $root); + defaultHtmlFormatter($, $root); +} + export function xwikiFormatter($: CheerioAPI, $root: Cheerio) { const $content = $root.find('#xwikicontent'); if ($content.length) { @@ -82,6 +98,8 @@ export function xwikiFormatter($: CheerioAPI, $root: Cheerio) { } export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio) { + normalizeTableColumnWidths($, $root); + $root.find('a[href]').each((_, el) => { const $el = $(el); const url = $el.attr('href')!; diff --git a/apps/server/src/integrations/import/utils/table-utils.ts b/apps/server/src/integrations/import/utils/table-utils.ts new file mode 100644 index 00000000..8b5240a0 --- /dev/null +++ b/apps/server/src/integrations/import/utils/table-utils.ts @@ -0,0 +1,107 @@ +import { CheerioAPI, Cheerio } from 'cheerio'; + +const DEFAULT_IMPORT_COL_WIDTH_PX = 150; + +/** + * Extracts a pixel-integer width from either the `width` attribute or + * `style="width: Npx"` on a //. Returns null when absent, + * non-numeric, or a non-px unit (em, %). + */ +function parsePixelWidth(el: Cheerio): number | null { + const attr = el.attr('width'); + if (attr) { + const n = parseInt(attr, 10); + if (Number.isFinite(n) && n > 0) return n; + } + const style = el.attr('style') || ''; + const m = style.match(/(?:^|;)\s*width\s*:\s*([\d.]+)\s*px/i); + if (m) { + const n = parseInt(m[1], 10); + if (Number.isFinite(n) && n > 0) return n; + } + return null; +} + +/** + * Derives per-column widths for a table, in visual column order. + * Priority: → first-row cells' own width style. + * Returns an array of length = number of columns, with null entries + * for columns whose width couldn't be determined. + */ +function deriveColumnWidths( + $: CheerioAPI, + table: Cheerio, +): (number | null)[] | null { + const cols = table.find('> colgroup > col'); + if (cols.length > 0) { + const widths: (number | null)[] = []; + cols.each(function () { + widths.push(parsePixelWidth($(this))); + }); + if (widths.some((w) => w !== null)) return widths; + } + + // Fallback: first row's cells. + const firstRow = table.find('> tbody > tr, > thead > tr, > tr').first(); + if (!firstRow.length) return null; + + const widths: (number | null)[] = []; + firstRow.children('td, th').each(function () { + const cell = $(this); + const colspan = parseInt(cell.attr('colspan') || '1', 10) || 1; + const w = parsePixelWidth(cell); + for (let i = 0; i < colspan; i++) { + widths.push(w !== null ? Math.round(w / colspan) : null); + } + }); + if (widths.every((w) => w === null)) return null; + return widths; +} + +/** + * Apply colwidth attributes to the first row of each table based on + * derived column widths. Accounts for colspan. Idempotent — re-running + * on already-normalized markup is a no-op. + * + * This lives upstream of tiptap's generateJSON: tiptap reads + * `colwidth="N[,N...]"` on / to build the runtime . + */ +export function normalizeTableColumnWidths( + $: CheerioAPI, + $root: Cheerio, +): void { + $root.find('table').each(function () { + const table = $(this); + const firstRow = table.find('> tbody > tr, > thead > tr, > tr').first(); + if (!firstRow.length) return; + + let colWidths = deriveColumnWidths($, table); + if (!colWidths) { + // No widths anywhere (e.g. markdown-sourced tables). Apply a default + // per-column width so the table's intrinsic width can exceed the + // editor container, letting .tableWrapper's overflow-x: auto scroll + // instead of cramming columns into the available width. + let count = 0; + firstRow.children('td, th').each(function () { + count += parseInt($(this).attr('colspan') || '1', 10) || 1; + }); + if (count === 0) return; + colWidths = new Array(count).fill(DEFAULT_IMPORT_COL_WIDTH_PX); + } + + let col = 0; + firstRow.children('td, th').each(function () { + const cell = $(this); + if (cell.attr('colwidth')) { + col += parseInt(cell.attr('colspan') || '1', 10) || 1; + return; + } + const colspan = parseInt(cell.attr('colspan') || '1', 10) || 1; + const slice = colWidths.slice(col, col + colspan); + col += colspan; + if (slice.length === 0 || slice.every((w) => w === null)) return; + const values = slice.map((w) => (w == null ? 100 : w)); + cell.attr('colwidth', values.join(',')); + }); + }); +}