mirror of
https://github.com/docmost/docmost.git
synced 2026-05-07 06:23:06 +08:00
feat: properly preserve table width
This commit is contained in:
@@ -80,10 +80,12 @@ export const MarkdownClipboard = Extension.create({
|
|||||||
const { from, to } = view.state.selection;
|
const { from, to } = view.state.selection;
|
||||||
|
|
||||||
const parsed = markdownToHtml(text.replace(/\n+$/, ""));
|
const parsed = markdownToHtml(text.replace(/\n+$/, ""));
|
||||||
|
const body = elementFromString(parsed);
|
||||||
|
normalizeTableColumnWidths(body);
|
||||||
|
|
||||||
const contentNodes = DOMParser.fromSchema(
|
const contentNodes = DOMParser.fromSchema(
|
||||||
this.editor.schema,
|
this.editor.schema,
|
||||||
).parseSlice(elementFromString(parsed), {
|
).parseSlice(body, {
|
||||||
preserveWhitespace: true,
|
preserveWhitespace: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -137,3 +139,92 @@ function elementFromString(value) {
|
|||||||
|
|
||||||
return new window.DOMParser().parseFromString(wrappedValue, "text/html").body;
|
return new window.DOMParser().parseFromString(wrappedValue, "text/html").body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DEFAULT_PASTE_COL_WIDTH_PX = 150;
|
||||||
|
|
||||||
|
function parsePixelWidth(el: Element): number | null {
|
||||||
|
const attr = el.getAttribute("width");
|
||||||
|
if (attr) {
|
||||||
|
const n = parseInt(attr, 10);
|
||||||
|
if (Number.isFinite(n) && n > 0) return n;
|
||||||
|
}
|
||||||
|
const style = el.getAttribute("style") || "";
|
||||||
|
const m = style.match(/(?:^|;)\s*width\s*:\s*([\d.]+)\s*px/i);
|
||||||
|
if (m) {
|
||||||
|
const n = parseInt(m[1], 10);
|
||||||
|
if (Number.isFinite(n) && n > 0) return n;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFirstRow(table: Element): Element | null {
|
||||||
|
const tbodyRow = table.querySelector(":scope > tbody > tr");
|
||||||
|
if (tbodyRow) return tbodyRow;
|
||||||
|
const theadRow = table.querySelector(":scope > thead > tr");
|
||||||
|
if (theadRow) return theadRow;
|
||||||
|
return table.querySelector(":scope > tr");
|
||||||
|
}
|
||||||
|
|
||||||
|
function deriveColumnWidths(table: Element): (number | null)[] | null {
|
||||||
|
const cols = table.querySelectorAll(":scope > colgroup > col");
|
||||||
|
if (cols.length > 0) {
|
||||||
|
const widths: (number | null)[] = [];
|
||||||
|
cols.forEach((col) => widths.push(parsePixelWidth(col)));
|
||||||
|
if (widths.some((w) => w !== null)) return widths;
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstRow = getFirstRow(table);
|
||||||
|
if (!firstRow) return null;
|
||||||
|
|
||||||
|
const widths: (number | null)[] = [];
|
||||||
|
Array.from(firstRow.children)
|
||||||
|
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
|
||||||
|
.forEach((cell) => {
|
||||||
|
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
|
||||||
|
const w = parsePixelWidth(cell);
|
||||||
|
for (let i = 0; i < colspan; i++) {
|
||||||
|
widths.push(w !== null ? Math.round(w / colspan) : null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (widths.length === 0 || widths.every((w) => w === null)) return null;
|
||||||
|
return widths;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mirror of server normalizeTableColumnWidths (see import/utils/table-utils.ts):
|
||||||
|
// markdown source has no widths, so without this every pasted table renders
|
||||||
|
// at table-layout:fixed/100% and squashes columns to fit the editor instead of
|
||||||
|
// letting .tableWrapper's overflow-x: auto scroll.
|
||||||
|
export function normalizeTableColumnWidths(root: Element): void {
|
||||||
|
root.querySelectorAll("table").forEach((table) => {
|
||||||
|
const firstRow = getFirstRow(table);
|
||||||
|
if (!firstRow) return;
|
||||||
|
|
||||||
|
let colWidths = deriveColumnWidths(table);
|
||||||
|
if (!colWidths) {
|
||||||
|
let count = 0;
|
||||||
|
Array.from(firstRow.children)
|
||||||
|
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
|
||||||
|
.forEach((cell) => {
|
||||||
|
count += parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
|
||||||
|
});
|
||||||
|
if (count === 0) return;
|
||||||
|
colWidths = new Array(count).fill(DEFAULT_PASTE_COL_WIDTH_PX);
|
||||||
|
}
|
||||||
|
|
||||||
|
let col = 0;
|
||||||
|
Array.from(firstRow.children)
|
||||||
|
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
|
||||||
|
.forEach((cell) => {
|
||||||
|
if (cell.getAttribute("colwidth")) {
|
||||||
|
col += parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
|
||||||
|
const slice = colWidths!.slice(col, col + colspan);
|
||||||
|
col += colspan;
|
||||||
|
if (slice.length === 0 || slice.every((w) => w === null)) return;
|
||||||
|
const values = slice.map((w) => (w == null ? 100 : w));
|
||||||
|
cell.setAttribute("colwidth", values.join(","));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ import { InjectQueue } from '@nestjs/bullmq';
|
|||||||
import { Queue } from 'bullmq';
|
import { Queue } from 'bullmq';
|
||||||
import { QueueJob, QueueName } from '../../queue/constants';
|
import { QueueJob, QueueName } from '../../queue/constants';
|
||||||
import { ModuleRef } from '@nestjs/core';
|
import { ModuleRef } from '@nestjs/core';
|
||||||
|
import { load } from 'cheerio';
|
||||||
|
import { normalizeImportHtml } from '../utils/import-formatter';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ImportService {
|
export class ImportService {
|
||||||
@@ -136,7 +138,9 @@ export class ImportService {
|
|||||||
|
|
||||||
async processHTML(htmlInput: string): Promise<any> {
|
async processHTML(htmlInput: string): Promise<any> {
|
||||||
try {
|
try {
|
||||||
return htmlToJson(htmlInput);
|
const $ = load(htmlInput);
|
||||||
|
normalizeImportHtml($, $.root());
|
||||||
|
return htmlToJson($.html() || '');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import { v7 } from 'uuid';
|
|||||||
import { InsertableBacklink } from '@docmost/db/types/entity.types';
|
import { InsertableBacklink } from '@docmost/db/types/entity.types';
|
||||||
import { Cheerio, CheerioAPI, load } from 'cheerio';
|
import { Cheerio, CheerioAPI, load } from 'cheerio';
|
||||||
import slugify from '@sindresorhus/slugify';
|
import slugify from '@sindresorhus/slugify';
|
||||||
|
import { normalizeTableColumnWidths } from './table-utils';
|
||||||
|
|
||||||
// Check if text contains Unicode characters (for emojis/icons)
|
// Check if text contains Unicode characters (for emojis/icons)
|
||||||
function isUnicodeCharacter(text: string): boolean {
|
function isUnicodeCharacter(text: string): boolean {
|
||||||
@@ -51,9 +52,7 @@ export async function formatImportHtml(opts: {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
notionFormatter($, $root);
|
normalizeImportHtml($, $root);
|
||||||
xwikiFormatter($, $root);
|
|
||||||
defaultHtmlFormatter($, $root);
|
|
||||||
|
|
||||||
const backlinks = await rewriteInternalLinksToMentionHtml(
|
const backlinks = await rewriteInternalLinksToMentionHtml(
|
||||||
$,
|
$,
|
||||||
@@ -73,6 +72,23 @@ export async function formatImportHtml(opts: {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contextless HTML cleanup shared by every import path.
|
||||||
|
* - notionFormatter: no-op on non-Notion HTML (class-selector-based).
|
||||||
|
* - xwikiFormatter: no-op on non-XWiki HTML (looks for #xwikicontent).
|
||||||
|
* - defaultHtmlFormatter: table column widths + provider auto-embeds.
|
||||||
|
*
|
||||||
|
* Does NOT run rewriteInternalLinksToMentionHtml — that requires zip context.
|
||||||
|
*/
|
||||||
|
export function normalizeImportHtml(
|
||||||
|
$: CheerioAPI,
|
||||||
|
$root: Cheerio<any>,
|
||||||
|
): void {
|
||||||
|
notionFormatter($, $root);
|
||||||
|
xwikiFormatter($, $root);
|
||||||
|
defaultHtmlFormatter($, $root);
|
||||||
|
}
|
||||||
|
|
||||||
export function xwikiFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
export function xwikiFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||||
const $content = $root.find('#xwikicontent');
|
const $content = $root.find('#xwikicontent');
|
||||||
if ($content.length) {
|
if ($content.length) {
|
||||||
@@ -82,6 +98,8 @@ export function xwikiFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||||
|
normalizeTableColumnWidths($, $root);
|
||||||
|
|
||||||
$root.find('a[href]').each((_, el) => {
|
$root.find('a[href]').each((_, el) => {
|
||||||
const $el = $(el);
|
const $el = $(el);
|
||||||
const url = $el.attr('href')!;
|
const url = $el.attr('href')!;
|
||||||
|
|||||||
@@ -0,0 +1,107 @@
|
|||||||
|
import { CheerioAPI, Cheerio } from 'cheerio';
|
||||||
|
|
||||||
|
const DEFAULT_IMPORT_COL_WIDTH_PX = 150;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts a pixel-integer width from either the `width` attribute or
|
||||||
|
* `style="width: Npx"` on a <col>/<td>/<th>. Returns null when absent,
|
||||||
|
* non-numeric, or a non-px unit (em, %).
|
||||||
|
*/
|
||||||
|
function parsePixelWidth(el: Cheerio<any>): number | null {
|
||||||
|
const attr = el.attr('width');
|
||||||
|
if (attr) {
|
||||||
|
const n = parseInt(attr, 10);
|
||||||
|
if (Number.isFinite(n) && n > 0) return n;
|
||||||
|
}
|
||||||
|
const style = el.attr('style') || '';
|
||||||
|
const m = style.match(/(?:^|;)\s*width\s*:\s*([\d.]+)\s*px/i);
|
||||||
|
if (m) {
|
||||||
|
const n = parseInt(m[1], 10);
|
||||||
|
if (Number.isFinite(n) && n > 0) return n;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Derives per-column widths for a table, in visual column order.
|
||||||
|
* Priority: <colgroup><col> → first-row cells' own width style.
|
||||||
|
* Returns an array of length = number of columns, with null entries
|
||||||
|
* for columns whose width couldn't be determined.
|
||||||
|
*/
|
||||||
|
function deriveColumnWidths(
|
||||||
|
$: CheerioAPI,
|
||||||
|
table: Cheerio<any>,
|
||||||
|
): (number | null)[] | null {
|
||||||
|
const cols = table.find('> colgroup > col');
|
||||||
|
if (cols.length > 0) {
|
||||||
|
const widths: (number | null)[] = [];
|
||||||
|
cols.each(function () {
|
||||||
|
widths.push(parsePixelWidth($(this)));
|
||||||
|
});
|
||||||
|
if (widths.some((w) => w !== null)) return widths;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: first row's cells.
|
||||||
|
const firstRow = table.find('> tbody > tr, > thead > tr, > tr').first();
|
||||||
|
if (!firstRow.length) return null;
|
||||||
|
|
||||||
|
const widths: (number | null)[] = [];
|
||||||
|
firstRow.children('td, th').each(function () {
|
||||||
|
const cell = $(this);
|
||||||
|
const colspan = parseInt(cell.attr('colspan') || '1', 10) || 1;
|
||||||
|
const w = parsePixelWidth(cell);
|
||||||
|
for (let i = 0; i < colspan; i++) {
|
||||||
|
widths.push(w !== null ? Math.round(w / colspan) : null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (widths.every((w) => w === null)) return null;
|
||||||
|
return widths;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply colwidth attributes to the first row of each table based on
|
||||||
|
* derived column widths. Accounts for colspan. Idempotent — re-running
|
||||||
|
* on already-normalized markup is a no-op.
|
||||||
|
*
|
||||||
|
* This lives upstream of tiptap's generateJSON: tiptap reads
|
||||||
|
* `colwidth="N[,N...]"` on <td>/<th> to build the runtime <colgroup>.
|
||||||
|
*/
|
||||||
|
export function normalizeTableColumnWidths(
|
||||||
|
$: CheerioAPI,
|
||||||
|
$root: Cheerio<any>,
|
||||||
|
): void {
|
||||||
|
$root.find('table').each(function () {
|
||||||
|
const table = $(this);
|
||||||
|
const firstRow = table.find('> tbody > tr, > thead > tr, > tr').first();
|
||||||
|
if (!firstRow.length) return;
|
||||||
|
|
||||||
|
let colWidths = deriveColumnWidths($, table);
|
||||||
|
if (!colWidths) {
|
||||||
|
// No widths anywhere (e.g. markdown-sourced tables). Apply a default
|
||||||
|
// per-column width so the table's intrinsic width can exceed the
|
||||||
|
// editor container, letting .tableWrapper's overflow-x: auto scroll
|
||||||
|
// instead of cramming columns into the available width.
|
||||||
|
let count = 0;
|
||||||
|
firstRow.children('td, th').each(function () {
|
||||||
|
count += parseInt($(this).attr('colspan') || '1', 10) || 1;
|
||||||
|
});
|
||||||
|
if (count === 0) return;
|
||||||
|
colWidths = new Array(count).fill(DEFAULT_IMPORT_COL_WIDTH_PX);
|
||||||
|
}
|
||||||
|
|
||||||
|
let col = 0;
|
||||||
|
firstRow.children('td, th').each(function () {
|
||||||
|
const cell = $(this);
|
||||||
|
if (cell.attr('colwidth')) {
|
||||||
|
col += parseInt(cell.attr('colspan') || '1', 10) || 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const colspan = parseInt(cell.attr('colspan') || '1', 10) || 1;
|
||||||
|
const slice = colWidths.slice(col, col + colspan);
|
||||||
|
col += colspan;
|
||||||
|
if (slice.length === 0 || slice.every((w) => w === null)) return;
|
||||||
|
const values = slice.map((w) => (w == null ? 100 : w));
|
||||||
|
cell.attr('colwidth', values.join(','));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user