share html normalization between zip and single-file imports

This commit is contained in:
Philipinho
2026-04-22 01:36:14 +01:00
parent 474ff6c629
commit 68309247b5
2 changed files with 23 additions and 4 deletions
@@ -30,6 +30,8 @@ import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueJob, QueueName } from '../../queue/constants';
import { ModuleRef } from '@nestjs/core';
import { load } from 'cheerio';
import { normalizeImportHtml } from '../utils/import-formatter';
@Injectable()
export class ImportService {
@@ -137,7 +139,9 @@ export class ImportService {
async processHTML(htmlInput: string): Promise<any> {
try {
return htmlToJson(htmlInput);
const $ = load(htmlInput);
normalizeImportHtml($, $.root());
return htmlToJson($.html() || '');
} catch (err) {
throw err;
}
@@ -52,9 +52,7 @@ export async function formatImportHtml(opts: {
}
}
notionFormatter($, $root);
xwikiFormatter($, $root);
defaultHtmlFormatter($, $root);
normalizeImportHtml($, $root);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
@@ -74,6 +72,23 @@ export async function formatImportHtml(opts: {
};
}
/**
* Contextless HTML cleanup shared by every import path.
* - notionFormatter: no-op on non-Notion HTML (class-selector-based).
* - xwikiFormatter: no-op on non-XWiki HTML (looks for #xwikicontent).
* - defaultHtmlFormatter: table column widths + provider auto-embeds.
*
* Does NOT run rewriteInternalLinksToMentionHtml — that requires zip context.
*/
export function normalizeImportHtml(
$: CheerioAPI,
$root: Cheerio<any>,
): void {
notionFormatter($, $root);
xwikiFormatter($, $root);
defaultHtmlFormatter($, $root);
}
export function xwikiFormatter($: CheerioAPI, $root: Cheerio<any>) {
const $content = $root.find('#xwikicontent');
if ($content.length) {