Compare commits

...

5 Commits

Author SHA1 Message Date
Philipinho f413720e15 - sync
- reinstantiate S3 client to fix file upload errors during import
- delete import zip file after use
2025-09-14 03:00:23 +01:00
Philipinho 8e16ad952a v0.23.1 2025-09-13 03:15:53 +01:00
Philip Okugbe 7ada3cb1f9 fix: page import task (#1551)
* fix import

* - fix notion importer
- support notion page icon import
- fix horizontal rule css
- rename service file

* sync

* 3 mins delay
2025-09-13 03:14:59 +01:00
Philipinho 47c54174b3 sync 2025-09-11 00:50:15 +01:00
Philipinho dc0650289d sync 2025-09-04 15:07:01 -07:00
15 changed files with 332 additions and 220 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
{ {
"name": "client", "name": "client",
"private": true, "private": true,
"version": "0.23.0", "version": "0.23.1",
"scripts": { "scripts": {
"dev": "vite", "dev": "vite",
"build": "tsc && vite build", "build": "tsc && vite build",
@@ -94,8 +94,7 @@
hr { hr {
border: none; border: none;
border-top: 2px solid #ced4da; border-top: 1px solid #ced4da;
margin: 2rem 0;
&:hover { &:hover {
cursor: pointer; cursor: pointer;
+1 -1
View File
@@ -1,6 +1,6 @@
{ {
"name": "server", "name": "server",
"version": "0.23.0", "version": "0.23.1",
"description": "", "description": "",
"author": "", "author": "",
"private": true, "private": true,
@@ -67,10 +67,16 @@ export class AttachmentProcessor extends WorkerHost implements OnModuleDestroy {
@OnWorkerEvent('failed') @OnWorkerEvent('failed')
onError(job: Job) { onError(job: Job) {
if (job.name === QueueJob.ATTACHMENT_INDEX_CONTENT) {
this.logger.debug(
`Error processing ${job.name} job for attachment ${job.data?.attachmentId}. Reason: ${job.failedReason}`,
);
} else {
this.logger.error( this.logger.error(
`Error processing ${job.name} job. Reason: ${job.failedReason}`, `Error processing ${job.name} job. Reason: ${job.failedReason}`,
); );
} }
}
@OnWorkerEvent('completed') @OnWorkerEvent('completed')
onCompleted(job: Job) { onCompleted(job: Job) {
@@ -2,7 +2,7 @@ import { Module } from '@nestjs/common';
import { ImportService } from './services/import.service'; import { ImportService } from './services/import.service';
import { ImportController } from './import.controller'; import { ImportController } from './import.controller';
import { StorageModule } from '../storage/storage.module'; import { StorageModule } from '../storage/storage.module';
import { FileTaskService } from './services/file-task.service'; import { FileImportTaskService } from './services/file-import-task.service';
import { FileTaskProcessor } from './processors/file-task.processor'; import { FileTaskProcessor } from './processors/file-task.processor';
import { ImportAttachmentService } from './services/import-attachment.service'; import { ImportAttachmentService } from './services/import-attachment.service';
import { FileTaskController } from './file-task.controller'; import { FileTaskController } from './file-task.controller';
@@ -11,7 +11,7 @@ import { PageModule } from '../../core/page/page.module';
@Module({ @Module({
providers: [ providers: [
ImportService, ImportService,
FileTaskService, FileImportTaskService,
FileTaskProcessor, FileTaskProcessor,
ImportAttachmentService, ImportAttachmentService,
], ],
@@ -2,7 +2,7 @@ import { Logger, OnModuleDestroy } from '@nestjs/common';
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq'; import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
import { Job } from 'bullmq'; import { Job } from 'bullmq';
import { QueueJob, QueueName } from 'src/integrations/queue/constants'; import { QueueJob, QueueName } from 'src/integrations/queue/constants';
import { FileTaskService } from '../services/file-task.service'; import { FileImportTaskService } from '../services/file-import-task.service';
import { FileTaskStatus } from '../utils/file.utils'; import { FileTaskStatus } from '../utils/file.utils';
import { StorageService } from '../../storage/storage.service'; import { StorageService } from '../../storage/storage.service';
@@ -11,7 +11,7 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
private readonly logger = new Logger(FileTaskProcessor.name); private readonly logger = new Logger(FileTaskProcessor.name);
constructor( constructor(
private readonly fileTaskService: FileTaskService, private readonly fileTaskService: FileImportTaskService,
private readonly storageService: StorageService, private readonly storageService: StorageService,
) { ) {
super(); super();
@@ -41,15 +41,32 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
@OnWorkerEvent('failed') @OnWorkerEvent('failed')
async onFailed(job: Job) { async onFailed(job: Job) {
this.logger.error( this.logger.error(
`Error processing ${job.name} job. Reason: ${job.failedReason}`, `Error processing ${job.name} job. Import Task ID: ${job.data.fileTaskId}. Reason: ${job.failedReason}`,
); );
await this.handleFailedJob(job);
}
@OnWorkerEvent('stalled')
async onStalled(job: Job) {
this.logger.error(
`Job ${job.name} stalled. . Import Task ID: ${job.data.fileTaskId}.. Job ID: ${job.id}`,
);
// Set failedReason for stalled jobs since it's not automatically set
job.failedReason = 'Job stalled and was marked as failed';
await this.handleFailedJob(job);
}
private async handleFailedJob(job: Job) {
try { try {
const fileTaskId = job.data.fileTaskId; const fileTaskId = job.data.fileTaskId;
const reason = job.failedReason || 'Unknown error';
await this.fileTaskService.updateTaskStatus( await this.fileTaskService.updateTaskStatus(
fileTaskId, fileTaskId,
FileTaskStatus.Failed, FileTaskStatus.Failed,
job.failedReason, reason,
); );
const fileTask = await this.fileTaskService.getFileTask(fileTaskId); const fileTask = await this.fileTaskService.getFileTask(fileTaskId);
@@ -62,10 +79,22 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
} }
@OnWorkerEvent('completed') @OnWorkerEvent('completed')
onCompleted(job: Job) { async onCompleted(job: Job) {
this.logger.log( this.logger.log(
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`, `Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
); );
try {
const fileTask = await this.fileTaskService.getFileTask(
job.data.fileTaskId,
);
if (fileTask) {
await this.storageService.delete(fileTask.filePath);
this.logger.debug(`Deleted imported zip file: ${fileTask.filePath}`);
}
} catch (err) {
this.logger.error(`Failed to delete imported zip file:`, err);
}
} }
async onModuleDestroy(): Promise<void> { async onModuleDestroy(): Promise<void> {
@@ -33,8 +33,8 @@ import { PageService } from '../../../core/page/services/page.service';
import { ImportPageNode } from '../dto/file-task-dto'; import { ImportPageNode } from '../dto/file-task-dto';
@Injectable() @Injectable()
export class FileTaskService { export class FileImportTaskService {
private readonly logger = new Logger(FileTaskService.name); private readonly logger = new Logger(FileImportTaskService.name);
constructor( constructor(
private readonly storageService: StorageService, private readonly storageService: StorageService,
@@ -266,7 +266,7 @@ export class FileTaskService {
attachmentCandidates, attachmentCandidates,
}); });
const { html, backlinks } = await formatImportHtml({ const { html, backlinks, pageIcon } = await formatImportHtml({
html: htmlContent, html: htmlContent,
currentFilePath: page.filePath, currentFilePath: page.filePath,
filePathToPageMetaMap: filePathToPageMetaMap, filePathToPageMetaMap: filePathToPageMetaMap,
@@ -286,6 +286,7 @@ export class FileTaskService {
id: page.id, id: page.id,
slugId: page.slugId, slugId: page.slugId,
title: title || page.name, title: title || page.name,
icon: pageIcon || null,
content: prosemirrorJson, content: prosemirrorJson,
textContent: jsonToText(prosemirrorJson), textContent: jsonToText(prosemirrorJson),
ydoc: await this.importService.createYdoc(prosemirrorJson), ydoc: await this.importService.createYdoc(prosemirrorJson),
@@ -35,7 +35,7 @@ interface DrawioPair {
@Injectable() @Injectable()
export class ImportAttachmentService { export class ImportAttachmentService {
private readonly logger = new Logger(ImportAttachmentService.name); private readonly logger = new Logger(ImportAttachmentService.name);
private readonly CONCURRENT_UPLOADS = 3; private readonly CONCURRENT_UPLOADS = 1;
private readonly MAX_RETRIES = 2; private readonly MAX_RETRIES = 2;
private readonly RETRY_DELAY = 2000; private readonly RETRY_DELAY = 2000;
@@ -53,6 +53,7 @@ export class ImportAttachmentService {
fileTask: FileTask; fileTask: FileTask;
attachmentCandidates: Map<string, string>; attachmentCandidates: Map<string, string>;
pageAttachments?: AttachmentInfo[]; pageAttachments?: AttachmentInfo[];
isConfluenceImport?: boolean;
}): Promise<string> { }): Promise<string> {
const { const {
html, html,
@@ -62,6 +63,7 @@ export class ImportAttachmentService {
fileTask, fileTask,
attachmentCandidates, attachmentCandidates,
pageAttachments = [], pageAttachments = [],
isConfluenceImport,
} = opts; } = opts;
const attachmentTasks: (() => Promise<void>)[] = []; const attachmentTasks: (() => Promise<void>)[] = [];
@@ -90,7 +92,10 @@ export class ImportAttachmentService {
>(); >();
// Analyze attachments to identify Draw.io pairs // Analyze attachments to identify Draw.io pairs
const { drawioPairs, skipFiles } = this.analyzeAttachments(pageAttachments); const { drawioPairs, skipFiles } = this.analyzeAttachments(
pageAttachments,
isConfluenceImport,
);
// Map to store processed Draw.io SVGs // Map to store processed Draw.io SVGs
const drawioSvgMap = new Map< const drawioSvgMap = new Map<
@@ -134,7 +139,9 @@ export class ImportAttachmentService {
const stream = Readable.from(svgBuffer); const stream = Readable.from(svgBuffer);
// Upload to storage // Upload to storage
await this.storageService.uploadStream(storageFilePath, stream); await this.storageService.uploadStream(storageFilePath, stream, {
recreateClient: true,
});
// Insert into database // Insert into database
await this.db await this.db
@@ -235,49 +242,17 @@ export class ImportAttachmentService {
const pageDir = path.dirname(pageRelativePath); const pageDir = path.dirname(pageRelativePath);
const $ = load(html); const $ = load(html);
// Cache for resolved paths to avoid repeated lookups // image
const resolvedPathCache = new Map<string, string | null>(); for (const imgEl of $('img').toArray()) {
const $img = $(imgEl);
const src = cleanUrlString($img.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue;
const getCachedResolvedPath = (rawPath: string): string | null => { const relPath = resolveRelativeAttachmentPath(
if (resolvedPathCache.has(rawPath)) { src,
return resolvedPathCache.get(rawPath)!;
}
const resolved = resolveRelativeAttachmentPath(
rawPath,
pageDir, pageDir,
attachmentCandidates, attachmentCandidates,
); );
resolvedPathCache.set(rawPath, resolved);
return resolved;
};
// Cache for file stats to avoid repeated file system calls
const statCache = new Map<string, any>();
const getCachedStat = async (absPath: string) => {
if (statCache.has(absPath)) {
return statCache.get(absPath);
}
const stat = await fs.stat(absPath);
statCache.set(absPath, stat);
return stat;
};
// Single DOM traversal for all attachment elements
const selector =
'img, video, div[data-type="attachment"], a, div[data-type="excalidraw"], div[data-type="drawio"]';
const elements = $(selector).toArray();
for (const element of elements) {
const $el = $(element);
const tagName = element.tagName.toLowerCase();
// Process based on element type
if (tagName === 'img') {
const src = cleanUrlString($el.attr('src') ?? '');
if (!src || src.startsWith('http')) continue;
const relPath = getCachedResolvedPath(src);
if (!relPath) continue; if (!relPath) continue;
// Check if this image is part of a Draw.io pair // Check if this image is part of a Draw.io pair
@@ -291,52 +266,66 @@ export class ImportAttachmentService {
.attr('data-align', 'center') .attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId); .attr('data-attachment-id', drawioSvg.attachmentId);
$el.replaceWith($drawio); $img.replaceWith($drawio);
unwrapFromParagraph($, $drawio); unwrapFromParagraph($, $drawio);
continue; continue;
} }
const { attachmentId, apiFilePath, abs } = processFile(relPath); const { attachmentId, apiFilePath } = processFile(relPath);
const stat = await getCachedStat(abs);
$el const width = $img.attr('width') ?? '100%';
const align = $img.attr('data-align') ?? 'center';
$img
.attr('src', apiFilePath) .attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId) .attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString()) .attr('width', width)
.attr('width', $el.attr('width') ?? '100%') .attr('data-align', align);
.attr('data-align', $el.attr('data-align') ?? 'center');
unwrapFromParagraph($, $el); unwrapFromParagraph($, $img);
} else if (tagName === 'video') { }
const src = cleanUrlString($el.attr('src') ?? '');
// video
for (const vidEl of $('video').toArray()) {
const $vid = $(vidEl);
const src = cleanUrlString($vid.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue; if (!src || src.startsWith('http')) continue;
const relPath = getCachedResolvedPath(src); const relPath = resolveRelativeAttachmentPath(
src,
pageDir,
attachmentCandidates,
);
if (!relPath) continue; if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath); const { attachmentId, apiFilePath } = processFile(relPath);
const stat = await getCachedStat(abs);
$el const width = $vid.attr('width') ?? '100%';
const align = $vid.attr('data-align') ?? 'center';
$vid
.attr('src', apiFilePath) .attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId) .attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString()) .attr('width', width)
.attr('width', $el.attr('width') ?? '100%') .attr('data-align', align);
.attr('data-align', $el.attr('data-align') ?? 'center');
unwrapFromParagraph($, $el); unwrapFromParagraph($, $vid);
} else if (tagName === 'div') { }
const dataType = $el.attr('data-type');
if (dataType === 'attachment') { // <div data-type="attachment">
const rawUrl = cleanUrlString($el.attr('data-attachment-url') ?? ''); for (const el of $('div[data-type="attachment"]').toArray()) {
const $oldDiv = $(el);
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
if (!rawUrl || rawUrl.startsWith('http')) continue; if (!rawUrl || rawUrl.startsWith('http')) continue;
const relPath = getCachedResolvedPath(rawUrl); const relPath = resolveRelativeAttachmentPath(
rawUrl,
pageDir,
attachmentCandidates,
);
if (!relPath) continue; if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath); const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const fileName = path.basename(abs); const fileName = path.basename(abs);
const mime = getMimeType(abs); const mime = getMimeType(abs);
@@ -345,39 +334,23 @@ export class ImportAttachmentService {
.attr('data-attachment-url', apiFilePath) .attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName) .attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime) .attr('data-attachment-mime', mime)
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId); .attr('data-attachment-id', attachmentId);
$el.replaceWith($newDiv); $oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
} else if (dataType === 'excalidraw' || dataType === 'drawio') {
const rawSrc = cleanUrlString($el.attr('data-src') ?? '');
if (!rawSrc || rawSrc.startsWith('http')) continue;
const relPath = getCachedResolvedPath(rawSrc);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const fileName = path.basename(abs);
const $newDiv = $('<div>')
.attr('data-type', dataType)
.attr('data-src', apiFilePath)
.attr('data-title', fileName)
.attr('data-width', $el.attr('data-width') || '100%')
.attr('data-size', stat.size.toString())
.attr('data-align', $el.attr('data-align') || 'center')
.attr('data-attachment-id', attachmentId);
$el.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv); unwrapFromParagraph($, $newDiv);
} }
} else if (tagName === 'a') {
const href = cleanUrlString($el.attr('href') ?? ''); // rewrite other attachments via <a>
for (const aEl of $('a').toArray()) {
const $a = $(aEl);
const href = cleanUrlString($a.attr('href') ?? '')!;
if (!href || href.startsWith('http')) continue; if (!href || href.startsWith('http')) continue;
const relPath = getCachedResolvedPath(href); const relPath = resolveRelativeAttachmentPath(
href,
pageDir,
attachmentCandidates,
);
if (!relPath) continue; if (!relPath) continue;
// Check if this is a Draw.io file // Check if this is a Draw.io file
@@ -391,32 +364,30 @@ export class ImportAttachmentService {
.attr('data-align', 'center') .attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId); .attr('data-attachment-id', drawioSvg.attachmentId);
$el.replaceWith($drawio); $a.replaceWith($drawio);
unwrapFromParagraph($, $drawio); unwrapFromParagraph($, $drawio);
continue; continue;
} }
// Skip files that should be ignored // Skip files that should be ignored
if (skipFiles.has(relPath)) { if (skipFiles.has(relPath)) {
$el.remove(); $a.remove();
continue; continue;
} }
const { attachmentId, apiFilePath, abs } = processFile(relPath); const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const ext = path.extname(relPath).toLowerCase(); const ext = path.extname(relPath).toLowerCase();
if (ext === '.mp4') { if (ext === '.mp4') {
const $video = $('<video>') const $video = $('<video>')
.attr('src', apiFilePath) .attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId) .attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', '100%') .attr('width', '100%')
.attr('data-align', 'center'); .attr('data-align', 'center');
$el.replaceWith($video); $a.replaceWith($video);
unwrapFromParagraph($, $video); unwrapFromParagraph($, $video);
} else { } else {
const confAliasName = $el.attr('data-linked-resource-default-alias'); const confAliasName = $a.attr('data-linked-resource-default-alias');
let attachmentName = path.basename(abs); let attachmentName = path.basename(abs);
if (confAliasName) attachmentName = confAliasName; if (confAliasName) attachmentName = confAliasName;
@@ -425,13 +396,44 @@ export class ImportAttachmentService {
.attr('data-attachment-url', apiFilePath) .attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', attachmentName) .attr('data-attachment-name', attachmentName)
.attr('data-attachment-mime', getMimeType(abs)) .attr('data-attachment-mime', getMimeType(abs))
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId); .attr('data-attachment-id', attachmentId);
$el.replaceWith($div); $a.replaceWith($div);
unwrapFromParagraph($, $div); unwrapFromParagraph($, $div);
} }
} }
// excalidraw and drawio
for (const type of ['excalidraw', 'drawio'] as const) {
for (const el of $(`div[data-type="${type}"]`).toArray()) {
const $oldDiv = $(el);
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
if (!rawSrc || rawSrc.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
rawSrc,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const fileName = path.basename(abs);
const width = $oldDiv.attr('data-width') || '100%';
const align = $oldDiv.attr('data-align') || 'center';
const $newDiv = $('<div>')
.attr('data-type', type)
.attr('data-src', apiFilePath)
.attr('data-title', fileName)
.attr('data-width', width)
.attr('data-align', align)
.attr('data-attachment-id', attachmentId);
$oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
} }
// Collect all attachment IDs in the HTML in a single DOM traversal - O(n) // Collect all attachment IDs in the HTML in a single DOM traversal - O(n)
@@ -492,9 +494,6 @@ export class ImportAttachmentService {
// This attachment was in the list but not referenced in HTML - add it // This attachment was in the list but not referenced in HTML - add it
const { attachmentId, apiFilePath, abs } = processFile(href); const { attachmentId, apiFilePath, abs } = processFile(href);
try {
const stat = await fs.stat(abs);
const mime = mimeType || getMimeType(abs); const mime = mimeType || getMimeType(abs);
// Add as attachment node at the end // Add as attachment node at the end
@@ -503,13 +502,9 @@ export class ImportAttachmentService {
.attr('data-attachment-url', apiFilePath) .attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName) .attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime) .attr('data-attachment-mime', mime)
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId); .attr('data-attachment-id', attachmentId);
$.root().append($attachmentDiv); $.root().append($attachmentDiv);
} catch (error) {
this.logger.error(`Failed to process attachment ${fileName}:`, error);
}
} }
// wait for all uploads & DB inserts // wait for all uploads & DB inserts
@@ -534,16 +529,49 @@ export class ImportAttachmentService {
} }
} }
// Post-process DOM elements to add file sizes after uploads complete
// This avoids blocking file operations during initial DOM processing
const elementsNeedingSize = $('[data-attachment-id]:not([data-size])');
for (const element of elementsNeedingSize.toArray()) {
const $el = $(element);
const attachmentId = $el.attr('data-attachment-id');
if (!attachmentId) continue;
// Find the corresponding processed file info
const processedEntry = Array.from(processed.values()).find(
(entry) => entry.attachmentId === attachmentId,
);
if (processedEntry) {
try {
const stat = await fs.stat(processedEntry.abs);
$el.attr('data-size', stat.size.toString());
} catch (error) {
this.logger.debug(
`Could not get size for ${processedEntry.abs}:`,
error,
);
}
}
}
return $.root().html() || ''; return $.root().html() || '';
} }
private analyzeAttachments(attachments: AttachmentInfo[]): { private analyzeAttachments(
attachments: AttachmentInfo[],
isConfluenceImport?: boolean,
): {
drawioPairs: Map<string, DrawioPair>; drawioPairs: Map<string, DrawioPair>;
skipFiles: Set<string>; skipFiles: Set<string>;
} { } {
const drawioPairs = new Map<string, DrawioPair>(); const drawioPairs = new Map<string, DrawioPair>();
const skipFiles = new Set<string>(); const skipFiles = new Set<string>();
if (!isConfluenceImport) {
return { drawioPairs, skipFiles };
}
// Group attachments by type // Group attachments by type
const drawioFiles: AttachmentInfo[] = []; const drawioFiles: AttachmentInfo[] = [];
const pngByBaseName = new Map<string, AttachmentInfo[]>(); const pngByBaseName = new Map<string, AttachmentInfo[]>();
@@ -776,7 +804,10 @@ export class ImportAttachmentService {
for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) { for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) {
try { try {
const fileStream = createReadStream(abs); const fileStream = createReadStream(abs);
await this.storageService.uploadStream(storageFilePath, fileStream); await this.storageService.uploadStream(storageFilePath, fileStream, {
recreateClient: true,
});
const stat = await fs.stat(abs); const stat = await fs.stat(abs);
await this.db await this.db
@@ -804,10 +835,10 @@ export class ImportAttachmentService {
QueueJob.ATTACHMENT_INDEX_CONTENT, QueueJob.ATTACHMENT_INDEX_CONTENT,
{ attachmentId }, { attachmentId },
{ {
attempts: 2, attempts: 1,
backoff: { backoff: {
type: 'exponential', type: 'exponential',
delay: 30 * 1000, delay: 3 * 60 * 1000,
}, },
deduplication: { deduplication: {
id: attachmentId, id: attachmentId,
@@ -4,6 +4,11 @@ import { v7 } from 'uuid';
import { InsertableBacklink } from '@docmost/db/types/entity.types'; import { InsertableBacklink } from '@docmost/db/types/entity.types';
import { Cheerio, CheerioAPI, load } from 'cheerio'; import { Cheerio, CheerioAPI, load } from 'cheerio';
// Check if text contains Unicode characters (for emojis/icons)
function isUnicodeCharacter(text: string): boolean {
return text.length > 0 && text.codePointAt(0)! > 127; // Non-ASCII characters
}
export async function formatImportHtml(opts: { export async function formatImportHtml(opts: {
html: string; html: string;
currentFilePath: string; currentFilePath: string;
@@ -16,7 +21,11 @@ export async function formatImportHtml(opts: {
workspaceId: string; workspaceId: string;
pageDir?: string; pageDir?: string;
attachmentCandidates?: string[]; attachmentCandidates?: string[];
}): Promise<{ html: string; backlinks: InsertableBacklink[] }> { }): Promise<{
html: string;
backlinks: InsertableBacklink[];
pageIcon?: string;
}> {
const { const {
html, html,
currentFilePath, currentFilePath,
@@ -28,6 +37,17 @@ export async function formatImportHtml(opts: {
const $: CheerioAPI = load(html); const $: CheerioAPI = load(html);
const $root: Cheerio<any> = $.root(); const $root: Cheerio<any> = $.root();
let pageIcon: string | null = null;
// extract notion page icon
const headerIconSpan = $root.find('header .page-header-icon .icon');
if (headerIconSpan.length > 0) {
const iconText = headerIconSpan.text().trim();
if (iconText && isUnicodeCharacter(iconText)) {
pageIcon = iconText;
}
}
notionFormatter($, $root); notionFormatter($, $root);
defaultHtmlFormatter($, $root); defaultHtmlFormatter($, $root);
@@ -44,6 +64,7 @@ export async function formatImportHtml(opts: {
return { return {
html: $root.html() || '', html: $root.html() || '',
backlinks, backlinks,
pageIcon: pageIcon || undefined,
}; };
} }
@@ -69,6 +90,10 @@ export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
} }
export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) { export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
// remove page header icon and cover image
$root.find('.page-header-icon').remove();
$root.find('.page-cover-image').remove();
// remove empty description paragraphs // remove empty description paragraphs
$root.find('p.page-description').each((_, el) => { $root.find('p.page-description').each((_, el) => {
if (!$(el).text().trim()) $(el).remove(); if (!$(el).text().trim()) $(el).remove();
@@ -189,6 +214,9 @@ export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
$fig.replaceWith($newAnchor); $fig.replaceWith($newAnchor);
}); });
// remove user icons
$root.find('span.user img.user-icon').remove();
// remove toc // remove toc
$root.find('nav.table_of_contents').remove(); $root.find('nav.table_of_contents').remove();
} }
@@ -28,7 +28,7 @@ export class LocalDriver implements StorageDriver {
} }
} }
async uploadStream(filePath: string, file: Readable): Promise<void> { async uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void> {
try { try {
const fullPath = this._fullPath(filePath); const fullPath = this._fullPath(filePath);
await fs.mkdir(dirname(fullPath), { recursive: true }); await fs.mkdir(dirname(fullPath), { recursive: true });
@@ -41,12 +41,26 @@ export class S3Driver implements StorageDriver {
} }
} }
async uploadStream(filePath: string, file: Readable): Promise<void> { async uploadStream(
filePath: string,
file: Readable,
options?: { recreateClient?: boolean },
): Promise<void> {
let clientToUse = this.s3Client;
let shouldDestroyClient = false;
// optionally recreate client to avoid socket hang errors
// (during multi-attachments imports)
if (options?.recreateClient) {
clientToUse = new S3Client(this.config as any);
shouldDestroyClient = true;
}
try { try {
const contentType = getMimeType(filePath); const contentType = getMimeType(filePath);
const upload = new Upload({ const upload = new Upload({
client: this.s3Client, client: clientToUse,
params: { params: {
Bucket: this.config.bucket, Bucket: this.config.bucket,
Key: filePath, Key: filePath,
@@ -58,6 +72,10 @@ export class S3Driver implements StorageDriver {
await upload.done(); await upload.done();
} catch (err) { } catch (err) {
throw new Error(`Failed to upload file: ${(err as Error).message}`); throw new Error(`Failed to upload file: ${(err as Error).message}`);
} finally {
if (shouldDestroyClient && clientToUse) {
clientToUse.destroy();
}
} }
} }
@@ -3,7 +3,7 @@ import { Readable } from 'stream';
export interface StorageDriver { export interface StorageDriver {
upload(filePath: string, file: Buffer): Promise<void>; upload(filePath: string, file: Buffer): Promise<void>;
uploadStream(filePath: string, file: Readable): Promise<void>; uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void>;
copy(fromFilePath: string, toFilePath: string): Promise<void>; copy(fromFilePath: string, toFilePath: string): Promise<void>;
@@ -15,8 +15,8 @@ export class StorageService {
this.logger.debug(`File uploaded successfully. Path: ${filePath}`); this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
} }
async uploadStream(filePath: string, fileContent: Readable) { async uploadStream(filePath: string, fileContent: Readable, options?: { recreateClient?: boolean }) {
await this.storageDriver.uploadStream(filePath, fileContent); await this.storageDriver.uploadStream(filePath, fileContent, options);
this.logger.debug(`File uploaded successfully. Path: ${filePath}`); this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
} }
+1 -1
View File
@@ -1,7 +1,7 @@
{ {
"name": "docmost", "name": "docmost",
"homepage": "https://docmost.com", "homepage": "https://docmost.com",
"version": "0.23.0", "version": "0.23.1",
"private": true, "private": true,
"scripts": { "scripts": {
"build": "nx run-many -t build", "build": "nx run-many -t build",