Merge branch 'main' into tiptap3-migration

This commit is contained in:
Philipinho
2025-09-26 18:53:18 +01:00
60 changed files with 1665 additions and 543 deletions
+3 -1
View File
@@ -72,7 +72,9 @@ export function extractDateFromUuid7(uuid7: string) {
}
export function sanitizeFileName(fileName: string): string {
const sanitizedFilename = sanitize(fileName).replace(/ /g, '_');
const sanitizedFilename = sanitize(fileName)
.replace(/ /g, '_')
.replace(/#/g, '_');
return sanitizedFilename.slice(0, 255);
}
@@ -1,12 +1,12 @@
export enum AttachmentType {
Avatar = 'avatar',
WorkspaceLogo = 'workspace-logo',
SpaceLogo = 'space-logo',
WorkspaceIcon = 'workspace-icon',
SpaceIcon = 'space-icon',
File = 'file',
}
export const validImageExtensions = ['.jpg', '.png', '.jpeg'];
export const MAX_AVATAR_SIZE = '5MB';
export const MAX_AVATAR_SIZE = '10MB';
export const inlineFileExtensions = [
'.jpg',
@@ -1,5 +1,6 @@
import {
BadRequestException,
Body,
Controller,
ForbiddenException,
Get,
@@ -51,6 +52,7 @@ import { EnvironmentService } from '../../integrations/environment/environment.s
import { TokenService } from '../auth/services/token.service';
import { JwtAttachmentPayload, JwtType } from '../auth/dto/jwt-payload';
import * as path from 'path';
import { RemoveIconDto } from './dto/attachment.dto';
@Controller()
export class AttachmentController {
@@ -302,7 +304,7 @@ export class AttachmentController {
throw new BadRequestException('Invalid image attachment type');
}
if (attachmentType === AttachmentType.WorkspaceLogo) {
if (attachmentType === AttachmentType.WorkspaceIcon) {
const ability = this.workspaceAbility.createForUser(user, workspace);
if (
ability.cannot(
@@ -314,7 +316,7 @@ export class AttachmentController {
}
}
if (attachmentType === AttachmentType.SpaceLogo) {
if (attachmentType === AttachmentType.SpaceIcon) {
if (!spaceId) {
throw new BadRequestException('spaceId is required');
}
@@ -372,8 +374,59 @@ export class AttachmentController {
});
return res.send(fileStream);
} catch (err) {
this.logger.error(err);
// this.logger.error(err);
throw new NotFoundException('File not found');
}
}
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('attachments/remove-icon')
async removeIcon(
@Body() dto: RemoveIconDto,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
const { type, spaceId } = dto;
// remove current user avatar
if (type === AttachmentType.Avatar) {
await this.attachmentService.removeUserAvatar(user);
return;
}
// remove space icon
if (type === AttachmentType.SpaceIcon) {
if (!spaceId) {
throw new BadRequestException(
'spaceId is required to change space icons',
);
}
const spaceAbility = await this.spaceAbility.createForUser(user, spaceId);
if (
spaceAbility.cannot(SpaceCaslAction.Manage, SpaceCaslSubject.Settings)
) {
throw new ForbiddenException();
}
await this.attachmentService.removeSpaceIcon(spaceId, workspace.id);
return;
}
// remove workspace icon
if (type === AttachmentType.WorkspaceIcon) {
const ability = this.workspaceAbility.createForUser(user, workspace);
if (
ability.cannot(
WorkspaceCaslAction.Manage,
WorkspaceCaslSubject.Settings,
)
) {
throw new ForbiddenException();
}
await this.attachmentService.removeWorkspaceIcon(workspace);
return;
}
}
}
@@ -1,8 +1,8 @@
import { MultipartFile } from '@fastify/multipart';
import { randomBytes } from 'crypto';
import { sanitize } from 'sanitize-filename-ts';
import * as path from 'path';
import { AttachmentType } from './attachment.constants';
import { sanitizeFileName } from '../../common/helpers';
import * as sharp from 'sharp';
export interface PreparedFile {
buffer: Buffer;
@@ -22,10 +22,8 @@ export async function prepareFile(
}
try {
const rand = randomBytes(8).toString('hex');
const buffer = await file.toBuffer();
const sanitizedFilename = sanitize(file.filename).replace(/ /g, '_');
const sanitizedFilename = sanitizeFileName(file.filename);
const fileName = sanitizedFilename.slice(0, 255);
const fileSize = buffer.length;
const fileExtension = path.extname(file.filename).toLowerCase();
@@ -58,9 +56,9 @@ export function getAttachmentFolderPath(
switch (type) {
case AttachmentType.Avatar:
return `${workspaceId}/avatars`;
case AttachmentType.WorkspaceLogo:
return `${workspaceId}/workspace-logo`;
case AttachmentType.SpaceLogo:
case AttachmentType.WorkspaceIcon:
return `${workspaceId}/workspace-logos`;
case AttachmentType.SpaceIcon:
return `${workspaceId}/space-logos`;
case AttachmentType.File:
return `${workspaceId}/files`;
@@ -70,3 +68,51 @@ export function getAttachmentFolderPath(
}
export const validAttachmentTypes = Object.values(AttachmentType);
export async function compressAndResizeIcon(
buffer: Buffer,
attachmentType?: AttachmentType,
): Promise<Buffer> {
try {
let sharpInstance = sharp(buffer);
const metadata = await sharpInstance.metadata();
const targetWidth = 300;
const targetHeight = 300;
// Only resize if image is larger than target dimensions
if (metadata.width > targetWidth || metadata.height > targetHeight) {
sharpInstance = sharpInstance.resize(targetWidth, targetHeight, {
fit: 'inside',
withoutEnlargement: true,
});
}
// Handle based on original format
if (metadata.format === 'png') {
// Only flatten avatars to remove transparency
if (attachmentType === AttachmentType.Avatar) {
sharpInstance = sharpInstance.flatten({
background: { r: 255, g: 255, b: 255 },
});
}
return await sharpInstance
.png({
quality: 85,
compressionLevel: 6,
})
.toBuffer();
} else {
return await sharpInstance
.jpeg({
quality: 85,
progressive: true,
mozjpeg: true,
})
.toBuffer();
}
} catch (err) {
throw err;
}
}
@@ -0,0 +1,17 @@
import { IsEnum, IsIn, IsNotEmpty, IsOptional, IsUUID } from 'class-validator';
import { AttachmentType } from '../attachment.constants';
export class RemoveIconDto {
@IsEnum(AttachmentType)
@IsIn([
AttachmentType.Avatar,
AttachmentType.SpaceIcon,
AttachmentType.WorkspaceIcon,
])
@IsNotEmpty()
type: AttachmentType;
@IsOptional()
@IsUUID()
spaceId: string;
}
@@ -1,3 +0,0 @@
import { IsOptional, IsString, IsUUID } from 'class-validator';
export class AvatarUploadDto {}
@@ -1,7 +0,0 @@
import { IsNotEmpty, IsString } from 'class-validator';
export class GetFileDto {
@IsString()
@IsNotEmpty()
attachmentId: string;
}
@@ -1,20 +0,0 @@
import {
IsDefined,
IsNotEmpty,
IsOptional,
IsString,
IsUUID,
} from 'class-validator';
export class UploadFileDto {
@IsString()
@IsNotEmpty()
attachmentType: string;
@IsOptional()
@IsUUID()
pageId: string;
@IsDefined()
file: any;
}
@@ -7,6 +7,7 @@ import {
import { StorageService } from '../../../integrations/storage/storage.service';
import { MultipartFile } from '@fastify/multipart';
import {
compressAndResizeIcon,
getAttachmentFolderPath,
PreparedFile,
prepareFile,
@@ -16,7 +17,7 @@ import { v4 as uuid4, v7 as uuid7 } from 'uuid';
import { AttachmentRepo } from '@docmost/db/repos/attachment/attachment.repo';
import { AttachmentType, validImageExtensions } from '../attachment.constants';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { Attachment } from '@docmost/db/types/entity.types';
import { Attachment, User, Workspace } from '@docmost/db/types/entity.types';
import { InjectKysely } from 'nestjs-kysely';
import { executeTx } from '@docmost/db/utils';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
@@ -132,8 +133,8 @@ export class AttachmentService {
filePromise: Promise<MultipartFile>,
type:
| AttachmentType.Avatar
| AttachmentType.WorkspaceLogo
| AttachmentType.SpaceLogo,
| AttachmentType.WorkspaceIcon
| AttachmentType.SpaceIcon,
userId: string,
workspaceId: string,
spaceId?: string,
@@ -141,6 +142,9 @@ export class AttachmentService {
const preparedFile: PreparedFile = await prepareFile(filePromise);
validateFileType(preparedFile.fileExtension, validImageExtensions);
const processedBuffer = await compressAndResizeIcon(preparedFile.buffer, type);
preparedFile.buffer = processedBuffer;
preparedFile.fileSize = processedBuffer.length;
preparedFile.fileName = uuid4() + preparedFile.fileExtension;
const filePath = `${getAttachmentFolderPath(type, workspaceId)}/${preparedFile.fileName}`;
@@ -174,7 +178,7 @@ export class AttachmentService {
workspaceId,
trx,
);
} else if (type === AttachmentType.WorkspaceLogo) {
} else if (type === AttachmentType.WorkspaceIcon) {
const workspace = await this.workspaceRepo.findById(workspaceId, {
trx,
});
@@ -186,7 +190,7 @@ export class AttachmentService {
workspaceId,
trx,
);
} else if (type === AttachmentType.SpaceLogo && spaceId) {
} else if (type === AttachmentType.SpaceIcon && spaceId) {
const space = await this.spaceRepo.findById(spaceId, workspaceId, {
trx,
});
@@ -205,7 +209,6 @@ export class AttachmentService {
});
} catch (err) {
// delete uploaded file on db update failure
this.logger.error('Image upload error:', err);
await this.deleteRedundantFile(filePath);
throw new BadRequestException('Failed to upload image');
}
@@ -389,4 +392,40 @@ export class AttachmentService {
}
}
async removeUserAvatar(user: User) {
if (user.avatarUrl && !user.avatarUrl.toLowerCase().startsWith('http')) {
const filePath = `${getAttachmentFolderPath(AttachmentType.Avatar, user.workspaceId)}/${user.avatarUrl}`;
await this.deleteRedundantFile(filePath);
}
await this.userRepo.updateUser(
{ avatarUrl: null },
user.id,
user.workspaceId,
);
}
async removeSpaceIcon(spaceId: string, workspaceId: string) {
const space = await this.spaceRepo.findById(spaceId, workspaceId);
if (!space) {
throw new NotFoundException('Space not found');
}
if (space.logo && !space.logo.toLowerCase().startsWith('http')) {
const filePath = `${getAttachmentFolderPath(AttachmentType.SpaceIcon, workspaceId)}/${space.logo}`;
await this.deleteRedundantFile(filePath);
}
await this.spaceRepo.updateSpace({ logo: null }, spaceId, workspaceId);
}
async removeWorkspaceIcon(workspace: Workspace) {
if (workspace.logo && !workspace.logo.toLowerCase().startsWith('http')) {
const filePath = `${getAttachmentFolderPath(AttachmentType.WorkspaceIcon, workspace.id)}/${workspace.logo}`;
await this.deleteRedundantFile(filePath);
}
await this.workspaceRepo.updateWorkspace({ logo: null }, workspace.id);
}
}
@@ -2,7 +2,7 @@ import { Module } from '@nestjs/common';
import { ImportService } from './services/import.service';
import { ImportController } from './import.controller';
import { StorageModule } from '../storage/storage.module';
import { FileTaskService } from './services/file-task.service';
import { FileImportTaskService } from './services/file-import-task.service';
import { FileTaskProcessor } from './processors/file-task.processor';
import { ImportAttachmentService } from './services/import-attachment.service';
import { FileTaskController } from './file-task.controller';
@@ -11,7 +11,7 @@ import { PageModule } from '../../core/page/page.module';
@Module({
providers: [
ImportService,
FileTaskService,
FileImportTaskService,
FileTaskProcessor,
ImportAttachmentService,
],
@@ -2,7 +2,7 @@ import { Logger, OnModuleDestroy } from '@nestjs/common';
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
import { Job } from 'bullmq';
import { QueueJob, QueueName } from 'src/integrations/queue/constants';
import { FileTaskService } from '../services/file-task.service';
import { FileImportTaskService } from '../services/file-import-task.service';
import { FileTaskStatus } from '../utils/file.utils';
import { StorageService } from '../../storage/storage.service';
@@ -11,7 +11,7 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
private readonly logger = new Logger(FileTaskProcessor.name);
constructor(
private readonly fileTaskService: FileTaskService,
private readonly fileTaskService: FileImportTaskService,
private readonly storageService: StorageService,
) {
super();
@@ -41,15 +41,40 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
@OnWorkerEvent('failed')
async onFailed(job: Job) {
this.logger.error(
`Error processing ${job.name} job. Reason: ${job.failedReason}`,
`Error processing ${job.name} job. Import Task ID: ${job.data.fileTaskId}. Reason: ${job.failedReason}`,
);
await this.handleFailedJob(job);
}
@OnWorkerEvent('completed')
async onCompleted(job: Job) {
this.logger.log(
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
);
try {
const fileTask = await this.fileTaskService.getFileTask(
job.data.fileTaskId,
);
if (fileTask) {
await this.storageService.delete(fileTask.filePath);
this.logger.debug(`Deleted imported zip file: ${fileTask.filePath}`);
}
} catch (err) {
this.logger.error(`Failed to delete imported zip file:`, err);
}
}
private async handleFailedJob(job: Job) {
try {
const fileTaskId = job.data.fileTaskId;
const reason = job.failedReason || 'Unknown error';
await this.fileTaskService.updateTaskStatus(
fileTaskId,
FileTaskStatus.Failed,
job.failedReason,
reason,
);
const fileTask = await this.fileTaskService.getFileTask(fileTaskId);
@@ -61,13 +86,6 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
}
}
@OnWorkerEvent('completed')
onCompleted(job: Job) {
this.logger.log(
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
);
}
async onModuleDestroy(): Promise<void> {
if (this.worker) {
await this.worker.close();
@@ -24,6 +24,7 @@ import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
collectMarkdownAndHtmlFiles,
stripNotionID,
} from '../utils/import.utils';
import { executeTx } from '@docmost/db/utils';
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
@@ -33,8 +34,8 @@ import { PageService } from '../../../core/page/services/page.service';
import { ImportPageNode } from '../dto/file-task-dto';
@Injectable()
export class FileTaskService {
private readonly logger = new Logger(FileTaskService.name);
export class FileImportTaskService {
private readonly logger = new Logger(FileImportTaskService.name);
constructor(
private readonly storageService: StorageService,
@@ -159,17 +160,12 @@ export class FileTaskService {
.split(path.sep)
.join('/'); // normalize to forward-slashes
const ext = path.extname(relPath).toLowerCase();
let content = await fs.readFile(absPath, 'utf-8');
if (ext.toLowerCase() === '.md') {
content = await markdownToHtml(content);
}
pagesMap.set(relPath, {
id: v7(),
slugId: generateSlugId(),
name: path.basename(relPath, ext),
content,
name: stripNotionID(path.basename(relPath, ext)),
content: '',
parentPageId: null,
fileExtension: ext,
filePath: relPath,
@@ -254,70 +250,160 @@ export class FileTaskService {
});
});
const pageResults = await Promise.all(
Array.from(pagesMap.values()).map(async (page) => {
const htmlContent =
await this.importAttachmentService.processAttachments({
html: page.content,
pageRelativePath: page.filePath,
extractDir,
pageId: page.id,
fileTask,
attachmentCandidates,
});
// Group pages by level (topological sort for parent-child relationships)
const pagesByLevel = new Map<number, Array<[string, ImportPageNode]>>();
const pageLevel = new Map<string, number>();
const { html, backlinks } = await formatImportHtml({
html: htmlContent,
currentFilePath: page.filePath,
filePathToPageMetaMap: filePathToPageMetaMap,
creatorId: fileTask.creatorId,
sourcePageId: page.id,
workspaceId: fileTask.workspaceId,
});
// Calculate levels using BFS
const calculateLevels = () => {
const queue: Array<{ filePath: string; level: number }> = [];
const pmState = getProsemirrorContent(
await this.importService.processHTML(html),
// Start with root pages (no parent)
for (const [filePath, page] of pagesMap.entries()) {
if (!page.parentPageId) {
queue.push({ filePath, level: 0 });
pageLevel.set(filePath, 0);
}
}
// BFS to assign levels
while (queue.length > 0) {
const { filePath, level } = queue.shift()!;
const currentPage = pagesMap.get(filePath)!;
// Find children of current page
for (const [childFilePath, childPage] of pagesMap.entries()) {
if (
childPage.parentPageId === currentPage.id &&
!pageLevel.has(childFilePath)
) {
pageLevel.set(childFilePath, level + 1);
queue.push({ filePath: childFilePath, level: level + 1 });
}
}
}
// Group pages by level
for (const [filePath, page] of pagesMap.entries()) {
const level = pageLevel.get(filePath) || 0;
if (!pagesByLevel.has(level)) {
pagesByLevel.set(level, []);
}
pagesByLevel.get(level)!.push([filePath, page]);
}
};
calculateLevels();
if (pagesMap.size < 1) return;
// Process pages level by level sequentially to respect foreign key constraints
const allBacklinks: any[] = [];
const validPageIds = new Set<string>();
let totalPagesProcessed = 0;
// Sort levels to process in order
const sortedLevels = Array.from(pagesByLevel.keys()).sort((a, b) => a - b);
try {
await executeTx(this.db, async (trx) => {
// Process pages level by level sequentially within the transaction
for (const level of sortedLevels) {
const levelPages = pagesByLevel.get(level)!;
for (const [filePath, page] of levelPages) {
const absPath = path.join(extractDir, filePath);
let content = await fs.readFile(absPath, 'utf-8');
if (page.fileExtension.toLowerCase() === '.md') {
content = await markdownToHtml(content);
}
const htmlContent =
await this.importAttachmentService.processAttachments({
html: content,
pageRelativePath: page.filePath,
extractDir,
pageId: page.id,
fileTask,
attachmentCandidates,
});
const { html, backlinks, pageIcon } = await formatImportHtml({
html: htmlContent,
currentFilePath: page.filePath,
filePathToPageMetaMap: filePathToPageMetaMap,
creatorId: fileTask.creatorId,
sourcePageId: page.id,
workspaceId: fileTask.workspaceId,
});
const pmState = getProsemirrorContent(
await this.importService.processHTML(html),
);
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,
title: title || page.name,
icon: pageIcon || null,
content: prosemirrorJson,
textContent: jsonToText(prosemirrorJson),
ydoc: await this.importService.createYdoc(prosemirrorJson),
position: page.position!,
spaceId: fileTask.spaceId,
workspaceId: fileTask.workspaceId,
creatorId: fileTask.creatorId,
lastUpdatedById: fileTask.creatorId,
parentPageId: page.parentPageId,
};
await trx.insertInto('pages').values(insertablePage).execute();
// Track valid page IDs and collect backlinks
validPageIds.add(insertablePage.id);
allBacklinks.push(...backlinks);
totalPagesProcessed++;
// Log progress periodically
if (totalPagesProcessed % 50 === 0) {
this.logger.debug(`Processed ${totalPagesProcessed} pages...`);
}
}
}
const filteredBacklinks = allBacklinks.filter(
({ sourcePageId, targetPageId }) =>
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
);
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// Insert backlinks in batches
if (filteredBacklinks.length > 0) {
const BACKLINK_BATCH_SIZE = 100;
for (
let i = 0;
i < filteredBacklinks.length;
i += BACKLINK_BATCH_SIZE
) {
const backlinkChunk = filteredBacklinks.slice(
i,
Math.min(i + BACKLINK_BATCH_SIZE, filteredBacklinks.length),
);
await this.backlinkRepo.insertBacklink(backlinkChunk, trx);
}
}
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,
title: title || page.name,
content: prosemirrorJson,
textContent: jsonToText(prosemirrorJson),
ydoc: await this.importService.createYdoc(prosemirrorJson),
position: page.position!,
spaceId: fileTask.spaceId,
workspaceId: fileTask.workspaceId,
creatorId: fileTask.creatorId,
lastUpdatedById: fileTask.creatorId,
parentPageId: page.parentPageId,
};
return { insertablePage, backlinks };
}),
);
const insertablePages = pageResults.map((r) => r.insertablePage);
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
if (insertablePages.length < 1) return;
const validPageIds = new Set(insertablePages.map((row) => row.id));
const filteredBacklinks = insertableBacklinks.filter(
({ sourcePageId, targetPageId }) =>
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
);
await executeTx(this.db, async (trx) => {
await trx.insertInto('pages').values(insertablePages).execute();
if (filteredBacklinks.length > 0) {
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
}
});
this.logger.log(
`Successfully imported ${totalPagesProcessed} pages with ${filteredBacklinks.length} backlinks`,
);
});
} catch (error) {
this.logger.error('Failed to import files:', error);
throw new Error(`File import failed: ${error?.['message']}`);
}
}
async getFileTask(fileTaskId: string) {
@@ -53,6 +53,7 @@ export class ImportAttachmentService {
fileTask: FileTask;
attachmentCandidates: Map<string, string>;
pageAttachments?: AttachmentInfo[];
isConfluenceImport?: boolean;
}): Promise<string> {
const {
html,
@@ -62,6 +63,7 @@ export class ImportAttachmentService {
fileTask,
attachmentCandidates,
pageAttachments = [],
isConfluenceImport,
} = opts;
const attachmentTasks: (() => Promise<void>)[] = [];
@@ -90,7 +92,10 @@ export class ImportAttachmentService {
>();
// Analyze attachments to identify Draw.io pairs
const { drawioPairs, skipFiles } = this.analyzeAttachments(pageAttachments);
const { drawioPairs, skipFiles } = this.analyzeAttachments(
pageAttachments,
isConfluenceImport,
);
// Map to store processed Draw.io SVGs
const drawioSvgMap = new Map<
@@ -134,7 +139,9 @@ export class ImportAttachmentService {
const stream = Readable.from(svgBuffer);
// Upload to storage
await this.storageService.uploadStream(storageFilePath, stream);
await this.storageService.uploadStream(storageFilePath, stream, {
recreateClient: true,
});
// Insert into database
await this.db
@@ -235,202 +242,197 @@ export class ImportAttachmentService {
const pageDir = path.dirname(pageRelativePath);
const $ = load(html);
// Cache for resolved paths to avoid repeated lookups
const resolvedPathCache = new Map<string, string | null>();
// image
for (const imgEl of $('img').toArray()) {
const $img = $(imgEl);
const src = cleanUrlString($img.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue;
const getCachedResolvedPath = (rawPath: string): string | null => {
if (resolvedPathCache.has(rawPath)) {
return resolvedPathCache.get(rawPath)!;
}
const resolved = resolveRelativeAttachmentPath(
rawPath,
const relPath = resolveRelativeAttachmentPath(
src,
pageDir,
attachmentCandidates,
);
resolvedPathCache.set(rawPath, resolved);
return resolved;
};
if (!relPath) continue;
// Cache for file stats to avoid repeated file system calls
const statCache = new Map<string, any>();
// Check if this image is part of a Draw.io pair
const drawioSvg = drawioSvgMap.get(relPath);
if (drawioSvg) {
const $drawio = $('<div>')
.attr('data-type', 'drawio')
.attr('data-src', drawioSvg.apiFilePath)
.attr('data-title', 'diagram')
.attr('data-width', '100%')
.attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId);
const getCachedStat = async (absPath: string) => {
if (statCache.has(absPath)) {
return statCache.get(absPath);
$img.replaceWith($drawio);
unwrapFromParagraph($, $drawio);
continue;
}
const stat = await fs.stat(absPath);
statCache.set(absPath, stat);
return stat;
};
// Single DOM traversal for all attachment elements
const selector =
'img, video, div[data-type="attachment"], a, div[data-type="excalidraw"], div[data-type="drawio"]';
const elements = $(selector).toArray();
const { attachmentId, apiFilePath } = processFile(relPath);
for (const element of elements) {
const $el = $(element);
const tagName = element.tagName.toLowerCase();
const width = $img.attr('width') ?? '100%';
const align = $img.attr('data-align') ?? 'center';
// Process based on element type
if (tagName === 'img') {
const src = cleanUrlString($el.attr('src') ?? '');
if (!src || src.startsWith('http')) continue;
$img
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('width', width)
.attr('data-align', align);
const relPath = getCachedResolvedPath(src);
if (!relPath) continue;
unwrapFromParagraph($, $img);
}
// Check if this image is part of a Draw.io pair
const drawioSvg = drawioSvgMap.get(relPath);
if (drawioSvg) {
const $drawio = $('<div>')
.attr('data-type', 'drawio')
.attr('data-src', drawioSvg.apiFilePath)
.attr('data-title', 'diagram')
.attr('data-width', '100%')
.attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId);
// video
for (const vidEl of $('video').toArray()) {
const $vid = $(vidEl);
const src = cleanUrlString($vid.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue;
$el.replaceWith($drawio);
unwrapFromParagraph($, $drawio);
continue;
}
const relPath = resolveRelativeAttachmentPath(
src,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const { attachmentId, apiFilePath } = processFile(relPath);
$el
const width = $vid.attr('width') ?? '100%';
const align = $vid.attr('data-align') ?? 'center';
$vid
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('width', width)
.attr('data-align', align);
unwrapFromParagraph($, $vid);
}
// <div data-type="attachment">
for (const el of $('div[data-type="attachment"]').toArray()) {
const $oldDiv = $(el);
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
if (!rawUrl || rawUrl.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
rawUrl,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const fileName = path.basename(abs);
const mime = getMimeType(abs);
const $newDiv = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime)
.attr('data-attachment-id', attachmentId);
$oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
// rewrite other attachments via <a>
for (const aEl of $('a').toArray()) {
const $a = $(aEl);
const href = cleanUrlString($a.attr('href') ?? '')!;
if (!href || href.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
href,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
// Check if this is a Draw.io file
const drawioSvg = drawioSvgMap.get(relPath);
if (drawioSvg) {
const $drawio = $('<div>')
.attr('data-type', 'drawio')
.attr('data-src', drawioSvg.apiFilePath)
.attr('data-title', 'diagram')
.attr('data-width', '100%')
.attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId);
$a.replaceWith($drawio);
unwrapFromParagraph($, $drawio);
continue;
}
// Skip files that should be ignored
if (skipFiles.has(relPath)) {
$a.remove();
continue;
}
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const ext = path.extname(relPath).toLowerCase();
if (ext === '.mp4') {
const $video = $('<video>')
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', $el.attr('width') ?? '100%')
.attr('data-align', $el.attr('data-align') ?? 'center');
.attr('width', '100%')
.attr('data-align', 'center');
$a.replaceWith($video);
unwrapFromParagraph($, $video);
} else {
const confAliasName = $a.attr('data-linked-resource-default-alias');
let attachmentName = path.basename(abs);
if (confAliasName) attachmentName = confAliasName;
unwrapFromParagraph($, $el);
} else if (tagName === 'video') {
const src = cleanUrlString($el.attr('src') ?? '');
if (!src || src.startsWith('http')) continue;
const $div = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', attachmentName)
.attr('data-attachment-mime', getMimeType(abs))
.attr('data-attachment-id', attachmentId);
const relPath = getCachedResolvedPath(src);
$a.replaceWith($div);
unwrapFromParagraph($, $div);
}
}
// excalidraw and drawio
for (const type of ['excalidraw', 'drawio'] as const) {
for (const el of $(`div[data-type="${type}"]`).toArray()) {
const $oldDiv = $(el);
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
if (!rawSrc || rawSrc.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
rawSrc,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const fileName = path.basename(abs);
$el
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', $el.attr('width') ?? '100%')
.attr('data-align', $el.attr('data-align') ?? 'center');
const width = $oldDiv.attr('data-width') || '100%';
const align = $oldDiv.attr('data-align') || 'center';
unwrapFromParagraph($, $el);
} else if (tagName === 'div') {
const dataType = $el.attr('data-type');
const $newDiv = $('<div>')
.attr('data-type', type)
.attr('data-src', apiFilePath)
.attr('data-title', fileName)
.attr('data-width', width)
.attr('data-align', align)
.attr('data-attachment-id', attachmentId);
if (dataType === 'attachment') {
const rawUrl = cleanUrlString($el.attr('data-attachment-url') ?? '');
if (!rawUrl || rawUrl.startsWith('http')) continue;
const relPath = getCachedResolvedPath(rawUrl);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const fileName = path.basename(abs);
const mime = getMimeType(abs);
const $newDiv = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime)
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId);
$el.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
} else if (dataType === 'excalidraw' || dataType === 'drawio') {
const rawSrc = cleanUrlString($el.attr('data-src') ?? '');
if (!rawSrc || rawSrc.startsWith('http')) continue;
const relPath = getCachedResolvedPath(rawSrc);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const fileName = path.basename(abs);
const $newDiv = $('<div>')
.attr('data-type', dataType)
.attr('data-src', apiFilePath)
.attr('data-title', fileName)
.attr('data-width', $el.attr('data-width') || '100%')
.attr('data-size', stat.size.toString())
.attr('data-align', $el.attr('data-align') || 'center')
.attr('data-attachment-id', attachmentId);
$el.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
} else if (tagName === 'a') {
const href = cleanUrlString($el.attr('href') ?? '');
if (!href || href.startsWith('http')) continue;
const relPath = getCachedResolvedPath(href);
if (!relPath) continue;
// Check if this is a Draw.io file
const drawioSvg = drawioSvgMap.get(relPath);
if (drawioSvg) {
const $drawio = $('<div>')
.attr('data-type', 'drawio')
.attr('data-src', drawioSvg.apiFilePath)
.attr('data-title', 'diagram')
.attr('data-width', '100%')
.attr('data-align', 'center')
.attr('data-attachment-id', drawioSvg.attachmentId);
$el.replaceWith($drawio);
unwrapFromParagraph($, $drawio);
continue;
}
// Skip files that should be ignored
if (skipFiles.has(relPath)) {
$el.remove();
continue;
}
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await getCachedStat(abs);
const ext = path.extname(relPath).toLowerCase();
if (ext === '.mp4') {
const $video = $('<video>')
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', '100%')
.attr('data-align', 'center');
$el.replaceWith($video);
unwrapFromParagraph($, $video);
} else {
const confAliasName = $el.attr('data-linked-resource-default-alias');
let attachmentName = path.basename(abs);
if (confAliasName) attachmentName = confAliasName;
const $div = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', attachmentName)
.attr('data-attachment-mime', getMimeType(abs))
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId);
$el.replaceWith($div);
unwrapFromParagraph($, $div);
}
$oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
}
@@ -492,24 +494,17 @@ export class ImportAttachmentService {
// This attachment was in the list but not referenced in HTML - add it
const { attachmentId, apiFilePath, abs } = processFile(href);
const mime = mimeType || getMimeType(abs);
try {
const stat = await fs.stat(abs);
const mime = mimeType || getMimeType(abs);
// Add as attachment node at the end
const $attachmentDiv = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime)
.attr('data-attachment-id', attachmentId);
// Add as attachment node at the end
const $attachmentDiv = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime)
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId);
$.root().append($attachmentDiv);
} catch (error) {
this.logger.error(`Failed to process attachment ${fileName}:`, error);
}
$.root().append($attachmentDiv);
}
// wait for all uploads & DB inserts
@@ -534,16 +529,49 @@ export class ImportAttachmentService {
}
}
// Post-process DOM elements to add file sizes after uploads complete
// This avoids blocking file operations during initial DOM processing
const elementsNeedingSize = $('[data-attachment-id]:not([data-size])');
for (const element of elementsNeedingSize.toArray()) {
const $el = $(element);
const attachmentId = $el.attr('data-attachment-id');
if (!attachmentId) continue;
// Find the corresponding processed file info
const processedEntry = Array.from(processed.values()).find(
(entry) => entry.attachmentId === attachmentId,
);
if (processedEntry) {
try {
const stat = await fs.stat(processedEntry.abs);
$el.attr('data-size', stat.size.toString());
} catch (error) {
this.logger.debug(
`Could not get size for ${processedEntry.abs}:`,
error,
);
}
}
}
return $.root().html() || '';
}
private analyzeAttachments(attachments: AttachmentInfo[]): {
private analyzeAttachments(
attachments: AttachmentInfo[],
isConfluenceImport?: boolean,
): {
drawioPairs: Map<string, DrawioPair>;
skipFiles: Set<string>;
} {
const drawioPairs = new Map<string, DrawioPair>();
const skipFiles = new Set<string>();
if (!isConfluenceImport) {
return { drawioPairs, skipFiles };
}
// Group attachments by type
const drawioFiles: AttachmentInfo[] = [];
const pngByBaseName = new Map<string, AttachmentInfo[]>();
@@ -776,7 +804,10 @@ export class ImportAttachmentService {
for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) {
try {
const fileStream = createReadStream(abs);
await this.storageService.uploadStream(storageFilePath, fileStream);
await this.storageService.uploadStream(storageFilePath, fileStream, {
recreateClient: true,
});
const stat = await fs.stat(abs);
await this.db
@@ -807,7 +838,7 @@ export class ImportAttachmentService {
attempts: 1,
backoff: {
type: 'exponential',
delay: 30 * 1000,
delay: 3 * 60 * 1000,
},
deduplication: {
id: attachmentId,
@@ -4,6 +4,11 @@ import { v7 } from 'uuid';
import { InsertableBacklink } from '@docmost/db/types/entity.types';
import { Cheerio, CheerioAPI, load } from 'cheerio';
// Check if text contains Unicode characters (for emojis/icons)
function isUnicodeCharacter(text: string): boolean {
return text.length > 0 && text.codePointAt(0)! > 127; // Non-ASCII characters
}
export async function formatImportHtml(opts: {
html: string;
currentFilePath: string;
@@ -16,7 +21,11 @@ export async function formatImportHtml(opts: {
workspaceId: string;
pageDir?: string;
attachmentCandidates?: string[];
}): Promise<{ html: string; backlinks: InsertableBacklink[] }> {
}): Promise<{
html: string;
backlinks: InsertableBacklink[];
pageIcon?: string;
}> {
const {
html,
currentFilePath,
@@ -28,6 +37,17 @@ export async function formatImportHtml(opts: {
const $: CheerioAPI = load(html);
const $root: Cheerio<any> = $.root();
let pageIcon: string | null = null;
// extract notion page icon
const headerIconSpan = $root.find('header .page-header-icon .icon');
if (headerIconSpan.length > 0) {
const iconText = headerIconSpan.text().trim();
if (iconText && isUnicodeCharacter(iconText)) {
pageIcon = iconText;
}
}
notionFormatter($, $root);
defaultHtmlFormatter($, $root);
@@ -44,6 +64,7 @@ export async function formatImportHtml(opts: {
return {
html: $root.html() || '',
backlinks,
pageIcon: pageIcon || undefined,
};
}
@@ -69,6 +90,10 @@ export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
}
export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
// remove page header icon and cover image
$root.find('.page-header-icon').remove();
$root.find('.page-cover-image').remove();
// remove empty description paragraphs
$root.find('p.page-description').each((_, el) => {
if (!$(el).text().trim()) $(el).remove();
@@ -189,22 +214,48 @@ export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
$fig.replaceWith($newAnchor);
});
// remove user icons
$root.find('span.user img.user-icon').remove();
// remove toc
$root.find('nav.table_of_contents').remove();
}
export function unwrapFromParagraph($: CheerioAPI, $node: Cheerio<any>) {
// find the nearest <p> or <a> ancestor
let $wrapper = $node.closest('p, a');
// Keep track of processed wrappers to avoid infinite loops
const processedWrappers = new Set<any>();
let $wrapper = $node.closest('p, a');
while ($wrapper.length) {
// if the wrapper has only our node inside, replace it entirely
if ($wrapper.contents().length === 1) {
const wrapperElement = $wrapper.get(0);
// If we've already processed this wrapper, break to avoid infinite loop
if (processedWrappers.has(wrapperElement)) {
break;
}
processedWrappers.add(wrapperElement);
// Check if the wrapper contains only whitespace and our target node
const hasOnlyTargetNode =
$wrapper.contents().filter((_, el) => {
const $el = $(el);
// Skip whitespace-only text nodes. NodeType 3 = text node
if (el.nodeType === 3 && !$el.text().trim()) {
return false;
}
// Return true if this is not our target node
return !$el.is($node) && !$node.is($el);
}).length === 0;
if (hasOnlyTargetNode) {
// Replace the wrapper entirely with our node
$wrapper.replaceWith($node);
} else {
// otherwise just move the node to before the wrapper
// Move the node to before the wrapper, preserving other content
$wrapper.before($node);
}
// look again for any new wrapper around $node
$wrapper = $node.closest('p, a');
}
@@ -64,3 +64,9 @@ export async function collectMarkdownAndHtmlFiles(
await walk(dir);
return results;
}
export function stripNotionID(fileName: string): string {
// Handle optional separator (space or dash) + 32 alphanumeric chars at end
const notionIdPattern = /[ -]?[a-z0-9]{32}$/i;
return fileName.replace(notionIdPattern, '').trim();
}
@@ -28,7 +28,7 @@ export class LocalDriver implements StorageDriver {
}
}
async uploadStream(filePath: string, file: Readable): Promise<void> {
async uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void> {
try {
const fullPath = this._fullPath(filePath);
await fs.mkdir(dirname(fullPath), { recursive: true });
@@ -41,12 +41,26 @@ export class S3Driver implements StorageDriver {
}
}
async uploadStream(filePath: string, file: Readable): Promise<void> {
async uploadStream(
filePath: string,
file: Readable,
options?: { recreateClient?: boolean },
): Promise<void> {
let clientToUse = this.s3Client;
let shouldDestroyClient = false;
// optionally recreate client to avoid socket hang errors
// (during multi-attachments imports)
if (options?.recreateClient) {
clientToUse = new S3Client(this.config as any);
shouldDestroyClient = true;
}
try {
const contentType = getMimeType(filePath);
const upload = new Upload({
client: this.s3Client,
client: clientToUse,
params: {
Bucket: this.config.bucket,
Key: filePath,
@@ -58,6 +72,10 @@ export class S3Driver implements StorageDriver {
await upload.done();
} catch (err) {
throw new Error(`Failed to upload file: ${(err as Error).message}`);
} finally {
if (shouldDestroyClient && clientToUse) {
clientToUse.destroy();
}
}
}
@@ -3,7 +3,7 @@ import { Readable } from 'stream';
export interface StorageDriver {
upload(filePath: string, file: Buffer): Promise<void>;
uploadStream(filePath: string, file: Readable): Promise<void>;
uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void>;
copy(fromFilePath: string, toFilePath: string): Promise<void>;
@@ -15,8 +15,8 @@ export class StorageService {
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
}
async uploadStream(filePath: string, fileContent: Readable) {
await this.storageDriver.uploadStream(filePath, fileContent);
async uploadStream(filePath: string, fileContent: Readable, options?: { recreateClient?: boolean }) {
await this.storageDriver.uploadStream(filePath, fileContent, options);
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
}