mirror of
https://github.com/docmost/docmost.git
synced 2026-05-16 22:41:30 +08:00
Merge branch 'main' into tiptap3-migration
This commit is contained in:
@@ -72,7 +72,9 @@ export function extractDateFromUuid7(uuid7: string) {
|
||||
}
|
||||
|
||||
export function sanitizeFileName(fileName: string): string {
|
||||
const sanitizedFilename = sanitize(fileName).replace(/ /g, '_');
|
||||
const sanitizedFilename = sanitize(fileName)
|
||||
.replace(/ /g, '_')
|
||||
.replace(/#/g, '_');
|
||||
return sanitizedFilename.slice(0, 255);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
export enum AttachmentType {
|
||||
Avatar = 'avatar',
|
||||
WorkspaceLogo = 'workspace-logo',
|
||||
SpaceLogo = 'space-logo',
|
||||
WorkspaceIcon = 'workspace-icon',
|
||||
SpaceIcon = 'space-icon',
|
||||
File = 'file',
|
||||
}
|
||||
|
||||
export const validImageExtensions = ['.jpg', '.png', '.jpeg'];
|
||||
export const MAX_AVATAR_SIZE = '5MB';
|
||||
export const MAX_AVATAR_SIZE = '10MB';
|
||||
|
||||
export const inlineFileExtensions = [
|
||||
'.jpg',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import {
|
||||
BadRequestException,
|
||||
Body,
|
||||
Controller,
|
||||
ForbiddenException,
|
||||
Get,
|
||||
@@ -51,6 +52,7 @@ import { EnvironmentService } from '../../integrations/environment/environment.s
|
||||
import { TokenService } from '../auth/services/token.service';
|
||||
import { JwtAttachmentPayload, JwtType } from '../auth/dto/jwt-payload';
|
||||
import * as path from 'path';
|
||||
import { RemoveIconDto } from './dto/attachment.dto';
|
||||
|
||||
@Controller()
|
||||
export class AttachmentController {
|
||||
@@ -302,7 +304,7 @@ export class AttachmentController {
|
||||
throw new BadRequestException('Invalid image attachment type');
|
||||
}
|
||||
|
||||
if (attachmentType === AttachmentType.WorkspaceLogo) {
|
||||
if (attachmentType === AttachmentType.WorkspaceIcon) {
|
||||
const ability = this.workspaceAbility.createForUser(user, workspace);
|
||||
if (
|
||||
ability.cannot(
|
||||
@@ -314,7 +316,7 @@ export class AttachmentController {
|
||||
}
|
||||
}
|
||||
|
||||
if (attachmentType === AttachmentType.SpaceLogo) {
|
||||
if (attachmentType === AttachmentType.SpaceIcon) {
|
||||
if (!spaceId) {
|
||||
throw new BadRequestException('spaceId is required');
|
||||
}
|
||||
@@ -372,8 +374,59 @@ export class AttachmentController {
|
||||
});
|
||||
return res.send(fileStream);
|
||||
} catch (err) {
|
||||
this.logger.error(err);
|
||||
// this.logger.error(err);
|
||||
throw new NotFoundException('File not found');
|
||||
}
|
||||
}
|
||||
|
||||
@UseGuards(JwtAuthGuard)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('attachments/remove-icon')
|
||||
async removeIcon(
|
||||
@Body() dto: RemoveIconDto,
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
) {
|
||||
const { type, spaceId } = dto;
|
||||
|
||||
// remove current user avatar
|
||||
if (type === AttachmentType.Avatar) {
|
||||
await this.attachmentService.removeUserAvatar(user);
|
||||
return;
|
||||
}
|
||||
|
||||
// remove space icon
|
||||
if (type === AttachmentType.SpaceIcon) {
|
||||
if (!spaceId) {
|
||||
throw new BadRequestException(
|
||||
'spaceId is required to change space icons',
|
||||
);
|
||||
}
|
||||
|
||||
const spaceAbility = await this.spaceAbility.createForUser(user, spaceId);
|
||||
if (
|
||||
spaceAbility.cannot(SpaceCaslAction.Manage, SpaceCaslSubject.Settings)
|
||||
) {
|
||||
throw new ForbiddenException();
|
||||
}
|
||||
|
||||
await this.attachmentService.removeSpaceIcon(spaceId, workspace.id);
|
||||
return;
|
||||
}
|
||||
|
||||
// remove workspace icon
|
||||
if (type === AttachmentType.WorkspaceIcon) {
|
||||
const ability = this.workspaceAbility.createForUser(user, workspace);
|
||||
if (
|
||||
ability.cannot(
|
||||
WorkspaceCaslAction.Manage,
|
||||
WorkspaceCaslSubject.Settings,
|
||||
)
|
||||
) {
|
||||
throw new ForbiddenException();
|
||||
}
|
||||
await this.attachmentService.removeWorkspaceIcon(workspace);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import { randomBytes } from 'crypto';
|
||||
import { sanitize } from 'sanitize-filename-ts';
|
||||
import * as path from 'path';
|
||||
import { AttachmentType } from './attachment.constants';
|
||||
import { sanitizeFileName } from '../../common/helpers';
|
||||
import * as sharp from 'sharp';
|
||||
|
||||
export interface PreparedFile {
|
||||
buffer: Buffer;
|
||||
@@ -22,10 +22,8 @@ export async function prepareFile(
|
||||
}
|
||||
|
||||
try {
|
||||
const rand = randomBytes(8).toString('hex');
|
||||
|
||||
const buffer = await file.toBuffer();
|
||||
const sanitizedFilename = sanitize(file.filename).replace(/ /g, '_');
|
||||
const sanitizedFilename = sanitizeFileName(file.filename);
|
||||
const fileName = sanitizedFilename.slice(0, 255);
|
||||
const fileSize = buffer.length;
|
||||
const fileExtension = path.extname(file.filename).toLowerCase();
|
||||
@@ -58,9 +56,9 @@ export function getAttachmentFolderPath(
|
||||
switch (type) {
|
||||
case AttachmentType.Avatar:
|
||||
return `${workspaceId}/avatars`;
|
||||
case AttachmentType.WorkspaceLogo:
|
||||
return `${workspaceId}/workspace-logo`;
|
||||
case AttachmentType.SpaceLogo:
|
||||
case AttachmentType.WorkspaceIcon:
|
||||
return `${workspaceId}/workspace-logos`;
|
||||
case AttachmentType.SpaceIcon:
|
||||
return `${workspaceId}/space-logos`;
|
||||
case AttachmentType.File:
|
||||
return `${workspaceId}/files`;
|
||||
@@ -70,3 +68,51 @@ export function getAttachmentFolderPath(
|
||||
}
|
||||
|
||||
export const validAttachmentTypes = Object.values(AttachmentType);
|
||||
|
||||
export async function compressAndResizeIcon(
|
||||
buffer: Buffer,
|
||||
attachmentType?: AttachmentType,
|
||||
): Promise<Buffer> {
|
||||
try {
|
||||
let sharpInstance = sharp(buffer);
|
||||
const metadata = await sharpInstance.metadata();
|
||||
|
||||
const targetWidth = 300;
|
||||
const targetHeight = 300;
|
||||
|
||||
// Only resize if image is larger than target dimensions
|
||||
if (metadata.width > targetWidth || metadata.height > targetHeight) {
|
||||
sharpInstance = sharpInstance.resize(targetWidth, targetHeight, {
|
||||
fit: 'inside',
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Handle based on original format
|
||||
if (metadata.format === 'png') {
|
||||
// Only flatten avatars to remove transparency
|
||||
if (attachmentType === AttachmentType.Avatar) {
|
||||
sharpInstance = sharpInstance.flatten({
|
||||
background: { r: 255, g: 255, b: 255 },
|
||||
});
|
||||
}
|
||||
|
||||
return await sharpInstance
|
||||
.png({
|
||||
quality: 85,
|
||||
compressionLevel: 6,
|
||||
})
|
||||
.toBuffer();
|
||||
} else {
|
||||
return await sharpInstance
|
||||
.jpeg({
|
||||
quality: 85,
|
||||
progressive: true,
|
||||
mozjpeg: true,
|
||||
})
|
||||
.toBuffer();
|
||||
}
|
||||
} catch (err) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
import { IsEnum, IsIn, IsNotEmpty, IsOptional, IsUUID } from 'class-validator';
|
||||
import { AttachmentType } from '../attachment.constants';
|
||||
|
||||
export class RemoveIconDto {
|
||||
@IsEnum(AttachmentType)
|
||||
@IsIn([
|
||||
AttachmentType.Avatar,
|
||||
AttachmentType.SpaceIcon,
|
||||
AttachmentType.WorkspaceIcon,
|
||||
])
|
||||
@IsNotEmpty()
|
||||
type: AttachmentType;
|
||||
|
||||
@IsOptional()
|
||||
@IsUUID()
|
||||
spaceId: string;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
import { IsOptional, IsString, IsUUID } from 'class-validator';
|
||||
|
||||
export class AvatarUploadDto {}
|
||||
@@ -1,7 +0,0 @@
|
||||
import { IsNotEmpty, IsString } from 'class-validator';
|
||||
|
||||
export class GetFileDto {
|
||||
@IsString()
|
||||
@IsNotEmpty()
|
||||
attachmentId: string;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
import {
|
||||
IsDefined,
|
||||
IsNotEmpty,
|
||||
IsOptional,
|
||||
IsString,
|
||||
IsUUID,
|
||||
} from 'class-validator';
|
||||
|
||||
export class UploadFileDto {
|
||||
@IsString()
|
||||
@IsNotEmpty()
|
||||
attachmentType: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsUUID()
|
||||
pageId: string;
|
||||
|
||||
@IsDefined()
|
||||
file: any;
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
import { StorageService } from '../../../integrations/storage/storage.service';
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import {
|
||||
compressAndResizeIcon,
|
||||
getAttachmentFolderPath,
|
||||
PreparedFile,
|
||||
prepareFile,
|
||||
@@ -16,7 +17,7 @@ import { v4 as uuid4, v7 as uuid7 } from 'uuid';
|
||||
import { AttachmentRepo } from '@docmost/db/repos/attachment/attachment.repo';
|
||||
import { AttachmentType, validImageExtensions } from '../attachment.constants';
|
||||
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||
import { Attachment } from '@docmost/db/types/entity.types';
|
||||
import { Attachment, User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { executeTx } from '@docmost/db/utils';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
@@ -132,8 +133,8 @@ export class AttachmentService {
|
||||
filePromise: Promise<MultipartFile>,
|
||||
type:
|
||||
| AttachmentType.Avatar
|
||||
| AttachmentType.WorkspaceLogo
|
||||
| AttachmentType.SpaceLogo,
|
||||
| AttachmentType.WorkspaceIcon
|
||||
| AttachmentType.SpaceIcon,
|
||||
userId: string,
|
||||
workspaceId: string,
|
||||
spaceId?: string,
|
||||
@@ -141,6 +142,9 @@ export class AttachmentService {
|
||||
const preparedFile: PreparedFile = await prepareFile(filePromise);
|
||||
validateFileType(preparedFile.fileExtension, validImageExtensions);
|
||||
|
||||
const processedBuffer = await compressAndResizeIcon(preparedFile.buffer, type);
|
||||
preparedFile.buffer = processedBuffer;
|
||||
preparedFile.fileSize = processedBuffer.length;
|
||||
preparedFile.fileName = uuid4() + preparedFile.fileExtension;
|
||||
|
||||
const filePath = `${getAttachmentFolderPath(type, workspaceId)}/${preparedFile.fileName}`;
|
||||
@@ -174,7 +178,7 @@ export class AttachmentService {
|
||||
workspaceId,
|
||||
trx,
|
||||
);
|
||||
} else if (type === AttachmentType.WorkspaceLogo) {
|
||||
} else if (type === AttachmentType.WorkspaceIcon) {
|
||||
const workspace = await this.workspaceRepo.findById(workspaceId, {
|
||||
trx,
|
||||
});
|
||||
@@ -186,7 +190,7 @@ export class AttachmentService {
|
||||
workspaceId,
|
||||
trx,
|
||||
);
|
||||
} else if (type === AttachmentType.SpaceLogo && spaceId) {
|
||||
} else if (type === AttachmentType.SpaceIcon && spaceId) {
|
||||
const space = await this.spaceRepo.findById(spaceId, workspaceId, {
|
||||
trx,
|
||||
});
|
||||
@@ -205,7 +209,6 @@ export class AttachmentService {
|
||||
});
|
||||
} catch (err) {
|
||||
// delete uploaded file on db update failure
|
||||
this.logger.error('Image upload error:', err);
|
||||
await this.deleteRedundantFile(filePath);
|
||||
throw new BadRequestException('Failed to upload image');
|
||||
}
|
||||
@@ -389,4 +392,40 @@ export class AttachmentService {
|
||||
}
|
||||
}
|
||||
|
||||
async removeUserAvatar(user: User) {
|
||||
if (user.avatarUrl && !user.avatarUrl.toLowerCase().startsWith('http')) {
|
||||
const filePath = `${getAttachmentFolderPath(AttachmentType.Avatar, user.workspaceId)}/${user.avatarUrl}`;
|
||||
await this.deleteRedundantFile(filePath);
|
||||
}
|
||||
|
||||
await this.userRepo.updateUser(
|
||||
{ avatarUrl: null },
|
||||
user.id,
|
||||
user.workspaceId,
|
||||
);
|
||||
}
|
||||
|
||||
async removeSpaceIcon(spaceId: string, workspaceId: string) {
|
||||
const space = await this.spaceRepo.findById(spaceId, workspaceId);
|
||||
|
||||
if (!space) {
|
||||
throw new NotFoundException('Space not found');
|
||||
}
|
||||
|
||||
if (space.logo && !space.logo.toLowerCase().startsWith('http')) {
|
||||
const filePath = `${getAttachmentFolderPath(AttachmentType.SpaceIcon, workspaceId)}/${space.logo}`;
|
||||
await this.deleteRedundantFile(filePath);
|
||||
}
|
||||
|
||||
await this.spaceRepo.updateSpace({ logo: null }, spaceId, workspaceId);
|
||||
}
|
||||
|
||||
async removeWorkspaceIcon(workspace: Workspace) {
|
||||
if (workspace.logo && !workspace.logo.toLowerCase().startsWith('http')) {
|
||||
const filePath = `${getAttachmentFolderPath(AttachmentType.WorkspaceIcon, workspace.id)}/${workspace.logo}`;
|
||||
await this.deleteRedundantFile(filePath);
|
||||
}
|
||||
|
||||
await this.workspaceRepo.updateWorkspace({ logo: null }, workspace.id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Module } from '@nestjs/common';
|
||||
import { ImportService } from './services/import.service';
|
||||
import { ImportController } from './import.controller';
|
||||
import { StorageModule } from '../storage/storage.module';
|
||||
import { FileTaskService } from './services/file-task.service';
|
||||
import { FileImportTaskService } from './services/file-import-task.service';
|
||||
import { FileTaskProcessor } from './processors/file-task.processor';
|
||||
import { ImportAttachmentService } from './services/import-attachment.service';
|
||||
import { FileTaskController } from './file-task.controller';
|
||||
@@ -11,7 +11,7 @@ import { PageModule } from '../../core/page/page.module';
|
||||
@Module({
|
||||
providers: [
|
||||
ImportService,
|
||||
FileTaskService,
|
||||
FileImportTaskService,
|
||||
FileTaskProcessor,
|
||||
ImportAttachmentService,
|
||||
],
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Logger, OnModuleDestroy } from '@nestjs/common';
|
||||
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Job } from 'bullmq';
|
||||
import { QueueJob, QueueName } from 'src/integrations/queue/constants';
|
||||
import { FileTaskService } from '../services/file-task.service';
|
||||
import { FileImportTaskService } from '../services/file-import-task.service';
|
||||
import { FileTaskStatus } from '../utils/file.utils';
|
||||
import { StorageService } from '../../storage/storage.service';
|
||||
|
||||
@@ -11,7 +11,7 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
||||
private readonly logger = new Logger(FileTaskProcessor.name);
|
||||
|
||||
constructor(
|
||||
private readonly fileTaskService: FileTaskService,
|
||||
private readonly fileTaskService: FileImportTaskService,
|
||||
private readonly storageService: StorageService,
|
||||
) {
|
||||
super();
|
||||
@@ -41,15 +41,40 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
||||
@OnWorkerEvent('failed')
|
||||
async onFailed(job: Job) {
|
||||
this.logger.error(
|
||||
`Error processing ${job.name} job. Reason: ${job.failedReason}`,
|
||||
`Error processing ${job.name} job. Import Task ID: ${job.data.fileTaskId}. Reason: ${job.failedReason}`,
|
||||
);
|
||||
|
||||
await this.handleFailedJob(job);
|
||||
}
|
||||
|
||||
@OnWorkerEvent('completed')
|
||||
async onCompleted(job: Job) {
|
||||
this.logger.log(
|
||||
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
|
||||
);
|
||||
|
||||
try {
|
||||
const fileTask = await this.fileTaskService.getFileTask(
|
||||
job.data.fileTaskId,
|
||||
);
|
||||
if (fileTask) {
|
||||
await this.storageService.delete(fileTask.filePath);
|
||||
this.logger.debug(`Deleted imported zip file: ${fileTask.filePath}`);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.error(`Failed to delete imported zip file:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
private async handleFailedJob(job: Job) {
|
||||
try {
|
||||
const fileTaskId = job.data.fileTaskId;
|
||||
const reason = job.failedReason || 'Unknown error';
|
||||
|
||||
await this.fileTaskService.updateTaskStatus(
|
||||
fileTaskId,
|
||||
FileTaskStatus.Failed,
|
||||
job.failedReason,
|
||||
reason,
|
||||
);
|
||||
|
||||
const fileTask = await this.fileTaskService.getFileTask(fileTaskId);
|
||||
@@ -61,13 +86,6 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
||||
}
|
||||
}
|
||||
|
||||
@OnWorkerEvent('completed')
|
||||
onCompleted(job: Job) {
|
||||
this.logger.log(
|
||||
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
|
||||
);
|
||||
}
|
||||
|
||||
async onModuleDestroy(): Promise<void> {
|
||||
if (this.worker) {
|
||||
await this.worker.close();
|
||||
|
||||
+154
-68
@@ -24,6 +24,7 @@ import { formatImportHtml } from '../utils/import-formatter';
|
||||
import {
|
||||
buildAttachmentCandidates,
|
||||
collectMarkdownAndHtmlFiles,
|
||||
stripNotionID,
|
||||
} from '../utils/import.utils';
|
||||
import { executeTx } from '@docmost/db/utils';
|
||||
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
||||
@@ -33,8 +34,8 @@ import { PageService } from '../../../core/page/services/page.service';
|
||||
import { ImportPageNode } from '../dto/file-task-dto';
|
||||
|
||||
@Injectable()
|
||||
export class FileTaskService {
|
||||
private readonly logger = new Logger(FileTaskService.name);
|
||||
export class FileImportTaskService {
|
||||
private readonly logger = new Logger(FileImportTaskService.name);
|
||||
|
||||
constructor(
|
||||
private readonly storageService: StorageService,
|
||||
@@ -159,17 +160,12 @@ export class FileTaskService {
|
||||
.split(path.sep)
|
||||
.join('/'); // normalize to forward-slashes
|
||||
const ext = path.extname(relPath).toLowerCase();
|
||||
let content = await fs.readFile(absPath, 'utf-8');
|
||||
|
||||
if (ext.toLowerCase() === '.md') {
|
||||
content = await markdownToHtml(content);
|
||||
}
|
||||
|
||||
pagesMap.set(relPath, {
|
||||
id: v7(),
|
||||
slugId: generateSlugId(),
|
||||
name: path.basename(relPath, ext),
|
||||
content,
|
||||
name: stripNotionID(path.basename(relPath, ext)),
|
||||
content: '',
|
||||
parentPageId: null,
|
||||
fileExtension: ext,
|
||||
filePath: relPath,
|
||||
@@ -254,70 +250,160 @@ export class FileTaskService {
|
||||
});
|
||||
});
|
||||
|
||||
const pageResults = await Promise.all(
|
||||
Array.from(pagesMap.values()).map(async (page) => {
|
||||
const htmlContent =
|
||||
await this.importAttachmentService.processAttachments({
|
||||
html: page.content,
|
||||
pageRelativePath: page.filePath,
|
||||
extractDir,
|
||||
pageId: page.id,
|
||||
fileTask,
|
||||
attachmentCandidates,
|
||||
});
|
||||
// Group pages by level (topological sort for parent-child relationships)
|
||||
const pagesByLevel = new Map<number, Array<[string, ImportPageNode]>>();
|
||||
const pageLevel = new Map<string, number>();
|
||||
|
||||
const { html, backlinks } = await formatImportHtml({
|
||||
html: htmlContent,
|
||||
currentFilePath: page.filePath,
|
||||
filePathToPageMetaMap: filePathToPageMetaMap,
|
||||
creatorId: fileTask.creatorId,
|
||||
sourcePageId: page.id,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
});
|
||||
// Calculate levels using BFS
|
||||
const calculateLevels = () => {
|
||||
const queue: Array<{ filePath: string; level: number }> = [];
|
||||
|
||||
const pmState = getProsemirrorContent(
|
||||
await this.importService.processHTML(html),
|
||||
// Start with root pages (no parent)
|
||||
for (const [filePath, page] of pagesMap.entries()) {
|
||||
if (!page.parentPageId) {
|
||||
queue.push({ filePath, level: 0 });
|
||||
pageLevel.set(filePath, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// BFS to assign levels
|
||||
while (queue.length > 0) {
|
||||
const { filePath, level } = queue.shift()!;
|
||||
const currentPage = pagesMap.get(filePath)!;
|
||||
|
||||
// Find children of current page
|
||||
for (const [childFilePath, childPage] of pagesMap.entries()) {
|
||||
if (
|
||||
childPage.parentPageId === currentPage.id &&
|
||||
!pageLevel.has(childFilePath)
|
||||
) {
|
||||
pageLevel.set(childFilePath, level + 1);
|
||||
queue.push({ filePath: childFilePath, level: level + 1 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group pages by level
|
||||
for (const [filePath, page] of pagesMap.entries()) {
|
||||
const level = pageLevel.get(filePath) || 0;
|
||||
if (!pagesByLevel.has(level)) {
|
||||
pagesByLevel.set(level, []);
|
||||
}
|
||||
pagesByLevel.get(level)!.push([filePath, page]);
|
||||
}
|
||||
};
|
||||
|
||||
calculateLevels();
|
||||
|
||||
if (pagesMap.size < 1) return;
|
||||
|
||||
// Process pages level by level sequentially to respect foreign key constraints
|
||||
const allBacklinks: any[] = [];
|
||||
const validPageIds = new Set<string>();
|
||||
let totalPagesProcessed = 0;
|
||||
|
||||
// Sort levels to process in order
|
||||
const sortedLevels = Array.from(pagesByLevel.keys()).sort((a, b) => a - b);
|
||||
|
||||
try {
|
||||
await executeTx(this.db, async (trx) => {
|
||||
// Process pages level by level sequentially within the transaction
|
||||
for (const level of sortedLevels) {
|
||||
const levelPages = pagesByLevel.get(level)!;
|
||||
|
||||
for (const [filePath, page] of levelPages) {
|
||||
const absPath = path.join(extractDir, filePath);
|
||||
let content = await fs.readFile(absPath, 'utf-8');
|
||||
|
||||
if (page.fileExtension.toLowerCase() === '.md') {
|
||||
content = await markdownToHtml(content);
|
||||
}
|
||||
|
||||
const htmlContent =
|
||||
await this.importAttachmentService.processAttachments({
|
||||
html: content,
|
||||
pageRelativePath: page.filePath,
|
||||
extractDir,
|
||||
pageId: page.id,
|
||||
fileTask,
|
||||
attachmentCandidates,
|
||||
});
|
||||
|
||||
const { html, backlinks, pageIcon } = await formatImportHtml({
|
||||
html: htmlContent,
|
||||
currentFilePath: page.filePath,
|
||||
filePathToPageMetaMap: filePathToPageMetaMap,
|
||||
creatorId: fileTask.creatorId,
|
||||
sourcePageId: page.id,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
});
|
||||
|
||||
const pmState = getProsemirrorContent(
|
||||
await this.importService.processHTML(html),
|
||||
);
|
||||
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: title || page.name,
|
||||
icon: pageIcon || null,
|
||||
content: prosemirrorJson,
|
||||
textContent: jsonToText(prosemirrorJson),
|
||||
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
||||
position: page.position!,
|
||||
spaceId: fileTask.spaceId,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
creatorId: fileTask.creatorId,
|
||||
lastUpdatedById: fileTask.creatorId,
|
||||
parentPageId: page.parentPageId,
|
||||
};
|
||||
|
||||
await trx.insertInto('pages').values(insertablePage).execute();
|
||||
|
||||
// Track valid page IDs and collect backlinks
|
||||
validPageIds.add(insertablePage.id);
|
||||
allBacklinks.push(...backlinks);
|
||||
totalPagesProcessed++;
|
||||
|
||||
// Log progress periodically
|
||||
if (totalPagesProcessed % 50 === 0) {
|
||||
this.logger.debug(`Processed ${totalPagesProcessed} pages...`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const filteredBacklinks = allBacklinks.filter(
|
||||
({ sourcePageId, targetPageId }) =>
|
||||
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
||||
);
|
||||
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
// Insert backlinks in batches
|
||||
if (filteredBacklinks.length > 0) {
|
||||
const BACKLINK_BATCH_SIZE = 100;
|
||||
for (
|
||||
let i = 0;
|
||||
i < filteredBacklinks.length;
|
||||
i += BACKLINK_BATCH_SIZE
|
||||
) {
|
||||
const backlinkChunk = filteredBacklinks.slice(
|
||||
i,
|
||||
Math.min(i + BACKLINK_BATCH_SIZE, filteredBacklinks.length),
|
||||
);
|
||||
await this.backlinkRepo.insertBacklink(backlinkChunk, trx);
|
||||
}
|
||||
}
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: title || page.name,
|
||||
content: prosemirrorJson,
|
||||
textContent: jsonToText(prosemirrorJson),
|
||||
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
||||
position: page.position!,
|
||||
spaceId: fileTask.spaceId,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
creatorId: fileTask.creatorId,
|
||||
lastUpdatedById: fileTask.creatorId,
|
||||
parentPageId: page.parentPageId,
|
||||
};
|
||||
|
||||
return { insertablePage, backlinks };
|
||||
}),
|
||||
);
|
||||
|
||||
const insertablePages = pageResults.map((r) => r.insertablePage);
|
||||
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
|
||||
|
||||
if (insertablePages.length < 1) return;
|
||||
const validPageIds = new Set(insertablePages.map((row) => row.id));
|
||||
const filteredBacklinks = insertableBacklinks.filter(
|
||||
({ sourcePageId, targetPageId }) =>
|
||||
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
||||
);
|
||||
|
||||
await executeTx(this.db, async (trx) => {
|
||||
await trx.insertInto('pages').values(insertablePages).execute();
|
||||
|
||||
if (filteredBacklinks.length > 0) {
|
||||
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
|
||||
}
|
||||
});
|
||||
this.logger.log(
|
||||
`Successfully imported ${totalPagesProcessed} pages with ${filteredBacklinks.length} backlinks`,
|
||||
);
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to import files:', error);
|
||||
throw new Error(`File import failed: ${error?.['message']}`);
|
||||
}
|
||||
}
|
||||
|
||||
async getFileTask(fileTaskId: string) {
|
||||
@@ -53,6 +53,7 @@ export class ImportAttachmentService {
|
||||
fileTask: FileTask;
|
||||
attachmentCandidates: Map<string, string>;
|
||||
pageAttachments?: AttachmentInfo[];
|
||||
isConfluenceImport?: boolean;
|
||||
}): Promise<string> {
|
||||
const {
|
||||
html,
|
||||
@@ -62,6 +63,7 @@ export class ImportAttachmentService {
|
||||
fileTask,
|
||||
attachmentCandidates,
|
||||
pageAttachments = [],
|
||||
isConfluenceImport,
|
||||
} = opts;
|
||||
|
||||
const attachmentTasks: (() => Promise<void>)[] = [];
|
||||
@@ -90,7 +92,10 @@ export class ImportAttachmentService {
|
||||
>();
|
||||
|
||||
// Analyze attachments to identify Draw.io pairs
|
||||
const { drawioPairs, skipFiles } = this.analyzeAttachments(pageAttachments);
|
||||
const { drawioPairs, skipFiles } = this.analyzeAttachments(
|
||||
pageAttachments,
|
||||
isConfluenceImport,
|
||||
);
|
||||
|
||||
// Map to store processed Draw.io SVGs
|
||||
const drawioSvgMap = new Map<
|
||||
@@ -134,7 +139,9 @@ export class ImportAttachmentService {
|
||||
const stream = Readable.from(svgBuffer);
|
||||
|
||||
// Upload to storage
|
||||
await this.storageService.uploadStream(storageFilePath, stream);
|
||||
await this.storageService.uploadStream(storageFilePath, stream, {
|
||||
recreateClient: true,
|
||||
});
|
||||
|
||||
// Insert into database
|
||||
await this.db
|
||||
@@ -235,202 +242,197 @@ export class ImportAttachmentService {
|
||||
const pageDir = path.dirname(pageRelativePath);
|
||||
const $ = load(html);
|
||||
|
||||
// Cache for resolved paths to avoid repeated lookups
|
||||
const resolvedPathCache = new Map<string, string | null>();
|
||||
// image
|
||||
for (const imgEl of $('img').toArray()) {
|
||||
const $img = $(imgEl);
|
||||
const src = cleanUrlString($img.attr('src') ?? '')!;
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
|
||||
const getCachedResolvedPath = (rawPath: string): string | null => {
|
||||
if (resolvedPathCache.has(rawPath)) {
|
||||
return resolvedPathCache.get(rawPath)!;
|
||||
}
|
||||
const resolved = resolveRelativeAttachmentPath(
|
||||
rawPath,
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
src,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
resolvedPathCache.set(rawPath, resolved);
|
||||
return resolved;
|
||||
};
|
||||
if (!relPath) continue;
|
||||
|
||||
// Cache for file stats to avoid repeated file system calls
|
||||
const statCache = new Map<string, any>();
|
||||
// Check if this image is part of a Draw.io pair
|
||||
const drawioSvg = drawioSvgMap.get(relPath);
|
||||
if (drawioSvg) {
|
||||
const $drawio = $('<div>')
|
||||
.attr('data-type', 'drawio')
|
||||
.attr('data-src', drawioSvg.apiFilePath)
|
||||
.attr('data-title', 'diagram')
|
||||
.attr('data-width', '100%')
|
||||
.attr('data-align', 'center')
|
||||
.attr('data-attachment-id', drawioSvg.attachmentId);
|
||||
|
||||
const getCachedStat = async (absPath: string) => {
|
||||
if (statCache.has(absPath)) {
|
||||
return statCache.get(absPath);
|
||||
$img.replaceWith($drawio);
|
||||
unwrapFromParagraph($, $drawio);
|
||||
continue;
|
||||
}
|
||||
const stat = await fs.stat(absPath);
|
||||
statCache.set(absPath, stat);
|
||||
return stat;
|
||||
};
|
||||
|
||||
// Single DOM traversal for all attachment elements
|
||||
const selector =
|
||||
'img, video, div[data-type="attachment"], a, div[data-type="excalidraw"], div[data-type="drawio"]';
|
||||
const elements = $(selector).toArray();
|
||||
const { attachmentId, apiFilePath } = processFile(relPath);
|
||||
|
||||
for (const element of elements) {
|
||||
const $el = $(element);
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
const width = $img.attr('width') ?? '100%';
|
||||
const align = $img.attr('data-align') ?? 'center';
|
||||
|
||||
// Process based on element type
|
||||
if (tagName === 'img') {
|
||||
const src = cleanUrlString($el.attr('src') ?? '');
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
$img
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('width', width)
|
||||
.attr('data-align', align);
|
||||
|
||||
const relPath = getCachedResolvedPath(src);
|
||||
if (!relPath) continue;
|
||||
unwrapFromParagraph($, $img);
|
||||
}
|
||||
|
||||
// Check if this image is part of a Draw.io pair
|
||||
const drawioSvg = drawioSvgMap.get(relPath);
|
||||
if (drawioSvg) {
|
||||
const $drawio = $('<div>')
|
||||
.attr('data-type', 'drawio')
|
||||
.attr('data-src', drawioSvg.apiFilePath)
|
||||
.attr('data-title', 'diagram')
|
||||
.attr('data-width', '100%')
|
||||
.attr('data-align', 'center')
|
||||
.attr('data-attachment-id', drawioSvg.attachmentId);
|
||||
// video
|
||||
for (const vidEl of $('video').toArray()) {
|
||||
const $vid = $(vidEl);
|
||||
const src = cleanUrlString($vid.attr('src') ?? '')!;
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
|
||||
$el.replaceWith($drawio);
|
||||
unwrapFromParagraph($, $drawio);
|
||||
continue;
|
||||
}
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
src,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await getCachedStat(abs);
|
||||
const { attachmentId, apiFilePath } = processFile(relPath);
|
||||
|
||||
$el
|
||||
const width = $vid.attr('width') ?? '100%';
|
||||
const align = $vid.attr('data-align') ?? 'center';
|
||||
|
||||
$vid
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('width', width)
|
||||
.attr('data-align', align);
|
||||
|
||||
unwrapFromParagraph($, $vid);
|
||||
}
|
||||
|
||||
// <div data-type="attachment">
|
||||
for (const el of $('div[data-type="attachment"]').toArray()) {
|
||||
const $oldDiv = $(el);
|
||||
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
|
||||
if (!rawUrl || rawUrl.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
rawUrl,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const fileName = path.basename(abs);
|
||||
const mime = getMimeType(abs);
|
||||
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', fileName)
|
||||
.attr('data-attachment-mime', mime)
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$oldDiv.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
}
|
||||
|
||||
// rewrite other attachments via <a>
|
||||
for (const aEl of $('a').toArray()) {
|
||||
const $a = $(aEl);
|
||||
const href = cleanUrlString($a.attr('href') ?? '')!;
|
||||
if (!href || href.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
href,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
// Check if this is a Draw.io file
|
||||
const drawioSvg = drawioSvgMap.get(relPath);
|
||||
if (drawioSvg) {
|
||||
const $drawio = $('<div>')
|
||||
.attr('data-type', 'drawio')
|
||||
.attr('data-src', drawioSvg.apiFilePath)
|
||||
.attr('data-title', 'diagram')
|
||||
.attr('data-width', '100%')
|
||||
.attr('data-align', 'center')
|
||||
.attr('data-attachment-id', drawioSvg.attachmentId);
|
||||
|
||||
$a.replaceWith($drawio);
|
||||
unwrapFromParagraph($, $drawio);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip files that should be ignored
|
||||
if (skipFiles.has(relPath)) {
|
||||
$a.remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const ext = path.extname(relPath).toLowerCase();
|
||||
|
||||
if (ext === '.mp4') {
|
||||
const $video = $('<video>')
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', $el.attr('width') ?? '100%')
|
||||
.attr('data-align', $el.attr('data-align') ?? 'center');
|
||||
.attr('width', '100%')
|
||||
.attr('data-align', 'center');
|
||||
$a.replaceWith($video);
|
||||
unwrapFromParagraph($, $video);
|
||||
} else {
|
||||
const confAliasName = $a.attr('data-linked-resource-default-alias');
|
||||
let attachmentName = path.basename(abs);
|
||||
if (confAliasName) attachmentName = confAliasName;
|
||||
|
||||
unwrapFromParagraph($, $el);
|
||||
} else if (tagName === 'video') {
|
||||
const src = cleanUrlString($el.attr('src') ?? '');
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
const $div = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', attachmentName)
|
||||
.attr('data-attachment-mime', getMimeType(abs))
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
const relPath = getCachedResolvedPath(src);
|
||||
$a.replaceWith($div);
|
||||
unwrapFromParagraph($, $div);
|
||||
}
|
||||
}
|
||||
|
||||
// excalidraw and drawio
|
||||
for (const type of ['excalidraw', 'drawio'] as const) {
|
||||
for (const el of $(`div[data-type="${type}"]`).toArray()) {
|
||||
const $oldDiv = $(el);
|
||||
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
|
||||
if (!rawSrc || rawSrc.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
rawSrc,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await getCachedStat(abs);
|
||||
const fileName = path.basename(abs);
|
||||
|
||||
$el
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', $el.attr('width') ?? '100%')
|
||||
.attr('data-align', $el.attr('data-align') ?? 'center');
|
||||
const width = $oldDiv.attr('data-width') || '100%';
|
||||
const align = $oldDiv.attr('data-align') || 'center';
|
||||
|
||||
unwrapFromParagraph($, $el);
|
||||
} else if (tagName === 'div') {
|
||||
const dataType = $el.attr('data-type');
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', type)
|
||||
.attr('data-src', apiFilePath)
|
||||
.attr('data-title', fileName)
|
||||
.attr('data-width', width)
|
||||
.attr('data-align', align)
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
if (dataType === 'attachment') {
|
||||
const rawUrl = cleanUrlString($el.attr('data-attachment-url') ?? '');
|
||||
if (!rawUrl || rawUrl.startsWith('http')) continue;
|
||||
|
||||
const relPath = getCachedResolvedPath(rawUrl);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await getCachedStat(abs);
|
||||
const fileName = path.basename(abs);
|
||||
const mime = getMimeType(abs);
|
||||
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', fileName)
|
||||
.attr('data-attachment-mime', mime)
|
||||
.attr('data-attachment-size', stat.size.toString())
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$el.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
} else if (dataType === 'excalidraw' || dataType === 'drawio') {
|
||||
const rawSrc = cleanUrlString($el.attr('data-src') ?? '');
|
||||
if (!rawSrc || rawSrc.startsWith('http')) continue;
|
||||
|
||||
const relPath = getCachedResolvedPath(rawSrc);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await getCachedStat(abs);
|
||||
const fileName = path.basename(abs);
|
||||
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', dataType)
|
||||
.attr('data-src', apiFilePath)
|
||||
.attr('data-title', fileName)
|
||||
.attr('data-width', $el.attr('data-width') || '100%')
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('data-align', $el.attr('data-align') || 'center')
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$el.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
}
|
||||
} else if (tagName === 'a') {
|
||||
const href = cleanUrlString($el.attr('href') ?? '');
|
||||
if (!href || href.startsWith('http')) continue;
|
||||
|
||||
const relPath = getCachedResolvedPath(href);
|
||||
if (!relPath) continue;
|
||||
|
||||
// Check if this is a Draw.io file
|
||||
const drawioSvg = drawioSvgMap.get(relPath);
|
||||
if (drawioSvg) {
|
||||
const $drawio = $('<div>')
|
||||
.attr('data-type', 'drawio')
|
||||
.attr('data-src', drawioSvg.apiFilePath)
|
||||
.attr('data-title', 'diagram')
|
||||
.attr('data-width', '100%')
|
||||
.attr('data-align', 'center')
|
||||
.attr('data-attachment-id', drawioSvg.attachmentId);
|
||||
|
||||
$el.replaceWith($drawio);
|
||||
unwrapFromParagraph($, $drawio);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip files that should be ignored
|
||||
if (skipFiles.has(relPath)) {
|
||||
$el.remove();
|
||||
continue;
|
||||
}
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await getCachedStat(abs);
|
||||
const ext = path.extname(relPath).toLowerCase();
|
||||
|
||||
if (ext === '.mp4') {
|
||||
const $video = $('<video>')
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', '100%')
|
||||
.attr('data-align', 'center');
|
||||
$el.replaceWith($video);
|
||||
unwrapFromParagraph($, $video);
|
||||
} else {
|
||||
const confAliasName = $el.attr('data-linked-resource-default-alias');
|
||||
let attachmentName = path.basename(abs);
|
||||
if (confAliasName) attachmentName = confAliasName;
|
||||
|
||||
const $div = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', attachmentName)
|
||||
.attr('data-attachment-mime', getMimeType(abs))
|
||||
.attr('data-attachment-size', stat.size.toString())
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$el.replaceWith($div);
|
||||
unwrapFromParagraph($, $div);
|
||||
}
|
||||
$oldDiv.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -492,24 +494,17 @@ export class ImportAttachmentService {
|
||||
|
||||
// This attachment was in the list but not referenced in HTML - add it
|
||||
const { attachmentId, apiFilePath, abs } = processFile(href);
|
||||
const mime = mimeType || getMimeType(abs);
|
||||
|
||||
try {
|
||||
const stat = await fs.stat(abs);
|
||||
const mime = mimeType || getMimeType(abs);
|
||||
// Add as attachment node at the end
|
||||
const $attachmentDiv = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', fileName)
|
||||
.attr('data-attachment-mime', mime)
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
// Add as attachment node at the end
|
||||
const $attachmentDiv = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', fileName)
|
||||
.attr('data-attachment-mime', mime)
|
||||
.attr('data-attachment-size', stat.size.toString())
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$.root().append($attachmentDiv);
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to process attachment ${fileName}:`, error);
|
||||
}
|
||||
$.root().append($attachmentDiv);
|
||||
}
|
||||
|
||||
// wait for all uploads & DB inserts
|
||||
@@ -534,16 +529,49 @@ export class ImportAttachmentService {
|
||||
}
|
||||
}
|
||||
|
||||
// Post-process DOM elements to add file sizes after uploads complete
|
||||
// This avoids blocking file operations during initial DOM processing
|
||||
const elementsNeedingSize = $('[data-attachment-id]:not([data-size])');
|
||||
for (const element of elementsNeedingSize.toArray()) {
|
||||
const $el = $(element);
|
||||
const attachmentId = $el.attr('data-attachment-id');
|
||||
if (!attachmentId) continue;
|
||||
|
||||
// Find the corresponding processed file info
|
||||
const processedEntry = Array.from(processed.values()).find(
|
||||
(entry) => entry.attachmentId === attachmentId,
|
||||
);
|
||||
|
||||
if (processedEntry) {
|
||||
try {
|
||||
const stat = await fs.stat(processedEntry.abs);
|
||||
$el.attr('data-size', stat.size.toString());
|
||||
} catch (error) {
|
||||
this.logger.debug(
|
||||
`Could not get size for ${processedEntry.abs}:`,
|
||||
error,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $.root().html() || '';
|
||||
}
|
||||
|
||||
private analyzeAttachments(attachments: AttachmentInfo[]): {
|
||||
private analyzeAttachments(
|
||||
attachments: AttachmentInfo[],
|
||||
isConfluenceImport?: boolean,
|
||||
): {
|
||||
drawioPairs: Map<string, DrawioPair>;
|
||||
skipFiles: Set<string>;
|
||||
} {
|
||||
const drawioPairs = new Map<string, DrawioPair>();
|
||||
const skipFiles = new Set<string>();
|
||||
|
||||
if (!isConfluenceImport) {
|
||||
return { drawioPairs, skipFiles };
|
||||
}
|
||||
|
||||
// Group attachments by type
|
||||
const drawioFiles: AttachmentInfo[] = [];
|
||||
const pngByBaseName = new Map<string, AttachmentInfo[]>();
|
||||
@@ -776,7 +804,10 @@ export class ImportAttachmentService {
|
||||
for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const fileStream = createReadStream(abs);
|
||||
await this.storageService.uploadStream(storageFilePath, fileStream);
|
||||
await this.storageService.uploadStream(storageFilePath, fileStream, {
|
||||
recreateClient: true,
|
||||
});
|
||||
|
||||
const stat = await fs.stat(abs);
|
||||
|
||||
await this.db
|
||||
@@ -807,7 +838,7 @@ export class ImportAttachmentService {
|
||||
attempts: 1,
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
delay: 30 * 1000,
|
||||
delay: 3 * 60 * 1000,
|
||||
},
|
||||
deduplication: {
|
||||
id: attachmentId,
|
||||
|
||||
@@ -4,6 +4,11 @@ import { v7 } from 'uuid';
|
||||
import { InsertableBacklink } from '@docmost/db/types/entity.types';
|
||||
import { Cheerio, CheerioAPI, load } from 'cheerio';
|
||||
|
||||
// Check if text contains Unicode characters (for emojis/icons)
|
||||
function isUnicodeCharacter(text: string): boolean {
|
||||
return text.length > 0 && text.codePointAt(0)! > 127; // Non-ASCII characters
|
||||
}
|
||||
|
||||
export async function formatImportHtml(opts: {
|
||||
html: string;
|
||||
currentFilePath: string;
|
||||
@@ -16,7 +21,11 @@ export async function formatImportHtml(opts: {
|
||||
workspaceId: string;
|
||||
pageDir?: string;
|
||||
attachmentCandidates?: string[];
|
||||
}): Promise<{ html: string; backlinks: InsertableBacklink[] }> {
|
||||
}): Promise<{
|
||||
html: string;
|
||||
backlinks: InsertableBacklink[];
|
||||
pageIcon?: string;
|
||||
}> {
|
||||
const {
|
||||
html,
|
||||
currentFilePath,
|
||||
@@ -28,6 +37,17 @@ export async function formatImportHtml(opts: {
|
||||
const $: CheerioAPI = load(html);
|
||||
const $root: Cheerio<any> = $.root();
|
||||
|
||||
let pageIcon: string | null = null;
|
||||
// extract notion page icon
|
||||
const headerIconSpan = $root.find('header .page-header-icon .icon');
|
||||
|
||||
if (headerIconSpan.length > 0) {
|
||||
const iconText = headerIconSpan.text().trim();
|
||||
if (iconText && isUnicodeCharacter(iconText)) {
|
||||
pageIcon = iconText;
|
||||
}
|
||||
}
|
||||
|
||||
notionFormatter($, $root);
|
||||
defaultHtmlFormatter($, $root);
|
||||
|
||||
@@ -44,6 +64,7 @@ export async function formatImportHtml(opts: {
|
||||
return {
|
||||
html: $root.html() || '',
|
||||
backlinks,
|
||||
pageIcon: pageIcon || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -69,6 +90,10 @@ export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||
}
|
||||
|
||||
export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||
// remove page header icon and cover image
|
||||
$root.find('.page-header-icon').remove();
|
||||
$root.find('.page-cover-image').remove();
|
||||
|
||||
// remove empty description paragraphs
|
||||
$root.find('p.page-description').each((_, el) => {
|
||||
if (!$(el).text().trim()) $(el).remove();
|
||||
@@ -189,22 +214,48 @@ export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
|
||||
$fig.replaceWith($newAnchor);
|
||||
});
|
||||
|
||||
// remove user icons
|
||||
$root.find('span.user img.user-icon').remove();
|
||||
|
||||
// remove toc
|
||||
$root.find('nav.table_of_contents').remove();
|
||||
}
|
||||
|
||||
export function unwrapFromParagraph($: CheerioAPI, $node: Cheerio<any>) {
|
||||
// find the nearest <p> or <a> ancestor
|
||||
let $wrapper = $node.closest('p, a');
|
||||
// Keep track of processed wrappers to avoid infinite loops
|
||||
const processedWrappers = new Set<any>();
|
||||
|
||||
let $wrapper = $node.closest('p, a');
|
||||
while ($wrapper.length) {
|
||||
// if the wrapper has only our node inside, replace it entirely
|
||||
if ($wrapper.contents().length === 1) {
|
||||
const wrapperElement = $wrapper.get(0);
|
||||
|
||||
// If we've already processed this wrapper, break to avoid infinite loop
|
||||
if (processedWrappers.has(wrapperElement)) {
|
||||
break;
|
||||
}
|
||||
|
||||
processedWrappers.add(wrapperElement);
|
||||
|
||||
// Check if the wrapper contains only whitespace and our target node
|
||||
const hasOnlyTargetNode =
|
||||
$wrapper.contents().filter((_, el) => {
|
||||
const $el = $(el);
|
||||
// Skip whitespace-only text nodes. NodeType 3 = text node
|
||||
if (el.nodeType === 3 && !$el.text().trim()) {
|
||||
return false;
|
||||
}
|
||||
// Return true if this is not our target node
|
||||
return !$el.is($node) && !$node.is($el);
|
||||
}).length === 0;
|
||||
|
||||
if (hasOnlyTargetNode) {
|
||||
// Replace the wrapper entirely with our node
|
||||
$wrapper.replaceWith($node);
|
||||
} else {
|
||||
// otherwise just move the node to before the wrapper
|
||||
// Move the node to before the wrapper, preserving other content
|
||||
$wrapper.before($node);
|
||||
}
|
||||
|
||||
// look again for any new wrapper around $node
|
||||
$wrapper = $node.closest('p, a');
|
||||
}
|
||||
|
||||
@@ -64,3 +64,9 @@ export async function collectMarkdownAndHtmlFiles(
|
||||
await walk(dir);
|
||||
return results;
|
||||
}
|
||||
|
||||
export function stripNotionID(fileName: string): string {
|
||||
// Handle optional separator (space or dash) + 32 alphanumeric chars at end
|
||||
const notionIdPattern = /[ -]?[a-z0-9]{32}$/i;
|
||||
return fileName.replace(notionIdPattern, '').trim();
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ export class LocalDriver implements StorageDriver {
|
||||
}
|
||||
}
|
||||
|
||||
async uploadStream(filePath: string, file: Readable): Promise<void> {
|
||||
async uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void> {
|
||||
try {
|
||||
const fullPath = this._fullPath(filePath);
|
||||
await fs.mkdir(dirname(fullPath), { recursive: true });
|
||||
|
||||
@@ -41,12 +41,26 @@ export class S3Driver implements StorageDriver {
|
||||
}
|
||||
}
|
||||
|
||||
async uploadStream(filePath: string, file: Readable): Promise<void> {
|
||||
async uploadStream(
|
||||
filePath: string,
|
||||
file: Readable,
|
||||
options?: { recreateClient?: boolean },
|
||||
): Promise<void> {
|
||||
let clientToUse = this.s3Client;
|
||||
let shouldDestroyClient = false;
|
||||
|
||||
// optionally recreate client to avoid socket hang errors
|
||||
// (during multi-attachments imports)
|
||||
if (options?.recreateClient) {
|
||||
clientToUse = new S3Client(this.config as any);
|
||||
shouldDestroyClient = true;
|
||||
}
|
||||
|
||||
try {
|
||||
const contentType = getMimeType(filePath);
|
||||
|
||||
const upload = new Upload({
|
||||
client: this.s3Client,
|
||||
client: clientToUse,
|
||||
params: {
|
||||
Bucket: this.config.bucket,
|
||||
Key: filePath,
|
||||
@@ -58,6 +72,10 @@ export class S3Driver implements StorageDriver {
|
||||
await upload.done();
|
||||
} catch (err) {
|
||||
throw new Error(`Failed to upload file: ${(err as Error).message}`);
|
||||
} finally {
|
||||
if (shouldDestroyClient && clientToUse) {
|
||||
clientToUse.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import { Readable } from 'stream';
|
||||
export interface StorageDriver {
|
||||
upload(filePath: string, file: Buffer): Promise<void>;
|
||||
|
||||
uploadStream(filePath: string, file: Readable): Promise<void>;
|
||||
uploadStream(filePath: string, file: Readable, options?: { recreateClient?: boolean }): Promise<void>;
|
||||
|
||||
copy(fromFilePath: string, toFilePath: string): Promise<void>;
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ export class StorageService {
|
||||
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
|
||||
}
|
||||
|
||||
async uploadStream(filePath: string, fileContent: Readable) {
|
||||
await this.storageDriver.uploadStream(filePath, fileContent);
|
||||
async uploadStream(filePath: string, fileContent: Readable, options?: { recreateClient?: boolean }) {
|
||||
await this.storageDriver.uploadStream(filePath, fileContent, options);
|
||||
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user