import { BadRequestException, Injectable, Logger, NotFoundException, } from '@nestjs/common'; import { jsonToHtml, jsonToNode } from '../../collaboration/collaboration.util'; import { ExportFormat } from './dto/export-dto'; import { Page } from '@docmost/db/types/entity.types'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB } from '@docmost/db/types/kysely.types'; import * as JSZip from 'jszip'; import { StorageService } from '../storage/storage.service'; import { buildTree, computeLocalPath, getExportExtension, getPageTitle, PageExportTree, replaceInternalLinks, updateAttachmentUrlsToLocalPaths, } from './utils'; import { ExportMetadata, ExportPageMetadata, } from '../../common/helpers/types/export-metadata.types'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; import { PagePermissionRepo } from '@docmost/db/repos/page/page-permission.repo'; import { Node } from '@tiptap/pm/model'; import { EditorState } from '@tiptap/pm/state'; // eslint-disable-next-line @typescript-eslint/no-require-imports import slugify = require('@sindresorhus/slugify'); // eslint-disable-next-line @typescript-eslint/no-require-imports const packageJson = require('../../../package.json'); import { EnvironmentService } from '../environment/environment.service'; import { DomainService } from '../environment/domain.service'; import { getAttachmentIds, getProsemirrorContent, } from '../../common/helpers/prosemirror/utils'; import { htmlToMarkdown } from '@docmost/editor-ext'; @Injectable() export class ExportService { private readonly logger = new Logger(ExportService.name); constructor( private readonly pageRepo: PageRepo, private readonly pagePermissionRepo: PagePermissionRepo, @InjectKysely() private readonly db: KyselyDB, private readonly storageService: StorageService, private readonly environmentService: EnvironmentService, private readonly domainService: DomainService, ) {} async exportPage(format: string, page: Page, singlePage?: boolean) { const titleNode = { type: 'heading', attrs: { level: 1 }, content: [{ type: 'text', text: getPageTitle(page.title) }], }; let prosemirrorJson: any; if (singlePage) { const baseUrl = await this.getWorkspaceBaseUrl(page.workspaceId); prosemirrorJson = await this.turnPageMentionsToLinks( getProsemirrorContent(page.content), page.workspaceId, baseUrl, ); } else { // mentions is already turned to links during the zip process prosemirrorJson = getProsemirrorContent(page.content); } if (page.title) { prosemirrorJson.content.unshift(titleNode); } const pageHtml = jsonToHtml(prosemirrorJson); if (format === ExportFormat.HTML) { return ` ${getPageTitle(page.title)} ${pageHtml} `; } if (format === ExportFormat.Markdown) { const newPageHtml = pageHtml.replace( /]*>[\s\S]*?<\/colgroup>/gim, '', ); return htmlToMarkdown(newPageHtml); } return; } async exportPages( pageId: string, format: string, includeAttachments: boolean, includeChildren: boolean, userId?: string, ignorePermissions = false, ) { let pages: Page[]; if (includeChildren) { //@ts-ignore pages = await this.pageRepo.getPageAndDescendants(pageId, { includeContent: true, }); } else { // Only fetch the single page when includeChildren is false const page = await this.pageRepo.findById(pageId, { includeContent: true, }); if (page) { pages = [page]; } } if (!pages || pages.length === 0) { throw new BadRequestException('No pages to export'); } if (!ignorePermissions && userId) { pages = await this.filterPagesForExport( pages, pageId, userId, pages[0].spaceId, ); if (pages.length === 0) { throw new BadRequestException('No accessible pages to export'); } } const parentPageIndex = pages.findIndex((obj) => obj.id === pageId); //After filtering by permissions, if the root page itself is not accessible to the user, findIndex returns -1 if (parentPageIndex === -1) { throw new BadRequestException('Root page is not accessible'); } // set to null to make export of pages with parentId work pages[parentPageIndex].parentPageId = null; const tree = buildTree(pages as Page[]); const baseUrl = await this.getWorkspaceBaseUrl(pages[0].workspaceId); const zip = new JSZip(); await this.zipPages( tree, format, zip, includeAttachments, baseUrl, userId, ignorePermissions, ); const zipFile = zip.generateNodeStream({ type: 'nodebuffer', streamFiles: true, compression: 'DEFLATE', }); return zipFile; } async exportSpace( spaceId: string, format: string, includeAttachments: boolean, userId?: string, ignorePermissions = false, ) { const space = await this.db .selectFrom('spaces') .select(['id', 'name']) .where('id', '=', spaceId) .executeTakeFirst(); if (!space) { throw new NotFoundException('Space not found'); } let pages = await this.db .selectFrom('pages') .select([ 'pages.id', 'pages.slugId', 'pages.title', 'pages.icon', 'pages.position', 'pages.content', 'pages.parentPageId', 'pages.spaceId', 'pages.workspaceId', 'pages.createdAt', 'pages.updatedAt', ]) .where('spaceId', '=', spaceId) .where('deletedAt', 'is', null) .execute(); if (!ignorePermissions && userId) { pages = await this.filterPagesForExport( pages as Page[], null, userId, spaceId, ); if (pages.length === 0) { throw new BadRequestException('No accessible pages to export'); } } const tree = buildTree(pages as Page[]); const baseUrl = await this.getWorkspaceBaseUrl(pages[0].workspaceId); const zip = new JSZip(); await this.zipPages( tree, format, zip, includeAttachments, baseUrl, userId, ignorePermissions, ); const zipFile = zip.generateNodeStream({ type: 'nodebuffer', streamFiles: true, compression: 'DEFLATE', }); const fileName = `${space.name}-space-export.zip`; return { fileStream: zipFile, fileName, spaceName: space.name, }; } async zipPages( tree: PageExportTree, format: string, zip: JSZip, includeAttachments: boolean, baseUrl: string, userId?: string, ignorePermissions = false, ): Promise { const slugIdToPath: Record = {}; const pageIdToFilePath: Record = {}; const pagesMetadata: Record = {}; computeLocalPath(tree, format, null, '', slugIdToPath); const stack: { folder: JSZip; parentPageId: string | null }[] = [ { folder: zip, parentPageId: null }, ]; while (stack.length > 0) { const { folder, parentPageId } = stack.pop(); const children = tree[parentPageId] || []; for (const page of children) { const childPages = tree[page.id] || []; const prosemirrorJson = await this.turnPageMentionsToLinks( getProsemirrorContent(page.content), page.workspaceId, baseUrl, userId, ignorePermissions, ); const currentPagePath = slugIdToPath[page.slugId]; let updatedJsonContent = replaceInternalLinks( prosemirrorJson, slugIdToPath, currentPagePath, ); if (includeAttachments) { await this.zipAttachments(updatedJsonContent, page.spaceId, folder); updatedJsonContent = updateAttachmentUrlsToLocalPaths(updatedJsonContent); } const pageTitle = getPageTitle(page.title); const pageExportContent = await this.exportPage(format, { ...page, content: updatedJsonContent, }); folder.file( `${pageTitle}${getExportExtension(format)}`, pageExportContent, ); pageIdToFilePath[page.id] = currentPagePath; const parentPath = parentPageId ? pageIdToFilePath[parentPageId] : null; pagesMetadata[currentPagePath] = { pageId: page.id, slugId: page.slugId, icon: page.icon ?? null, position: page.position, parentPath, createdAt: page.createdAt?.toISOString() ?? new Date().toISOString(), updatedAt: page.updatedAt?.toISOString() ?? new Date().toISOString(), }; if (childPages.length > 0) { const pageFolder = folder.folder(pageTitle); stack.push({ folder: pageFolder, parentPageId: page.id }); } } } const metadata: ExportMetadata = { exportedAt: new Date().toISOString(), source: 'docmost', version: packageJson.version, pages: pagesMetadata, }; zip.file('docmost-metadata.json', JSON.stringify(metadata, null, 2)); } async zipAttachments(prosemirrorJson: any, spaceId: string, zip: JSZip) { const attachmentIds = getAttachmentIds(prosemirrorJson); if (attachmentIds.length > 0) { const attachments = await this.db .selectFrom('attachments') .selectAll() .where('id', 'in', attachmentIds) .where('spaceId', '=', spaceId) .execute(); await Promise.all( attachments.map(async (attachment) => { try { const fileBuffer = await this.storageService.read( attachment.filePath, ); const filePath = `/files/${attachment.id}/${attachment.fileName}`; zip.file(filePath, fileBuffer); } catch (err) { this.logger.debug(`Attachment export error ${attachment.id}`, err); } }), ); } } async turnPageMentionsToLinks( prosemirrorJson: any, workspaceId: string, baseUrl: string, userId?: string, ignorePermissions = false, ) { const doc = jsonToNode(prosemirrorJson); let pageMentionIds: string[] = []; doc.descendants((node: Node) => { if (node.type.name === 'mention' && node.attrs.entityType === 'page') { if (node.attrs.entityId) { pageMentionIds.push(node.attrs.entityId); } } }); if (pageMentionIds.length < 1) { return prosemirrorJson; } // Filter to only accessible pages if permissions are enforced if (!ignorePermissions && userId) { pageMentionIds = await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds: pageMentionIds, userId, }); } const pages = pageMentionIds.length > 0 ? await this.db .selectFrom('pages') .select([ 'id', 'slugId', 'title', 'creatorId', 'spaceId', 'workspaceId', ]) .select((eb) => this.pageRepo.withSpace(eb)) .where('id', 'in', pageMentionIds) .where('workspaceId', '=', workspaceId) .execute() : []; const pageMap = new Map(pages.map((page) => [page.id, page])); let editorState = EditorState.create({ doc: doc, }); const transaction = editorState.tr; let offset = 0; /** * Helper function to replace a mention node with a link node. */ const replaceMentionWithLink = ( node: Node, pos: number, title: string, slugId: string, spaceSlug: string, ) => { const linkTitle = title || 'untitled'; const truncatedTitle = linkTitle?.substring(0, 70); const pageSlug = `${slugify(truncatedTitle)}-${slugId}`; const link = `${baseUrl}/s/${spaceSlug}/p/${pageSlug}`; // Create a link mark and a text node with that mark const linkMark = editorState.schema.marks.link.create({ href: link }); const linkTextNode = editorState.schema.text(linkTitle, [linkMark]); // Calculate positions (adjusted by the current offset) const from = pos + offset; const to = pos + offset + node.nodeSize; // Replace the node in the transaction and update the offset transaction.replaceWith(from, to, linkTextNode); offset += linkTextNode.nodeSize - node.nodeSize; }; // find and convert page mentions to links editorState.doc.descendants((node: Node, pos: number) => { // Check if the node is a page mention if (node.type.name === 'mention' && node.attrs.entityType === 'page') { const { entityId: pageId, slugId, label } = node.attrs; const page = pageMap.get(pageId); if (page) { replaceMentionWithLink( node, pos, page.title, page.slugId, page.space.slug, ); } else { // if page is not found, default to the node label and slugId replaceMentionWithLink(node, pos, label, slugId, 'undefined'); } } }); if (transaction.docChanged) { editorState = editorState.apply(transaction); } const updatedDoc = editorState.doc; return updatedDoc.toJSON(); } private async getWorkspaceBaseUrl(workspaceId: string): Promise { const workspace = await this.db .selectFrom('workspaces') .select('hostname') .where('id', '=', workspaceId) .executeTakeFirst(); return this.domainService.getUrl(workspace?.hostname); } private async filterPagesForExport( pages: Page[], rootPageId: string | null, userId: string, spaceId: string, ): Promise { if (pages.length === 0) return []; const pageIds = pages.map((p) => p.id); const accessibleIds = await this.pagePermissionRepo.filterAccessiblePageIds( { pageIds, userId, spaceId, }, ); const accessibleSet = new Set(accessibleIds); const includedIds = new Set(); let changed = true; while (changed) { changed = false; for (const page of pages) { if (includedIds.has(page.id)) continue; if (!accessibleSet.has(page.id)) continue; // Root page or top-level page in space export if ( page.id === rootPageId || (rootPageId === null && page.parentPageId === null) ) { includedIds.add(page.id); changed = true; continue; } // Non-root: include if parent is already included if (page.parentPageId && includedIds.has(page.parentPageId)) { includedIds.add(page.id); changed = true; } } } return pages.filter((p) => includedIds.has(p.id)); } }