import {
BadRequestException,
Injectable,
Logger,
NotFoundException,
} from '@nestjs/common';
import { jsonToHtml, jsonToNode } from '../../collaboration/collaboration.util';
import { ExportFormat } from './dto/export-dto';
import { Page } from '@docmost/db/types/entity.types';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import * as JSZip from 'jszip';
import { StorageService } from '../storage/storage.service';
import {
buildTree,
computeLocalPath,
getExportExtension,
getPageTitle,
PageExportTree,
replaceInternalLinks,
updateAttachmentUrlsToLocalPaths,
} from './utils';
import {
ExportMetadata,
ExportPageMetadata,
} from '../../common/helpers/types/export-metadata.types';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { PagePermissionRepo } from '@docmost/db/repos/page/page-permission.repo';
import { Node } from '@tiptap/pm/model';
import { EditorState } from '@tiptap/pm/state';
import slugify from '@sindresorhus/slugify';
// eslint-disable-next-line @typescript-eslint/no-require-imports
const packageJson = require('../../../package.json');
import { EnvironmentService } from '../environment/environment.service';
import { DomainService } from '../environment/domain.service';
import {
getAttachmentIds,
getProsemirrorContent,
} from '../../common/helpers/prosemirror/utils';
import { htmlToMarkdown } from '@docmost/editor-ext';
type AllowedAttachment = { id: string; fileName: string; filePath: string };
@Injectable()
export class ExportService {
private readonly logger = new Logger(ExportService.name);
constructor(
private readonly pageRepo: PageRepo,
private readonly pagePermissionRepo: PagePermissionRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly storageService: StorageService,
private readonly environmentService: EnvironmentService,
private readonly domainService: DomainService,
) {}
async exportPage(format: string, page: Page, singlePage?: boolean) {
const titleNode = {
type: 'heading',
attrs: { level: 1 },
content: [{ type: 'text', text: getPageTitle(page.title) }],
};
let prosemirrorJson: any;
if (singlePage) {
const baseUrl = await this.getWorkspaceBaseUrl(page.workspaceId);
prosemirrorJson = await this.turnPageMentionsToLinks(
getProsemirrorContent(page.content),
page.workspaceId,
baseUrl,
);
} else {
// mentions is already turned to links during the zip process
prosemirrorJson = getProsemirrorContent(page.content);
}
if (page.title) {
prosemirrorJson.content.unshift(titleNode);
}
const pageHtml = jsonToHtml(prosemirrorJson);
if (format === ExportFormat.HTML) {
return `
${getPageTitle(page.title)}
${pageHtml}
`;
}
if (format === ExportFormat.Markdown) {
const newPageHtml = pageHtml.replace(
/]*>[\s\S]*?<\/colgroup>/gim,
'',
);
return htmlToMarkdown(newPageHtml);
}
return;
}
async exportPages(
pageId: string,
format: string,
includeAttachments: boolean,
includeChildren: boolean,
userId?: string,
ignorePermissions = false,
) {
let pages: Page[];
if (includeChildren) {
//@ts-ignore
pages = await this.pageRepo.getPageAndDescendants(pageId, {
includeContent: true,
});
} else {
// Only fetch the single page when includeChildren is false
const page = await this.pageRepo.findById(pageId, {
includeContent: true,
});
if (page) {
pages = [page];
}
}
if (!pages || pages.length === 0) {
throw new BadRequestException('No pages to export');
}
if (!ignorePermissions && userId) {
pages = await this.filterPagesForExport(
pages,
pageId,
userId,
pages[0].spaceId,
);
if (pages.length === 0) {
throw new BadRequestException('No accessible pages to export');
}
}
const parentPageIndex = pages.findIndex((obj) => obj.id === pageId);
//After filtering by permissions, if the root page itself is not accessible to the user, findIndex returns -1
if (parentPageIndex === -1) {
throw new BadRequestException('Root page is not accessible');
}
// set to null to make export of pages with parentId work
pages[parentPageIndex].parentPageId = null;
const isSinglePage = pages.length === 1 && !includeAttachments;
if (isSinglePage) {
const pageContent = await this.exportPage(format, pages[0], true);
return { type: 'file' as const, content: pageContent, page: pages[0] };
}
const tree = buildTree(pages as Page[]);
const baseUrl = await this.getWorkspaceBaseUrl(pages[0].workspaceId);
const zip = new JSZip();
await this.zipPages(
tree,
format,
zip,
includeAttachments,
baseUrl,
userId,
ignorePermissions,
);
const zipFile = zip.generateNodeStream({
type: 'nodebuffer',
streamFiles: true,
compression: 'DEFLATE',
});
return { type: 'zip' as const, stream: zipFile, page: pages[0] };
}
async exportSpace(
spaceId: string,
format: string,
includeAttachments: boolean,
userId?: string,
ignorePermissions = false,
) {
const space = await this.db
.selectFrom('spaces')
.select(['id', 'name'])
.where('id', '=', spaceId)
.executeTakeFirst();
if (!space) {
throw new NotFoundException('Space not found');
}
let pages = await this.db
.selectFrom('pages')
.select([
'pages.id',
'pages.slugId',
'pages.title',
'pages.icon',
'pages.position',
'pages.content',
'pages.parentPageId',
'pages.spaceId',
'pages.workspaceId',
'pages.createdAt',
'pages.updatedAt',
])
.where('spaceId', '=', spaceId)
.where('deletedAt', 'is', null)
.execute();
if (!ignorePermissions && userId) {
pages = await this.filterPagesForExport(
pages as Page[],
null,
userId,
spaceId,
);
if (pages.length === 0) {
throw new BadRequestException('No accessible pages to export');
}
}
const tree = buildTree(pages as Page[]);
const baseUrl = await this.getWorkspaceBaseUrl(pages[0].workspaceId);
const zip = new JSZip();
await this.zipPages(
tree,
format,
zip,
includeAttachments,
baseUrl,
userId,
ignorePermissions,
);
const zipFile = zip.generateNodeStream({
type: 'nodebuffer',
streamFiles: true,
compression: 'DEFLATE',
});
const fileName = `${space.name}-space-export.zip`;
return {
fileStream: zipFile,
fileName,
spaceName: space.name,
};
}
async zipPages(
tree: PageExportTree,
format: string,
zip: JSZip,
includeAttachments: boolean,
baseUrl: string,
userId?: string,
ignorePermissions = false,
): Promise {
const slugIdToPath: Record = {};
const pageIdToFilePath: Record = {};
const pagesMetadata: Record = {};
computeLocalPath(tree, format, null, '', slugIdToPath);
// Batch resolve attachments once for the whole export so we only run the
// owning-page view check a single time, regardless of page count.
const allowedAttachments = includeAttachments
? await this.resolveAccessibleAttachments(tree, userId, ignorePermissions)
: new Map();
const stack: { folder: JSZip; parentPageId: string | null }[] = [
{ folder: zip, parentPageId: null },
];
while (stack.length > 0) {
const { folder, parentPageId } = stack.pop();
const children = tree[parentPageId] || [];
for (const page of children) {
const childPages = tree[page.id] || [];
const prosemirrorJson = await this.turnPageMentionsToLinks(
getProsemirrorContent(page.content),
page.workspaceId,
baseUrl,
userId,
ignorePermissions,
);
const currentPagePath = slugIdToPath[page.slugId];
let updatedJsonContent = replaceInternalLinks(
prosemirrorJson,
slugIdToPath,
currentPagePath,
baseUrl,
);
if (includeAttachments) {
await this.zipAttachments(updatedJsonContent, folder, allowedAttachments);
updatedJsonContent =
updateAttachmentUrlsToLocalPaths(updatedJsonContent);
}
const pageTitle = getPageTitle(page.title);
const pageExportContent = await this.exportPage(format, {
...page,
content: updatedJsonContent,
});
folder.file(
`${pageTitle}${getExportExtension(format)}`,
pageExportContent,
);
pageIdToFilePath[page.id] = currentPagePath;
const parentPath = parentPageId ? pageIdToFilePath[parentPageId] : null;
pagesMetadata[currentPagePath] = {
pageId: page.id,
slugId: page.slugId,
icon: page.icon ?? null,
position: page.position,
parentPath,
createdAt: page.createdAt?.toISOString() ?? new Date().toISOString(),
updatedAt: page.updatedAt?.toISOString() ?? new Date().toISOString(),
};
if (childPages.length > 0) {
const pageFolder = folder.folder(pageTitle);
stack.push({ folder: pageFolder, parentPageId: page.id });
}
}
}
const metadata: ExportMetadata = {
exportedAt: new Date().toISOString(),
source: 'docmost',
version: packageJson.version,
pages: pagesMetadata,
};
zip.file('docmost-metadata.json', JSON.stringify(metadata, null, 2));
}
async zipAttachments(
prosemirrorJson: any,
zip: JSZip,
allowed: Map,
) {
const attachmentIds = getAttachmentIds(prosemirrorJson);
await Promise.all(
attachmentIds.map(async (id) => {
const attachment = allowed.get(id);
if (!attachment) return;
try {
const fileBuffer = await this.storageService.read(
attachment.filePath,
);
const filePath = `/files/${attachment.id}/${attachment.fileName}`;
zip.file(filePath, fileBuffer);
} catch (err) {
this.logger.debug(`Attachment export error ${attachment.id}`, err);
}
}),
);
}
private async resolveAccessibleAttachments(
tree: PageExportTree,
userId: string | undefined,
ignorePermissions: boolean,
): Promise