From 6d024fc3de140e0d6748abec543dd6670f4b1882 Mon Sep 17 00:00:00 2001 From: Philip Okugbe <16838612+Philipinho@users.noreply.github.com> Date: Mon, 9 Jun 2025 04:29:27 +0100 Subject: [PATCH] feat: bulk page imports (#1219) * refactor imports - WIP * Add readstream * WIP * fix attachmentId render * fix attachmentId render * turndown video tag * feat: add stream upload support and improve file handling - Add stream upload functionality to storage drivers\n- Improve ZIP file extraction with better encoding handling\n- Fix attachment ID rendering issues\n- Add AWS S3 upload stream support\n- Update dependencies for better compatibility * WIP * notion formatter * move embed parser to editor-ext package * import embeds * utility files * cleanup * Switch from happy-dom to cheerio * Refine code * WIP * bug fixes and UI * sync * WIP * sync * keep import modal mounted * Show modal during upload * WIP * WIP --- .../src/components/icons/confluence-icon.tsx | 20 + .../editor/components/embed/embed-view.tsx | 7 +- .../file-task/services/file-task-service.ts | 14 + .../file-task/types/file-task.types.ts | 17 + .../page/components/page-import-modal.tsx | 224 +++++++++++- .../features/page/services/page-service.ts | 22 +- .../page/tree/components/space-tree.tsx | 54 +-- .../src/features/page/tree/utils/utils.ts | 25 +- .../src/features/websocket/types/types.ts | 21 +- .../websocket/use-query-subscription.ts | 38 +- apps/client/src/lib/config.ts | 5 + apps/client/vite.config.ts | 2 + apps/server/package.json | 7 +- .../extensions/persistence.extension.ts | 2 +- apps/server/src/common/helpers/utils.ts | 6 + .../migrations/20250521T154949-file_tasks.ts | 39 ++ apps/server/src/database/types/db.d.ts | 19 + .../server/src/database/types/entity.types.ts | 6 + apps/server/src/ee | 2 +- .../environment/environment.service.ts | 4 + .../src/integrations/export/turndown-utils.ts | 23 +- .../integrations/import/dto/file-task-dto.ts | 18 + .../import/file-task.controller.ts | 79 ++++ .../integrations/import/import.controller.ts | 77 +++- .../src/integrations/import/import.module.ts | 19 +- .../import/processors/file-task.processor.ts | 76 ++++ .../import/services/file-task.service.ts | 346 ++++++++++++++++++ .../services/import-attachment.service.ts | 303 +++++++++++++++ .../import/{ => services}/import.service.ts | 104 +++++- .../integrations/import/utils/file.utils.ts | 187 ++++++++++ .../import/utils/import-formatter.ts | 254 +++++++++++++ .../integrations/import/utils/import.utils.ts | 66 ++++ .../queue/constants/queue.constants.ts | 4 + .../src/integrations/queue/queue.module.ts | 8 + .../storage/drivers/local.driver.ts | 23 +- .../integrations/storage/drivers/s3.driver.ts | 36 ++ .../interfaces/storage-driver.interface.ts | 6 + .../integrations/storage/storage.service.ts | 10 + apps/server/src/main.ts | 2 +- packages/editor-ext/src/index.ts | 3 +- .../editor-ext/src/lib/embed-provider.ts | 97 ++--- packages/editor-ext/src/lib/image/image.ts | 2 +- packages/editor-ext/src/lib/trailing-node.ts | 9 +- packages/editor-ext/src/lib/video/video.ts | 10 +- pnpm-lock.yaml | 215 +++++++++-- 45 files changed, 2362 insertions(+), 149 deletions(-) create mode 100644 apps/client/src/components/icons/confluence-icon.tsx create mode 100644 apps/client/src/features/file-task/services/file-task-service.ts create mode 100644 apps/client/src/features/file-task/types/file-task.types.ts create mode 100644 apps/server/src/database/migrations/20250521T154949-file_tasks.ts create mode 100644 apps/server/src/integrations/import/dto/file-task-dto.ts create mode 100644 apps/server/src/integrations/import/file-task.controller.ts create mode 100644 apps/server/src/integrations/import/processors/file-task.processor.ts create mode 100644 apps/server/src/integrations/import/services/file-task.service.ts create mode 100644 apps/server/src/integrations/import/services/import-attachment.service.ts rename apps/server/src/integrations/import/{ => services}/import.service.ts (61%) create mode 100644 apps/server/src/integrations/import/utils/file.utils.ts create mode 100644 apps/server/src/integrations/import/utils/import-formatter.ts create mode 100644 apps/server/src/integrations/import/utils/import.utils.ts rename apps/client/src/features/editor/components/embed/providers.ts => packages/editor-ext/src/lib/embed-provider.ts (58%) diff --git a/apps/client/src/components/icons/confluence-icon.tsx b/apps/client/src/components/icons/confluence-icon.tsx new file mode 100644 index 00000000..499f18da --- /dev/null +++ b/apps/client/src/components/icons/confluence-icon.tsx @@ -0,0 +1,20 @@ +import { rem } from "@mantine/core"; + +interface Props { + size?: number | string; +} + +export function ConfluenceIcon({ size }: Props) { + return ( + + + + ); +} diff --git a/apps/client/src/features/editor/components/embed/embed-view.tsx b/apps/client/src/features/editor/components/embed/embed-view.tsx index 02ae6edf..77743a07 100644 --- a/apps/client/src/features/editor/components/embed/embed-view.tsx +++ b/apps/client/src/features/editor/components/embed/embed-view.tsx @@ -15,13 +15,10 @@ import { import { IconEdit } from "@tabler/icons-react"; import { z } from "zod"; import { useForm, zodResolver } from "@mantine/form"; -import { - getEmbedProviderById, - getEmbedUrlAndProvider, -} from "@/features/editor/components/embed/providers.ts"; import { notifications } from "@mantine/notifications"; import { useTranslation } from "react-i18next"; import i18n from "i18next"; +import { getEmbedProviderById, getEmbedUrlAndProvider } from '@docmost/editor-ext'; const schema = z.object({ url: z @@ -101,7 +98,7 @@ export default function EmbedView(props: NodeViewProps) { {t("Embed {{provider}}", { - provider: getEmbedProviderById(provider).name, + provider: getEmbedProviderById(provider)?.name, })} diff --git a/apps/client/src/features/file-task/services/file-task-service.ts b/apps/client/src/features/file-task/services/file-task-service.ts new file mode 100644 index 00000000..ffccbaae --- /dev/null +++ b/apps/client/src/features/file-task/services/file-task-service.ts @@ -0,0 +1,14 @@ +import api from "@/lib/api-client"; +import { IFileTask } from "@/features/file-task/types/file-task.types.ts"; + +export async function getFileTaskById(fileTaskId: string): Promise { + const req = await api.post("/file-tasks/info", { + fileTaskId: fileTaskId, + }); + return req.data; +} + +export async function getFileTasks(): Promise { + const req = await api.post("/file-tasks"); + return req.data; +} diff --git a/apps/client/src/features/file-task/types/file-task.types.ts b/apps/client/src/features/file-task/types/file-task.types.ts new file mode 100644 index 00000000..917e1757 --- /dev/null +++ b/apps/client/src/features/file-task/types/file-task.types.ts @@ -0,0 +1,17 @@ +export interface IFileTask { + id: string; + type: "import" | "export"; + source: string; + status: string; + fileName: string; + filePath: string; + fileSize: number; + fileExt: string; + errorMessage: string | null; + creatorId: string; + spaceId: string; + workspaceId: string; + createdAt: string; + updatedAt: string; + deletedAt: string | null; +} \ No newline at end of file diff --git a/apps/client/src/features/page/components/page-import-modal.tsx b/apps/client/src/features/page/components/page-import-modal.tsx index f07fd8a9..90c08bb6 100644 --- a/apps/client/src/features/page/components/page-import-modal.tsx +++ b/apps/client/src/features/page/components/page-import-modal.tsx @@ -1,18 +1,38 @@ -import { Modal, Button, SimpleGrid, FileButton } from "@mantine/core"; import { + Modal, + Button, + SimpleGrid, + FileButton, + Group, + Text, + Tooltip, +} from "@mantine/core"; +import { + IconBrandNotion, IconCheck, IconFileCode, + IconFileTypeZip, IconMarkdown, IconX, } from "@tabler/icons-react"; -import { importPage } from "@/features/page/services/page-service.ts"; +import { + importPage, + importZip, +} from "@/features/page/services/page-service.ts"; import { notifications } from "@mantine/notifications"; import { treeDataAtom } from "@/features/page/tree/atoms/tree-data-atom.ts"; import { useAtom } from "jotai"; import { buildTree } from "@/features/page/tree/utils"; import { IPage } from "@/features/page/types/page.types.ts"; -import React from "react"; +import React, { useEffect, useState } from "react"; import { useTranslation } from "react-i18next"; +import { ConfluenceIcon } from "@/components/icons/confluence-icon.tsx"; +import { getFileImportSizeLimit, isCloud } from "@/lib/config.ts"; +import { formatBytes } from "@/lib"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import { getFileTaskById } from "@/features/file-task/services/file-task-service.ts"; +import { queryClient } from "@/main.tsx"; +import { useQueryEmit } from "@/features/websocket/use-query-emit.ts"; interface PageImportModalProps { spaceId: string; @@ -36,6 +56,7 @@ export default function PageImportModal({ yOffset="10vh" xOffset={0} mah={400} + keepMounted={true} > @@ -59,6 +80,133 @@ interface ImportFormatSelection { function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) { const { t } = useTranslation(); const [treeData, setTreeData] = useAtom(treeDataAtom); + const [workspace] = useAtom(workspaceAtom); + const [fileTaskId, setFileTaskId] = useState(null); + const emit = useQueryEmit(); + + const canUseConfluence = isCloud() || workspace?.hasLicenseKey; + + const handleZipUpload = async (selectedFile: File, source: string) => { + if (!selectedFile) { + return; + } + + try { + onClose(); + + notifications.show({ + id: "import", + title: t("Uploading import file"), + message: t("Please don't close this tab."), + loading: true, + withCloseButton: false, + autoClose: false, + }); + + const importTask = await importZip(selectedFile, spaceId, source); + notifications.update({ + id: "import", + title: t("Importing pages"), + message: t( + "Page import is in progress. You can check back later if this takes longer.", + ), + loading: true, + withCloseButton: true, + autoClose: false, + }); + + setFileTaskId(importTask.id); + } catch (err) { + console.log("Failed to upload import file", err); + notifications.update({ + id: "import", + color: "red", + title: t("Failed to upload import file"), + message: err?.response.data.message, + icon: , + loading: false, + withCloseButton: true, + autoClose: false, + }); + } + }; + + useEffect(() => { + if (!fileTaskId) return; + + const intervalId = setInterval(async () => { + try { + const fileTask = await getFileTaskById(fileTaskId); + const status = fileTask.status; + + if (status === "success") { + notifications.update({ + id: "import", + color: "teal", + title: t("Import complete"), + message: t("Your pages were successfully imported."), + icon: , + loading: false, + withCloseButton: true, + autoClose: false, + }); + clearInterval(intervalId); + setFileTaskId(null); + + await queryClient.refetchQueries({ + queryKey: ["root-sidebar-pages", fileTask.spaceId], + }); + + setTimeout(() => { + emit({ + operation: "refetchRootTreeNodeEvent", + spaceId: spaceId, + }); + }, 50); + } + + if (status === "failed") { + notifications.update({ + id: "import", + color: "red", + title: t("Page import failed"), + message: t( + "Something went wrong while importing pages: {{reason}}.", + { + reason: fileTask.errorMessage, + }, + ), + icon: , + loading: false, + withCloseButton: true, + autoClose: false, + }); + clearInterval(intervalId); + setFileTaskId(null); + console.error(fileTask.errorMessage); + } + } catch (err) { + notifications.update({ + id: "import", + color: "red", + title: t("Import failed"), + message: t( + "Something went wrong while importing pages: {{reason}}.", + { + reason: err.response?.data.message, + }, + ), + icon: , + loading: false, + withCloseButton: true, + autoClose: false, + }); + clearInterval(intervalId); + setFileTaskId(null); + console.error("Failed to fetch import status", err); + } + }, 3000); + }, [fileTaskId]); const handleFileUpload = async (selectedFiles: File[]) => { if (!selectedFiles) { @@ -120,6 +268,7 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) { } }; + // @ts-ignore return ( <> @@ -148,7 +297,76 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) { )} + + handleZipUpload(file, "notion")} + accept="application/zip" + > + {(props) => ( + + )} + + handleZipUpload(file, "confluence")} + accept="application/zip" + > + {(props) => ( + + + + )} + + + +
+ + Import zip file + + + {t( + `Upload zip file containing Markdown and HTML files. Max: {{sizeLimit}}`, + { + sizeLimit: formatBytes(getFileImportSizeLimit()), + }, + )} + + handleZipUpload(file, "generic")} + accept="application/zip" + > + {(props) => ( + + + + )} + +
+
); } diff --git a/apps/client/src/features/page/services/page-service.ts b/apps/client/src/features/page/services/page-service.ts index e7e15608..a8e3d256 100644 --- a/apps/client/src/features/page/services/page-service.ts +++ b/apps/client/src/features/page/services/page-service.ts @@ -7,10 +7,11 @@ import { IPage, IPageInput, SidebarPagesParams, -} from "@/features/page/types/page.types"; +} from '@/features/page/types/page.types'; import { IAttachment, IPagination } from "@/lib/types.ts"; import { saveAs } from "file-saver"; import { InfiniteData } from "@tanstack/react-query"; +import { IFileTask } from '@/features/file-task/types/file-task.types.ts'; export async function createPage(data: Partial): Promise { const req = await api.post("/pages/create", data); @@ -119,6 +120,25 @@ export async function importPage(file: File, spaceId: string) { return req.data; } +export async function importZip( + file: File, + spaceId: string, + source?: string, +): Promise { + const formData = new FormData(); + formData.append("spaceId", spaceId); + formData.append("source", source); + formData.append("file", file); + + const req = await api.post("/pages/import-zip", formData, { + headers: { + "Content-Type": "multipart/form-data", + }, + }); + + return req.data; +} + export async function uploadFile( file: File, pageId: string, diff --git a/apps/client/src/features/page/tree/components/space-tree.tsx b/apps/client/src/features/page/tree/components/space-tree.tsx index da5b1832..db818518 100644 --- a/apps/client/src/features/page/tree/components/space-tree.tsx +++ b/apps/client/src/features/page/tree/components/space-tree.tsx @@ -24,7 +24,10 @@ import { IconPointFilled, IconTrash, } from "@tabler/icons-react"; -import { appendNodeChildrenAtom, treeDataAtom } from "@/features/page/tree/atoms/tree-data-atom.ts"; +import { + appendNodeChildrenAtom, + treeDataAtom, +} from "@/features/page/tree/atoms/tree-data-atom.ts"; import clsx from "clsx"; import EmojiPicker from "@/components/ui/emoji-picker.tsx"; import { useTreeMutation } from "@/features/page/tree/hooks/use-tree-mutation.ts"; @@ -32,6 +35,7 @@ import { appendNodeChildren, buildTree, buildTreeWithChildren, + mergeRootTrees, updateTreeNodeIcon, } from "@/features/page/tree/utils/utils.ts"; import { SpaceTreeNode } from "@/features/page/tree/types.ts"; @@ -104,17 +108,17 @@ export default function SpaceTree({ spaceId, readOnly }: SpaceTreeProps) { const allItems = pagesData.pages.flatMap((page) => page.items); const treeData = buildTree(allItems); - if (data.length < 1 || data?.[0].spaceId !== spaceId) { - //Thoughts - // don't reset if there is data in state - // we only expect to call this once on initial load - // even if we decide to refetch, it should only update - // and append root pages instead of resetting the entire tree - // which looses async loaded children too - setData(treeData); - setIsDataLoaded(true); - setOpenTreeNodes({}); - } + setData((prev) => { + // fresh space; full reset + if (prev.length === 0 || prev[0]?.spaceId !== spaceId) { + setIsDataLoaded(true); + setOpenTreeNodes({}); + return treeData; + } + + // same space; append only missing roots + return mergeRootTrees(prev, treeData); + }); } }, [pagesData, hasNextPage]); @@ -297,17 +301,19 @@ function Node({ node, style, dragHandle, tree }: NodeRendererProps) { const handleEmojiSelect = (emoji: { native: string }) => { handleUpdateNodeIcon(node.id, emoji.native); - updatePageMutation.mutateAsync({ pageId: node.id, icon: emoji.native }).then((data) => { - setTimeout(() => { - emit({ - operation: "updateOne", - spaceId: node.data.spaceId, - entity: ["pages"], - id: node.id, - payload: { icon: emoji.native, parentPageId: data.parentPageId}, - }); - }, 50); - }); + updatePageMutation + .mutateAsync({ pageId: node.id, icon: emoji.native }) + .then((data) => { + setTimeout(() => { + emit({ + operation: "updateOne", + spaceId: node.data.spaceId, + entity: ["pages"], + id: node.id, + payload: { icon: emoji.native, parentPageId: data.parentPageId }, + }); + }, 50); + }); }; const handleRemoveEmoji = () => { @@ -570,7 +576,7 @@ interface PageArrowProps { function PageArrow({ node, onExpandTree }: PageArrowProps) { useEffect(() => { - if(node.isOpen){ + if (node.isOpen) { onExpandTree(); } }, []); diff --git a/apps/client/src/features/page/tree/utils/utils.ts b/apps/client/src/features/page/tree/utils/utils.ts index 910799c8..8ec1b884 100644 --- a/apps/client/src/features/page/tree/utils/utils.ts +++ b/apps/client/src/features/page/tree/utils/utils.ts @@ -121,7 +121,6 @@ export const deleteTreeNode = ( .filter((node) => node !== null); }; - export function buildTreeWithChildren(items: SpaceTreeNode[]): SpaceTreeNode[] { const nodeMap = {}; let result: SpaceTreeNode[] = []; @@ -167,10 +166,12 @@ export function appendNodeChildren( // Preserve deeper children if they exist and remove node if deleted return treeItems.map((node) => { if (node.id === nodeId) { - const newIds = new Set(children.map(c => c.id)); + const newIds = new Set(children.map((c) => c.id)); const existingMap = new Map( - (node.children ?? []).filter(c => newIds.has(c.id)).map(c => [c.id, c]) + (node.children ?? []) + .filter((c) => newIds.has(c.id)) + .map((c) => [c.id, c]), ); const merged = children.map((newChild) => { @@ -196,3 +197,21 @@ export function appendNodeChildren( return node; }); } + +/** + * Merge root nodes; keep existing ones intact, append new ones, + */ +export function mergeRootTrees( + prevRoots: SpaceTreeNode[], + incomingRoots: SpaceTreeNode[], +): SpaceTreeNode[] { + const seen = new Set(prevRoots.map((r) => r.id)); + + // add new roots that were not present before + const merged = [...prevRoots]; + incomingRoots.forEach((node) => { + if (!seen.has(node.id)) merged.push(node); + }); + + return sortPositionKeys(merged); +} diff --git a/apps/client/src/features/websocket/types/types.ts b/apps/client/src/features/websocket/types/types.ts index bda76b4e..25b57df9 100644 --- a/apps/client/src/features/websocket/types/types.ts +++ b/apps/client/src/features/websocket/types/types.ts @@ -47,15 +47,28 @@ export type MoveTreeNodeEvent = { parentId: string; index: number; position: string; - } + }; }; export type DeleteTreeNodeEvent = { operation: "deleteTreeNode"; spaceId: string; payload: { - node: SpaceTreeNode - } + node: SpaceTreeNode; + }; }; -export type WebSocketEvent = InvalidateEvent | InvalidateCommentsEvent | UpdateEvent | DeleteEvent | AddTreeNodeEvent | MoveTreeNodeEvent | DeleteTreeNodeEvent; +export type RefetchRootTreeNodeEvent = { + operation: "refetchRootTreeNodeEvent"; + spaceId: string; +}; + +export type WebSocketEvent = + | InvalidateEvent + | InvalidateCommentsEvent + | UpdateEvent + | DeleteEvent + | AddTreeNodeEvent + | MoveTreeNodeEvent + | DeleteTreeNodeEvent + | RefetchRootTreeNodeEvent; diff --git a/apps/client/src/features/websocket/use-query-subscription.ts b/apps/client/src/features/websocket/use-query-subscription.ts index 8e3ad569..29a815be 100644 --- a/apps/client/src/features/websocket/use-query-subscription.ts +++ b/apps/client/src/features/websocket/use-query-subscription.ts @@ -5,8 +5,14 @@ import { InfiniteData, useQueryClient } from "@tanstack/react-query"; import { WebSocketEvent } from "@/features/websocket/types"; import { IPage } from "../page/types/page.types"; import { IPagination } from "@/lib/types"; -import { invalidateOnCreatePage, invalidateOnDeletePage, invalidateOnMovePage, invalidateOnUpdatePage } from "../page/queries/page-query"; +import { + invalidateOnCreatePage, + invalidateOnDeletePage, + invalidateOnMovePage, + invalidateOnUpdatePage, +} from "../page/queries/page-query"; import { RQ_KEY } from "../comment/queries/comment-query"; +import { queryClient } from "@/main.tsx"; export const useQuerySubscription = () => { const queryClient = useQueryClient(); @@ -37,8 +43,7 @@ export const useQuerySubscription = () => { invalidateOnMovePage(); break; case "deleteTreeNode": - const pageId = data.payload.node.id; - invalidateOnDeletePage(pageId); + invalidateOnDeletePage(data.payload.node.id); break; case "updateOne": entity = data.entity[0]; @@ -50,17 +55,23 @@ export const useQuerySubscription = () => { } // only update if data was already in cache - if(queryClient.getQueryData([...data.entity, queryKeyId])){ + if (queryClient.getQueryData([...data.entity, queryKeyId])) { queryClient.setQueryData([...data.entity, queryKeyId], { ...queryClient.getQueryData([...data.entity, queryKeyId]), ...data.payload, }); } - + if (entity === "pages") { - invalidateOnUpdatePage(data.spaceId, data.payload.parentPageId, data.id, data.payload.title, data.payload.icon); + invalidateOnUpdatePage( + data.spaceId, + data.payload.parentPageId, + data.id, + data.payload.title, + data.payload.icon, + ); } - + /* queryClient.setQueriesData( { queryKey: [data.entity, data.id] }, @@ -72,8 +83,19 @@ export const useQuerySubscription = () => { : update(oldData as Record); }, ); - */ + */ break; + case "refetchRootTreeNodeEvent": { + const spaceId = data.spaceId; + queryClient.refetchQueries({ + queryKey: ["root-sidebar-pages", spaceId], + }); + + queryClient.invalidateQueries({ + queryKey: ["recent-changes", spaceId], + }); + break; + } } }); }, [queryClient, socket]); diff --git a/apps/client/src/lib/config.ts b/apps/client/src/lib/config.ts index 2f621b91..717bf9ff 100644 --- a/apps/client/src/lib/config.ts +++ b/apps/client/src/lib/config.ts @@ -70,6 +70,11 @@ export function getFileUploadSizeLimit() { return bytes(limit); } +export function getFileImportSizeLimit() { + const limit = getConfigValue("FILE_IMPORT_SIZE_LIMIT", "200mb"); + return bytes(limit); +} + export function getDrawioUrl() { return getConfigValue("DRAWIO_URL", "https://embed.diagrams.net"); } diff --git a/apps/client/vite.config.ts b/apps/client/vite.config.ts index a6efc4bc..cc8a01fd 100644 --- a/apps/client/vite.config.ts +++ b/apps/client/vite.config.ts @@ -8,6 +8,7 @@ export default defineConfig(({ mode }) => { const { APP_URL, FILE_UPLOAD_SIZE_LIMIT, + FILE_IMPORT_SIZE_LIMIT, DRAWIO_URL, CLOUD, SUBDOMAIN_HOST, @@ -20,6 +21,7 @@ export default defineConfig(({ mode }) => { "process.env": { APP_URL, FILE_UPLOAD_SIZE_LIMIT, + FILE_IMPORT_SIZE_LIMIT, DRAWIO_URL, CLOUD, SUBDOMAIN_HOST, diff --git a/apps/server/package.json b/apps/server/package.json index 5082d129..d105f20c 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -31,6 +31,7 @@ }, "dependencies": { "@aws-sdk/client-s3": "3.701.0", + "@aws-sdk/lib-storage": "3.701.0", "@aws-sdk/s3-request-presigner": "3.701.0", "@casl/ability": "^6.7.3", "@fastify/cookie": "^11.0.2", @@ -56,6 +57,7 @@ "bcrypt": "^5.1.1", "bullmq": "^5.41.3", "cache-manager": "^6.4.0", + "cheerio": "^1.0.0", "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "cookie": "^1.0.2", @@ -80,7 +82,9 @@ "sanitize-filename-ts": "^1.0.2", "socket.io": "^4.8.1", "stripe": "^17.5.0", - "ws": "^8.18.0" + "tmp-promise": "^3.0.3", + "ws": "^8.18.0", + "yauzl": "^3.2.0" }, "devDependencies": { "@eslint/js": "^9.20.0", @@ -99,6 +103,7 @@ "@types/pg": "^8.11.11", "@types/supertest": "^6.0.2", "@types/ws": "^8.5.14", + "@types/yauzl": "^2.10.3", "eslint": "^9.20.1", "eslint-config-prettier": "^10.0.1", "globals": "^15.15.0", diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 3c206e1a..88284fd2 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -130,7 +130,7 @@ export class PersistenceExtension implements Extension { ); this.contributors.delete(documentName); } catch (err) { - this.logger.debug('Contributors error:' + err?.['message']); + //this.logger.debug('Contributors error:' + err?.['message']); } await this.pageRepo.updatePage( diff --git a/apps/server/src/common/helpers/utils.ts b/apps/server/src/common/helpers/utils.ts index e2a4d5eb..d1748850 100644 --- a/apps/server/src/common/helpers/utils.ts +++ b/apps/server/src/common/helpers/utils.ts @@ -1,5 +1,6 @@ import * as path from 'path'; import * as bcrypt from 'bcrypt'; +import { sanitize } from 'sanitize-filename-ts'; export const envPath = path.resolve(process.cwd(), '..', '..', '.env'); @@ -62,3 +63,8 @@ export function extractDateFromUuid7(uuid7: string) { return new Date(timestamp); } + +export function sanitizeFileName(fileName: string): string { + const sanitizedFilename = sanitize(fileName).replace(/ /g, '_'); + return sanitizedFilename.slice(0, 255); +} diff --git a/apps/server/src/database/migrations/20250521T154949-file_tasks.ts b/apps/server/src/database/migrations/20250521T154949-file_tasks.ts new file mode 100644 index 00000000..523ae86b --- /dev/null +++ b/apps/server/src/database/migrations/20250521T154949-file_tasks.ts @@ -0,0 +1,39 @@ +import { Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema + .createTable('file_tasks') + .addColumn('id', 'uuid', (col) => + col.primaryKey().defaultTo(sql`gen_uuid_v7()`), + ) + // type (import, export) + .addColumn('type', 'varchar', (col) => col) + // source (generic, notion, confluence) + .addColumn('source', 'varchar', (col) => col) + // status (pending|processing|success|failed), + .addColumn('status', 'varchar', (col) => col) + .addColumn('file_name', 'varchar', (col) => col.notNull()) + .addColumn('file_path', 'varchar', (col) => col.notNull()) + .addColumn('file_size', 'int8', (col) => col) + .addColumn('file_ext', 'varchar', (col) => col) + .addColumn('error_message', 'varchar', (col) => col) + .addColumn('creator_id', 'uuid', (col) => col.references('users.id')) + .addColumn('space_id', 'uuid', (col) => + col.references('spaces.id').onDelete('cascade'), + ) + .addColumn('workspace_id', 'uuid', (col) => + col.references('workspaces.id').onDelete('cascade').notNull(), + ) + .addColumn('created_at', 'timestamptz', (col) => + col.notNull().defaultTo(sql`now()`), + ) + .addColumn('updated_at', 'timestamptz', (col) => + col.notNull().defaultTo(sql`now()`), + ) + .addColumn('deleted_at', 'timestamptz', (col) => col) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropTable('file_tasks').execute(); +} diff --git a/apps/server/src/database/types/db.d.ts b/apps/server/src/database/types/db.d.ts index 8c4cbd57..4545ebc4 100644 --- a/apps/server/src/database/types/db.d.ts +++ b/apps/server/src/database/types/db.d.ts @@ -122,6 +122,24 @@ export interface Comments { workspaceId: string; } +export interface FileTasks { + createdAt: Generated; + creatorId: string | null; + deletedAt: Timestamp | null; + errorMessage: string | null; + fileExt: string | null; + fileName: string; + filePath: string; + fileSize: Int8 | null; + id: Generated; + source: string | null; + spaceId: string | null; + status: string | null; + type: string | null; + updatedAt: Generated; + workspaceId: string; +} + export interface Groups { createdAt: Generated; creatorId: string | null; @@ -298,6 +316,7 @@ export interface DB { backlinks: Backlinks; billing: Billing; comments: Comments; + fileTasks: FileTasks; groups: Groups; groupUsers: GroupUsers; pageHistory: PageHistory; diff --git a/apps/server/src/database/types/entity.types.ts b/apps/server/src/database/types/entity.types.ts index 6cb55a11..db2c2823 100644 --- a/apps/server/src/database/types/entity.types.ts +++ b/apps/server/src/database/types/entity.types.ts @@ -17,6 +17,7 @@ import { AuthProviders, AuthAccounts, Shares, + FileTasks, } from './db'; // Workspace @@ -107,3 +108,8 @@ export type UpdatableAuthAccount = Updateable>; export type Share = Selectable; export type InsertableShare = Insertable; export type UpdatableShare = Updateable>; + +// File Task +export type FileTask = Selectable; +export type InsertableFileTask = Insertable; +export type UpdatableFileTask = Updateable>; diff --git a/apps/server/src/ee b/apps/server/src/ee index b312008b..70eb45ea 160000 --- a/apps/server/src/ee +++ b/apps/server/src/ee @@ -1 +1 @@ -Subproject commit b312008b4b7ed3d5862436b279d91aeddb6048d7 +Subproject commit 70eb45eaec84f61cb94a83a153915ce443ccc437 diff --git a/apps/server/src/integrations/environment/environment.service.ts b/apps/server/src/integrations/environment/environment.service.ts index ac26b4fb..d6336993 100644 --- a/apps/server/src/integrations/environment/environment.service.ts +++ b/apps/server/src/integrations/environment/environment.service.ts @@ -67,6 +67,10 @@ export class EnvironmentService { return this.configService.get('FILE_UPLOAD_SIZE_LIMIT', '50mb'); } + getFileImportSizeLimit(): string { + return this.configService.get('FILE_IMPORT_SIZE_LIMIT', '200mb'); + } + getAwsS3AccessKeyId(): string { return this.configService.get('AWS_S3_ACCESS_KEY_ID'); } diff --git a/apps/server/src/integrations/export/turndown-utils.ts b/apps/server/src/integrations/export/turndown-utils.ts index 44e606f3..54fdef12 100644 --- a/apps/server/src/integrations/export/turndown-utils.ts +++ b/apps/server/src/integrations/export/turndown-utils.ts @@ -1,5 +1,6 @@ import * as TurndownService from '@joplin/turndown'; import * as TurndownPluginGfm from '@joplin/turndown-plugin-gfm'; +import * as path from 'path'; export function turndown(html: string): string { const turndownService = new TurndownService({ @@ -23,6 +24,7 @@ export function turndown(html: string): string { mathInline, mathBlock, iframeEmbed, + video, ]); return turndownService.turndown(html).replaceAll('
', ' '); } @@ -87,8 +89,12 @@ function preserveDetail(turndownService: TurndownService) { } const detailsContent = Array.from(node.childNodes) - .filter(child => child.nodeName !== 'SUMMARY') - .map(child => (child.nodeType === 1 ? turndownService.turndown((child as HTMLElement).outerHTML) : child.textContent)) + .filter((child) => child.nodeName !== 'SUMMARY') + .map((child) => + child.nodeType === 1 + ? turndownService.turndown((child as HTMLElement).outerHTML) + : child.textContent, + ) .join(''); return `\n
\n${detailSummary}\n\n${detailsContent}\n\n
\n`; @@ -135,3 +141,16 @@ function iframeEmbed(turndownService: TurndownService) { }, }); } + +function video(turndownService: TurndownService) { + turndownService.addRule('video', { + filter: function (node: HTMLInputElement) { + return node.tagName === 'VIDEO'; + }, + replacement: function (content: any, node: HTMLInputElement) { + const src = node.getAttribute('src') || ''; + const name = path.basename(src); + return '[' + name + '](' + src + ')'; + }, + }); +} diff --git a/apps/server/src/integrations/import/dto/file-task-dto.ts b/apps/server/src/integrations/import/dto/file-task-dto.ts new file mode 100644 index 00000000..9cdea395 --- /dev/null +++ b/apps/server/src/integrations/import/dto/file-task-dto.ts @@ -0,0 +1,18 @@ +import { IsNotEmpty, IsUUID } from 'class-validator'; + +export class FileTaskIdDto { + @IsNotEmpty() + @IsUUID() + fileTaskId: string; +} + +export type ImportPageNode = { + id: string; + slugId: string; + name: string; + content: string; + position?: string | null; + parentPageId: string | null; + fileExtension: string; + filePath: string; +}; \ No newline at end of file diff --git a/apps/server/src/integrations/import/file-task.controller.ts b/apps/server/src/integrations/import/file-task.controller.ts new file mode 100644 index 00000000..305779b4 --- /dev/null +++ b/apps/server/src/integrations/import/file-task.controller.ts @@ -0,0 +1,79 @@ +import { + Body, + Controller, + ForbiddenException, + HttpCode, + HttpStatus, + NotFoundException, + Post, + UseGuards, +} from '@nestjs/common'; +import SpaceAbilityFactory from '../../core/casl/abilities/space-ability.factory'; +import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard'; +import { User } from '@docmost/db/types/entity.types'; +import { + SpaceCaslAction, + SpaceCaslSubject, +} from '../../core/casl/interfaces/space-ability.type'; +import { InjectKysely } from 'nestjs-kysely'; +import { KyselyDB } from '@docmost/db/types/kysely.types'; +import { AuthUser } from '../../common/decorators/auth-user.decorator'; +import { FileTaskIdDto } from './dto/file-task-dto'; +import { SpaceMemberRepo } from '@docmost/db/repos/space/space-member.repo'; + +@Controller('file-tasks') +export class FileTaskController { + constructor( + private readonly spaceMemberRepo: SpaceMemberRepo, + private readonly spaceAbility: SpaceAbilityFactory, + @InjectKysely() private readonly db: KyselyDB, + ) {} + + @UseGuards(JwtAuthGuard) + @HttpCode(HttpStatus.OK) + @Post() + async getFileTasks(@AuthUser() user: User) { + const userSpaceIds = await this.spaceMemberRepo.getUserSpaceIds(user.id); + + if (!userSpaceIds || userSpaceIds.length === 0) { + return []; + } + + const fileTasks = await this.db + .selectFrom('fileTasks') + .selectAll() + .where('spaceId', 'in', userSpaceIds) + .execute(); + + if (!fileTasks) { + throw new NotFoundException('File task not found'); + } + + return fileTasks; + } + + @UseGuards(JwtAuthGuard) + @HttpCode(HttpStatus.OK) + @Post('info') + async getFileTask(@Body() dto: FileTaskIdDto, @AuthUser() user: User) { + const fileTask = await this.db + .selectFrom('fileTasks') + .selectAll() + .where('id', '=', dto.fileTaskId) + .executeTakeFirst(); + + if (!fileTask || !fileTask.spaceId) { + throw new NotFoundException('File task not found'); + } + + const ability = await this.spaceAbility.createForUser( + user, + fileTask.spaceId, + ); + if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) { + throw new ForbiddenException(); + } + + return fileTask; + } +} diff --git a/apps/server/src/integrations/import/import.controller.ts b/apps/server/src/integrations/import/import.controller.ts index 975301af..1adb82eb 100644 --- a/apps/server/src/integrations/import/import.controller.ts +++ b/apps/server/src/integrations/import/import.controller.ts @@ -21,8 +21,9 @@ import { import { FileInterceptor } from '../../common/interceptors/file.interceptor'; import * as bytes from 'bytes'; import * as path from 'path'; -import { ImportService } from './import.service'; +import { ImportService } from './services/import.service'; import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator'; +import { EnvironmentService } from '../environment/environment.service'; @Controller() export class ImportController { @@ -31,6 +32,7 @@ export class ImportController { constructor( private readonly importService: ImportService, private readonly spaceAbility: SpaceAbilityFactory, + private readonly environmentService: EnvironmentService, ) {} @UseInterceptors(FileInterceptor) @@ -44,18 +46,18 @@ export class ImportController { ) { const validFileExtensions = ['.md', '.html']; - const maxFileSize = bytes('100mb'); + const maxFileSize = bytes('10mb'); let file = null; try { file = await req.file({ - limits: { fileSize: maxFileSize, fields: 3, files: 1 }, + limits: { fileSize: maxFileSize, fields: 4, files: 1 }, }); } catch (err: any) { this.logger.error(err.message); if (err?.statusCode === 413) { throw new BadRequestException( - `File too large. Exceeds the 100mb import limit`, + `File too large. Exceeds the 10mb import limit`, ); } } @@ -73,7 +75,7 @@ export class ImportController { const spaceId = file.fields?.spaceId?.value; if (!spaceId) { - throw new BadRequestException('spaceId or format not found'); + throw new BadRequestException('spaceId is required'); } const ability = await this.spaceAbility.createForUser(user, spaceId); @@ -83,4 +85,69 @@ export class ImportController { return this.importService.importPage(file, user.id, spaceId, workspace.id); } + + @UseInterceptors(FileInterceptor) + @UseGuards(JwtAuthGuard) + @HttpCode(HttpStatus.OK) + @Post('pages/import-zip') + async importZip( + @Req() req: any, + @AuthUser() user: User, + @AuthWorkspace() workspace: Workspace, + ) { + const validFileExtensions = ['.zip']; + + const maxFileSize = bytes(this.environmentService.getFileImportSizeLimit()); + + let file = null; + try { + file = await req.file({ + limits: { fileSize: maxFileSize, fields: 3, files: 1 }, + }); + } catch (err: any) { + this.logger.error(err.message); + if (err?.statusCode === 413) { + throw new BadRequestException( + `File too large. Exceeds the ${this.environmentService.getFileImportSizeLimit()} import limit`, + ); + } + } + + if (!file) { + throw new BadRequestException('Failed to upload file'); + } + + if ( + !validFileExtensions.includes(path.extname(file.filename).toLowerCase()) + ) { + throw new BadRequestException('Invalid import file extension.'); + } + + const spaceId = file.fields?.spaceId?.value; + const source = file.fields?.source?.value; + + const validZipSources = ['generic', 'notion', 'confluence']; + if (!validZipSources.includes(source)) { + throw new BadRequestException( + 'Invalid import source. Import source must either be generic, notion or confluence.', + ); + } + + if (!spaceId) { + throw new BadRequestException('spaceId is required'); + } + + const ability = await this.spaceAbility.createForUser(user, spaceId); + if (ability.cannot(SpaceCaslAction.Edit, SpaceCaslSubject.Page)) { + throw new ForbiddenException(); + } + + return this.importService.importZip( + file, + source, + user.id, + spaceId, + workspace.id, + ); + } } diff --git a/apps/server/src/integrations/import/import.module.ts b/apps/server/src/integrations/import/import.module.ts index 60498808..40a49023 100644 --- a/apps/server/src/integrations/import/import.module.ts +++ b/apps/server/src/integrations/import/import.module.ts @@ -1,9 +1,22 @@ import { Module } from '@nestjs/common'; -import { ImportService } from './import.service'; +import { ImportService } from './services/import.service'; import { ImportController } from './import.controller'; +import { StorageModule } from '../storage/storage.module'; +import { FileTaskService } from './services/file-task.service'; +import { FileTaskProcessor } from './processors/file-task.processor'; +import { ImportAttachmentService } from './services/import-attachment.service'; +import { FileTaskController } from './file-task.controller'; +import { PageModule } from '../../core/page/page.module'; @Module({ - providers: [ImportService], - controllers: [ImportController], + providers: [ + ImportService, + FileTaskService, + FileTaskProcessor, + ImportAttachmentService, + ], + exports: [ImportService, ImportAttachmentService], + controllers: [ImportController, FileTaskController], + imports: [StorageModule, PageModule], }) export class ImportModule {} diff --git a/apps/server/src/integrations/import/processors/file-task.processor.ts b/apps/server/src/integrations/import/processors/file-task.processor.ts new file mode 100644 index 00000000..9431ccec --- /dev/null +++ b/apps/server/src/integrations/import/processors/file-task.processor.ts @@ -0,0 +1,76 @@ +import { Logger, OnModuleDestroy } from '@nestjs/common'; +import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq'; +import { Job } from 'bullmq'; +import { QueueJob, QueueName } from 'src/integrations/queue/constants'; +import { FileTaskService } from '../services/file-task.service'; +import { FileTaskStatus } from '../utils/file.utils'; +import { StorageService } from '../../storage/storage.service'; + +@Processor(QueueName.FILE_TASK_QUEUE) +export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy { + private readonly logger = new Logger(FileTaskProcessor.name); + + constructor( + private readonly fileTaskService: FileTaskService, + private readonly storageService: StorageService, + ) { + super(); + } + + async process(job: Job): Promise { + try { + switch (job.name) { + case QueueJob.IMPORT_TASK: + await this.fileTaskService.processZIpImport(job.data.fileTaskId); + break; + case QueueJob.EXPORT_TASK: + // TODO: export task + break; + } + } catch (err) { + this.logger.error('File task failed', err); + throw err; + } + } + + @OnWorkerEvent('active') + onActive(job: Job) { + this.logger.debug(`Processing ${job.name} job`); + } + + @OnWorkerEvent('failed') + async onFailed(job: Job) { + this.logger.error( + `Error processing ${job.name} job. Reason: ${job.failedReason}`, + ); + + try { + const fileTaskId = job.data.fileTaskId; + await this.fileTaskService.updateTaskStatus( + fileTaskId, + FileTaskStatus.Failed, + job.failedReason, + ); + + const fileTask = await this.fileTaskService.getFileTask(fileTaskId); + if (fileTask) { + await this.storageService.delete(fileTask.filePath); + } + } catch (err) { + this.logger.error(err); + } + } + + @OnWorkerEvent('completed') + onCompleted(job: Job) { + this.logger.log( + `Completed ${job.name} job for File task ID ${job.data.fileTaskId}`, + ); + } + + async onModuleDestroy(): Promise { + if (this.worker) { + await this.worker.close(); + } + } +} diff --git a/apps/server/src/integrations/import/services/file-task.service.ts b/apps/server/src/integrations/import/services/file-task.service.ts new file mode 100644 index 00000000..f054017d --- /dev/null +++ b/apps/server/src/integrations/import/services/file-task.service.ts @@ -0,0 +1,346 @@ +import { Injectable, Logger } from '@nestjs/common'; +import * as path from 'path'; +import { jsonToText } from '../../../collaboration/collaboration.util'; +import { InjectKysely } from 'nestjs-kysely'; +import { KyselyDB } from '@docmost/db/types/kysely.types'; +import { + extractZip, + FileImportSource, + FileTaskStatus, +} from '../utils/file.utils'; +import { StorageService } from '../../storage/storage.service'; +import * as tmp from 'tmp-promise'; +import { pipeline } from 'node:stream/promises'; +import { createWriteStream } from 'node:fs'; +import { ImportService } from './import.service'; +import { promises as fs } from 'fs'; +import { generateSlugId } from '../../../common/helpers'; +import { v7 } from 'uuid'; +import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; +import { FileTask, InsertablePage } from '@docmost/db/types/entity.types'; +import { markdownToHtml } from '@docmost/editor-ext'; +import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils'; +import { formatImportHtml } from '../utils/import-formatter'; +import { + buildAttachmentCandidates, + collectMarkdownAndHtmlFiles, +} from '../utils/import.utils'; +import { executeTx } from '@docmost/db/utils'; +import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo'; +import { ImportAttachmentService } from './import-attachment.service'; +import { ModuleRef } from '@nestjs/core'; +import { PageService } from '../../../core/page/services/page.service'; +import { ImportPageNode } from '../dto/file-task-dto'; + +@Injectable() +export class FileTaskService { + private readonly logger = new Logger(FileTaskService.name); + + constructor( + private readonly storageService: StorageService, + private readonly importService: ImportService, + private readonly pageService: PageService, + private readonly backlinkRepo: BacklinkRepo, + @InjectKysely() private readonly db: KyselyDB, + private readonly importAttachmentService: ImportAttachmentService, + private moduleRef: ModuleRef, + ) {} + + async processZIpImport(fileTaskId: string): Promise { + const fileTask = await this.db + .selectFrom('fileTasks') + .selectAll() + .where('id', '=', fileTaskId) + .executeTakeFirst(); + + if (!fileTask) { + this.logger.log(`Import file task with ID ${fileTaskId} not found`); + return; + } + + if (fileTask.status === FileTaskStatus.Failed) { + return; + } + + if (fileTask.status === FileTaskStatus.Success) { + this.logger.log('Imported task already processed.'); + return; + } + + const { path: tmpZipPath, cleanup: cleanupTmpFile } = await tmp.file({ + prefix: 'docmost-import', + postfix: '.zip', + discardDescriptor: true, + }); + + const { path: tmpExtractDir, cleanup: cleanupTmpDir } = await tmp.dir({ + prefix: 'docmost-extract-', + unsafeCleanup: true, + }); + + try { + const fileStream = await this.storageService.readStream( + fileTask.filePath, + ); + await pipeline(fileStream, createWriteStream(tmpZipPath)); + await extractZip(tmpZipPath, tmpExtractDir); + } catch (err) { + await cleanupTmpFile(); + await cleanupTmpDir(); + + throw err; + } + + try { + if ( + fileTask.source === FileImportSource.Generic || + fileTask.source === FileImportSource.Notion + ) { + await this.processGenericImport({ + extractDir: tmpExtractDir, + fileTask, + }); + } + + if (fileTask.source === FileImportSource.Confluence) { + let ConfluenceModule: any; + try { + // eslint-disable-next-line @typescript-eslint/no-require-imports + ConfluenceModule = require('./../../../ee/confluence-import/confluence-import.service'); + } catch (err) { + this.logger.error( + 'Confluence import requested but EE module not bundled in this build', + ); + return; + } + const confluenceImportService = this.moduleRef.get( + ConfluenceModule.ConfluenceImportService, + { strict: false }, + ); + + await confluenceImportService.processConfluenceImport({ + extractDir: tmpExtractDir, + fileTask, + }); + } + try { + await this.updateTaskStatus(fileTaskId, FileTaskStatus.Success, null); + await cleanupTmpFile(); + await cleanupTmpDir(); + // delete stored file on success + await this.storageService.delete(fileTask.filePath); + } catch (err) { + this.logger.error( + `Failed to delete import file from storage. Task ID: ${fileTaskId}`, + err, + ); + } + } catch (err) { + await cleanupTmpFile(); + await cleanupTmpDir(); + + throw err; + } + } + + async processGenericImport(opts: { + extractDir: string; + fileTask: FileTask; + }): Promise { + const { extractDir, fileTask } = opts; + const allFiles = await collectMarkdownAndHtmlFiles(extractDir); + const attachmentCandidates = await buildAttachmentCandidates(extractDir); + + const pagesMap = new Map(); + + for (const absPath of allFiles) { + const relPath = path + .relative(extractDir, absPath) + .split(path.sep) + .join('/'); // normalize to forward-slashes + const ext = path.extname(relPath).toLowerCase(); + let content = await fs.readFile(absPath, 'utf-8'); + + if (ext.toLowerCase() === '.md') { + content = await markdownToHtml(content); + } + + pagesMap.set(relPath, { + id: v7(), + slugId: generateSlugId(), + name: path.basename(relPath, ext), + content, + parentPageId: null, + fileExtension: ext, + filePath: relPath, + }); + } + + // parent/child linking + pagesMap.forEach((page, filePath) => { + const segments = filePath.split('/'); + segments.pop(); + let parentPage = null; + while (segments.length) { + const tryMd = segments.join('/') + '.md'; + const tryHtml = segments.join('/') + '.html'; + if (pagesMap.has(tryMd)) { + parentPage = pagesMap.get(tryMd)!; + break; + } + if (pagesMap.has(tryHtml)) { + parentPage = pagesMap.get(tryHtml)!; + break; + } + segments.pop(); + } + if (parentPage) page.parentPageId = parentPage.id; + }); + + // generate position keys + const siblingsMap = new Map(); + + pagesMap.forEach((page) => { + const group = siblingsMap.get(page.parentPageId) ?? []; + group.push(page); + siblingsMap.set(page.parentPageId, group); + }); + + // get root pages + const rootSibs = siblingsMap.get(null); + + if (rootSibs?.length) { + rootSibs.sort((a, b) => a.name.localeCompare(b.name)); + + // get first position key from the server + const nextPosition = await this.pageService.nextPagePosition( + fileTask.spaceId, + ); + + let prevPos: string | null = null; + rootSibs.forEach((page, idx) => { + if (idx === 0) { + page.position = nextPosition; + } else { + page.position = generateJitteredKeyBetween(prevPos, null); + } + prevPos = page.position; + }); + } + + // non-root buckets (children & deeper levels) + siblingsMap.forEach((sibs, parentId) => { + if (parentId === null) return; // root already done + + sibs.sort((a, b) => a.name.localeCompare(b.name)); + + let prevPos: string | null = null; + for (const page of sibs) { + page.position = generateJitteredKeyBetween(prevPos, null); + prevPos = page.position; + } + }); + + // internal page links + const filePathToPageMetaMap = new Map< + string, + { id: string; title: string; slugId: string } + >(); + pagesMap.forEach((page) => { + filePathToPageMetaMap.set(page.filePath, { + id: page.id, + title: page.name, + slugId: page.slugId, + }); + }); + + const pageResults = await Promise.all( + Array.from(pagesMap.values()).map(async (page) => { + const htmlContent = + await this.importAttachmentService.processAttachments({ + html: page.content, + pageRelativePath: page.filePath, + extractDir, + pageId: page.id, + fileTask, + attachmentCandidates, + }); + + const { html, backlinks } = await formatImportHtml({ + html: htmlContent, + currentFilePath: page.filePath, + filePathToPageMetaMap: filePathToPageMetaMap, + creatorId: fileTask.creatorId, + sourcePageId: page.id, + workspaceId: fileTask.workspaceId, + }); + + const pmState = getProsemirrorContent( + await this.importService.processHTML(html), + ); + + const { title, prosemirrorJson } = + this.importService.extractTitleAndRemoveHeading(pmState); + + const insertablePage: InsertablePage = { + id: page.id, + slugId: page.slugId, + title: title || page.name, + content: prosemirrorJson, + textContent: jsonToText(prosemirrorJson), + ydoc: await this.importService.createYdoc(prosemirrorJson), + position: page.position!, + spaceId: fileTask.spaceId, + workspaceId: fileTask.workspaceId, + creatorId: fileTask.creatorId, + lastUpdatedById: fileTask.creatorId, + parentPageId: page.parentPageId, + }; + + return { insertablePage, backlinks }; + }), + ); + + const insertablePages = pageResults.map((r) => r.insertablePage); + const insertableBacklinks = pageResults.flatMap((r) => r.backlinks); + + if (insertablePages.length < 1) return; + const validPageIds = new Set(insertablePages.map((row) => row.id)); + const filteredBacklinks = insertableBacklinks.filter( + ({ sourcePageId, targetPageId }) => + validPageIds.has(sourcePageId) && validPageIds.has(targetPageId), + ); + + await executeTx(this.db, async (trx) => { + await trx.insertInto('pages').values(insertablePages).execute(); + + if (filteredBacklinks.length > 0) { + await this.backlinkRepo.insertBacklink(filteredBacklinks, trx); + } + }); + } + + async getFileTask(fileTaskId: string) { + return this.db + .selectFrom('fileTasks') + .selectAll() + .where('id', '=', fileTaskId) + .executeTakeFirst(); + } + + async updateTaskStatus( + fileTaskId: string, + status: FileTaskStatus, + errorMessage?: string, + ) { + try { + await this.db + .updateTable('fileTasks') + .set({ status: status, errorMessage, updatedAt: new Date() }) + .where('id', '=', fileTaskId) + .execute(); + } catch (err) { + this.logger.error(err); + } + } +} diff --git a/apps/server/src/integrations/import/services/import-attachment.service.ts b/apps/server/src/integrations/import/services/import-attachment.service.ts new file mode 100644 index 00000000..cd9039e2 --- /dev/null +++ b/apps/server/src/integrations/import/services/import-attachment.service.ts @@ -0,0 +1,303 @@ +import { Injectable, Logger } from '@nestjs/common'; +import * as path from 'path'; +import { InjectKysely } from 'nestjs-kysely'; +import { KyselyDB } from '@docmost/db/types/kysely.types'; +import { cleanUrlString } from '../utils/file.utils'; +import { StorageService } from '../../storage/storage.service'; +import { createReadStream } from 'node:fs'; +import { promises as fs } from 'fs'; +import { getMimeType, sanitizeFileName } from '../../../common/helpers'; +import { v7 } from 'uuid'; +import { FileTask } from '@docmost/db/types/entity.types'; +import { getAttachmentFolderPath } from '../../../core/attachment/attachment.utils'; +import { AttachmentType } from '../../../core/attachment/attachment.constants'; +import { unwrapFromParagraph } from '../utils/import-formatter'; +import { resolveRelativeAttachmentPath } from '../utils/import.utils'; +import { load } from 'cheerio'; + +@Injectable() +export class ImportAttachmentService { + private readonly logger = new Logger(ImportAttachmentService.name); + + constructor( + private readonly storageService: StorageService, + @InjectKysely() private readonly db: KyselyDB, + ) {} + + async processAttachments(opts: { + html: string; + pageRelativePath: string; + extractDir: string; + pageId: string; + fileTask: FileTask; + attachmentCandidates: Map; + }): Promise { + const { + html, + pageRelativePath, + extractDir, + pageId, + fileTask, + attachmentCandidates, + } = opts; + + const attachmentTasks: Promise[] = []; + + /** + * Cache keyed by the *relative* path that appears in the HTML. + * Ensures we upload (and DB-insert) each attachment at most once, + * even if it’s referenced multiple times on the page. + */ + const processed = new Map< + string, + { + attachmentId: string; + storageFilePath: string; + apiFilePath: string; + fileNameWithExt: string; + abs: string; + } + >(); + + const uploadOnce = (relPath: string) => { + const abs = attachmentCandidates.get(relPath)!; + const attachmentId = v7(); + const ext = path.extname(abs); + + const fileNameWithExt = + sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase(); + + const storageFilePath = `${getAttachmentFolderPath( + AttachmentType.File, + fileTask.workspaceId, + )}/${attachmentId}/${fileNameWithExt}`; + + const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`; + + attachmentTasks.push( + (async () => { + const fileStream = createReadStream(abs); + await this.storageService.uploadStream(storageFilePath, fileStream); + const stat = await fs.stat(abs); + + await this.db + .insertInto('attachments') + .values({ + id: attachmentId, + filePath: storageFilePath, + fileName: fileNameWithExt, + fileSize: stat.size, + mimeType: getMimeType(fileNameWithExt), + type: 'file', + fileExt: ext, + creatorId: fileTask.creatorId, + workspaceId: fileTask.workspaceId, + pageId, + spaceId: fileTask.spaceId, + }) + .execute(); + })(), + ); + + return { + attachmentId, + storageFilePath, + apiFilePath, + fileNameWithExt, + abs, + }; + }; + + /** + * – Returns cached data if we’ve already processed this path. + * – Otherwise calls `uploadOnce`, stores the result, and returns it. + */ + const processFile = (relPath: string) => { + const cached = processed.get(relPath); + if (cached) return cached; + + const fresh = uploadOnce(relPath); + processed.set(relPath, fresh); + return fresh; + }; + + const pageDir = path.dirname(pageRelativePath); + const $ = load(html); + + // image + for (const imgEl of $('img').toArray()) { + const $img = $(imgEl); + const src = cleanUrlString($img.attr('src') ?? '')!; + if (!src || src.startsWith('http')) continue; + + const relPath = resolveRelativeAttachmentPath( + src, + pageDir, + attachmentCandidates, + ); + if (!relPath) continue; + + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await fs.stat(abs); + + const width = $img.attr('width') ?? '100%'; + const align = $img.attr('data-align') ?? 'center'; + + $img + .attr('src', apiFilePath) + .attr('data-attachment-id', attachmentId) + .attr('data-size', stat.size.toString()) + .attr('width', width) + .attr('data-align', align); + + unwrapFromParagraph($, $img); + } + + // video + for (const vidEl of $('video').toArray()) { + const $vid = $(vidEl); + const src = cleanUrlString($vid.attr('src') ?? '')!; + if (!src || src.startsWith('http')) continue; + + const relPath = resolveRelativeAttachmentPath( + src, + pageDir, + attachmentCandidates, + ); + if (!relPath) continue; + + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await fs.stat(abs); + + const width = $vid.attr('width') ?? '100%'; + const align = $vid.attr('data-align') ?? 'center'; + + $vid + .attr('src', apiFilePath) + .attr('data-attachment-id', attachmentId) + .attr('data-size', stat.size.toString()) + .attr('width', width) + .attr('data-align', align); + + unwrapFromParagraph($, $vid); + } + + //
+ for (const el of $('div[data-type="attachment"]').toArray()) { + const $oldDiv = $(el); + const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!; + if (!rawUrl || rawUrl.startsWith('http')) continue; + + const relPath = resolveRelativeAttachmentPath( + rawUrl, + pageDir, + attachmentCandidates, + ); + if (!relPath) continue; + + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await fs.stat(abs); + const fileName = path.basename(abs); + const mime = getMimeType(abs); + + const $newDiv = $('
') + .attr('data-type', 'attachment') + .attr('data-attachment-url', apiFilePath) + .attr('data-attachment-name', fileName) + .attr('data-attachment-mime', mime) + .attr('data-attachment-size', stat.size.toString()) + .attr('data-attachment-id', attachmentId); + + $oldDiv.replaceWith($newDiv); + unwrapFromParagraph($, $newDiv); + } + + // rewrite other attachments via + for (const aEl of $('a').toArray()) { + const $a = $(aEl); + const href = cleanUrlString($a.attr('href') ?? '')!; + if (!href || href.startsWith('http')) continue; + + const relPath = resolveRelativeAttachmentPath( + href, + pageDir, + attachmentCandidates, + ); + if (!relPath) continue; + + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await fs.stat(abs); + const ext = path.extname(relPath).toLowerCase(); + + if (ext === '.mp4') { + const $video = $('
') + .attr('data-type', 'attachment') + .attr('data-attachment-url', apiFilePath) + .attr('data-attachment-name', attachmentName) + .attr('data-attachment-mime', getMimeType(abs)) + .attr('data-attachment-size', stat.size.toString()) + .attr('data-attachment-id', attachmentId); + + $a.replaceWith($div); + unwrapFromParagraph($, $div); + } + } + + // excalidraw and drawio + for (const type of ['excalidraw', 'drawio'] as const) { + for (const el of $(`div[data-type="${type}"]`).toArray()) { + const $oldDiv = $(el); + const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!; + if (!rawSrc || rawSrc.startsWith('http')) continue; + + const relPath = resolveRelativeAttachmentPath( + rawSrc, + pageDir, + attachmentCandidates, + ); + if (!relPath) continue; + + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await fs.stat(abs); + const fileName = path.basename(abs); + + const width = $oldDiv.attr('data-width') || '100%'; + const align = $oldDiv.attr('data-align') || 'center'; + + const $newDiv = $('
') + .attr('data-type', type) + .attr('data-src', apiFilePath) + .attr('data-title', fileName) + .attr('data-width', width) + .attr('data-size', stat.size.toString()) + .attr('data-align', align) + .attr('data-attachment-id', attachmentId); + + $oldDiv.replaceWith($newDiv); + unwrapFromParagraph($, $newDiv); + } + } + + // wait for all uploads & DB inserts + try { + await Promise.all(attachmentTasks); + } catch (err) { + this.logger.log('Import attachment upload error', err); + } + + return $.root().html() || ''; + } +} diff --git a/apps/server/src/integrations/import/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts similarity index 61% rename from apps/server/src/integrations/import/import.service.ts rename to apps/server/src/integrations/import/services/import.service.ts index f77df0dc..a3da4918 100644 --- a/apps/server/src/integrations/import/import.service.ts +++ b/apps/server/src/integrations/import/services/import.service.ts @@ -4,16 +4,27 @@ import { MultipartFile } from '@fastify/multipart'; import { sanitize } from 'sanitize-filename-ts'; import * as path from 'path'; import { - htmlToJson, jsonToText, + htmlToJson, + jsonToText, tiptapExtensions, -} from '../../collaboration/collaboration.util'; +} from '../../../collaboration/collaboration.util'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB } from '@docmost/db/types/kysely.types'; -import { generateSlugId } from '../../common/helpers'; +import { generateSlugId, sanitizeFileName } from '../../../common/helpers'; import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { TiptapTransformer } from '@hocuspocus/transformer'; import * as Y from 'yjs'; -import { markdownToHtml } from "@docmost/editor-ext"; +import { markdownToHtml } from '@docmost/editor-ext'; +import { + FileTaskStatus, + FileTaskType, + getFileTaskFolderPath, +} from '../utils/file.utils'; +import { v7 as uuid7 } from 'uuid'; +import { StorageService } from '../../storage/storage.service'; +import { InjectQueue } from '@nestjs/bullmq'; +import { Queue } from 'bullmq'; +import { QueueJob, QueueName } from '../../queue/constants'; @Injectable() export class ImportService { @@ -21,7 +32,10 @@ export class ImportService { constructor( private readonly pageRepo: PageRepo, + private readonly storageService: StorageService, @InjectKysely() private readonly db: KyselyDB, + @InjectQueue(QueueName.FILE_TASK_QUEUE) + private readonly fileTaskQueue: Queue, ) {} async importPage( @@ -113,7 +127,7 @@ export class ImportService { async createYdoc(prosemirrorJson: any): Promise { if (prosemirrorJson) { - this.logger.debug(`Converting prosemirror json state to ydoc`); + // this.logger.debug(`Converting prosemirror json state to ydoc`); const ydoc = TiptapTransformer.toYdoc( prosemirrorJson, @@ -129,20 +143,34 @@ export class ImportService { } extractTitleAndRemoveHeading(prosemirrorState: any) { - let title = null; + let title: string | null = null; + + const content = prosemirrorState.content ?? []; if ( - prosemirrorState?.content?.length > 0 && - prosemirrorState.content[0].type === 'heading' && - prosemirrorState.content[0].attrs?.level === 1 + content.length > 0 && + content[0].type === 'heading' && + content[0].attrs?.level === 1 ) { - title = prosemirrorState.content[0].content[0].text; - - // remove h1 header node from state - prosemirrorState.content.shift(); + title = content[0].content?.[0]?.text ?? null; + content.shift(); } - return { title, prosemirrorJson: prosemirrorState }; + // ensure at least one paragraph + if (content.length === 0) { + content.push({ + type: 'paragraph', + content: [], + }); + } + + return { + title, + prosemirrorJson: { + ...prosemirrorState, + content, + }, + }; } async getNewPagePosition(spaceId: string): Promise { @@ -161,4 +189,52 @@ export class ImportService { return generateJitteredKeyBetween(null, null); } } + + async importZip( + filePromise: Promise, + source: string, + userId: string, + spaceId: string, + workspaceId: string, + ) { + const file = await filePromise; + const fileBuffer = await file.toBuffer(); + const fileExtension = path.extname(file.filename).toLowerCase(); + const fileName = sanitizeFileName( + path.basename(file.filename, fileExtension), + ); + const fileSize = fileBuffer.length; + + const fileNameWithExt = fileName + fileExtension; + + const fileTaskId = uuid7(); + const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileNameWithExt}`; + + // upload file + await this.storageService.upload(filePath, fileBuffer); + + const fileTask = await this.db + .insertInto('fileTasks') + .values({ + id: fileTaskId, + type: FileTaskType.Import, + source: source, + status: FileTaskStatus.Processing, + fileName: fileNameWithExt, + filePath: filePath, + fileSize: fileSize, + fileExt: 'zip', + creatorId: userId, + spaceId: spaceId, + workspaceId: workspaceId, + }) + .returningAll() + .executeTakeFirst(); + + await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, { + fileTaskId: fileTaskId, + }); + + return fileTask; + } } diff --git a/apps/server/src/integrations/import/utils/file.utils.ts b/apps/server/src/integrations/import/utils/file.utils.ts new file mode 100644 index 00000000..b3d39cda --- /dev/null +++ b/apps/server/src/integrations/import/utils/file.utils.ts @@ -0,0 +1,187 @@ +import * as yauzl from 'yauzl'; +import * as path from 'path'; +import * as fs from 'node:fs'; + +export enum FileTaskType { + Import = 'import', + Export = 'export', +} + +export enum FileImportSource { + Generic = 'generic', + Notion = 'notion', + Confluence = 'confluence', +} + +export enum FileTaskStatus { + Processing = 'processing', + Success = 'success', + Failed = 'failed', +} + +export function getFileTaskFolderPath( + type: FileTaskType, + workspaceId: string, +): string { + switch (type) { + case FileTaskType.Import: + return `${workspaceId}/imports`; + case FileTaskType.Export: + return `${workspaceId}/exports`; + } +} + +/** + * Extracts a ZIP archive. + */ +export async function extractZip( + source: string, + target: string, +): Promise { + return extractZipInternal(source, target, true); +} + +/** + * Internal helper to extract a ZIP, with optional single-nested-ZIP handling. + * @param source Path to the ZIP file + * @param target Directory to extract into + * @param allowNested Whether to check and unwrap one level of nested ZIP + */ +function extractZipInternal( + source: string, + target: string, + allowNested: boolean, +): Promise { + return new Promise((resolve, reject) => { + yauzl.open( + source, + { lazyEntries: true, decodeStrings: false, autoClose: true }, + (err, zipfile) => { + if (err) return reject(err); + + // Handle one level of nested ZIP if allowed + if (allowNested && zipfile.entryCount === 1) { + zipfile.readEntry(); + zipfile.once('entry', (entry) => { + const name = entry.fileName.toString('utf8').replace(/^\/+/, ''); + const isZip = + !/\/$/.test(entry.fileName) && + name.toLowerCase().endsWith('.zip'); + if (isZip) { + // temporary name to avoid overwriting file + const nestedPath = source.endsWith('.zip') + ? source.slice(0, -4) + '.inner.zip' + : source + '.inner.zip'; + + zipfile.openReadStream(entry, (openErr, rs) => { + if (openErr) return reject(openErr); + const ws = fs.createWriteStream(nestedPath); + rs.on('error', reject); + ws.on('error', reject); + ws.on('finish', () => { + zipfile.close(); + extractZipInternal(nestedPath, target, false) + .then(() => { + fs.unlinkSync(nestedPath); + resolve(); + }) + .catch(reject); + }); + rs.pipe(ws); + }); + } else { + zipfile.close(); + extractZipInternal(source, target, false).then(resolve, reject); + } + }); + zipfile.once('error', reject); + return; + } + + // Normal extraction + zipfile.readEntry(); + zipfile.on('entry', (entry) => { + const name = entry.fileName.toString('utf8'); + const safe = name.replace(/^\/+/, ''); + if (safe.startsWith('__MACOSX/')) { + zipfile.readEntry(); + return; + } + + const fullPath = path.join(target, safe); + + // Handle directories + if (/\/$/.test(name)) { + try { + fs.mkdirSync(fullPath, { recursive: true }); + } catch (mkdirErr: any) { + if (mkdirErr.code === 'ENAMETOOLONG') { + console.warn(`Skipping directory (path too long): ${fullPath}`); + zipfile.readEntry(); + return; + } + return reject(mkdirErr); + } + zipfile.readEntry(); + return; + } + + // Handle files + try { + fs.mkdirSync(path.dirname(fullPath), { recursive: true }); + } catch (mkdirErr: any) { + if (mkdirErr.code === 'ENAMETOOLONG') { + console.warn( + `Skipping file directory creation (path too long): ${fullPath}`, + ); + zipfile.readEntry(); + return; + } + return reject(mkdirErr); + } + + zipfile.openReadStream(entry, (openErr, rs) => { + if (openErr) return reject(openErr); + + let ws: fs.WriteStream; + try { + ws = fs.createWriteStream(fullPath); + } catch (openWsErr: any) { + if (openWsErr.code === 'ENAMETOOLONG') { + console.warn( + `Skipping file write (path too long): ${fullPath}`, + ); + zipfile.readEntry(); + return; + } + return reject(openWsErr); + } + + rs.on('error', (err) => reject(err)); + ws.on('error', (err) => { + if ((err as any).code === 'ENAMETOOLONG') { + console.warn( + `Skipping file write on stream (path too long): ${fullPath}`, + ); + zipfile.readEntry(); + } else { + reject(err); + } + }); + ws.on('finish', () => zipfile.readEntry()); + rs.pipe(ws); + }); + }); + + zipfile.on('end', () => resolve()); + zipfile.on('error', (err) => reject(err)); + }, + ); + }); +} + +export function cleanUrlString(url: string): string { + if (!url) return null; + const [mainUrl] = url.split('?', 1); + return mainUrl; +} diff --git a/apps/server/src/integrations/import/utils/import-formatter.ts b/apps/server/src/integrations/import/utils/import-formatter.ts new file mode 100644 index 00000000..92291d39 --- /dev/null +++ b/apps/server/src/integrations/import/utils/import-formatter.ts @@ -0,0 +1,254 @@ +import { getEmbedUrlAndProvider } from '@docmost/editor-ext'; +import * as path from 'path'; +import { v7 } from 'uuid'; +import { InsertableBacklink } from '@docmost/db/types/entity.types'; +import { Cheerio, CheerioAPI, load } from 'cheerio'; + +export async function formatImportHtml(opts: { + html: string; + currentFilePath: string; + filePathToPageMetaMap: Map< + string, + { id: string; title: string; slugId: string } + >; + creatorId: string; + sourcePageId: string; + workspaceId: string; + pageDir?: string; + attachmentCandidates?: string[]; +}): Promise<{ html: string; backlinks: InsertableBacklink[] }> { + const { + html, + currentFilePath, + filePathToPageMetaMap, + creatorId, + sourcePageId, + workspaceId, + } = opts; + const $: CheerioAPI = load(html); + const $root: Cheerio = $.root(); + + notionFormatter($, $root); + defaultHtmlFormatter($, $root); + + const backlinks = await rewriteInternalLinksToMentionHtml( + $, + $root, + currentFilePath, + filePathToPageMetaMap, + creatorId, + sourcePageId, + workspaceId, + ); + + return { + html: $root.html() || '', + backlinks, + }; +} + +export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio) { + $root.find('a[href]').each((_, el) => { + const $el = $(el); + const url = $el.attr('href')!; + const { provider } = getEmbedUrlAndProvider(url); + if (provider === 'iframe') return; + + const embed = `
`; + $el.replaceWith(embed); + }); + + $root.find('iframe[src]').each((_, el) => { + const $el = $(el); + const url = $el.attr('src')!; + const { provider } = getEmbedUrlAndProvider(url); + + const embed = `
`; + $el.replaceWith(embed); + }); +} + +export function notionFormatter($: CheerioAPI, $root: Cheerio) { + // remove empty description paragraphs + $root.find('p.page-description').each((_, el) => { + if (!$(el).text().trim()) $(el).remove(); + }); + + // block math → mathBlock + $root.find('figure.equation').each((_: any, fig: any) => { + const $fig = $(fig); + const tex = $fig + .find('annotation[encoding="application/x-tex"]') + .text() + .trim(); + const $math = $('
') + .attr('data-type', 'mathBlock') + .attr('data-katex', 'true') + .text(tex); + $fig.replaceWith($math); + }); + + // inline math → mathInline + $root.find('span.notion-text-equation-token').each((_, tok) => { + const $tok = $(tok); + const $prev = $tok.prev('style'); + if ($prev.length) $prev.remove(); + const tex = $tok + .find('annotation[encoding="application/x-tex"]') + .text() + .trim(); + const $inline = $('') + .attr('data-type', 'mathInline') + .attr('data-katex', 'true') + .text(tex); + $tok.replaceWith($inline); + }); + + // callouts + $root + .find('figure.callout') + .get() + .reverse() + .forEach((fig) => { + const $fig = $(fig); + const $content = $fig.find('div').eq(1); + if (!$content.length) return; + const $wrapper = $('
') + .attr('data-type', 'callout') + .attr('data-callout-type', 'info'); + // @ts-ignore + $content.children().each((_, child) => $wrapper.append(child)); + $fig.replaceWith($wrapper); + }); + + // to-do lists + $root.find('ul.to-do-list').each((_, list) => { + const $old = $(list); + const $new = $('