mirror of
https://github.com/docmost/docmost.git
synced 2026-05-07 06:23:06 +08:00
feat: pdf import
This commit is contained in:
@@ -8,6 +8,7 @@ export const Feature = {
|
|||||||
AI: 'ai',
|
AI: 'ai',
|
||||||
CONFLUENCE_IMPORT: 'import:confluence',
|
CONFLUENCE_IMPORT: 'import:confluence',
|
||||||
DOCX_IMPORT: 'import:docx',
|
DOCX_IMPORT: 'import:docx',
|
||||||
|
PDF_IMPORT: 'import:pdf',
|
||||||
ATTACHMENT_INDEXING: 'attachment:indexing',
|
ATTACHMENT_INDEXING: 'attachment:indexing',
|
||||||
SECURITY_SETTINGS: 'security:settings',
|
SECURITY_SETTINGS: 'security:settings',
|
||||||
MCP: 'mcp',
|
MCP: 'mcp',
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
IconCheck,
|
IconCheck,
|
||||||
IconFileCode,
|
IconFileCode,
|
||||||
IconFileTypeDocx,
|
IconFileTypeDocx,
|
||||||
|
IconFileTypePdf,
|
||||||
IconFileTypeZip,
|
IconFileTypeZip,
|
||||||
IconMarkdown,
|
IconMarkdown,
|
||||||
IconX,
|
IconX,
|
||||||
@@ -90,12 +91,14 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
|
|||||||
const markdownFileRef = useRef<() => void>(null);
|
const markdownFileRef = useRef<() => void>(null);
|
||||||
const htmlFileRef = useRef<() => void>(null);
|
const htmlFileRef = useRef<() => void>(null);
|
||||||
const docxFileRef = useRef<() => void>(null);
|
const docxFileRef = useRef<() => void>(null);
|
||||||
|
const pdfFileRef = useRef<() => void>(null);
|
||||||
const notionFileRef = useRef<() => void>(null);
|
const notionFileRef = useRef<() => void>(null);
|
||||||
const confluenceFileRef = useRef<() => void>(null);
|
const confluenceFileRef = useRef<() => void>(null);
|
||||||
const zipFileRef = useRef<() => void>(null);
|
const zipFileRef = useRef<() => void>(null);
|
||||||
|
|
||||||
const canUseConfluence = useHasFeature(Feature.CONFLUENCE_IMPORT);
|
const canUseConfluence = useHasFeature(Feature.CONFLUENCE_IMPORT);
|
||||||
const canUseDocx = useHasFeature(Feature.DOCX_IMPORT);
|
const canUseDocx = useHasFeature(Feature.DOCX_IMPORT);
|
||||||
|
const canUsePdf = useHasFeature(Feature.PDF_IMPORT);
|
||||||
const upgradeLabel = useUpgradeLabel();
|
const upgradeLabel = useUpgradeLabel();
|
||||||
|
|
||||||
const handleZipUpload = async (selectedFile: File, source: string) => {
|
const handleZipUpload = async (selectedFile: File, source: string) => {
|
||||||
@@ -298,6 +301,7 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
|
|||||||
if (markdownFileRef.current) markdownFileRef.current();
|
if (markdownFileRef.current) markdownFileRef.current();
|
||||||
if (htmlFileRef.current) htmlFileRef.current();
|
if (htmlFileRef.current) htmlFileRef.current();
|
||||||
if (docxFileRef.current) docxFileRef.current();
|
if (docxFileRef.current) docxFileRef.current();
|
||||||
|
if (pdfFileRef.current) pdfFileRef.current();
|
||||||
|
|
||||||
const pageCountText =
|
const pageCountText =
|
||||||
pageCount === 1 ? `1 ${t("page")}` : `${pageCount} ${t("pages")}`;
|
pageCount === 1 ? `1 ${t("page")}` : `${pageCount} ${t("pages")}`;
|
||||||
@@ -378,6 +382,30 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
|
|||||||
)}
|
)}
|
||||||
</FileButton>
|
</FileButton>
|
||||||
|
|
||||||
|
<FileButton
|
||||||
|
onChange={handleFileUpload}
|
||||||
|
accept=".pdf"
|
||||||
|
multiple
|
||||||
|
resetRef={pdfFileRef}
|
||||||
|
>
|
||||||
|
{(props) => (
|
||||||
|
<Tooltip
|
||||||
|
label={upgradeLabel}
|
||||||
|
disabled={canUsePdf}
|
||||||
|
>
|
||||||
|
<Button
|
||||||
|
disabled={!canUsePdf}
|
||||||
|
justify="start"
|
||||||
|
variant="default"
|
||||||
|
leftSection={<IconFileTypePdf size={18} />}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
PDF
|
||||||
|
</Button>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
</FileButton>
|
||||||
|
|
||||||
<FileButton
|
<FileButton
|
||||||
onChange={(file) => handleZipUpload(file, "notion")}
|
onChange={(file) => handleZipUpload(file, "notion")}
|
||||||
accept="application/zip"
|
accept="application/zip"
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ export const Feature = {
|
|||||||
AI: 'ai',
|
AI: 'ai',
|
||||||
CONFLUENCE_IMPORT: 'import:confluence',
|
CONFLUENCE_IMPORT: 'import:confluence',
|
||||||
DOCX_IMPORT: 'import:docx',
|
DOCX_IMPORT: 'import:docx',
|
||||||
|
PDF_IMPORT: 'import:pdf',
|
||||||
ATTACHMENT_INDEXING: 'attachment:indexing',
|
ATTACHMENT_INDEXING: 'attachment:indexing',
|
||||||
SECURITY_SETTINGS: 'security:settings',
|
SECURITY_SETTINGS: 'security:settings',
|
||||||
MCP: 'mcp',
|
MCP: 'mcp',
|
||||||
|
|||||||
+1
-1
Submodule apps/server/src/ee updated: a5eb8d1e9a...4bac9b0a3f
@@ -51,7 +51,7 @@ export class ImportController {
|
|||||||
@AuthUser() user: User,
|
@AuthUser() user: User,
|
||||||
@AuthWorkspace() workspace: Workspace,
|
@AuthWorkspace() workspace: Workspace,
|
||||||
) {
|
) {
|
||||||
const validFileExtensions = ['.md', '.html', '.docx'];
|
const validFileExtensions = ['.md', '.html', '.docx', '.pdf'];
|
||||||
|
|
||||||
const maxFileSize = bytes('20mb');
|
const maxFileSize = bytes('20mb');
|
||||||
|
|
||||||
@@ -102,6 +102,7 @@ export class ImportController {
|
|||||||
'.md': 'markdown',
|
'.md': 'markdown',
|
||||||
'.html': 'html',
|
'.html': 'html',
|
||||||
'.docx': 'docx',
|
'.docx': 'docx',
|
||||||
|
'.pdf': 'pdf',
|
||||||
};
|
};
|
||||||
|
|
||||||
if (createdPage) {
|
if (createdPage) {
|
||||||
|
|||||||
@@ -62,7 +62,10 @@ export class ImportService {
|
|||||||
let createdPage = null;
|
let createdPage = null;
|
||||||
|
|
||||||
// For DOCX, we need the page ID upfront so images can reference it
|
// For DOCX, we need the page ID upfront so images can reference it
|
||||||
const pageId = fileExtension === '.docx' ? uuid7() : undefined;
|
const pageId =
|
||||||
|
fileExtension === '.docx' || fileExtension === '.pdf'
|
||||||
|
? uuid7()
|
||||||
|
: undefined;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (fileExtension.endsWith('.md')) {
|
if (fileExtension.endsWith('.md')) {
|
||||||
@@ -77,6 +80,14 @@ export class ImportService {
|
|||||||
pageId,
|
pageId,
|
||||||
userId,
|
userId,
|
||||||
);
|
);
|
||||||
|
} else if (fileExtension.endsWith('.pdf')) {
|
||||||
|
prosemirrorState = await this.processPdf(
|
||||||
|
fileBuffer,
|
||||||
|
workspaceId,
|
||||||
|
spaceId,
|
||||||
|
pageId,
|
||||||
|
userId,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const message = 'Error processing file content';
|
const message = 'Error processing file content';
|
||||||
@@ -153,7 +164,7 @@ export class ImportService {
|
|||||||
let DocxImportModule: any;
|
let DocxImportModule: any;
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||||
DocxImportModule = require('./../../../ee/docx-import/docx-import.service');
|
DocxImportModule = require('./../../../ee/document-import/docx-import.service');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
'DOCX import requested but EE module not bundled in this build',
|
'DOCX import requested but EE module not bundled in this build',
|
||||||
@@ -179,6 +190,42 @@ export class ImportService {
|
|||||||
return this.processHTML(html);
|
return this.processHTML(html);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async processPdf(
|
||||||
|
fileBuffer: Buffer,
|
||||||
|
workspaceId: string,
|
||||||
|
spaceId: string,
|
||||||
|
pageId: string,
|
||||||
|
userId: string,
|
||||||
|
): Promise<any> {
|
||||||
|
let PdfImportModule: any;
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||||
|
PdfImportModule = require('./../../../ee/document-import/pdf-import.service');
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(
|
||||||
|
'PDF import requested but EE module not bundled in this build',
|
||||||
|
);
|
||||||
|
throw new BadRequestException(
|
||||||
|
'This feature requires a valid enterprise license.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const pdfImportService = this.moduleRef.get(
|
||||||
|
PdfImportModule.PdfImportService,
|
||||||
|
{ strict: false },
|
||||||
|
);
|
||||||
|
|
||||||
|
const html = await pdfImportService.convertPdfToHtml(
|
||||||
|
fileBuffer,
|
||||||
|
workspaceId,
|
||||||
|
spaceId,
|
||||||
|
pageId,
|
||||||
|
userId,
|
||||||
|
);
|
||||||
|
|
||||||
|
return this.processHTML(html);
|
||||||
|
}
|
||||||
|
|
||||||
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
|
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
|
||||||
if (prosemirrorJson) {
|
if (prosemirrorJson) {
|
||||||
// this.logger.debug(`Converting prosemirror json state to ydoc`);
|
// this.logger.debug(`Converting prosemirror json state to ydoc`);
|
||||||
|
|||||||
Reference in New Issue
Block a user