mirror of
https://github.com/docmost/docmost.git
synced 2026-05-08 15:23:07 +08:00
feat: pdf import
This commit is contained in:
@@ -8,6 +8,7 @@ export const Feature = {
|
||||
AI: 'ai',
|
||||
CONFLUENCE_IMPORT: 'import:confluence',
|
||||
DOCX_IMPORT: 'import:docx',
|
||||
PDF_IMPORT: 'import:pdf',
|
||||
ATTACHMENT_INDEXING: 'attachment:indexing',
|
||||
SECURITY_SETTINGS: 'security:settings',
|
||||
MCP: 'mcp',
|
||||
|
||||
+1
-1
Submodule apps/server/src/ee updated: a5eb8d1e9a...4bac9b0a3f
@@ -51,7 +51,7 @@ export class ImportController {
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
) {
|
||||
const validFileExtensions = ['.md', '.html', '.docx'];
|
||||
const validFileExtensions = ['.md', '.html', '.docx', '.pdf'];
|
||||
|
||||
const maxFileSize = bytes('20mb');
|
||||
|
||||
@@ -102,6 +102,7 @@ export class ImportController {
|
||||
'.md': 'markdown',
|
||||
'.html': 'html',
|
||||
'.docx': 'docx',
|
||||
'.pdf': 'pdf',
|
||||
};
|
||||
|
||||
if (createdPage) {
|
||||
|
||||
@@ -62,7 +62,10 @@ export class ImportService {
|
||||
let createdPage = null;
|
||||
|
||||
// For DOCX, we need the page ID upfront so images can reference it
|
||||
const pageId = fileExtension === '.docx' ? uuid7() : undefined;
|
||||
const pageId =
|
||||
fileExtension === '.docx' || fileExtension === '.pdf'
|
||||
? uuid7()
|
||||
: undefined;
|
||||
|
||||
try {
|
||||
if (fileExtension.endsWith('.md')) {
|
||||
@@ -77,6 +80,14 @@ export class ImportService {
|
||||
pageId,
|
||||
userId,
|
||||
);
|
||||
} else if (fileExtension.endsWith('.pdf')) {
|
||||
prosemirrorState = await this.processPdf(
|
||||
fileBuffer,
|
||||
workspaceId,
|
||||
spaceId,
|
||||
pageId,
|
||||
userId,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = 'Error processing file content';
|
||||
@@ -153,7 +164,7 @@ export class ImportService {
|
||||
let DocxImportModule: any;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
DocxImportModule = require('./../../../ee/docx-import/docx-import.service');
|
||||
DocxImportModule = require('./../../../ee/document-import/docx-import.service');
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'DOCX import requested but EE module not bundled in this build',
|
||||
@@ -179,6 +190,42 @@ export class ImportService {
|
||||
return this.processHTML(html);
|
||||
}
|
||||
|
||||
async processPdf(
|
||||
fileBuffer: Buffer,
|
||||
workspaceId: string,
|
||||
spaceId: string,
|
||||
pageId: string,
|
||||
userId: string,
|
||||
): Promise<any> {
|
||||
let PdfImportModule: any;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
PdfImportModule = require('./../../../ee/document-import/pdf-import.service');
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'PDF import requested but EE module not bundled in this build',
|
||||
);
|
||||
throw new BadRequestException(
|
||||
'This feature requires a valid enterprise license.',
|
||||
);
|
||||
}
|
||||
|
||||
const pdfImportService = this.moduleRef.get(
|
||||
PdfImportModule.PdfImportService,
|
||||
{ strict: false },
|
||||
);
|
||||
|
||||
const html = await pdfImportService.convertPdfToHtml(
|
||||
fileBuffer,
|
||||
workspaceId,
|
||||
spaceId,
|
||||
pageId,
|
||||
userId,
|
||||
);
|
||||
|
||||
return this.processHTML(html);
|
||||
}
|
||||
|
||||
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
|
||||
if (prosemirrorJson) {
|
||||
// this.logger.debug(`Converting prosemirror json state to ydoc`);
|
||||
|
||||
Reference in New Issue
Block a user