Compare commits

...

8 Commits

Author SHA1 Message Date
Philipinho ce57f66136 update package 2026-05-01 14:56:12 +01:00
Philipinho 1a6eb5a7f4 sync 2026-05-01 14:55:17 +01:00
Philipinho 9cda8ca6f4 use npm package 2026-04-30 17:53:14 +01:00
Philipinho 663573062d Merge branch 'main' into fire-pdf 2026-04-30 03:05:06 +01:00
Philipinho 00eab7ac61 increase single file upload size limit 2026-04-30 03:04:02 +01:00
Philipinho 28b46dc0cb feat: pdf import 2026-04-16 14:17:27 +01:00
Philipinho ba9e4de036 use modified firecrawl-pdf-inspector 2026-04-16 12:49:52 +01:00
Philipinho e0788dc654 feat: replace pdfjs-dist with firecrawl-pdf-inspector 2026-04-16 01:24:27 +01:00
8 changed files with 94 additions and 143 deletions
+1
View File
@@ -8,6 +8,7 @@ export const Feature = {
AI: 'ai',
CONFLUENCE_IMPORT: 'import:confluence',
DOCX_IMPORT: 'import:docx',
PDF_IMPORT: 'import:pdf',
ATTACHMENT_INDEXING: 'attachment:indexing',
SECURITY_SETTINGS: 'security:settings',
MCP: 'mcp',
@@ -12,6 +12,7 @@ import {
IconCheck,
IconFileCode,
IconFileTypeDocx,
IconFileTypePdf,
IconFileTypeZip,
IconMarkdown,
IconX,
@@ -90,12 +91,14 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
const markdownFileRef = useRef<() => void>(null);
const htmlFileRef = useRef<() => void>(null);
const docxFileRef = useRef<() => void>(null);
const pdfFileRef = useRef<() => void>(null);
const notionFileRef = useRef<() => void>(null);
const confluenceFileRef = useRef<() => void>(null);
const zipFileRef = useRef<() => void>(null);
const canUseConfluence = useHasFeature(Feature.CONFLUENCE_IMPORT);
const canUseDocx = useHasFeature(Feature.DOCX_IMPORT);
const canUsePdf = useHasFeature(Feature.PDF_IMPORT);
const upgradeLabel = useUpgradeLabel();
const handleZipUpload = async (selectedFile: File, source: string) => {
@@ -244,7 +247,7 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
}, 3000);
}, [fileTaskId]);
const maxSingleFileSize = bytes("20mb");
const maxSingleFileSize = bytes("30mb");
const handleFileUpload = async (selectedFiles: File[]) => {
if (!selectedFiles) {
@@ -298,6 +301,7 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
if (markdownFileRef.current) markdownFileRef.current();
if (htmlFileRef.current) htmlFileRef.current();
if (docxFileRef.current) docxFileRef.current();
if (pdfFileRef.current) pdfFileRef.current();
const pageCountText =
pageCount === 1 ? `1 ${t("page")}` : `${pageCount} ${t("pages")}`;
@@ -378,6 +382,30 @@ function ImportFormatSelection({ spaceId, onClose }: ImportFormatSelection) {
)}
</FileButton>
<FileButton
onChange={handleFileUpload}
accept=".pdf"
multiple
resetRef={pdfFileRef}
>
{(props) => (
<Tooltip
label={upgradeLabel}
disabled={canUsePdf}
>
<Button
disabled={!canUsePdf}
justify="start"
variant="default"
leftSection={<IconFileTypePdf size={18} />}
{...props}
>
PDF
</Button>
</Tooltip>
)}
</FileButton>
<FileButton
onChange={(file) => handleZipUpload(file, "notion")}
accept="application/zip"
+1 -1
View File
@@ -37,6 +37,7 @@
"@aws-sdk/lib-storage": "3.1037.0",
"@aws-sdk/s3-request-presigner": "3.1037.0",
"@clickhouse/client": "^1.18.2",
"@docmost/pdf-inspector": "1.9.4",
"@fastify/cookie": "^11.0.2",
"@fastify/multipart": "^10.0.0",
"@fastify/static": "^9.1.3",
@@ -100,7 +101,6 @@
"p-limit": "^7.3.0",
"passport-google-oauth20": "^2.0.0",
"passport-jwt": "^4.0.1",
"pdfjs-dist": "^5.5.207",
"pg-tsquery": "^8.4.2",
"pgvector": "^0.2.1",
"pino-http": "^11.0.0",
+1
View File
@@ -8,6 +8,7 @@ export const Feature = {
AI: 'ai',
CONFLUENCE_IMPORT: 'import:confluence',
DOCX_IMPORT: 'import:docx',
PDF_IMPORT: 'import:pdf',
ATTACHMENT_INDEXING: 'attachment:indexing',
SECURITY_SETTINGS: 'security:settings',
MCP: 'mcp',
@@ -51,9 +51,9 @@ export class ImportController {
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
const validFileExtensions = ['.md', '.html', '.docx'];
const validFileExtensions = ['.md', '.html', '.docx', '.pdf'];
const maxFileSize = bytes('20mb');
const maxFileSize = bytes('30mb');
let file = null;
try {
@@ -102,6 +102,7 @@ export class ImportController {
'.md': 'markdown',
'.html': 'html',
'.docx': 'docx',
'.pdf': 'pdf',
};
if (createdPage) {
@@ -61,7 +61,10 @@ export class ImportService {
let createdPage = null;
// For DOCX, we need the page ID upfront so images can reference it
const pageId = fileExtension === '.docx' ? uuid7() : undefined;
const pageId =
fileExtension === '.docx' || fileExtension === '.pdf'
? uuid7()
: undefined;
try {
if (fileExtension.endsWith('.md')) {
@@ -76,6 +79,14 @@ export class ImportService {
pageId,
userId,
);
} else if (fileExtension.endsWith('.pdf')) {
prosemirrorState = await this.processPdf(
fileBuffer,
workspaceId,
spaceId,
pageId,
userId,
);
}
} catch (err) {
const message = 'Error processing file content';
@@ -152,7 +163,7 @@ export class ImportService {
let DocxImportModule: any;
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
DocxImportModule = require('./../../../ee/docx-import/docx-import.service');
DocxImportModule = require('./../../../ee/document-import/docx-import.service');
} catch (err) {
this.logger.error(
'DOCX import requested but EE module not bundled in this build',
@@ -178,6 +189,42 @@ export class ImportService {
return this.processHTML(html);
}
async processPdf(
fileBuffer: Buffer,
workspaceId: string,
spaceId: string,
pageId: string,
userId: string,
): Promise<any> {
let PdfImportModule: any;
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
PdfImportModule = require('./../../../ee/document-import/pdf-import.service');
} catch (err) {
this.logger.error(
'PDF import requested but EE module not bundled in this build',
);
throw new BadRequestException(
'This feature requires a valid enterprise license.',
);
}
const pdfImportService = this.moduleRef.get(
PdfImportModule.PdfImportService,
{ strict: false },
);
const html = await pdfImportService.convertPdfToHtml(
fileBuffer,
workspaceId,
spaceId,
pageId,
userId,
);
return this.processHTML(html);
}
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
if (prosemirrorJson) {
// this.logger.debug(`Converting prosemirror json state to ydoc`);
+9 -136
View File
@@ -479,6 +479,9 @@ importers:
'@clickhouse/client':
specifier: ^1.18.2
version: 1.18.2
'@docmost/pdf-inspector':
specifier: 1.9.4
version: 1.9.4
'@fastify/cookie':
specifier: ^11.0.2
version: 11.0.2
@@ -668,9 +671,6 @@ importers:
passport-jwt:
specifier: ^4.0.1
version: 4.0.1
pdfjs-dist:
specifier: ^5.5.207
version: 5.5.207
pg-tsquery:
specifier: ^8.4.2
version: 8.4.2
@@ -1820,6 +1820,9 @@ packages:
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
engines: {node: '>=18'}
'@docmost/pdf-inspector@1.9.4':
resolution: {integrity: sha512-G5DNyDtLNxybTXWakqi7PuOEuSb/A2ZjDlv2WCkOkiHszPeILdrC+G0a4e4UP10yxvzuLfb23pJ5jy8fUSYZPw==}
'@emnapi/core@1.8.1':
resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==}
@@ -2756,76 +2759,6 @@ packages:
cpu: [x64]
os: [win32]
'@napi-rs/canvas-android-arm64@0.1.97':
resolution: {integrity: sha512-V1c/WVw+NzH8vk7ZK/O8/nyBSCQimU8sfMsB/9qeSvdkGKNU7+mxy/bIF0gTgeBFmHpj30S4E9WHMSrxXGQuVQ==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [android]
'@napi-rs/canvas-darwin-arm64@0.1.97':
resolution: {integrity: sha512-ok+SCEF4YejcxuJ9Rm+WWunHHpf2HmiPxfz6z1a/NFQECGXtsY7A4B8XocK1LmT1D7P174MzwPF9Wy3AUAwEPw==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [darwin]
'@napi-rs/canvas-darwin-x64@0.1.97':
resolution: {integrity: sha512-PUP6e6/UGlclUvAQNnuXCcnkpdUou6VYZfQOQxExLp86epOylmiwLkqXIvpFmjoTEDmPmXrI+coL/9EFU1gKPA==}
engines: {node: '>= 10'}
cpu: [x64]
os: [darwin]
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.97':
resolution: {integrity: sha512-XyXH2L/cic8eTNtbrXCcvqHtMX/nEOxN18+7rMrAM2XtLYC/EB5s0wnO1FsLMWmK+04ZSLN9FBGipo7kpIkcOw==}
engines: {node: '>= 10'}
cpu: [arm]
os: [linux]
'@napi-rs/canvas-linux-arm64-gnu@0.1.97':
resolution: {integrity: sha512-Kuq/M3djq0K8ktgz6nPlK7Ne5d4uWeDxPpyKWOjWDK2RIOhHVtLtyLiJw2fuldw7Vn4mhw05EZXCEr4Q76rs9w==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-arm64-musl@0.1.97':
resolution: {integrity: sha512-kKmSkQVnWeqg7qdsiXvYxKhAFuHz3tkBjW/zyQv5YKUPhotpaVhpBGv5LqCngzyuRV85SXoe+OFj+Tv0a0QXkQ==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-riscv64-gnu@0.1.97':
resolution: {integrity: sha512-Jc7I3A51jnEOIAXeLsN/M/+Z28LUeakcsXs07FLq9prXc0eYOtVwsDEv913Gr+06IRo34gJJVgT0TXvmz+N2VA==}
engines: {node: '>= 10'}
cpu: [riscv64]
os: [linux]
'@napi-rs/canvas-linux-x64-gnu@0.1.97':
resolution: {integrity: sha512-iDUBe7AilfuBSRbSa8/IGX38Mf+iCSBqoVKLSQ5XaY2JLOaqz1TVyPFEyIck7wT6mRQhQt5sN6ogfjIDfi74tg==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-linux-x64-musl@0.1.97':
resolution: {integrity: sha512-AKLFd/v0Z5fvgqBDqhvqtAdx+fHMJ5t9JcUNKq4FIZ5WH+iegGm8HPdj00NFlCSnm83Fp3Ln8I2f7uq1aIiWaA==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-win32-arm64-msvc@0.1.97':
resolution: {integrity: sha512-u883Yr6A6fO7Vpsy9YE4FVCIxzzo5sO+7pIUjjoDLjS3vQaNMkVzx5bdIpEL+ob+gU88WDK4VcxYMZ6nmnoX9A==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [win32]
'@napi-rs/canvas-win32-x64-msvc@0.1.97':
resolution: {integrity: sha512-sWtD2EE3fV0IzN+iiQUqr/Q1SwqWhs2O1FKItFlxtdDkikpEj5g7DKQpY3x55H/MAOnL8iomnlk3mcEeGiUMoQ==}
engines: {node: '>= 10'}
cpu: [x64]
os: [win32]
'@napi-rs/canvas@0.1.97':
resolution: {integrity: sha512-8cFniXvrIEnVwuNSRCW9wirRZbHvrD3JVujdS2P5n5xiJZNZMOZcfOvJ1pb66c7jXMKHHglJEDVJGbm8XWFcXQ==}
engines: {node: '>= 10'}
'@napi-rs/wasm-runtime@0.2.12':
resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==}
@@ -8545,9 +8478,6 @@ packages:
node-int64@0.4.0:
resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==}
node-readable-to-web-readable-stream@0.4.2:
resolution: {integrity: sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ==}
node-releases@2.0.27:
resolution: {integrity: sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==}
@@ -8839,10 +8769,6 @@ packages:
pause@0.0.1:
resolution: {integrity: sha512-KG8UEiEVkR3wGEb4m5yZkVCzigAD+cVEJck2CzYZO37ZGJfctvVptVO192MwrtPhzONn6go8ylnOdMhKqi4nfg==}
pdfjs-dist@5.5.207:
resolution: {integrity: sha512-WMqqw06w1vUt9ZfT0gOFhMf3wHsWhaCrxGrckGs5Cci6ybDW87IvPaOd2pnBwT6BJuP/CzXDZxjFgmSULLdsdw==}
engines: {node: '>=20.19.0 || >=22.13.0 || >=24'}
peberminta@0.9.0:
resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==}
@@ -10318,6 +10244,7 @@ packages:
uuid@10.0.0:
resolution: {integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==}
deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028).
hasBin: true
uuid@11.1.0:
@@ -12183,6 +12110,8 @@ snapshots:
'@csstools/css-tokenizer@3.0.3': {}
'@docmost/pdf-inspector@1.9.4': {}
'@emnapi/core@1.8.1':
dependencies:
'@emnapi/wasi-threads': 1.1.0
@@ -13183,54 +13112,6 @@ snapshots:
'@msgpackr-extract/msgpackr-extract-win32-x64@3.0.2':
optional: true
'@napi-rs/canvas-android-arm64@0.1.97':
optional: true
'@napi-rs/canvas-darwin-arm64@0.1.97':
optional: true
'@napi-rs/canvas-darwin-x64@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm64-musl@0.1.97':
optional: true
'@napi-rs/canvas-linux-riscv64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-x64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-x64-musl@0.1.97':
optional: true
'@napi-rs/canvas-win32-arm64-msvc@0.1.97':
optional: true
'@napi-rs/canvas-win32-x64-msvc@0.1.97':
optional: true
'@napi-rs/canvas@0.1.97':
optionalDependencies:
'@napi-rs/canvas-android-arm64': 0.1.97
'@napi-rs/canvas-darwin-arm64': 0.1.97
'@napi-rs/canvas-darwin-x64': 0.1.97
'@napi-rs/canvas-linux-arm-gnueabihf': 0.1.97
'@napi-rs/canvas-linux-arm64-gnu': 0.1.97
'@napi-rs/canvas-linux-arm64-musl': 0.1.97
'@napi-rs/canvas-linux-riscv64-gnu': 0.1.97
'@napi-rs/canvas-linux-x64-gnu': 0.1.97
'@napi-rs/canvas-linux-x64-musl': 0.1.97
'@napi-rs/canvas-win32-arm64-msvc': 0.1.97
'@napi-rs/canvas-win32-x64-msvc': 0.1.97
optional: true
'@napi-rs/wasm-runtime@0.2.12':
dependencies:
'@emnapi/core': 1.8.1
@@ -19617,9 +19498,6 @@ snapshots:
node-int64@0.4.0: {}
node-readable-to-web-readable-stream@0.4.2:
optional: true
node-releases@2.0.27: {}
nodemailer@8.0.5: {}
@@ -19971,11 +19849,6 @@ snapshots:
pause@0.0.1: {}
pdfjs-dist@5.5.207:
optionalDependencies:
'@napi-rs/canvas': 0.1.97
node-readable-to-web-readable-stream: 0.4.2
peberminta@0.9.0: {}
pend@1.2.0: {}