feat(ee): PDF import (#2142)

* feat: replace pdfjs-dist with firecrawl-pdf-inspector

* use modified firecrawl-pdf-inspector

* feat: pdf import

* increase single file upload size limit

* use npm package

* sync

* update package
This commit is contained in:
Philip Okugbe
2026-05-01 14:56:39 +01:00
committed by GitHub
parent 641ce142df
commit c247d4c1e3
7 changed files with 93 additions and 142 deletions
+9 -136
View File
@@ -482,6 +482,9 @@ importers:
'@clickhouse/client':
specifier: ^1.18.2
version: 1.18.2
'@docmost/pdf-inspector':
specifier: 1.9.4
version: 1.9.4
'@fastify/cookie':
specifier: ^11.0.2
version: 11.0.2
@@ -671,9 +674,6 @@ importers:
passport-jwt:
specifier: ^4.0.1
version: 4.0.1
pdfjs-dist:
specifier: ^5.5.207
version: 5.5.207
pg-tsquery:
specifier: ^8.4.2
version: 8.4.2
@@ -1826,6 +1826,9 @@ packages:
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
engines: {node: '>=18'}
'@docmost/pdf-inspector@1.9.4':
resolution: {integrity: sha512-G5DNyDtLNxybTXWakqi7PuOEuSb/A2ZjDlv2WCkOkiHszPeILdrC+G0a4e4UP10yxvzuLfb23pJ5jy8fUSYZPw==}
'@emnapi/core@1.8.1':
resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==}
@@ -2762,76 +2765,6 @@ packages:
cpu: [x64]
os: [win32]
'@napi-rs/canvas-android-arm64@0.1.97':
resolution: {integrity: sha512-V1c/WVw+NzH8vk7ZK/O8/nyBSCQimU8sfMsB/9qeSvdkGKNU7+mxy/bIF0gTgeBFmHpj30S4E9WHMSrxXGQuVQ==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [android]
'@napi-rs/canvas-darwin-arm64@0.1.97':
resolution: {integrity: sha512-ok+SCEF4YejcxuJ9Rm+WWunHHpf2HmiPxfz6z1a/NFQECGXtsY7A4B8XocK1LmT1D7P174MzwPF9Wy3AUAwEPw==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [darwin]
'@napi-rs/canvas-darwin-x64@0.1.97':
resolution: {integrity: sha512-PUP6e6/UGlclUvAQNnuXCcnkpdUou6VYZfQOQxExLp86epOylmiwLkqXIvpFmjoTEDmPmXrI+coL/9EFU1gKPA==}
engines: {node: '>= 10'}
cpu: [x64]
os: [darwin]
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.97':
resolution: {integrity: sha512-XyXH2L/cic8eTNtbrXCcvqHtMX/nEOxN18+7rMrAM2XtLYC/EB5s0wnO1FsLMWmK+04ZSLN9FBGipo7kpIkcOw==}
engines: {node: '>= 10'}
cpu: [arm]
os: [linux]
'@napi-rs/canvas-linux-arm64-gnu@0.1.97':
resolution: {integrity: sha512-Kuq/M3djq0K8ktgz6nPlK7Ne5d4uWeDxPpyKWOjWDK2RIOhHVtLtyLiJw2fuldw7Vn4mhw05EZXCEr4Q76rs9w==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-arm64-musl@0.1.97':
resolution: {integrity: sha512-kKmSkQVnWeqg7qdsiXvYxKhAFuHz3tkBjW/zyQv5YKUPhotpaVhpBGv5LqCngzyuRV85SXoe+OFj+Tv0a0QXkQ==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
'@napi-rs/canvas-linux-riscv64-gnu@0.1.97':
resolution: {integrity: sha512-Jc7I3A51jnEOIAXeLsN/M/+Z28LUeakcsXs07FLq9prXc0eYOtVwsDEv913Gr+06IRo34gJJVgT0TXvmz+N2VA==}
engines: {node: '>= 10'}
cpu: [riscv64]
os: [linux]
'@napi-rs/canvas-linux-x64-gnu@0.1.97':
resolution: {integrity: sha512-iDUBe7AilfuBSRbSa8/IGX38Mf+iCSBqoVKLSQ5XaY2JLOaqz1TVyPFEyIck7wT6mRQhQt5sN6ogfjIDfi74tg==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-linux-x64-musl@0.1.97':
resolution: {integrity: sha512-AKLFd/v0Z5fvgqBDqhvqtAdx+fHMJ5t9JcUNKq4FIZ5WH+iegGm8HPdj00NFlCSnm83Fp3Ln8I2f7uq1aIiWaA==}
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
'@napi-rs/canvas-win32-arm64-msvc@0.1.97':
resolution: {integrity: sha512-u883Yr6A6fO7Vpsy9YE4FVCIxzzo5sO+7pIUjjoDLjS3vQaNMkVzx5bdIpEL+ob+gU88WDK4VcxYMZ6nmnoX9A==}
engines: {node: '>= 10'}
cpu: [arm64]
os: [win32]
'@napi-rs/canvas-win32-x64-msvc@0.1.97':
resolution: {integrity: sha512-sWtD2EE3fV0IzN+iiQUqr/Q1SwqWhs2O1FKItFlxtdDkikpEj5g7DKQpY3x55H/MAOnL8iomnlk3mcEeGiUMoQ==}
engines: {node: '>= 10'}
cpu: [x64]
os: [win32]
'@napi-rs/canvas@0.1.97':
resolution: {integrity: sha512-8cFniXvrIEnVwuNSRCW9wirRZbHvrD3JVujdS2P5n5xiJZNZMOZcfOvJ1pb66c7jXMKHHglJEDVJGbm8XWFcXQ==}
engines: {node: '>= 10'}
'@napi-rs/wasm-runtime@0.2.12':
resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==}
@@ -8551,9 +8484,6 @@ packages:
node-int64@0.4.0:
resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==}
node-readable-to-web-readable-stream@0.4.2:
resolution: {integrity: sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ==}
node-releases@2.0.27:
resolution: {integrity: sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==}
@@ -8845,10 +8775,6 @@ packages:
pause@0.0.1:
resolution: {integrity: sha512-KG8UEiEVkR3wGEb4m5yZkVCzigAD+cVEJck2CzYZO37ZGJfctvVptVO192MwrtPhzONn6go8ylnOdMhKqi4nfg==}
pdfjs-dist@5.5.207:
resolution: {integrity: sha512-WMqqw06w1vUt9ZfT0gOFhMf3wHsWhaCrxGrckGs5Cci6ybDW87IvPaOd2pnBwT6BJuP/CzXDZxjFgmSULLdsdw==}
engines: {node: '>=20.19.0 || >=22.13.0 || >=24'}
peberminta@0.9.0:
resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==}
@@ -10328,6 +10254,7 @@ packages:
uuid@10.0.0:
resolution: {integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==}
deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028).
hasBin: true
uuid@11.1.0:
@@ -12193,6 +12120,8 @@ snapshots:
'@csstools/css-tokenizer@3.0.3': {}
'@docmost/pdf-inspector@1.9.4': {}
'@emnapi/core@1.8.1':
dependencies:
'@emnapi/wasi-threads': 1.1.0
@@ -13193,54 +13122,6 @@ snapshots:
'@msgpackr-extract/msgpackr-extract-win32-x64@3.0.2':
optional: true
'@napi-rs/canvas-android-arm64@0.1.97':
optional: true
'@napi-rs/canvas-darwin-arm64@0.1.97':
optional: true
'@napi-rs/canvas-darwin-x64@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm-gnueabihf@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-arm64-musl@0.1.97':
optional: true
'@napi-rs/canvas-linux-riscv64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-x64-gnu@0.1.97':
optional: true
'@napi-rs/canvas-linux-x64-musl@0.1.97':
optional: true
'@napi-rs/canvas-win32-arm64-msvc@0.1.97':
optional: true
'@napi-rs/canvas-win32-x64-msvc@0.1.97':
optional: true
'@napi-rs/canvas@0.1.97':
optionalDependencies:
'@napi-rs/canvas-android-arm64': 0.1.97
'@napi-rs/canvas-darwin-arm64': 0.1.97
'@napi-rs/canvas-darwin-x64': 0.1.97
'@napi-rs/canvas-linux-arm-gnueabihf': 0.1.97
'@napi-rs/canvas-linux-arm64-gnu': 0.1.97
'@napi-rs/canvas-linux-arm64-musl': 0.1.97
'@napi-rs/canvas-linux-riscv64-gnu': 0.1.97
'@napi-rs/canvas-linux-x64-gnu': 0.1.97
'@napi-rs/canvas-linux-x64-musl': 0.1.97
'@napi-rs/canvas-win32-arm64-msvc': 0.1.97
'@napi-rs/canvas-win32-x64-msvc': 0.1.97
optional: true
'@napi-rs/wasm-runtime@0.2.12':
dependencies:
'@emnapi/core': 1.8.1
@@ -19627,9 +19508,6 @@ snapshots:
node-int64@0.4.0: {}
node-readable-to-web-readable-stream@0.4.2:
optional: true
node-releases@2.0.27: {}
nodemailer@8.0.5: {}
@@ -19981,11 +19859,6 @@ snapshots:
pause@0.0.1: {}
pdfjs-dist@5.5.207:
optionalDependencies:
'@napi-rs/canvas': 0.1.97
node-readable-to-web-readable-stream: 0.4.2
peberminta@0.9.0: {}
pend@1.2.0: {}