From 44d5206e078a1fac07e91883f1358d94781d6f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20P=C3=B6ttker?= Date: Tue, 5 May 2026 08:22:18 +0200 Subject: [PATCH] feat: implement checksum-based duplicate detection for split email attachments --- .../src/email/email-import.controller.ts | 11 +++ .../src/email/email-import.service.ts | 20 +++++ paperless-frontend/src/api/email-import.ts | 5 ++ .../src/components/MailImportWizard.tsx | 77 ++++++++++++------- 4 files changed, 84 insertions(+), 29 deletions(-) diff --git a/paperless-backend/src/email/email-import.controller.ts b/paperless-backend/src/email/email-import.controller.ts index 23ad3f2..4ca5e6f 100644 --- a/paperless-backend/src/email/email-import.controller.ts +++ b/paperless-backend/src/email/email-import.controller.ts @@ -67,6 +67,17 @@ export class EmailImportController { return { success: true }; } + // --- Split Checksum Check --- + @Post('attachments/:attachmentId/check-split-checksum') + @RequirePermissions(Permission.VIEW_MAIL) + async checkSplitChecksum( + @Param('attachmentId') attachmentId: number, + @Body() body: { pages: { start: number; end: number } }, + ) { + const isDuplicate = await this.importService.checkSplitChecksum(attachmentId, body.pages); + return { isDuplicate }; + } + // --- Print Preview --- @Post('attachments/:attachmentId/print-preview') @RequirePermissions(Permission.VIEW_MAIL) diff --git a/paperless-backend/src/email/email-import.service.ts b/paperless-backend/src/email/email-import.service.ts index c7f7450..ec0e0bc 100644 --- a/paperless-backend/src/email/email-import.service.ts +++ b/paperless-backend/src/email/email-import.service.ts @@ -16,6 +16,7 @@ import { PdfService } from '../preprocessing/pdf.service'; import * as path from 'path'; import * as os from 'os'; import * as fs from 'fs/promises'; +import * as crypto from 'crypto'; @Injectable() export class EmailImportService { @@ -154,6 +155,25 @@ export class EmailImportService { } } + // --- Checksum Check for Split Documents --- + async checkSplitChecksum(attachmentId: number, pages: { start: number; end: number }): Promise { + const content = await this.contentRepo.findOne({ where: { AttachmentEntityId: attachmentId } }); + if (!content) return false; + + const pdfDoc = await PDFDocument.load(content.Content1, { ignoreEncryption: true }); + const total = pdfDoc.getPageCount(); + const startIdx = Math.max(1, pages.start) - 1; + const endIdx = Math.min(pages.end === 999 ? total : pages.end, total) - 1; + + const sliced = await PDFDocument.create(); + const indices = Array.from({ length: endIdx - startIdx + 1 }, (_, i) => startIdx + i); + const copied = await sliced.copyPages(pdfDoc, indices); + copied.forEach(p => sliced.addPage(p)); + + const checksum = crypto.createHash('md5').update(Buffer.from(await sliced.save())).digest('hex'); + return this.paperlessService.checksumExists(checksum); + } + // --- Print Preview --- async generatePrintPdf(attachmentId: number, barcodeData: any): Promise { const content = await this.contentRepo.findOne({ where: { AttachmentEntityId: attachmentId } }); diff --git a/paperless-frontend/src/api/email-import.ts b/paperless-frontend/src/api/email-import.ts index e6cc51f..a516820 100644 --- a/paperless-frontend/src/api/email-import.ts +++ b/paperless-frontend/src/api/email-import.ts @@ -57,6 +57,11 @@ export const emailImportApi = { return res.data; }, + checkSplitChecksum: async (attachmentId: number, pages: { start: number; end: number }): Promise => { + const res = await api.post<{ isDuplicate: boolean }>(`/api/email-import/attachments/${attachmentId}/check-split-checksum`, { pages }); + return res.data.isDuplicate; + }, + executeImport: async (emailDate: string, attachments: AttachmentImportData[]): Promise<{ success: boolean; results: any[] }> => { const res = await api.post('/api/email-import/execute', { emailDate, attachments }); return res.data; diff --git a/paperless-frontend/src/components/MailImportWizard.tsx b/paperless-frontend/src/components/MailImportWizard.tsx index 075987d..6ed2f98 100644 --- a/paperless-frontend/src/components/MailImportWizard.tsx +++ b/paperless-frontend/src/components/MailImportWizard.tsx @@ -131,40 +131,59 @@ export default function MailImportWizard({ visible, onClose, email, attachments setImportData(prev => prev.map(item => item.virtualId === virtualId ? { ...item, [key]: value } : item)); }; - const handleSplit = (virtualId: string, splitPage: number) => { - setImportData(prev => { - const idx = prev.findIndex(i => i.virtualId === virtualId); - if (idx === -1) return prev; - - const itemToSplit = prev[idx]; - const start = itemToSplit.pages?.start || 1; - const end = itemToSplit.pages?.end || 999; // 999 means to the end - - const part1 = { ...itemToSplit, virtualId: `${itemToSplit.attachmentId}_${start}_${splitPage}`, pages: { start, end: splitPage }, fileName: `${itemToSplit.fileName} (Teil 1)` }; - const part2 = { ...itemToSplit, virtualId: `${itemToSplit.attachmentId}_${splitPage+1}_${end}`, pages: { start: splitPage + 1, end }, fileName: `${itemToSplit.fileName} (Teil 2)` }; - - // Propagate date and barcode - const parentDate = eingangsdaten[virtualId] || dayjs(email.Date); - const parentBarcode = barcodes[virtualId]; - - setEingangsdaten(prev => ({ - ...prev, - [part1.virtualId]: parentDate, - [part2.virtualId]: parentDate, - })); - - if (parentBarcode) { - setBarcodes(prev => ({ - ...prev, - [part1.virtualId]: { ...parentBarcode }, - [part2.virtualId]: { ...parentBarcode }, - })); - } + const handleSplit = async (virtualId: string, splitPage: number) => { + const idx = importData.findIndex(i => i.virtualId === virtualId); + if (idx === -1) return; + const itemToSplit = importData[idx]; + const start = itemToSplit.pages?.start || 1; + const end = itemToSplit.pages?.end || 999; + + const part1Pages = { start, end: splitPage }; + const part2Pages = { start: splitPage + 1, end }; + + const part1 = { ...itemToSplit, virtualId: `${itemToSplit.attachmentId}_${start}_${splitPage}`, pages: part1Pages, fileName: `${itemToSplit.fileName} (Teil 1)` }; + const part2 = { ...itemToSplit, virtualId: `${itemToSplit.attachmentId}_${splitPage+1}_${end}`, pages: part2Pages, fileName: `${itemToSplit.fileName} (Teil 2)` }; + + const parentDate = eingangsdaten[virtualId] || dayjs(email.Date); + const parentBarcode = barcodes[virtualId]; + + setEingangsdaten(prev => ({ + ...prev, + [part1.virtualId]: parentDate, + [part2.virtualId]: parentDate, + })); + + if (parentBarcode) { + setBarcodes(prev => ({ + ...prev, + [part1.virtualId]: { ...parentBarcode }, + [part2.virtualId]: { ...parentBarcode }, + })); + } + + setImportData(prev => { const newArray = [...prev]; newArray.splice(idx, 1, part1, part2); return newArray; }); + + // Checksumme der geteilten Teile prüfen + try { + const [dup1, dup2] = await Promise.all([ + emailImportApi.checkSplitChecksum(itemToSplit.attachmentId, part1Pages), + emailImportApi.checkSplitChecksum(itemToSplit.attachmentId, part2Pages), + ]); + if (dup1 || dup2) { + setImportData(prev => prev.map(item => { + if (item.virtualId === part1.virtualId && dup1) return { ...item, isDuplicate: true, type: 'IGNORE' as const }; + if (item.virtualId === part2.virtualId && dup2) return { ...item, isDuplicate: true, type: 'IGNORE' as const }; + return item; + })); + } + } catch (e) { + console.error('Fehler bei Checksummen-Prüfung nach Split', e); + } }; const loadBelegnummern = async () => {