feat: implement checksum-based duplicate detection for split email attachments
Build and Push Multi-Platform Images / build-and-push (push) Successful in 33s

This commit is contained in:
2026-05-05 08:22:18 +02:00
parent b47ad17568
commit 44d5206e07
4 changed files with 84 additions and 29 deletions
@@ -16,6 +16,7 @@ import { PdfService } from '../preprocessing/pdf.service';
import * as path from 'path';
import * as os from 'os';
import * as fs from 'fs/promises';
import * as crypto from 'crypto';
@Injectable()
export class EmailImportService {
@@ -154,6 +155,25 @@ export class EmailImportService {
}
}
// --- Checksum Check for Split Documents ---
async checkSplitChecksum(attachmentId: number, pages: { start: number; end: number }): Promise<boolean> {
const content = await this.contentRepo.findOne({ where: { AttachmentEntityId: attachmentId } });
if (!content) return false;
const pdfDoc = await PDFDocument.load(content.Content1, { ignoreEncryption: true });
const total = pdfDoc.getPageCount();
const startIdx = Math.max(1, pages.start) - 1;
const endIdx = Math.min(pages.end === 999 ? total : pages.end, total) - 1;
const sliced = await PDFDocument.create();
const indices = Array.from({ length: endIdx - startIdx + 1 }, (_, i) => startIdx + i);
const copied = await sliced.copyPages(pdfDoc, indices);
copied.forEach(p => sliced.addPage(p));
const checksum = crypto.createHash('md5').update(Buffer.from(await sliced.save())).digest('hex');
return this.paperlessService.checksumExists(checksum);
}
// --- Print Preview ---
async generatePrintPdf(attachmentId: number, barcodeData: any): Promise<Buffer> {
const content = await this.contentRepo.findOne({ where: { AttachmentEntityId: attachmentId } });