import { Injectable, Logger } from '@nestjs/common'; import { InjectRepository } from '@nestjs/typeorm'; import { Repository } from 'typeorm'; import { ConfigService } from '@nestjs/config'; import * as fs from 'fs/promises'; import * as path from 'path'; import { v4 as uuidv4 } from 'uuid'; import { Task } from '../database/entities/task.entity'; import { PdfService } from './pdf.service'; import { QrCodeService } from './qr-code.service'; import { OcrService } from './ocr.service'; @Injectable() export class DocumentPipelineService { private readonly logger = new Logger(DocumentPipelineService.name); private readonly archiveDir: string; constructor( @InjectRepository(Task) private readonly taskRepo: Repository, private readonly pdfService: PdfService, private readonly qrCodeService: QrCodeService, private readonly ocrService: OcrService, private readonly configService: ConfigService, ) { this.archiveDir = this.configService.get( 'SCANNER_ARCHIVE_DIR', '/data/scanner/_processed_archive', ); } /** * Verarbeitet ein neues Dokument: * 1. PDF → Bilder * 2. QR-Code-Erkennung auf Seite 1 * 3. OCR via Ollama Vision auf Seite 1 * 4. Task in DB erstellen (Inbox-Eintrag) * 5. Original in Archiv verschieben (GoBD) */ async processDocument(filePath: string): Promise { const taskId = uuidv4(); const fileName = path.basename(filePath); this.logger.log(`Pipeline startet: ${fileName} (${taskId})`); let images: string[] = []; try { // 1. PDF → Bild(er) images = await this.pdfService.pdfToImages(filePath, 200); this.logger.log(`${images.length} Seite(n) konvertiert`); // 2. QR-Code auf erster Seite scannen const firstPageBuffer = await fs.readFile(images[0]); const qrResults = await this.qrCodeService.extractFromImage(firstPageBuffer); let barcodeData: Record | null = null; if (qrResults.length > 0) { barcodeData = this.qrCodeService.parseBarcode(qrResults[0].data); if (barcodeData) { this.logger.log(`QR-Code erkannt und validiert: ${JSON.stringify(barcodeData)}`); } } // 3. OCR auf erster Seite const ocrMarkdown = await this.ocrService.extractTextAsMarkdown(firstPageBuffer); // 4. Task in DB erstellen const year = new Date().getFullYear(); const lastTask = await this.taskRepo .createQueryBuilder('t') .where('t.InterneBelegnummer LIKE :prefix', { prefix: `${year}-%` }) .orderBy('t.InterneBelegnummer', 'DESC') .getOne(); const nextNum = lastTask ? parseInt(lastTask.InterneBelegnummer.split('-')[1], 10) + 1 : 1; const belegnummer = `${year}-${String(nextNum).padStart(6, '0')}`; const task = this.taskRepo.create({ TaskId: taskId, InterneBelegnummer: belegnummer, Eingangsdatum: new Date(), Fertig: 0, BarcodeJson: barcodeData ? JSON.stringify(barcodeData) : null, DocumentType: barcodeData?.DocumentType ?? null, BetriebID: barcodeData?.BetriebID ?? null, Lieferant: barcodeData?.Lieferant ?? null, externeBelegnummer: barcodeData?.Nummer ?? null, }); await this.taskRepo.save(task); this.logger.log(`Task erstellt: ${belegnummer}`); // 5. GoBD-Archivierung await this.archiveFile(filePath); return task; } finally { await this.pdfService.cleanup(images); } } /** * Verschiebt die Originaldatei ins Archiv (GoBD-konform). */ private async archiveFile(filePath: string): Promise { await fs.mkdir(this.archiveDir, { recursive: true }); const datePrefix = new Date().toISOString().slice(0, 10); const fileName = path.basename(filePath); const archivePath = path.join(this.archiveDir, `${datePrefix}_${fileName}`); await fs.rename(filePath, archivePath); this.logger.log(`Archiviert: ${archivePath}`); } }