119 lines
3.9 KiB
TypeScript
119 lines
3.9 KiB
TypeScript
import { Injectable, Logger } from '@nestjs/common';
|
|
import { InjectRepository } from '@nestjs/typeorm';
|
|
import { Repository } from 'typeorm';
|
|
import { ConfigService } from '@nestjs/config';
|
|
import * as fs from 'fs/promises';
|
|
import * as path from 'path';
|
|
import { v4 as uuidv4 } from 'uuid';
|
|
|
|
import { Task } from '../database/entities/task.entity';
|
|
import { PdfService } from './pdf.service';
|
|
import { QrCodeService } from './qr-code.service';
|
|
import { OcrService } from './ocr.service';
|
|
|
|
@Injectable()
|
|
export class DocumentPipelineService {
|
|
private readonly logger = new Logger(DocumentPipelineService.name);
|
|
private readonly archiveDir: string;
|
|
|
|
constructor(
|
|
@InjectRepository(Task) private readonly taskRepo: Repository<Task>,
|
|
private readonly pdfService: PdfService,
|
|
private readonly qrCodeService: QrCodeService,
|
|
private readonly ocrService: OcrService,
|
|
private readonly configService: ConfigService,
|
|
) {
|
|
this.archiveDir = this.configService.get<string>(
|
|
'SCANNER_ARCHIVE_DIR',
|
|
'/data/scanner/_processed_archive',
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Verarbeitet ein neues Dokument:
|
|
* 1. PDF → Bilder
|
|
* 2. QR-Code-Erkennung auf Seite 1
|
|
* 3. OCR via Ollama Vision auf Seite 1
|
|
* 4. Task in DB erstellen (Inbox-Eintrag)
|
|
* 5. Original in Archiv verschieben (GoBD)
|
|
*/
|
|
async processDocument(filePath: string): Promise<Task> {
|
|
const taskId = uuidv4();
|
|
const fileName = path.basename(filePath);
|
|
this.logger.log(`Pipeline startet: ${fileName} (${taskId})`);
|
|
|
|
let images: string[] = [];
|
|
|
|
try {
|
|
// 1. PDF → Bild(er)
|
|
images = await this.pdfService.pdfToImages(filePath, 200);
|
|
this.logger.log(`${images.length} Seite(n) konvertiert`);
|
|
|
|
// 2. QR-Code auf erster Seite scannen
|
|
const firstPageBuffer = await fs.readFile(images[0]);
|
|
const qrResults = await this.qrCodeService.extractFromImage(firstPageBuffer);
|
|
|
|
let barcodeData: Record<string, any> | null = null;
|
|
if (qrResults.length > 0) {
|
|
barcodeData = this.qrCodeService.parseBarcode(qrResults[0].data);
|
|
if (barcodeData) {
|
|
this.logger.log(`QR-Code erkannt und validiert: ${JSON.stringify(barcodeData)}`);
|
|
}
|
|
}
|
|
|
|
// 3. OCR auf erster Seite
|
|
const ocrMarkdown = await this.ocrService.extractTextAsMarkdown(firstPageBuffer);
|
|
|
|
// 4. Task in DB erstellen
|
|
const year = new Date().getFullYear();
|
|
const lastTask = await this.taskRepo
|
|
.createQueryBuilder('t')
|
|
.where('t.InterneBelegnummer LIKE :prefix', { prefix: `${year}-%` })
|
|
.orderBy('t.InterneBelegnummer', 'DESC')
|
|
.getOne();
|
|
|
|
const nextNum = lastTask
|
|
? parseInt(lastTask.InterneBelegnummer.split('-')[1], 10) + 1
|
|
: 1;
|
|
|
|
const belegnummer = `${year}-${String(nextNum).padStart(6, '0')}`;
|
|
|
|
const task = this.taskRepo.create({
|
|
TaskId: taskId,
|
|
InterneBelegnummer: belegnummer,
|
|
Eingangsdatum: new Date(),
|
|
Fertig: 0,
|
|
BarcodeJson: barcodeData ? JSON.stringify(barcodeData) : null,
|
|
DocumentType: barcodeData?.DocumentType ?? null,
|
|
BetriebID: barcodeData?.BetriebID ?? null,
|
|
Lieferant: barcodeData?.Lieferant ?? null,
|
|
externeBelegnummer: barcodeData?.Nummer ?? null,
|
|
});
|
|
|
|
await this.taskRepo.save(task);
|
|
this.logger.log(`Task erstellt: ${belegnummer}`);
|
|
|
|
// 5. GoBD-Archivierung
|
|
await this.archiveFile(filePath);
|
|
|
|
return task;
|
|
} finally {
|
|
await this.pdfService.cleanup(images);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Verschiebt die Originaldatei ins Archiv (GoBD-konform).
|
|
*/
|
|
private async archiveFile(filePath: string): Promise<void> {
|
|
await fs.mkdir(this.archiveDir, { recursive: true });
|
|
|
|
const datePrefix = new Date().toISOString().slice(0, 10);
|
|
const fileName = path.basename(filePath);
|
|
const archivePath = path.join(this.archiveDir, `${datePrefix}_${fileName}`);
|
|
|
|
await fs.rename(filePath, archivePath);
|
|
this.logger.log(`Archiviert: ${archivePath}`);
|
|
}
|
|
}
|