Files
paperlessmanager/paperless-backend/src/barcode/barcode-scanner.service.ts
T
bjoernpoettker dad0136365
Build and Push Multi-Platform Images / build-and-push (push) Successful in 41s
chore: apply ESLint auto-fix across entire backend
Reformats code style (line breaks, indentation, type annotations)
without changing logic. Also includes minor feature additions bundled
in the same lint run (stats service, user-settings groups, agrarmonitor
polling improvements).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 09:02:02 +02:00

320 lines
9.6 KiB
TypeScript

import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import * as fs from 'fs/promises';
import sharp = require('sharp');
import { PdfService } from '../preprocessing/pdf.service';
import { QrCodeService } from '../preprocessing/qr-code.service';
import {
BarcodeTemplate,
type BarcodeActionType,
} from '../database/entities/barcode-template.entity';
import {
InboxDocument,
type StoredQrCode,
} from '../database/entities/inbox-document.entity';
import { PageCacheService } from './page-cache.service';
import {
applyTemplate,
buildVariables,
} from '../inbox-postprocessor/variable-resolver';
export interface MatchedBarcode {
page: number;
value: string;
templateId: number | null;
templateName: string | null;
dateinameTemplate: string | null;
splitBefore: boolean;
actions: BarcodeActionType[];
}
@Injectable()
export class BarcodeScannerService implements OnApplicationBootstrap {
private readonly logger = new Logger(BarcodeScannerService.name);
private templatesCache: BarcodeTemplate[] | null = null;
constructor(
private readonly pdfService: PdfService,
private readonly qrCodeService: QrCodeService,
private readonly pageCache: PageCacheService,
@InjectRepository(BarcodeTemplate)
private readonly templateRepo: Repository<BarcodeTemplate>,
@InjectRepository(InboxDocument)
private readonly documentRepo: Repository<InboxDocument>,
) {}
async onApplicationBootstrap(): Promise<void> {
await this.migrateLegacySplitBefore();
}
invalidateTemplates(): void {
this.templatesCache = null;
}
private async migrateLegacySplitBefore(): Promise<void> {
let rows: BarcodeTemplate[];
try {
rows = await this.templateRepo.find();
} catch (err: any) {
this.logger.warn(
`Template-Migration: Query fehlgeschlagen: ${err.message}`,
);
return;
}
let migrated = 0;
for (const tpl of rows) {
const actions = (tpl.Actions ?? []) as string[];
if (actions.includes('SPLIT_BEFORE')) {
tpl.SplitBefore = true;
tpl.Actions = actions.filter(
(a) => a !== 'SPLIT_BEFORE',
) as BarcodeActionType[];
await this.templateRepo.save(tpl);
migrated += 1;
}
}
if (migrated > 0) {
this.logger.log(
`Template-Migration: ${migrated} Vorlage(n) auf SplitBefore-Flag umgestellt`,
);
}
}
/**
* Rendert alle Seiten, extrahiert QR-Codes, persistiert Page-Cache + DB-Row.
* Wird nach dem Move aus dem Watcher und beim Backfill aufgerufen.
*/
async scanAndMatch(doc: InboxDocument): Promise<MatchedBarcode[]> {
const pdfPath = this.pageCache.documentPdfPath(doc.Id);
const { qrCodes, pageCount } = await this.performScan(doc.Id, pdfPath);
doc.QrCodes = qrCodes;
doc.PageCount = pageCount;
doc.IsScanned = true;
try {
await this.documentRepo.save(doc);
} catch (err: any) {
this.logger.warn(
`Scan-Ergebnis konnte nicht gespeichert werden (${doc.Id}): ${err.message}`,
);
}
return this.matchTemplates(qrCodes);
}
/**
* Scannt nur, wenn die Row noch keine Seitenanzahl hat (= noch nie gescannt).
*/
async ensureScanned(doc: InboxDocument): Promise<boolean> {
if (doc.PageCount > 0) return false;
await this.scanAndMatch(doc);
return true;
}
/**
* Read-only: mapped die persistierten QR-Codes auf MatchedBarcodes.
*/
async getMatched(doc: InboxDocument): Promise<MatchedBarcode[]> {
return this.matchTemplates(doc.QrCodes ?? []);
}
private async matchTemplates(
qrCodes: StoredQrCode[],
): Promise<MatchedBarcode[]> {
if (qrCodes.length === 0) return [];
const templates = await this.getTemplates();
return qrCodes.map((qr) => {
const tpl = this.firstMatch(qr.value, templates);
return {
page: qr.page,
value: qr.value,
templateId: tpl?.Id ?? null,
templateName: tpl?.Name ?? null,
dateinameTemplate: tpl?.DateinameTemplate
? applyTemplate(
tpl.DateinameTemplate,
buildVariables({
doc: {} as InboxDocument,
template: tpl,
matchingQrValue: qr.value,
}),
)
: null,
splitBefore: tpl?.SplitBefore ?? false,
actions: tpl?.Actions ?? [],
};
});
}
private firstMatch(
value: string,
templates: BarcodeTemplate[],
): BarcodeTemplate | null {
for (const tpl of templates) {
try {
const re = new RegExp(tpl.Regex);
if (re.test(value)) return tpl;
} catch {
// ignore invalid regex
}
}
return null;
}
private async getTemplates(): Promise<BarcodeTemplate[]> {
if (!this.templatesCache) {
this.templatesCache = await this.templateRepo.find({
order: { Id: 'ASC' },
});
}
return this.templatesCache;
}
private async performScan(
documentId: string,
pdfPath: string,
): Promise<{ qrCodes: StoredQrCode[]; pageCount: number }> {
let images: string[] = [];
try {
images = await this.pdfService.pdfToImages(pdfPath, 400);
const qrCodes: StoredQrCode[] = [];
const templates = await this.getTemplates();
for (let i = 0; i < images.length; i++) {
try {
const buffer = await fs.readFile(images[i]);
const qrs = await this.qrCodeService.extractFromImage(buffer);
// Nur QR-Codes speichern, die zu einer Eingangsdokumentart passen.
// Mehrere passende QRs pro Seite werden alle übernommen.
for (const qr of qrs) {
if (this.firstMatch(qr.data, templates)) {
qrCodes.push({ page: i + 1, value: qr.data });
}
}
} catch (err: any) {
this.logger.warn(
`QR-Scan fehlgeschlagen (${pdfPath}, Seite ${i + 1}): ${err.message}`,
);
}
}
await this.pageCache.clear(documentId);
await this.pageCache.generate(documentId, images);
return { qrCodes, pageCount: images.length };
} catch (err: any) {
this.logger.warn(`Kein QR-Scan möglich für ${pdfPath}: ${err.message}`);
return { qrCodes: [], pageCount: 0 };
} finally {
await this.pdfService.cleanup(images);
}
}
/**
* Rendert eine einzelne Seite bei hoher DPI, beschneidet den angegebenen
* Bereich (normalisierte Koordinaten 0..1) und scannt ihn nach QR-Codes.
* Neu gefundene QR-Codes werden in der DB persistiert.
*/
async scanRegion(
doc: InboxDocument,
pdfPath: string,
page: number,
x: number,
y: number,
w: number,
h: number,
): Promise<{ found: string[] }> {
let imagePath: string | null = null;
try {
imagePath = await this.pdfService.pdfPageToImage(pdfPath, page, 400);
const image = sharp(imagePath);
const { width: imgW, height: imgH } = await image.metadata();
if (!imgW || !imgH) return { found: [] };
const left = Math.round(Math.max(0, x * imgW));
const top = Math.round(Math.max(0, y * imgH));
const width = Math.round(Math.min(imgW - left, w * imgW));
const height = Math.round(Math.min(imgH - top, h * imgH));
if (width <= 0 || height <= 0) return { found: [] };
const cropped = await image
.extract({ left, top, width, height })
.png()
.toBuffer();
const qrResults = await this.qrCodeService.extractFromImage(cropped);
if (qrResults.length === 0) return { found: [] };
const existingKeys = new Set(
(doc.QrCodes ?? []).map((qr) => `${qr.page}:${qr.value}`),
);
const found: string[] = [];
let changed = false;
for (const qr of qrResults) {
found.push(qr.data);
const key = `${page}:${qr.data}`;
if (!existingKeys.has(key)) {
doc.QrCodes = [...(doc.QrCodes ?? []), { page, value: qr.data }];
changed = true;
}
}
if (changed) {
await this.documentRepo.save(doc);
}
return { found };
} finally {
if (imagePath) await this.pdfService.cleanup([imagePath]);
}
}
/**
* Rescannt alle Inbox-Dokumente — wird nach Änderungen an Eingangsdokumentarten aufgerufen.
* Läuft sequenziell, um PDF-Rendering nicht zu überlasten. Fire-and-forget vom Caller.
*/
async rescanAll(): Promise<{ scanned: number; failed: number }> {
this.invalidateTemplates();
let docs: InboxDocument[];
try {
docs = await this.documentRepo.find();
} catch (err: any) {
this.logger.warn(`Rescan: DB-Query fehlgeschlagen: ${err.message}`);
return { scanned: 0, failed: 0 };
}
if (docs.length === 0) return { scanned: 0, failed: 0 };
this.logger.log(
`Rescan: starte Neuerfassung für ${docs.length} Inbox-Dokument(e)`,
);
let scanned = 0;
let failed = 0;
for (const doc of docs) {
try {
const pdfPath = this.pageCache.documentPdfPath(doc.Id);
try {
await fs.access(pdfPath);
} catch {
this.logger.warn(`Rescan: PDF fehlt für ${doc.Id} (${pdfPath})`);
failed++;
continue;
}
const { qrCodes, pageCount } = await this.performScan(doc.Id, pdfPath);
doc.QrCodes = qrCodes;
doc.PageCount = pageCount;
await this.documentRepo.save(doc);
scanned++;
} catch (err: any) {
this.logger.warn(`Rescan fehlgeschlagen für ${doc.Id}: ${err.message}`);
failed++;
}
}
this.logger.log(
`Rescan abgeschlossen: ${scanned} ok, ${failed} fehlgeschlagen`,
);
return { scanned, failed };
}
}