Files
paperlessmanager/paperless-backend/src/scanner/scanner-watcher.service.ts
T

250 lines
7.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
import { Cron } from '@nestjs/schedule';
import { ConfigService } from '@nestjs/config';
import { InjectRepository } from '@nestjs/typeorm';
import { IsNull, Repository } from 'typeorm';
import { randomUUID } from 'crypto';
import * as chokidar from 'chokidar';
import * as path from 'path';
import * as fs from 'fs/promises';
import { BarcodeScannerService } from '../barcode/barcode-scanner.service';
import { PageCacheService } from '../barcode/page-cache.service';
import {
InboxDocument,
type InboxSource,
} from '../database/entities/inbox-document.entity';
const STABILITY_MS = 5000;
@Injectable()
export class ScannerWatcherService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(ScannerWatcherService.name);
private watcher: chokidar.FSWatcher | null = null;
private readonly sourceRoot: string;
private readonly processing = new Set<string>();
private isPeriodicScanning = false;
constructor(
private readonly configService: ConfigService,
private readonly barcodeScanner: BarcodeScannerService,
private readonly pageCache: PageCacheService,
@InjectRepository(InboxDocument)
private readonly documentRepo: Repository<InboxDocument>,
) {
this.sourceRoot = this.configService.get<string>('SCANNER_WATCH_DIR', '/mnt/scans');
}
onModuleInit(): void {
this.startWatching();
// Sequenziell, sonst greifen sich initialScan (Watcher) und Backfill
// dieselbe frisch angelegte Row und scannen doppelt. Fire-and-forget,
// damit der Modulstart nicht blockiert.
void this.bootstrap();
}
private async bootstrap(): Promise<void> {
await this.initialScan();
await this.backfillMissingScans();
}
onModuleDestroy(): void {
this.stopWatching();
}
private startWatching(): void {
this.logger.log(`Starte Überwachung: ${this.sourceRoot}`);
this.watcher = chokidar.watch(this.sourceRoot, {
ignored: /(^|[\/\\])\../,
persistent: true,
ignoreInitial: true,
awaitWriteFinish: {
stabilityThreshold: STABILITY_MS,
pollInterval: 500,
},
depth: 1,
});
this.watcher
.on('add', (filePath: string) => this.handleNewFile(filePath))
.on('error', (error: Error) => this.logger.error(`Watcher Fehler: ${error.message}`));
this.logger.log('Scanner-Watcher aktiv');
}
private stopWatching(): void {
if (this.watcher) {
this.watcher.close();
this.logger.log('Scanner-Watcher gestoppt');
}
}
private async initialScan(silent = false): Promise<void> {
let subdirs: string[];
try {
const entries = await fs.readdir(this.sourceRoot, { withFileTypes: true });
subdirs = entries.filter((e) => e.isDirectory()).map((e) => e.name);
} catch (err: any) {
if (!silent) {
this.logger.warn(`Scanner-Check: Quellverzeichnis nicht lesbar (${this.sourceRoot}): ${err.message}`);
}
return;
}
let seen = 0;
for (const subdir of subdirs) {
const dir = path.join(this.sourceRoot, subdir);
let files: string[];
try {
files = await fs.readdir(dir);
} catch (err: any) {
if (!silent) {
this.logger.warn(`Scanner-Check: ${dir} nicht lesbar: ${err.message}`);
}
continue;
}
for (const name of files) {
if (path.extname(name).toLowerCase() !== '.pdf') continue;
const full = path.join(dir, name);
if (!(await this.isStable(full))) {
if (!silent) {
this.logger.debug(`Scanner-Check: ${full} noch nicht stabil Watcher übernimmt`);
}
continue;
}
seen += 1;
await this.handleNewFile(full);
}
}
if (seen > 0) {
this.logger.log(`Scanner-Check: ${seen} Datei(en) verarbeitet`);
} else if (!silent) {
this.logger.log('Scanner-Check: keine neuen Dateien gefunden');
}
}
@Cron('*/15 * * * * *')
async periodicScan(): Promise<void> {
if (this.isPeriodicScanning) return;
this.isPeriodicScanning = true;
try {
await this.initialScan(true);
} catch (err: any) {
this.logger.error(`Periodic Scan Fehler: ${err.message}`);
} finally {
this.isPeriodicScanning = false;
}
}
private async isStable(filePath: string): Promise<boolean> {
try {
const stat = await fs.stat(filePath);
return Date.now() - stat.mtimeMs >= STABILITY_MS;
} catch {
return false;
}
}
private async handleNewFile(filePath: string): Promise<void> {
if (path.extname(filePath).toLowerCase() !== '.pdf') return;
const relative = path.relative(this.sourceRoot, filePath);
const parts = relative.split(path.sep);
if (parts.length !== 2) {
this.logger.debug(`Überspringe (falsche Tiefe): ${filePath}`);
return;
}
const subdir = parts[0];
const fileName = parts[1];
if (this.processing.has(filePath)) return;
this.processing.add(filePath);
try {
const id = randomUUID();
const source: InboxSource = subdir === 'all' ? 'all' : 'user';
const owner = source === 'all' ? null : subdir;
const targetDir = this.pageCache.documentDir(id);
const targetPdf = this.pageCache.documentPdfPath(id);
await fs.mkdir(targetDir, { recursive: true });
await this.move(filePath, targetPdf);
const doc = this.documentRepo.create({
Id: id,
OriginalName: fileName,
Source: source,
OwnerUsername: owner,
PageCount: 0,
QrCodes: [],
});
await this.documentRepo.save(doc);
this.logger.log(`Übernommen: ${relative}${id}/document.pdf`);
try {
await this.barcodeScanner.scanAndMatch(doc);
} catch (err: any) {
this.logger.warn(`Barcode-Scan nach Move fehlgeschlagen (${id}): ${err.message}`);
}
} catch (err: any) {
this.logger.error(`Übernahme fehlgeschlagen für ${filePath}: ${err.message}`);
} finally {
this.processing.delete(filePath);
}
}
private async backfillMissingScans(): Promise<void> {
let pending: InboxDocument[];
try {
pending = await this.documentRepo.find({
where: [{ PageCount: 0 }, { QrCodes: IsNull() }],
});
} catch (err: any) {
this.logger.warn(`Backfill: DB-Query fehlgeschlagen: ${err.message}`);
return;
}
let scanned = 0;
for (const doc of pending) {
try {
const didScan = await this.barcodeScanner.ensureScanned(doc);
if (didScan) scanned += 1;
} catch (err: any) {
this.logger.warn(`Backfill fehlgeschlagen (${doc.Id}): ${err.message}`);
}
}
if (scanned > 0) {
this.logger.log(`Backfill: ${scanned} Datei(en) nachträglich gescannt`);
} else {
this.logger.log('Backfill: alle Dateien bereits gescannt');
}
}
private async move(src: string, dest: string): Promise<void> {
try {
await fs.rename(src, dest);
return;
} catch (err: any) {
if (err.code !== 'EXDEV') throw err;
}
// Cross-device: copy + unlink. Wenn unlink scheitert, Kopie zurückrollen,
// damit ein kaputter Mount nicht bei jedem Neustart Duplikate produziert.
await fs.copyFile(src, dest);
try {
await fs.unlink(src);
} catch (err) {
await fs.unlink(dest).catch(() => undefined);
throw err;
}
}
}