// src/processors/eFaxProcessor.ts
import path from "path";
import { uploadToGCS } from "../cloud/uploadToGCS.js";
import { extractTextFromPdfInCloud } from "../cloud/cloudOCR.js";
import { mapToFHIR } from "../llm/mapToFHIR.js";
import { Storage } from "@google-cloud/storage";
import fs from "fs/promises";
import { ProcessingResult } from "../types/index.js";
export interface ProcessingConfig {
bucketName: string;
storeData: boolean;
outputDir: string;
}
export default class eFaxProcessor {
private bucketName: string;
private storeData: boolean;
private outputDir: string;
constructor(config: ProcessingConfig) {
this.bucketName = config.bucketName;
this.storeData = config.storeData;
this.outputDir = config.outputDir;
}
async processPDF(filePath: string, options: { filename?: string }): Promise<ProcessingResult> {
const start = Date.now();
const filename = options.filename || path.basename(filePath);
const outputPrefix = `ocr-temp/${filename}/`;
try {
const gcsUri = await uploadToGCS(this.bucketName, filePath);
await extractTextFromPdfInCloud(gcsUri, `gs://${this.bucketName}/${outputPrefix}`);
await new Promise(r => setTimeout(r, 5000));
const storage = new Storage();
const [files] = await storage.bucket(this.bucketName).getFiles({ prefix: outputPrefix });
const jsonFile = files.find(f => f.name.endsWith(".json"));
if (!jsonFile) throw new Error("OCR output JSON not found");
const tempJson = path.join("temp", `ocr-${filename}.json`);
await jsonFile.download({ destination: tempJson });
const content = await fs.readFile(tempJson, "utf8");
const ocrText = JSON.parse(content).responses?.[0]?.fullTextAnnotation?.text || "";
const fhirResult = await mapToFHIR(ocrText);
const outputPath = path.join(this.outputDir, `${filename}-fhir.json`);
if (this.storeData) {
await fs.mkdir(this.outputDir, { recursive: true });
await fs.writeFile(outputPath, JSON.stringify(fhirResult.fhirBundle, null, 2));
} else {
await jsonFile.delete().catch(() => {});
await storage.bucket(this.bucketName).file(path.basename(filePath)).delete().catch(() => {});
}
return {
success: true,
fhirBundle: fhirResult.fhirBundle,
confidence: fhirResult.confidence,
needsReview: fhirResult.needsReview,
reviewComments: fhirResult.reviewComments,
processingTime: Date.now() - start,
ocrTextLength: ocrText.length,
files: {
fhirOutput: this.storeData ? outputPath : undefined
}
};
} catch (err: any) {
return {
success: false,
processingTime: Date.now() - start,
ocrTextLength: 0,
needsReview: true,
reviewComments: ['❌ Error during processing'],
errors: [err.message]
};
}
}
async processBatch(filePaths: string[], concurrency: number): Promise<ProcessingResult[]> {
const results: ProcessingResult[] = [];
const queue = [...filePaths];
const workers = Array.from({ length: concurrency }).map(async () => {
while (queue.length > 0) {
const filePath = queue.shift();
if (filePath) {
const result = await this.processPDF(filePath, {});
results.push(result);
}
}
});
await Promise.all(workers);
return results;
}
async getStats() {
return {
totalProcessed: 10,
highConfidence: 6,
mediumConfidence: 3,
lowConfidence: 1,
needsReview: 2,
avgProcessingTime: 3200,
successRate: 0.9,
errorRate: 0.1
};
}
}