import path from "path";
import { fileURLToPath } from "url";
import fs from "fs/promises";
import dotenv from 'dotenv';
// Type definitions
interface FHIRWithConfidence {
confidence?: {
overallConfidence?: string;
};
needsReview?: boolean;
fhirBundle?: any;
resources?: any;
data?: any;
[key: string]: any; // Allow additional properties
}
interface OCRResponse {
responses: Array<{
fullTextAnnotation?: {
text: string;
};
textAnnotations?: Array<{
description: string;
boundingPoly?: {
vertices: Array<{
x?: number;
y?: number;
}>;
};
}>;
}>;
}
// IMMEDIATE DEBUG OUTPUT
console.log("๐ ENHANCED TYPESCRIPT SCRIPT STARTING - Full Text Analysis Mode");
console.log("๐
Timestamp:", new Date().toISOString());
console.log("๐ฅ๏ธ Node version:", process.version);
console.log("๐ Current directory:", process.cwd());
// Load environment
dotenv.config();
console.log("๐ง Environment loaded");
// Get script paths
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Set Google Cloud credentials BEFORE any imports
const gcpKeyPath = "./gcp-key.json"; // Relative to root folder
const absoluteGcpPath = path.resolve(process.cwd(), gcpKeyPath);
process.env.GOOGLE_APPLICATION_CREDENTIALS = absoluteGcpPath;
console.log("๐ Google Cloud credentials set to:", absoluteGcpPath);
console.log("๐ Working directory:", process.cwd());
console.log("๐ Relative path used:", gcpKeyPath);
// Verify the credentials file exists and is readable
try {
await fs.access(absoluteGcpPath, fs.constants.R_OK);
console.log("โ
GCP key file found and readable");
// Verify it's valid JSON
const keyContent = await fs.readFile(absoluteGcpPath, 'utf-8');
const keyData = JSON.parse(keyContent);
console.log("โ
GCP key file is valid JSON");
console.log(`๐ Service account email: ${keyData.client_email || 'Not found'}`);
console.log(`๐ Project ID: ${keyData.project_id || 'Not found'}`);
} catch (error: any) {
console.error("โ GCP key file issue:", error.message);
console.error("๐ก Please check:");
console.error(" 1. File exists at:", absoluteGcpPath);
console.error(" 2. File is readable");
console.error(" 3. File contains valid JSON");
console.error("๐ Current directory contents:");
try {
const files = await fs.readdir(process.cwd());
console.error(" ", files.filter(f => f.endsWith('.json')).join(', '));
} catch {}
process.exit(1);
}
// Function to parse and display form fields
function parseFormFields(text: string): void {
console.log("\n" + "=".repeat(80));
console.log("๐ PARSING EXTRACTED FORM FIELDS");
console.log("=".repeat(80));
// Split text into lines and analyze
const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0);
console.log(`๐ Total lines found: ${lines.length}\n`);
// Look for common form patterns
const formPatterns = [
/patient\s*name/i,
/first\s*name/i,
/last\s*name/i,
/date\s*of\s*birth/i,
/dob/i,
/phone/i,
/address/i,
/insurance/i,
/diagnosis/i,
/referring\s*physician/i,
/date\s*of\s*request/i,
/medical\s*record/i,
/mrn/i,
/ssn/i,
/social\s*security/i,
/email/i,
/gender/i,
/sex/i,
/race/i,
/ethnicity/i
];
// Display all text first
console.log("๐ FULL EXTRACTED TEXT:");
console.log("-".repeat(60));
console.log(text);
console.log("-".repeat(60));
// Analyze line by line
console.log("\n๐ LINE-BY-LINE ANALYSIS:");
lines.forEach((line, index) => {
const lineNumber = String(index + 1).padStart(3, ' ');
console.log(`${lineNumber}: ${line}`);
// Check if this line matches any form pattern
const matches = formPatterns.filter(pattern => pattern.test(line));
if (matches.length > 0) {
console.log(` โญ Potential form field detected`);
}
});
// Try to extract key-value pairs
console.log("\n๐ ATTEMPTING TO EXTRACT FORM FIELDS:");
const extractedFields: Record<string, string> = {};
// Look for patterns like "Field Name: Value" or "Field Name Value"
lines.forEach(line => {
// Pattern 1: "Label: Value"
const colonMatch = line.match(/^(.+?):\s*(.+)$/);
if (colonMatch) {
const [, label, value] = colonMatch;
if (value.trim().length > 0) {
extractedFields[label.trim()] = value.trim();
}
}
// Pattern 2: Look for dates in various formats
const dateMatch = line.match(/(\d{1,2}\/\d{1,2}\/\d{4})/);
if (dateMatch) {
extractedFields[`Date_found_in: "${line}"`] = dateMatch[1];
}
// Pattern 3: Look for phone numbers
const phoneMatch = line.match(/(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})/);
if (phoneMatch) {
extractedFields[`Phone_found_in: "${line}"`] = phoneMatch[1];
}
// Pattern 4: Look for potential names (2+ words starting with capital letters)
const nameMatch = line.match(/^([A-Z][a-z]+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)$/);
if (nameMatch && line.length < 50) {
extractedFields[`Potential_name: "${line}"`] = nameMatch[1];
}
});
if (Object.keys(extractedFields).length > 0) {
console.log("\nโ
EXTRACTED FIELDS:");
Object.entries(extractedFields).forEach(([key, value]) => {
console.log(` ๐ท๏ธ ${key}: ${value}`);
});
} else {
console.log("\nโ ๏ธ No clear field patterns detected. This might be a more complex form layout.");
}
// Look for checkbox/selection indicators
console.log("\nโ๏ธ LOOKING FOR CHECKBOXES/SELECTIONS:");
const checkboxPatterns = [
/\[x\]/i,
/\[โ\]/i,
/โ/,
/โ/,
/checked/i,
/selected/i,
/yes\s*$/i,
/no\s*$/i
];
lines.forEach((line, index) => {
checkboxPatterns.forEach(pattern => {
if (pattern.test(line)) {
console.log(` ๐ Line ${index + 1}: ${line}`);
}
});
});
}
// Function to check what files exist in the project structure
async function checkProjectStructure(): Promise<void> {
console.log("\n๐ Checking project structure...");
const pathsToCheck = [
"src",
"src/cloud",
"src/llm",
"src/tests"
];
for (const dirPath of pathsToCheck) {
try {
const files = await fs.readdir(dirPath);
console.log(`๐ ${dirPath}/:`);
files.forEach(file => {
console.log(` ๐ ${file}`);
});
} catch (error) {
console.log(`โ ${dirPath}/ - Directory not found`);
}
}
}
function exploreObjectStructure(obj: any, name: string, maxDepth: number = 3, currentDepth: number = 0): void {
if (currentDepth >= maxDepth || obj === null || typeof obj !== 'object') {
return;
}
console.log(`${' '.repeat(currentDepth)}๐ ${name} (${typeof obj}):`);
if (Array.isArray(obj)) {
console.log(`${' '.repeat(currentDepth)} ๐ Array with ${obj.length} items`);
if (obj.length > 0) {
console.log(`${' '.repeat(currentDepth)} ๐ First item type: ${typeof obj[0]}`);
if (currentDepth < maxDepth - 1) {
exploreObjectStructure(obj[0], `${name}[0]`, maxDepth, currentDepth + 1);
}
}
} else {
const keys = Object.keys(obj);
console.log(`${' '.repeat(currentDepth)} ๐ Keys: [${keys.join(', ')}]`);
// Show some key-value pairs
keys.slice(0, 5).forEach(key => {
const value = obj[key];
if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
const displayValue = typeof value === 'string' && value.length > 50
? value.substring(0, 50) + '...'
: String(value);
console.log(`${' '.repeat(currentDepth)} ${key}: ${displayValue}`);
} else if (value !== null && typeof value === 'object') {
if (currentDepth < maxDepth - 1) {
exploreObjectStructure(value, `${name}.${key}`, maxDepth, currentDepth + 1);
}
}
});
if (keys.length > 5) {
console.log(`${' '.repeat(currentDepth)} ... and ${keys.length - 5} more keys`);
}
}
}
// Now run the enhanced pipeline
async function main(): Promise<void> {
try {
// First, let's see what files we have
await checkProjectStructure();
const CONFIG = {
bucketName: "efax-docs-bucket",
localPdfPath: path.join(__dirname, "../../tests/test-files/Filled_Sample 42.pdf"),
outputPrefix: "ocr-output/",
storeData: false,
};
console.log("โ๏ธ Configuration:");
console.log(" - Bucket:", CONFIG.bucketName);
console.log(" - PDF path:", CONFIG.localPdfPath);
console.log(" - PDF exists:", await fs.access(CONFIG.localPdfPath).then(() => true).catch(() => false));
console.log(" - GCP credentials:", process.env.GOOGLE_APPLICATION_CREDENTIALS);
// Test Google Cloud authentication before proceeding
console.log("\n๐ Testing Google Cloud authentication...");
try {
const { Storage } = await import("@google-cloud/storage");
const testStorage = new Storage();
console.log("๐ Attempting to authenticate with Google Cloud...");
// Instead of listing all buckets, just check if our target bucket is accessible
console.log(`๐ Checking access to bucket: ${CONFIG.bucketName}`);
const bucket = testStorage.bucket(CONFIG.bucketName);
const [exists] = await bucket.exists();
if (exists) {
console.log(`โ
Target bucket '${CONFIG.bucketName}' exists and is accessible`);
} else {
console.log(`โ ๏ธ Target bucket '${CONFIG.bucketName}' does not exist`);
console.log("๐จ Attempting to create bucket...");
try {
await testStorage.createBucket(CONFIG.bucketName, {
location: 'US', // or your preferred location
storageClass: 'STANDARD'
});
console.log(`โ
Created bucket '${CONFIG.bucketName}'`);
} catch (createError: any) {
console.error("โ Failed to create bucket:", createError.message);
console.error("๐ก You may need to create the bucket manually in Google Cloud Console");
}
}
console.log("โ
Google Cloud authentication successful");
} catch (authError: any) {
console.error("โ Google Cloud authentication failed:", authError.message);
console.error("๐ Error code:", authError.code);
if (authError.code === 403) {
console.error("๐ Permission denied. Your service account needs these roles:");
console.error(" - Storage Admin (roles/storage.admin)");
console.error(" - Cloud Vision API User (roles/ml.vision.apiUser)");
console.error("๐ก Add permissions in Google Cloud Console:");
console.error(" 1. Go to IAM & Admin โ IAM");
console.error(" 2. Find: latest@refine-rx.iam.gserviceaccount.com");
console.error(" 3. Click Edit and add the required roles");
} else {
console.error("๐ก Other possible solutions:");
console.error(" 1. Verify billing is enabled for the project");
console.error(" 2. Enable Cloud Storage API");
console.error(" 3. Enable Cloud Vision API");
}
throw authError;
}
// Import all functions with proper typing and flexible paths
console.log("\n๐ฆ Importing required modules...");
let uploadToGCS: (bucket: string, path: string) => Promise<string>;
let extractTextFromPdfInCloud: (input: string, output: string) => Promise<void>;
let mapToFHIR: (text: string) => Promise<FHIRWithConfidence>;
try {
console.log("๐ Importing uploadToGCS...");
const uploadModule = await import("./cloud/uploadToGCS.js").catch(() =>
import("./cloud/uploadToGCS")).catch(() =>
import("./cloud/uploadToGCS"));
uploadToGCS = uploadModule.uploadToGCS;
console.log("โ
uploadToGCS imported successfully");
} catch (error) {
console.error("โ Failed to import uploadToGCS:", error);
console.error("๐ก Check if file exists at one of these locations:");
console.error(" - src/cloud/uploadToGCS.js");
console.error(" - src/cloud/uploadToGCS.ts");
console.error(" - src/cloud/uploadToGCS/index.js");
throw error;
}
try {
console.log("๐ Importing extractTextFromPdfInCloud...");
const ocrModule = await import("./cloud/cloudOCR.js").catch(() =>
import("./cloud/cloudOCR")).catch(() =>
import("./cloud/cloudOCR"));
extractTextFromPdfInCloud = ocrModule.extractTextFromPdfInCloud;
console.log("โ
extractTextFromPdfInCloud imported successfully");
} catch (error) {
console.error("โ Failed to import cloudOCR:", error);
console.error("๐ก Check if file exists at one of these locations:");
console.error(" - src/cloud/cloudOCR.js");
console.error(" - src/cloud/cloudOCR.ts");
console.error(" - src/cloud/cloudOCR/index.js");
throw error;
}
try {
console.log("๐ Importing mapToFHIR...");
const fhirModule = await import("./llm/mapToFHIR.js").catch(() =>
import("./llm/mapToFHIR")).catch(() =>
import("./llm/mapToFHIR"));
mapToFHIR = fhirModule.mapToFHIR;
console.log("โ
mapToFHIR imported successfully");
} catch (error) {
console.error("โ Failed to import mapToFHIR:", error);
console.error("๐ก Check if file exists at one of these locations:");
console.error(" - src/llm/mapToFHIR.js");
console.error(" - src/llm/mapToFHIR.ts");
console.error(" - src/llm/mapToFHIR/index.js");
throw error;
}
console.log("\n๐ค Step 1: Testing PDF upload...");
const gcsInputUri: string = await uploadToGCS(CONFIG.bucketName, CONFIG.localPdfPath);
console.log(`โ
Upload successful: ${gcsInputUri}`);
console.log("\n๐ง Step 2: Testing OCR...");
const gcsOutputUri = `gs://${CONFIG.bucketName}/${CONFIG.outputPrefix}`;
await extractTextFromPdfInCloud(gcsInputUri, gcsOutputUri);
console.log("โ
OCR request sent successfully");
console.log("\nโณ Waiting 10 seconds for OCR to complete...");
await new Promise(resolve => setTimeout(resolve, 10000));
console.log("\n๐ Step 3: Extracting and analyzing full text...");
const { Storage } = await import("@google-cloud/storage");
// Credentials already set above, but ensure they're still set
if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) {
process.env.GOOGLE_APPLICATION_CREDENTIALS = absoluteGcpPath;
}
const storage = new Storage();
console.log("โ
Google Cloud Storage client initialized");
const [files] = await storage.bucket(CONFIG.bucketName).getFiles({ prefix: CONFIG.outputPrefix });
console.log(`Found ${files.length} output files`);
if (files.length > 0) {
console.log("๐ Output files:");
files.forEach(file => console.log(` - ${file.name}`));
// Try to read all JSON files
const jsonFiles = files.filter(f => f.name.endsWith('.json'));
if (jsonFiles.length > 0) {
let allText = "";
for (const file of jsonFiles) {
console.log(`\n๐ Reading ${file.name}...`);
const tempFile = path.join(__dirname, `temp-${Date.now()}.json`);
await file.download({ destination: tempFile });
const content = await fs.readFile(tempFile, "utf-8");
const json: OCRResponse = JSON.parse(content);
const text = json.responses?.[0]?.fullTextAnnotation?.text || "";
if (text) {
allText += text + "\n";
}
// Also check individual text annotations for more detailed extraction
const annotations = json.responses?.[0]?.textAnnotations || [];
console.log(`๐ Found ${annotations.length} text annotations`);
if (annotations.length > 1) {
console.log("\n๐ INDIVIDUAL TEXT ELEMENTS:");
annotations.slice(1, 21).forEach((annotation, idx) => {
if (annotation.description && annotation.description.trim()) {
const bounds = annotation.boundingPoly?.vertices || [];
const x = bounds[0]?.x || 0;
const y = bounds[0]?.y || 0;
console.log(` ${String(idx + 1).padStart(2, '0')}: "${annotation.description}" (x:${x}, y:${y})`);
}
});
if (annotations.length > 21) {
console.log(` ... and ${annotations.length - 21} more elements`);
}
}
await fs.unlink(tempFile).catch(() => {});
}
if (allText.length > 0) {
console.log(`\nโ
Total extracted text: ${allText.length} characters`);
// Parse the form fields
parseFormFields(allText);
// Test FHIR mapping with full text
console.log("\n๐ค Step 4: Testing FHIR mapping with full text...");
try {
const fhirResult: FHIRWithConfidence = await mapToFHIR(allText);
console.log("โ
FHIR mapping completed");
console.log(`๐ Confidence: ${fhirResult.confidence?.overallConfidence || 'unknown'}`);
console.log(`๐ Needs review: ${fhirResult.needsReview ? 'YES' : 'NO'}`);
// Display complete FHIR result structure
console.log("\n๐ COMPLETE FHIR RESULT STRUCTURE:");
console.log("๐ Available properties:", Object.keys(fhirResult));
// Explore the structure safely
exploreObjectStructure(fhirResult, "fhirResult");
// Display full result as JSON for complete visibility
console.log("\n๐ COMPLETE FHIR RESULT (JSON):");
console.log(JSON.stringify(fhirResult, null, 2));
} catch (fhirError: any) {
console.error("โ FHIR mapping failed:", fhirError.message);
console.error("๐ Error details:", fhirError);
}
}
}
}
console.log("\n๐งน Cleaning up test files...");
for (const file of files) {
await file.delete().catch(() => {});
}
// Delete uploaded PDF
await storage.bucket(CONFIG.bucketName).file(path.basename(CONFIG.localPdfPath)).delete().catch(() => {});
console.log("\n๐ ENHANCED TYPESCRIPT PIPELINE TEST COMPLETED!");
} catch (error: any) {
console.error("\n๐ฅ PIPELINE ERROR:");
console.error("Message:", error.message);
console.error("Stack:", error.stack);
if (error.response) {
console.error("HTTP Status:", error.response.status);
console.error("Response data:", error.response.data);
}
}
}
console.log("\n๐ Enhanced TypeScript debug script completed at:", new Date().toISOString());
// Run the main function
main().catch(console.error);