import crypto from 'crypto';
import { RFP, ParsedRFP } from './types';
/**
* Parse an RFP and extract structured information
* Uses pattern matching and keyword extraction
*/
export function parseRFP(rfp: RFP): ParsedRFP {
const text = rfp.rawText.toLowerCase();
// Extract material
const materialPatterns = [
/6061[-\s]?t6/i,
/6061/i,
/304[-\s]?ss/i,
/304/i,
/316[-\s]?ss/i,
/stainless\s*steel/i,
/aluminum/i,
/steel/i,
/titanium/i,
/brass/i,
/copper/i,
];
let material = rfp.material;
if (!material) {
for (const pattern of materialPatterns) {
const match = pattern.exec(rfp.rawText);
if (match) {
material = match[0];
break;
}
}
}
// Extract processes
const processKeywords = [
'cnc', 'mill', 'turn', 'lathe', 'drill', 'tap',
'anodize', 'powder coat', 'paint',
'laser', 'waterjet', 'weld', 'bend',
'passivate', 'heat treat', 'plating', 'polish', 'deburr'
];
const processes = rfp.processes || [];
for (const keyword of processKeywords) {
const regex = new RegExp(keyword, 'i');
if (regex.test(text) && !processes.some(p => p.toLowerCase() === keyword)) {
// Capitalize first letter
processes.push(keyword.split(' ').map(w =>
w.charAt(0).toUpperCase() + w.slice(1)
).join(' '));
}
}
// Extract tolerances
let tolerances = rfp.tolerances;
if (!tolerances) {
const tolPattern = /[+\-±]\s*[\d.]+\s*(?:in|inch|mm|thou)?/i;
const match = tolPattern.exec(rfp.rawText);
if (match) {
tolerances = match[0];
}
}
// Extract finish
let finish = rfp.finish;
if (!finish) {
const finishPatterns = [
/anodize[d]?/i,
/powder coat/i,
/paint/i,
/polish/i,
/passivate/i,
/plat(e|ing)/i,
/chrome/i,
/zinc/i,
];
for (const pattern of finishPatterns) {
const match = pattern.exec(rfp.rawText);
if (match) {
finish = match[0];
break;
}
}
}
// Extract due date
let dueDate = rfp.dueDate;
if (!dueDate) {
const datePattern = /(\d{4})[-\/](\d{1,2})[-\/](\d{1,2})/;
const match = datePattern.exec(rfp.rawText);
if (match) {
dueDate = match[0];
}
}
// Extract part number
let partNumber = rfp.partNumber;
if (!partNumber) {
const pnPattern = /(?:part|p\/n|part#|pn)[\s:]*([A-Z0-9-]+)/i;
const match = pnPattern.exec(rfp.rawText);
if (match) {
partNumber = match[1];
}
}
// Extract contact email
let contactEmail = rfp.contactEmail;
if (!contactEmail) {
const emailPattern = /[\w.-]+@[\w.-]+\.\w+/;
const match = emailPattern.exec(rfp.rawText);
if (match) {
contactEmail = match[0];
}
}
// Calculate confidence based on how much we extracted
let confidence: 'low' | 'medium' | 'high' = 'low';
let extractedFields = 0;
const totalFields = 7; // material, processes, tolerances, finish, dueDate, partNumber, qty
if (material) extractedFields++;
if (processes.length > 0) extractedFields++;
if (tolerances) extractedFields++;
if (finish) extractedFields++;
if (dueDate) extractedFields++;
if (partNumber) extractedFields++;
if (rfp.qty && rfp.qty > 0) extractedFields++;
const completeness = extractedFields / totalFields;
if (completeness >= 0.7) confidence = 'high';
else if (completeness >= 0.4) confidence = 'medium';
// Generate ID
const id = rfp.id || crypto
.createHash('sha256')
.update(rfp.rawText + Date.now())
.digest('hex')
.slice(0, 12);
return {
...rfp,
id,
material: material || 'UNKNOWN',
processes,
tolerances,
finish,
dueDate,
partNumber,
contactEmail,
parsedAt: new Date().toISOString(),
confidence,
};
}
/**
* Normalize parsed RFP for similarity matching
*/
export function normalizeRFP(parsed: ParsedRFP) {
return {
material: (parsed.material || '').toLowerCase().trim(),
processes: (parsed.processes || []).map(p => p.toLowerCase().trim()),
qtyRange: getQuantityRange(parsed.qty),
tolerances: (parsed.tolerances || '').toLowerCase().trim(),
finish: (parsed.finish || '').toLowerCase().trim(),
};
}
/**
* Convert a quantity to a range for similarity matching
*/
function getQuantityRange(qty: number): [number, number] {
if (qty <= 10) return [1, 10];
if (qty <= 50) return [11, 50];
if (qty <= 100) return [51, 100];
if (qty <= 500) return [101, 500];
if (qty <= 1000) return [501, 1000];
return [1001, 10000];
}