"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractTextFromPdf = extractTextFromPdf;
exports.chunkText = chunkText;
const fs_1 = __importDefault(require("fs"));
const pdf_parse_1 = __importDefault(require("pdf-parse"));
const crypto_1 = __importDefault(require("crypto"));
async function extractTextFromPdf(path) {
let dataBuffer;
if (typeof path === "string") {
dataBuffer = fs_1.default.readFileSync(path);
}
else {
dataBuffer = path; // Buffer from upload
}
const data = await (0, pdf_parse_1.default)(dataBuffer);
return data.text || "";
}
function chunkText(text, chunkSize = 1000, overlap = 200) {
const chunks = [];
let i = 0;
while (i < text.length) {
const part = text.slice(i, i + chunkSize);
const id = crypto_1.default.createHash("sha1").update(part + i.toString()).digest("hex");
chunks.push({ id, text: part });
i += chunkSize - overlap;
}
return chunks;
}