import { describe, it, expect } from "vitest";
import { searchInPdf, getWordCount, getWordFrequency } from "../src/pdf.js";
describe("searchInPdf", () => {
const sampleText = "Hello world\nThis is a test document\nHello again\nFinal line";
it("finds matching lines", () => {
const results = searchInPdf(sampleText, "Hello");
expect(results).toHaveLength(2);
expect(results[0].line).toBe(1);
expect(results[1].line).toBe(3);
});
it("is case insensitive", () => {
const results = searchInPdf(sampleText, "hello");
expect(results).toHaveLength(2);
});
it("returns empty for no match", () => {
const results = searchInPdf(sampleText, "nonexistent");
expect(results).toHaveLength(0);
});
it("includes context lines", () => {
const results = searchInPdf(sampleText, "test document");
expect(results).toHaveLength(1);
expect(results[0].context).toContain("Hello world");
});
});
describe("getWordCount", () => {
it("counts words correctly", () => {
expect(getWordCount("hello world")).toBe(2);
expect(getWordCount("one two three four")).toBe(4);
});
it("handles empty string", () => {
expect(getWordCount("")).toBe(0);
});
it("handles extra whitespace", () => {
expect(getWordCount(" hello world ")).toBe(2);
});
});
describe("getWordFrequency", () => {
it("returns top words sorted by frequency", () => {
const text = "the cat sat on the mat the cat";
const freq = getWordFrequency(text, 5);
expect(freq[0].word).toBe("the");
expect(freq[0].count).toBe(3);
expect(freq[1].word).toBe("cat");
expect(freq[1].count).toBe(2);
});
it("respects topN limit", () => {
const text = "one two three four five six seven";
const freq = getWordFrequency(text, 3);
expect(freq).toHaveLength(3);
});
it("filters short words", () => {
const text = "I am a big cat in the hat";
const freq = getWordFrequency(text, 10);
const words = freq.map((f) => f.word);
expect(words).not.toContain("I");
expect(words).not.toContain("am");
expect(words).not.toContain("a");
});
});