import Database from "better-sqlite3";
import matter from "gray-matter";
import { globSync } from "glob";
import fs from "fs";
import path from "path";
import chokidar, { FSWatcher } from "chokidar";
import { WIKI_REPO_PATH, DB_PATH } from "./config.js";
export interface PageMetadata {
doc_id: string;
title: string;
path: string;
type: string;
tags: string[];
updated_at: string;
version: string;
}
export interface Page extends PageMetadata {
body_md: string;
links: string[];
}
export class Indexer {
private db: Database.Database;
private watcher: FSWatcher | null = null;
constructor() {
// Ensure data directory exists
const dataDir = path.dirname(DB_PATH);
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
}
this.db = new Database(DB_PATH);
this.initSchema();
}
private initSchema(): void {
this.db.exec(`
CREATE TABLE IF NOT EXISTS pages (
doc_id TEXT PRIMARY KEY,
title TEXT,
path TEXT UNIQUE,
type TEXT,
tags TEXT,
updated_at TEXT,
version TEXT,
body_md TEXT,
links TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5(
title,
body_md,
content='pages',
content_rowid='rowid'
);
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
INSERT INTO pages_fts(rowid, title, body_md)
VALUES (NEW.rowid, NEW.title, NEW.body_md);
END;
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
INSERT INTO pages_fts(pages_fts, rowid, title, body_md)
VALUES ('delete', OLD.rowid, OLD.title, OLD.body_md);
END;
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
INSERT INTO pages_fts(pages_fts, rowid, title, body_md)
VALUES ('delete', OLD.rowid, OLD.title, OLD.body_md);
INSERT INTO pages_fts(rowid, title, body_md)
VALUES (NEW.rowid, NEW.title, NEW.body_md);
END;
`);
}
public watch(): void {
if (this.watcher) return;
console.error(`Starting file watcher on ${WIKI_REPO_PATH}`);
const pattern = path.join(WIKI_REPO_PATH, "**/*.md").replace(/\\/g, "/");
this.watcher = chokidar.watch(pattern, {
ignored: /(^|[\/\\])\../, // ignore dotfiles
persistent: true,
ignoreInitial: true, // we already indexed on startup
});
this.watcher
.on("add", (filePath: string) => {
console.error(`File added: ${filePath}`);
this.indexFile(filePath);
})
.on("change", (filePath: string) => {
console.error(`File changed: ${filePath}`);
this.indexFile(filePath);
})
.on("unlink", (filePath: string) => {
console.error(`File removed: ${filePath}`);
this.removeFile(filePath);
});
}
private removeFile(filePath: string): void {
const relativePath = path
.relative(WIKI_REPO_PATH, filePath)
.replace(/\.md$/, "")
.replace(/\\/g, "/");
// This relies on the trigger to update FTS
this.db.prepare("DELETE FROM pages WHERE doc_id = ?").run(relativePath);
}
private indexFile(filePath: string): void {
const page = this.parseMarkdownFile(filePath);
if (page) {
const insert = this.db.prepare(`
INSERT OR REPLACE INTO pages (doc_id, title, path, type, tags, updated_at, version, body_md, links)
VALUES (@doc_id, @title, @path, @type, @tags, @updated_at, @version, @body_md, @links)
`);
insert.run({
...page,
tags: JSON.stringify(page.tags),
links: JSON.stringify(page.links),
});
}
}
private parseMarkdownFile(filePath: string): Page | null {
try {
const content = fs.readFileSync(filePath, "utf-8");
const { data, content: body } = matter(content);
// Derive path relative to wiki repo
// e.g. en/characters/kaelen.md -> en/characters/kaelen
const relativePath = path
.relative(WIKI_REPO_PATH, filePath)
.replace(/\.md$/, "")
.replace(/\\/g, "/");
// Normalize tags: "a, b" -> ["a", "b"]
let tags: string[] = [];
if (Array.isArray(data.tags)) {
tags = data.tags;
} else if (typeof data.tags === "string") {
tags = data.tags.split(",").map((t) => t.trim()).filter((t) => t.length > 0);
}
// Extract links: both [[wiki-link]] and [markdown](/path)
const links: string[] = [];
// 1. Wiki Links [[link]]
const wikiLinkRegex = /\[\[([^\]]+)\]\]/g;
let match;
while ((match = wikiLinkRegex.exec(body)) !== null) {
const linkContent = match[1];
// Handle piped links [[path | Label]]
const linkPath = linkContent.split("|")[0].trim();
links.push(linkPath);
}
// 2. Markdown Links [Label](/path)
const mdLinkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
while ((match = mdLinkRegex.exec(body)) !== null) {
const url = match[2];
// Only track internal wiki links (starting with /)
if (url.startsWith("/")) {
links.push(url);
}
}
// Infer type from path
const pathParts = relativePath.split("/");
// If first part is a locale (e.g. 'en'), skip it
let typeIndex = 0;
if (pathParts[0] === "en" || pathParts[0].length === 2) {
typeIndex = 1;
}
const inferredType = pathParts.length > typeIndex + 1 ? pathParts[typeIndex] : "page";
const stats = fs.statSync(filePath);
return {
doc_id: relativePath,
title: data.title || path.basename(filePath, ".md"),
path: "/" + relativePath,
type: data.type || inferredType,
tags,
updated_at: stats.mtime.toISOString(),
version: data.version || "unknown",
body_md: body,
links,
};
} catch (error) {
console.error(`Error parsing ${filePath}:`, error);
return null;
}
}
public reindex(): number {
// Clear existing data
this.db.exec("DELETE FROM pages");
// Find all markdown files
const pattern = path.join(WIKI_REPO_PATH, "**/*.md").replace(/\\/g, "/");
const files = globSync(pattern);
const insert = this.db.prepare(`
INSERT OR REPLACE INTO pages (doc_id, title, path, type, tags, updated_at, version, body_md, links)
VALUES (@doc_id, @title, @path, @type, @tags, @updated_at, @version, @body_md, @links)
`);
let count = 0;
for (const file of files) {
const page = this.parseMarkdownFile(file);
if (page) {
insert.run({
...page,
tags: JSON.stringify(page.tags),
links: JSON.stringify(page.links),
});
count++;
}
}
console.error(`Indexed ${count} pages`);
return count;
}
public search(
query: string,
filters?: { type?: string; tags?: string[]; path_prefix?: string },
k: number = 10
): Array<{ score: number; title: string; path: string; snippet: string; citation: object }> {
let sql = `
SELECT
pages.doc_id,
pages.title,
pages.path,
pages.type,
pages.tags,
pages.version,
snippet(pages_fts, 1, '<mark>', '</mark>', '...', 32) as snippet,
bm25(pages_fts) as score
FROM pages_fts
JOIN pages ON pages.rowid = pages_fts.rowid
WHERE pages_fts MATCH ?
`;
const params: any[] = [query];
if (filters?.type) {
sql += " AND pages.type = ?";
params.push(filters.type);
}
if (filters?.path_prefix) {
sql += " AND pages.path LIKE ?";
params.push(filters.path_prefix + "%");
}
if (filters?.tags && filters.tags.length > 0) {
for (const tag of filters.tags) {
sql += " AND pages.tags LIKE ?";
params.push(`%"${tag}"%`);
}
}
sql += " ORDER BY score LIMIT ?";
params.push(k);
const rows = this.db.prepare(sql).all(...params) as any[];
return rows.map((row) => ({
score: Math.abs(row.score),
title: row.title,
path: row.path,
snippet: row.snippet,
citation: {
path: row.path,
version: row.version,
},
}));
}
public getPage(pagePath: string): Page | null {
const normalizedPath = pagePath.startsWith("/") ? pagePath : "/" + pagePath;
const row = this.db
.prepare("SELECT * FROM pages WHERE path = ?")
.get(normalizedPath) as any;
if (!row) return null;
return {
...row,
tags: JSON.parse(row.tags),
links: JSON.parse(row.links),
};
}
public listPages(filters?: { type?: string; tag?: string }): PageMetadata[] {
let sql = "SELECT doc_id, title, path, type, tags, updated_at, version FROM pages WHERE 1=1";
const params: any[] = [];
if (filters?.type) {
sql += " AND type = ?";
params.push(filters.type);
}
if (filters?.tag) {
sql += " AND tags LIKE ?";
params.push(`%"${filters.tag}"%`);
}
sql += " ORDER BY path";
const rows = this.db.prepare(sql).all(...params) as any[];
return rows.map((row) => ({
...row,
tags: JSON.parse(row.tags),
}));
}
public quote(
pagePath: string,
lineStart: number,
lineEnd: number
): { text: string; citation: object } | null {
const page = this.getPage(pagePath);
if (!page) return null;
const lines = page.body_md.split("\n");
const start = Math.max(0, lineStart - 1);
const end = Math.min(lines.length, lineEnd);
const text = lines.slice(start, end).join("\n");
return {
text,
citation: {
path: page.path,
version: page.version,
line_start: lineStart,
line_end: lineEnd,
},
};
}
public close(): void {
if (this.watcher) {
this.watcher.close();
}
this.db.close();
}
}