PolyMarket MCP Server
by berlinbra
Verified
- src
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { Document } from "@langchain/core/documents";
import { ProcessedDocument, SearchResult, StoreStats } from "./types.js";
import { TransformersEmbeddings } from "./embeddings.js";
import * as fs from 'fs/promises';
import * as path from 'path';
export class VectorStore {
private store: HNSWLib | null = null;
private embeddings: TransformersEmbeddings;
private stats: StoreStats = {
totalDocuments: 0,
documentsByType: {},
watchedDirectories: [],
filesBeingProcessed: 0
};
constructor() {
this.embeddings = new TransformersEmbeddings();
}
private documentsBySource: Map<string, Document[]> = new Map();
private processingFiles: Set<string> = new Set();
incrementProcessingCount(filePath: string): void {
this.processingFiles.add(filePath);
this.stats.filesBeingProcessed = this.processingFiles.size;
}
decrementProcessingCount(filePath: string): void {
this.processingFiles.delete(filePath);
this.stats.filesBeingProcessed = this.processingFiles.size;
}
async addDocuments(docs: ProcessedDocument[]): Promise<void> {
const documents = docs.map(
(doc) =>
new Document({
pageContent: doc.content,
metadata: doc.metadata,
})
);
if (!this.store) {
this.store = await HNSWLib.fromDocuments(documents, this.embeddings);
} else {
await this.store.addDocuments(documents);
}
// Group documents by source
for (const doc of documents) {
const source = doc.metadata.source;
const existing = this.documentsBySource.get(source) || [];
existing.push(doc);
this.documentsBySource.set(source, existing);
}
// Update stats
this.stats.totalDocuments += documents.length;
for (const doc of docs) {
const fileType = doc.metadata.fileType;
this.stats.documentsByType[fileType] = (this.stats.documentsByType[fileType] || 0) + 1;
}
}
async removeDocumentsBySource(source: string): Promise<void> {
const documents = this.documentsBySource.get(source);
if (!documents || !this.store) return;
// Since HNSWLib doesn't support selective deletion, we need to:
// 1. Get all documents except the ones we want to remove
// 2. Create a new store with those documents
const allDocs = Array.from(this.documentsBySource.values()).flat();
const remainingDocs = allDocs.filter(doc => doc.metadata.source !== source);
// Create new store with remaining documents
this.store = await HNSWLib.fromDocuments(remainingDocs, this.embeddings);
// Update stats
this.stats.totalDocuments -= documents.length;
const fileType = documents[0].metadata.fileType;
this.stats.documentsByType[fileType] = Math.max(0, (this.stats.documentsByType[fileType] || 0) - documents.length);
if (this.stats.documentsByType[fileType] === 0) {
delete this.stats.documentsByType[fileType];
}
// Remove from tracking
this.documentsBySource.delete(source);
}
async updateDocuments(docs: ProcessedDocument[]): Promise<void> {
if (docs.length === 0) return;
// Group documents by source
const docsBySource = new Map<string, ProcessedDocument[]>();
for (const doc of docs) {
const source = doc.metadata.source;
const existing = docsBySource.get(source) || [];
existing.push(doc);
docsBySource.set(source, existing);
}
// Update each source
for (const [source, sourceDocs] of docsBySource) {
// Remove old documents
await this.removeDocumentsBySource(source);
// Add new documents
await this.addDocuments(sourceDocs);
}
}
async similaritySearch(query: string, limit: number = 5): Promise<SearchResult[]> {
if (!this.store) {
throw new Error("Vector store not initialized");
}
const results = await this.store.similaritySearchWithScore(query, limit);
return results.map(([doc, score]) => ({
content: doc.pageContent,
metadata: doc.metadata as {
source: string;
fileType: string;
lastModified: number;
chunkIndex: number;
totalChunks: number;
},
score,
}));
}
async save(directory: string): Promise<void> {
if (!this.store) {
throw new Error("Vector store not initialized");
}
await fs.mkdir(directory, { recursive: true });
await this.store.save(directory);
// Save stats
await fs.writeFile(
path.join(directory, "stats.json"),
JSON.stringify(this.stats, null, 2)
);
}
async load(directory: string): Promise<void> {
try {
this.store = await HNSWLib.load(directory, this.embeddings);
// Load stats
const statsPath = path.join(directory, "stats.json");
const statsContent = await fs.readFile(statsPath, "utf-8");
this.stats = JSON.parse(statsContent);
} catch (error) {
throw new Error(`Failed to load vector store: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
isInitialized(): boolean {
return this.store !== null;
}
getStats(): StoreStats {
return { ...this.stats };
}
setWatchedDirectories(directories: string[]): void {
this.stats.watchedDirectories = [...directories];
}
}