Code Context MCP Server
by fkesheh
Verified
- code-context-mcp
- tools
import { z } from "zod";
import { simpleGit } from "simple-git";
import path from "path";
import fs from "fs";
import dbInterface from "../utils/db.js";
import {
cloneRepository,
getRepositoryFiles,
getDefaultBranch,
} from "../utils/gitUtils.js";
import { extensionToSplitter, splitDocument } from "../utils/codeSplitter.js";
import { generateOllamaEmbeddings } from "../utils/ollamaEmbeddings.js";
import { createFilePatternCondition } from "../utils/filePatternMatcher.js";
import config from "../config.js";
// Define input schemas for tools
export const QueryRepoSchema = z.object({
repoUrl: z.string().describe("GitHub repository URL"),
branch: z
.string()
.optional()
.describe("Branch name to query (defaults to repository's default branch)"),
query: z.string().describe("Search query"),
keywords: z
.array(z.string())
.optional()
.describe(
"Optional array of keywords to filter results (results must contain at least one keyword)"
),
filePatterns: z
.array(z.string())
.optional()
.describe(
"Optional array of glob patterns to filter files (e.g. '**/*.ts', 'src/*.js')"
),
excludePatterns: z
.array(z.string())
.optional()
.describe(
"Optional array of glob patterns to exclude files (e.g. '**/node_modules/**', '**/dist/**')"
),
limit: z.number().optional().describe("Maximum number of results to return"),
_meta: z
.object({
progressToken: z.union([z.string(), z.number()]).optional(),
})
.optional(),
});
// Define chunk interface
interface Chunk {
content: string;
chunkNumber: number;
tokenCount: number;
}
export interface ProgressNotifier {
sendProgress: (progress: number, total: number) => Promise<void>;
}
export async function queryRepo(
input: z.infer<typeof QueryRepoSchema>,
progressNotifier?: ProgressNotifier
) {
try {
console.error(
`[queryRepo] Starting with parameters: ${JSON.stringify(input)}`
);
// Check if input is defined
if (!input) {
console.error(`[queryRepo] Error: Input parameters are undefined`);
return {
error: {
message: "Input parameters are required for queryRepo tool",
},
};
}
const startTime = Date.now();
const {
repoUrl,
branch,
query,
limit,
keywords,
filePatterns,
excludePatterns,
} = input;
const progressToken = input._meta?.progressToken;
// Validate required parameters
if (!repoUrl || !query) {
console.error(`[queryRepo] Error: Missing required parameters`);
return {
error: {
message: "Required parameters (repoUrl, query) are missing",
},
};
}
const reposDir = config.REPO_CACHE_DIR;
// If branch is not specified, get the default branch from the repository
let actualBranch = branch || "";
if (!branch) {
console.error(`[queryRepo] Branch not specified, getting default branch`);
// Extract owner and repo from URL
const urlMatch = repoUrl.match(
/github\.com[:\/]([^\/]+)\/([^\/\.]+)(\.git)?$/
);
if (!urlMatch) {
console.error(`[queryRepo] Error: Could not parse repository URL`);
return {
error: {
message: "Invalid repository URL format",
},
};
}
const [, owner, repo] = urlMatch;
try {
// Get the default branch
console.error(
`[queryRepo] Getting default branch for ${owner}/${repo}`
);
actualBranch = await getDefaultBranch(owner, repo);
console.error(`[queryRepo] Using default branch: ${actualBranch}`);
} catch (error) {
console.error(`[queryRepo] Error getting default branch:`, error);
// Fallback to 'main' if we can't determine the default branch
actualBranch = "main";
console.error(`[queryRepo] Falling back to branch: ${actualBranch}`);
}
}
console.error(
`[queryRepo] Cloning repository: ${repoUrl}, branch: ${actualBranch}`
);
const repoLocalPath = await cloneRepository(repoUrl, reposDir);
console.error(
`[queryRepo] Repository cloned to: ${repoLocalPath} (${
Date.now() - startTime
}ms)`
);
// Extract repo name from URL
const repoName = path.basename(repoUrl, ".git");
// Check if repo exists in database
console.error(
`[queryRepo] Checking if repo exists in database: ${repoName}`
);
const repoExists = dbInterface.get(
"SELECT id FROM repository WHERE name = ?",
repoName
);
let repoId;
if (repoExists) {
repoId = repoExists.id;
console.error(
`[queryRepo] Repository found in database with ID: ${repoId}`
);
} else {
// Register repository
console.error(`[queryRepo] Registering new repository: ${repoName}`);
const result = dbInterface.run(
"INSERT INTO repository (name, path) VALUES (?, ?)",
[repoName, repoLocalPath]
);
repoId = result.lastInsertRowid;
console.error(`[queryRepo] Repository registered with ID: ${repoId}`);
}
// Get the latest commit SHA
console.error(`[queryRepo] Checking out branch: ${actualBranch}`);
const git = simpleGit(repoLocalPath);
// Ensure actualBranch is not undefined for checkout and revparse
if (!actualBranch) {
actualBranch = "main"; // Fallback to main if somehow still undefined
}
await git.checkout(actualBranch);
const latestCommit = await git.revparse(actualBranch);
console.error(`[queryRepo] Latest commit SHA: ${latestCommit}`);
// Check if branch exists and has the same commit SHA
console.error(`[queryRepo] Checking if branch exists in database`);
const branchExists = dbInterface.get(
"SELECT id, last_commit_sha FROM branch WHERE name = ? AND repository_id = ?",
[actualBranch, repoId]
);
let branchId;
let needsUpdate = false;
if (branchExists) {
branchId = branchExists.id;
console.error(
`[queryRepo] Branch found in database with ID: ${branchId}`
);
// Only process files if the commit has changed
if (branchExists.last_commit_sha !== latestCommit) {
needsUpdate = true;
console.error(
`[queryRepo] Commit SHA changed, updating branch: ${branchId}`
);
// Update branch commit SHA
dbInterface.run(
"UPDATE branch SET last_commit_sha = ?, status = 'pending' WHERE id = ?",
[latestCommit, branchId]
);
} else {
console.error(`[queryRepo] Commit SHA unchanged, skipping update`);
}
} else {
// Register the branch
console.error(`[queryRepo] Registering new branch: ${actualBranch}`);
const result = dbInterface.run(
"INSERT INTO branch (name, repository_id, last_commit_sha, status) VALUES (?, ?, ?, 'pending')",
[actualBranch, repoId, latestCommit]
);
branchId = result.lastInsertRowid;
needsUpdate = true;
console.error(`[queryRepo] Branch registered with ID: ${branchId}`);
}
// Process the repository files if needed
if (needsUpdate) {
console.error(
`[queryRepo] Processing repository files (${Date.now() - startTime}ms)`
);
// Get all files in the repository
const { files } = await getRepositoryFiles(repoLocalPath, actualBranch);
console.error(`[queryRepo] Found ${files.length} files in repository`);
// Define transaction function
console.error(`[queryRepo] Starting file database transaction`);
const processFiles = (db: any) => {
// Get existing files to compare
const existingFiles = db
.prepare(
`SELECT f.id, f.path, f.sha FROM file f
JOIN branch_file_association bfa ON f.id = bfa.file_id
WHERE bfa.branch_id = ?`
)
.all(branchId);
console.error(
`[queryRepo] Found ${existingFiles.length} existing files in database`
);
const existingFileMap = new Map();
for (const file of existingFiles) {
existingFileMap.set(file.path, file);
}
// Track files that need processing
const filesToProcess: any[] = [];
// File counters for logging
let newFiles = 0;
let updatedFiles = 0;
let unchangedFiles = 0;
let removedFiles = 0;
// Process each file
for (const file of files) {
const existingFile = existingFileMap.get(file.path);
existingFileMap.delete(file.path); // Remove from map to track what's left later
if (!existingFile) {
// New file
newFiles++;
const result = db
.prepare(
"INSERT INTO file (repository_id, path, sha, name, status) VALUES (?, ?, ?, ?, 'pending')"
)
.run(repoId, file.path, file.sha, file.name);
const fileId = result.lastInsertRowid;
// Associate with branch
db.prepare(
"INSERT INTO branch_file_association (branch_id, file_id) VALUES (?, ?)"
).run(branchId, fileId);
filesToProcess.push({
id: fileId,
path: file.path,
name: file.name,
});
} else if (existingFile.sha !== file.sha) {
// Updated file - SHA changed
updatedFiles++;
db.prepare(
"UPDATE file SET sha = ?, status = 'pending' WHERE id = ?"
).run(file.sha, existingFile.id);
filesToProcess.push({
id: existingFile.id,
path: file.path,
name: file.name,
});
} else {
// Unchanged file
unchangedFiles++;
}
}
// Remove files that no longer exist in the branch
for (const [path, file] of existingFileMap.entries()) {
removedFiles++;
db.prepare(
"DELETE FROM branch_file_association WHERE branch_id = ? AND file_id = ?"
).run(branchId, file.id);
// If no other branches reference this file, delete it and its chunks
const fileStillInUse = db
.prepare(
"SELECT 1 FROM branch_file_association WHERE file_id = ? LIMIT 1"
)
.get(file.id);
if (!fileStillInUse) {
// Delete chunks first
db.prepare("DELETE FROM file_chunk WHERE file_id = ?").run(file.id);
// Then delete the file
db.prepare("DELETE FROM file WHERE id = ?").run(file.id);
}
}
console.error(
`[queryRepo] Files summary: ${newFiles} new, ${updatedFiles} updated, ${unchangedFiles} unchanged, ${removedFiles} removed`
);
return filesToProcess;
};
// Execute the transaction
console.error(`[queryRepo] Executing file processing transaction`);
const filesToProcess = dbInterface.transaction((db) => processFiles(db));
console.error(
`[queryRepo] Transaction completed, processing ${
filesToProcess.length
} files (${Date.now() - startTime}ms)`
);
// Limit the number of files processed to avoid timeouts
// This might need adjustment based on actual performance
const MAX_FILES_TO_PROCESS = 1000000;
const limitedFiles = filesToProcess.slice(0, MAX_FILES_TO_PROCESS);
if (limitedFiles.length < filesToProcess.length) {
console.error(
`[queryRepo] WARNING: Processing only ${limitedFiles.length} of ${filesToProcess.length} files to avoid timeout`
);
}
// Process content and generate embeddings for new/updated files outside of transaction
let processedFiles = 0;
let totalChunks = 0;
for (const file of limitedFiles) {
try {
console.error(
`[queryRepo] Processing file ${processedFiles + 1}/${
limitedFiles.length
}: ${file.path}`
);
const filePath = path.join(repoLocalPath, file.path);
if (fs.existsSync(filePath)) {
const content = fs.readFileSync(filePath, "utf-8");
const extension = filePath.split(".").pop()?.toLowerCase();
const splitterType = extension
? extensionToSplitter(extension)
: "ignore";
if (splitterType === "ignore") {
continue;
}
console.error(
`[queryRepo] Splitting file: ${file.path} with splitter type: ${splitterType}`
);
// Split the document into chunks
const docChunks = await splitDocument(content, splitterType);
console.error(
`[queryRepo] Split file into ${docChunks.length} chunks`
);
// Convert Document objects to our Chunk interface
const chunks: Chunk[] = docChunks.map((doc, index) => ({
content: doc.pageContent,
chunkNumber: index + 1,
tokenCount: Math.ceil(doc.pageContent.length / 4), // Approximate token count
}));
// Delete existing chunks for this file
console.error(
`[queryRepo] Deleting existing chunks for file: ${file.id}`
);
dbInterface.run(
"DELETE FROM file_chunk WHERE file_id = ?",
file.id
);
// Process batches of chunks for embeddings
const chunkBatches: Chunk[][] = [];
for (let i = 0; i < chunks.length; i += 20) {
chunkBatches.push(chunks.slice(i, i + 20));
}
console.error(
`[queryRepo] Created ${chunkBatches.length} batches of chunks`
);
for (
let batchIndex = 0;
batchIndex < chunkBatches.length;
batchIndex++
) {
console.error(
`[queryRepo] Processing batch ${batchIndex + 1}/${
chunkBatches.length
}`
);
const batch = chunkBatches[batchIndex];
const chunkContents = batch.map((c: Chunk) => c.content);
// Generate embeddings for chunks
console.error(
`[queryRepo] Generating embeddings for ${batch.length} chunks`
);
const embeddingStartTime = Date.now();
const embeddings = await generateOllamaEmbeddings(chunkContents);
console.error(
`[queryRepo] Generated embeddings in ${
Date.now() - embeddingStartTime
}ms`
);
// Store chunks with embeddings in transaction for better performance
console.error(`[queryRepo] Storing chunks with embeddings`);
dbInterface.transaction((db) => {
const insertChunkStmt = db.prepare(`INSERT INTO file_chunk (
file_id, content, chunk_number, embedding, model_version, token_count
) VALUES (?, ?, ?, ?, ?, ?)`);
for (let i = 0; i < batch.length; i++) {
const chunk = batch[i];
const embedding = JSON.stringify(embeddings[i]);
insertChunkStmt.run(
file.id,
chunk.content,
chunk.chunkNumber,
embedding,
config.EMBEDDING_MODEL.model,
chunk.tokenCount
);
}
});
totalChunks += batch.length;
// Send progress notification
if (progressNotifier) {
const progress = (processedFiles + 1) / limitedFiles.length;
await progressNotifier.sendProgress(progress, 1);
}
}
// Update file status
console.error(
`[queryRepo] Updating file status to done: ${file.id}`
);
dbInterface.run(
"UPDATE file SET status = 'done' WHERE id = ?",
file.id
);
processedFiles++;
} else {
console.error(`[queryRepo] File does not exist: ${filePath}`);
}
} catch (error) {
console.error(
`[queryRepo] Error processing file ${file.path}:`,
error
);
dbInterface.run(
"UPDATE file SET status = 'fetched' WHERE id = ?",
file.id
);
}
}
console.error(
`[queryRepo] Processed ${processedFiles} files with ${totalChunks} total chunks (${
Date.now() - startTime
}ms)`
);
// Update branch status based on whether we processed all files or just a subset
if (limitedFiles.length < filesToProcess.length) {
console.error(
`[queryRepo] Setting branch status to 'files_processed' due to processing limit`
);
dbInterface.run(
"UPDATE branch SET status = 'files_processed' WHERE id = ?",
branchId
);
} else {
console.error(
`[queryRepo] Setting branch status to 'embeddings_generated'`
);
dbInterface.run(
"UPDATE branch SET status = 'embeddings_generated' WHERE id = ?",
branchId
);
}
}
// Generate embedding for the query
console.error(`[queryRepo] Generating embedding for query: "${query}"`);
const queryEmbedStart = Date.now();
const [queryEmbedding] = await generateOllamaEmbeddings([query]);
const queryEmbeddingStr = JSON.stringify(queryEmbedding);
console.error(
`[queryRepo] Generated query embedding in ${
Date.now() - queryEmbedStart
}ms`
);
// Search for similar chunks using SQLite's JSON functions for vector similarity
console.error(
`[queryRepo] Searching for similar chunks with limit: ${limit}`
);
const searchStart = Date.now();
// Use a default limit of 10 if undefined
const effectiveLimit = limit === undefined ? 10 : limit;
// Create SQL condition for file pattern filtering
const filePatternCondition = createFilePatternCondition(
filePatterns,
excludePatterns
);
const results = dbInterface.all(
`
SELECT fc.content, f.path, fc.chunk_number,
(SELECT (SELECT SUM(json_extract(value, '$') * json_extract(?, '$[' || key || ']'))
FROM json_each(fc.embedding)
GROUP BY key IS NOT NULL)
) as similarity
FROM file_chunk fc
JOIN file f ON fc.file_id = f.id
JOIN branch_file_association bfa ON f.id = bfa.file_id
WHERE bfa.branch_id = ?
AND fc.embedding IS NOT NULL
${filePatternCondition}
ORDER BY similarity DESC
LIMIT ?
`,
[queryEmbeddingStr, branchId, effectiveLimit]
);
console.error(
`[queryRepo] Search completed in ${Date.now() - searchStart}ms, found ${
results.length
} results`
);
// Filter results by keywords if provided
let filteredResults = results;
if (keywords && keywords.length > 0) {
console.error(
`[queryRepo] Filtering results by keywords: ${keywords.join(", ")}`
);
const keywordFilterStart = Date.now();
// Convert keywords to lowercase for case-insensitive matching
const lowercaseKeywords = keywords.map((kw) => kw.toLowerCase());
filteredResults = results.filter((result: { content: string }) => {
const content = result.content.toLowerCase();
// Check if the content contains at least one of the keywords
return lowercaseKeywords.some((keyword) => content.includes(keyword));
});
console.error(
`[queryRepo] Keyword filtering completed in ${
Date.now() - keywordFilterStart
}ms, filtered from ${results.length} to ${
filteredResults.length
} results`
);
}
const totalTime = Date.now() - startTime;
console.error(`[queryRepo] Tool completed in ${totalTime}ms`);
return {
output: {
success: true,
repoUrl,
branch: actualBranch,
processingTimeMs: totalTime,
results: filteredResults.map((result: any) => ({
filePath: result.path,
chunkNumber: result.chunk_number,
content: result.content,
similarity: result.similarity,
})),
},
};
} catch (error) {
console.error(`[queryRepo] Error executing tool:`, error);
return {
error: {
message: `Error executing queryRepo tool: ${
error instanceof Error ? error.message : String(error)
}`,
},
};
}
}