Code MCP
by ezyang
- codemcp
- tools
#!/usr/bin/env python3
import asyncio
import logging
import os
import tomli
from ..common import MAX_LINE_LENGTH, MAX_LINES_TO_READ, normalize_file_path
from ..git import get_repository_root, is_git_repository
__all__ = [
"init_project",
]
def _generate_command_docs(command_docs: dict) -> str:
"""Generate documentation for commands from the command_docs dictionary.
Args:
command_docs: A dictionary of command names to their documentation
Returns:
A formatted string with command documentation
"""
if not command_docs:
return ""
docs = []
for cmd_name, doc in command_docs.items():
docs.append(f"\n- {cmd_name}: {doc}")
return "\n\nCommand documentation:" + "".join(docs)
async def _generate_chat_id(directory: str) -> str:
"""Generate a unique chat ID based on a counter stored in the git repository.
Args:
directory: The directory path within the git repository
Returns:
A string containing the unique chat ID, e.g., "23-initproject-unique-id"
"""
# Generate a human-readable short identifier
human_readable_part = "untitled"
try:
# Check if we're in a git repository
if not await is_git_repository(directory):
logging.warning(f"Not in a git repository: {directory}")
# Return a fallback ID if not in a git repository
return f"0-{human_readable_part}"
# Get the repository root
repo_root = await get_repository_root(directory)
# Create .git/codemcp directory if it doesn't exist
codemcp_dir = os.path.join(repo_root, ".git", "codemcp")
os.makedirs(codemcp_dir, exist_ok=True)
# Path to the counter file
counter_file = os.path.join(codemcp_dir, "counter")
# Read the current counter value or initialize to 0
counter_value = 0
asyncio.get_event_loop()
if os.path.exists(counter_file):
try:
from .async_file_utils import async_open_text
read_counter = await async_open_text(counter_file)
counter_value = int(read_counter.strip())
except (ValueError, IOError) as e:
logging.warning(f"Error reading counter file: {e}")
# Increment the counter
counter_value += 1
# Write the new counter value
try:
from .async_file_utils import async_write_text
await async_write_text(counter_file, str(counter_value))
except IOError as e:
logging.warning(f"Error writing to counter file: {e}")
# Return the chat ID in the format "number-human-readable-part"
return f"{counter_value}-{human_readable_part}"
except Exception as e:
logging.warning(f"Exception generating chat ID: {e!s}", exc_info=True)
# Return a fallback ID in case of errors
return f"0-{human_readable_part}"
async def init_project(directory: str) -> str:
"""Initialize a project by reading the codemcp.toml TOML file and returning
a combined system prompt.
Args:
directory: The directory path containing the codemcp.toml file
Returns:
A string containing the system prompt plus any project_prompt from the config
"""
try:
# Normalize the directory path
full_dir_path = normalize_file_path(directory)
# Validate the directory path
if not os.path.exists(full_dir_path):
return f"Error: Directory does not exist: {directory}"
if not os.path.isdir(full_dir_path):
return f"Error: Path is not a directory: {directory}"
# Generate a unique chat ID
chat_id = await _generate_chat_id(full_dir_path)
# Build path to codemcp.toml file
rules_file_path = os.path.join(full_dir_path, "codemcp.toml")
project_prompt = ""
command_help = ""
command_docs = {}
rules_config = {}
# Check if codemcp.toml file exists
if os.path.exists(rules_file_path):
try:
from .async_file_utils import async_open_binary
rules_data = await async_open_binary(rules_file_path)
# tomli.loads expects a string, but we have bytes, so use tomli.load with an io.BytesIO object
import io
rules_config = tomli.load(io.BytesIO(rules_data))
# Extract project_prompt if it exists
if "project_prompt" in rules_config:
project_prompt = rules_config["project_prompt"]
# Extract commands and their documentation
command_list = rules_config.get("commands", {})
command_help = ", ".join(command_list.keys())
# Process command documentation
for cmd_name, cmd_config in command_list.items():
if isinstance(cmd_config, dict) and "doc" in cmd_config:
command_docs[cmd_name] = cmd_config["doc"]
except Exception as e:
logging.warning(
f"Exception suppressed when reading codemcp.toml: {e!s}",
exc_info=True,
)
return f"Error reading codemcp.toml file: {e!s}"
# Default system prompt, cribbed from claude code
# TODO: Figure out if we want Sonnet to make determinations about what
# goes in the global prompt. The current ARCHITECTURE.md rule is
# mostly to make sure we don't lose important information that was
# conveyed in chats.
# TODO: This prompt is pretty long, maybe we want it shorter
# NB: If you edit this, also edit codemcp/main.py
system_prompt = f"""\
You are an AI assistant that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user.
# Tone and style
IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do.
IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to.
# Proactiveness
You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between:
1. Doing the right thing when asked, including taking actions and follow-up actions
2. Not surprising the user with actions you take without asking
For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions.
3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did.
# Following conventions
When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns.
- NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language).
- When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions.
- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
# Code style
- Do not add comments to the code you write, unless the user asks you to, or the code is complex and requires additional context.
# Tool usage policy
- If you intend to call multiple tools and there are no dependencies between the calls, make all of the independent calls in the same function_calls block.
# codemcp tool
The codemcp tool supports a number of subtools which you should use to perform coding tasks.
## ReadFile path offset? limit?
Reads a file from the local filesystem. The path parameter must be an absolute path, not a relative path. By default, it reads up to {MAX_LINES_TO_READ} lines starting from the beginning of the file. You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters. Any lines longer than {MAX_LINE_LENGTH} characters will be truncated. For image files, the tool will display the image for you.
## WriteFile path content description
Write a file to the local filesystem. Overwrites the existing file if there is one.
Provide a short description of the change.
Before using this tool:
1. Use the ReadFile tool to understand the file's contents and context
2. Directory Verification (only applicable when creating new files):
- Use the LS tool to verify the parent directory exists and is the correct location
## EditFile path old_string new_string description
This is a tool for editing files. For larger edits, use the Write tool to overwrite files.
Provide a short description of the change.
Before using this tool:
1. Use the View tool to understand the file's contents and context
2. Verify the directory path is correct (only applicable when creating new files):
- Use the LS tool to verify the parent directory exists and is the correct location
To make a file edit, provide the following:
1. path: The absolute path to the file to modify (must be absolute, not relative)
2. old_string: The text to replace (must be unique within the file, and must match the file contents exactly, including all whitespace and indentation)
3. new_string: The edited text to replace the old_string
The tool will replace ONE occurrence of old_string with new_string in the specified file.
CRITICAL REQUIREMENTS FOR USING THIS TOOL:
1. UNIQUENESS: The old_string MUST uniquely identify the specific instance you want to change. This means:
- Include AT LEAST 3-5 lines of context BEFORE the change point
- Include AT LEAST 3-5 lines of context AFTER the change point
- Include all whitespace, indentation, and surrounding code exactly as it appears in the file
2. SINGLE INSTANCE: This tool can only change ONE instance at a time. If you need to change multiple instances:
- Make separate calls to this tool for each instance
- Each call must uniquely identify its specific instance using extensive context
3. VERIFICATION: Before using this tool:
- Check how many instances of the target text exist in the file
- If multiple instances exist, gather enough context to uniquely identify each one
- Plan separate tool calls for each instance
WARNING: If you do not follow these requirements:
- The tool will fail if old_string matches multiple locations
- The tool will fail if old_string doesn't match exactly (including whitespace)
- You may change the wrong instance if you don't include enough context
When making edits:
- Ensure the edit results in idiomatic, correct code
- Do not leave the code in a broken state
- Always use absolute file paths (starting with /)
If you want to create a new file, use:
- A new file path, including dir name if needed
- An empty old_string
- The new file's contents as new_string
Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.
## LS path
Lists files and directories in a given path. The path parameter must be an absolute path, not a relative path. You should generally prefer the Glob and Grep tools, if you know which directories to search.
## Grep pattern path include?
Searches for files containing a specified pattern (regular expression) using git grep.
Files with a match are returned, up to a maximum of 100 files.
Note that this tool only works inside git repositories.
Example:
Grep "function.*hello" /path/to/repo # Find files containing functions with "hello" in their name
Grep "console\\.log" /path/to/repo --include="*.js" # Find JS files with console.log statements
## RunCommand path command arguments?
Runs a command. This does NOT support arbitrary code execution, ONLY call
with this set of valid commands: {command_help}
{_generate_command_docs(command_docs)}
## Summary
Args:
subtool: The subtool to execute (ReadFile, WriteFile, EditFile, LS, InitProject, RunCommand)
path: The path to the file or directory to operate on
content: Content for WriteFile subtool
old_string: String to replace for EditFile subtool
new_string: Replacement string for EditFile subtool
offset: Line offset for ReadFile subtool
limit: Line limit for ReadFile subtool
description: Short description of the change (for WriteFile/EditFile)
arguments: A list of string arguments for RunCommand subtool
# Chat ID
This chat has been assigned a unique ID: {chat_id}
When you use any tool, you should always include this unique ID as the first argument.
"""
# Combine system prompt, global prompt
combined_prompt = system_prompt
if project_prompt:
combined_prompt += "\n\n" + project_prompt
return combined_prompt
except Exception as e:
logging.warning(
f"Exception suppressed during project initialization: {e!s}", exc_info=True
)
return f"Error initializing project: {e!s}"