Code-Index-MCP

plugin_semantic.py•16.6 KiB

"""JavaScript plugin with semantic search support.""" from __future__ import annotations import logging from pathlib import Path from typing import Dict, Iterable, List, Optional from tree_sitter import Node from ...plugin_base import ( IndexShard, Reference, SearchOpts, SearchResult, SymbolDef, ) from ...plugin_base_enhanced import PluginWithSemanticSearch from ...storage.sqlite_store import SQLiteStore from ...utils.fuzzy_indexer import FuzzyIndexer logger = logging.getLogger(__name__) class JsPluginSemantic(PluginWithSemanticSearch): """JavaScript/TypeScript plugin with semantic search capabilities.""" lang = "js" def __init__( self, sqlite_store: Optional[SQLiteStore] = None, enable_semantic: bool = True ) -> None: # Initialize enhanced base class super().__init__(sqlite_store=sqlite_store, enable_semantic=enable_semantic) # Initialize JS-specific components self._indexer = FuzzyIndexer(sqlite_store=sqlite_store) self._repository_id = None # Initialize parser without tree-sitter for now self.parser = None self.language = None # Create or get repository if SQLite is enabled if self._sqlite_store: try: self._repository_id = self._sqlite_store.create_repository( str(Path.cwd()), Path.cwd().name, {"language": "javascript"} ) except Exception as e: logger.warning(f"Failed to create repository: {e}") self._repository_id = None self._preindex() def _preindex(self) -> None: """Pre-index JavaScript/TypeScript files in the current directory.""" extensions = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"] for ext in extensions: for path in Path(".").rglob(f"*{ext}"): try: text = path.read_text() self._indexer.add_file(str(path), text) except Exception: continue def supports(self, path: str | Path) -> bool: """Return True if file extension matches JavaScript/TypeScript.""" extensions = {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"} return Path(path).suffix in extensions def indexFile(self, path: str | Path, content: str) -> IndexShard: """Index a JavaScript/TypeScript file with optional semantic embeddings.""" if isinstance(path, str): path = Path(path) # Add to fuzzy indexer self._indexer.add_file(str(path), content) # Store file in SQLite if available file_id = None if self._sqlite_store and self._repository_id: import hashlib file_hash = hashlib.sha256(content.encode("utf-8")).hexdigest() file_id = self._sqlite_store.store_file( self._repository_id, str(path), str(path.relative_to(Path.cwd()) if path.is_absolute() else path), language="javascript", size=len(content), hash=file_hash, ) # Extract symbols without tree-sitter for now symbols = self._extract_symbols_simple(content, file_id) # Create semantic embeddings if enabled if self._enable_semantic and symbols: self.index_with_embeddings(path, content, symbols) return IndexShard(file=str(path), symbols=symbols, language="javascript") def _extract_symbols_simple(self, content: str, file_id: Optional[int] = None) -> List[Dict]: """Simple symbol extraction without tree-sitter.""" symbols = [] lines = content.split("\n") for i, line in enumerate(lines): # Function declarations if "function" in line and "(" in line: import re match = re.search(r"function\s+(\w+)\s*\(", line) if match: name = match.group(1) symbols.append( { "symbol": name, "kind": "function", "signature": line.strip(), "line": i + 1, "end_line": i + 1, "span": [i + 1, i + 1], } ) # Class declarations elif "class" in line: match = re.search(r"class\s+(\w+)", line) if match: name = match.group(1) symbols.append( { "symbol": name, "kind": "class", "signature": line.strip(), "line": i + 1, "end_line": i + 1, "span": [i + 1, i + 1], } ) # Arrow functions and const declarations elif "const" in line and "=>" in line: match = re.search(r"const\s+(\w+)\s*=.*=>", line) if match: name = match.group(1) symbols.append( { "symbol": name, "kind": "function", "signature": line.strip(), "line": i + 1, "end_line": i + 1, "span": [i + 1, i + 1], } ) return symbols def _extract_symbols( self, node: Node, content: str, symbols: List[Dict], imports: List[Dict], exports: List[Dict], file_id: Optional[int] = None, scope_path: List[str] = None, path: Optional[Path] = None, ) -> None: """Recursively extract symbols from the AST.""" if scope_path is None: scope_path = [] # Extract imports and exports at the top level if not scope_path: imports.extend(self._extract_imports(node, content)) exports.extend(self._extract_exports(node, content)) # Function declarations if node.type in ["function_declaration", "function"]: name_node = node.child_by_field_name("name") if name_node: name = content[name_node.start_byte : name_node.end_byte] params = self._extract_parameters(node, content) is_async = self._is_async_function(node, content) is_generator = self._is_generator_function(node, content) doc = self._extract_jsdoc(node, content) kind = "function" if is_generator: kind = "generator" signature = f"{'async ' if is_async else ''}function{'*' if is_generator else ''} {name}({params})" symbol_info = { "symbol": name, "kind": kind, "signature": signature, "line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "span": [node.start_point[0] + 1, node.end_point[0] + 1], "scope": ".".join(scope_path) if scope_path else None, "doc": doc, } symbols.append(symbol_info) # Store in SQLite if available if self._sqlite_store and file_id: symbol_id = self._sqlite_store.store_symbol( file_id, name, kind, symbol_info["line"], symbol_info["end_line"], signature=signature, ) if path: self._indexer.add_symbol( name, str(path), symbol_info["line"], {"symbol_id": symbol_id, "file_id": file_id}, ) # Class declarations elif node.type == "class_declaration": name_node = node.child_by_field_name("name") if name_node: name = content[name_node.start_byte : name_node.end_byte] doc = self._extract_jsdoc(node, content) symbol_info = { "symbol": name, "kind": "class", "signature": f"class {name}", "line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "span": [node.start_point[0] + 1, node.end_point[0] + 1], "scope": ".".join(scope_path) if scope_path else None, "doc": doc, } symbols.append(symbol_info) # Process class body body_node = node.child_by_field_name("body") if body_node: self._extract_symbols( body_node, content, symbols, imports, exports, file_id, scope_path + [name], path, ) # Arrow functions and variables elif node.type == "variable_declaration": declarators = [child for child in node.children if child.type == "variable_declarator"] for declarator in declarators: name_node = declarator.child_by_field_name("name") value_node = declarator.child_by_field_name("value") if name_node and value_node and value_node.type == "arrow_function": name = content[name_node.start_byte : name_node.end_byte] params = self._extract_parameters(value_node, content) is_async = self._is_async_function(value_node, content) doc = self._extract_jsdoc(node, content) signature = f"{'async ' if is_async else ''}const {name} = ({params}) => {{}}" symbol_info = { "symbol": name, "kind": "function", "signature": signature, "line": node.start_point[0] + 1, "end_line": node.end_point[0] + 1, "span": [node.start_point[0] + 1, node.end_point[0] + 1], "scope": ".".join(scope_path) if scope_path else None, "doc": doc, } symbols.append(symbol_info) # Recurse through children for child in node.children: self._extract_symbols( child, content, symbols, imports, exports, file_id, scope_path, path ) def _extract_imports(self, node: Node, content: str) -> List[Dict]: """Extract import statements.""" imports = [] if node.type == "import_statement": source_node = node.child_by_field_name("source") if source_node: module = content[source_node.start_byte : source_node.end_byte].strip("\"'") imports.append({"module": module}) for child in node.children: imports.extend(self._extract_imports(child, content)) return imports def _extract_exports(self, node: Node, content: str) -> List[Dict]: """Extract export statements.""" exports = [] if node.type in ["export_statement", "export_default_declaration"]: # Simplified export extraction exports.append({"name": "export", "kind": "export", "line": node.start_point[0] + 1}) for child in node.children: exports.extend(self._extract_exports(child, content)) return exports def _extract_parameters(self, node: Node, content: str) -> str: """Extract function parameters.""" params_node = node.child_by_field_name("parameters") if params_node: return content[params_node.start_byte : params_node.end_byte].strip("()") return "" def _is_async_function(self, node: Node, content: str) -> bool: """Check if function is async.""" # Check for async keyword before function start = node.start_byte prefix = content[max(0, start - 10) : start].strip() return prefix.endswith("async") def _is_generator_function(self, node: Node, content: str) -> bool: """Check if function is a generator.""" # Look for * after function keyword for child in node.children: if child.type == "*": return True return False def _extract_jsdoc(self, node: Node, content: str) -> Optional[str]: """Extract JSDoc comment preceding the node.""" # Look for comment nodes before this node start_line = node.start_point[0] lines = content.split("\n") # Check previous lines for JSDoc for i in range(start_line - 1, max(0, start_line - 5), -1): line = lines[i].strip() if line.startswith("/**"): # Found JSDoc start, extract until */ doc_lines = [] for j in range(i, start_line): doc_lines.append(lines[j]) if lines[j].strip().endswith("*/"): break # Clean up JSDoc doc = "\n".join(doc_lines) doc = doc.replace("/**", "").replace("*/", "") doc = "\n".join(line.strip().lstrip("*").strip() for line in doc.split("\n")) return doc.strip() return None def getDefinition(self, symbol: str) -> SymbolDef | None: """Get symbol definition.""" # Search through indexed files extensions = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"] for ext in extensions: for path in Path(".").rglob(f"*{ext}"): try: content = path.read_text() # Simple search for the symbol if symbol in content: lines = content.split("\n") for i, line in enumerate(lines): if symbol in line and ( "function" in line or "class" in line or "const" in line ): return SymbolDef( symbol=symbol, kind=( "function" if "function" in line else "class" if "class" in line else "variable" ), language="javascript", signature=line.strip(), doc=None, defined_in=str(path), line=i + 1, span=(i + 1, i + 3), ) except Exception: continue return None def findReferences(self, symbol: str) -> list[Reference]: """Find all references to a symbol.""" refs: list[Reference] = [] seen: set[tuple[str, int]] = set() extensions = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"] for ext in extensions: for path in Path(".").rglob(f"*{ext}"): try: content = path.read_text() lines = content.split("\n") for i, line in enumerate(lines): if symbol in line: key = (str(path), i + 1) if key not in seen: refs.append(Reference(file=str(path), line=i + 1)) seen.add(key) except Exception: continue return refs def _traditional_search( self, query: str, opts: SearchOpts | None = None ) -> Iterable[SearchResult]: """Traditional fuzzy search implementation.""" limit = 20 if opts and "limit" in opts: limit = opts["limit"] return self._indexer.search(query, limit=limit) def get_indexed_count(self) -> int: """Return the number of indexed files.""" if hasattr(self._indexer, "_file_contents"): return len(self._indexer._file_contents) return 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

plugin_semantic.py•16.6 KiB