"""AST-based code chunking for semantic extraction."""
import hashlib
from pathlib import Path
from typing import Any, Iterator
from tree_sitter import Node
from local_deepwiki.config import ChunkingConfig, get_config
from local_deepwiki.core.parser import (
CodeParser,
find_nodes_by_type,
get_docstring,
get_node_name,
get_node_text,
)
from local_deepwiki.logging import get_logger
from local_deepwiki.models import ChunkType, CodeChunk, Language
from local_deepwiki.plugins.registry import get_plugin_registry
logger = get_logger(__name__)
# Node types that represent extractable code units per language
FUNCTION_NODE_TYPES: dict[Language, set[str]] = {
Language.PYTHON: {"function_definition", "async_function_definition"},
Language.JAVASCRIPT: {
"function_declaration",
"arrow_function",
"function_expression",
"method_definition",
},
Language.TYPESCRIPT: {
"function_declaration",
"arrow_function",
"function_expression",
"method_definition",
},
Language.GO: {"function_declaration", "method_declaration"},
Language.RUST: {"function_item"},
Language.JAVA: {"method_declaration", "constructor_declaration"},
Language.C: {"function_definition"},
Language.CPP: {"function_definition"},
Language.SWIFT: {"function_declaration", "init_declaration"},
Language.RUBY: {"method", "singleton_method"},
Language.PHP: {"function_definition", "method_declaration"},
Language.KOTLIN: {"function_declaration"},
Language.CSHARP: {"method_declaration", "constructor_declaration"},
}
CLASS_NODE_TYPES: dict[Language, set[str]] = {
Language.PYTHON: {"class_definition"},
Language.JAVASCRIPT: {"class_declaration"},
Language.TYPESCRIPT: {"class_declaration", "interface_declaration", "type_alias_declaration"},
Language.GO: {"type_declaration"},
Language.RUST: {"struct_item", "impl_item", "trait_item", "enum_item"},
Language.JAVA: {"class_declaration", "interface_declaration", "enum_declaration"},
Language.C: {"struct_specifier"},
Language.CPP: {"class_specifier", "struct_specifier"},
Language.SWIFT: {
"class_declaration",
"struct_declaration",
"protocol_declaration",
"enum_declaration",
"extension_declaration",
},
Language.RUBY: {"class", "module"},
Language.PHP: {"class_declaration", "interface_declaration", "trait_declaration"},
Language.KOTLIN: {"class_declaration", "object_declaration"},
Language.CSHARP: {
"class_declaration",
"struct_declaration",
"interface_declaration",
"enum_declaration",
},
}
IMPORT_NODE_TYPES: dict[Language, set[str]] = {
Language.PYTHON: {"import_statement", "import_from_statement"},
Language.JAVASCRIPT: {"import_statement", "import_declaration"},
Language.TYPESCRIPT: {"import_statement", "import_declaration"},
Language.GO: {"import_declaration"},
Language.RUST: {"use_declaration"},
Language.JAVA: {"import_declaration"},
Language.C: {"preproc_include"},
Language.CPP: {"preproc_include"},
Language.SWIFT: {"import_declaration"},
Language.RUBY: {"call"}, # require/require_relative are method calls in Ruby AST
Language.PHP: {"namespace_use_declaration"}, # use statements in PHP
Language.KOTLIN: {"import_header"},
Language.CSHARP: {"using_directive"},
}
def get_parent_classes(class_node: Node, source: bytes, language: Language) -> list[str]:
"""Extract parent class names from a class definition.
Args:
class_node: The class AST node.
source: Source bytes.
language: Programming language.
Returns:
List of parent class names.
"""
parents = []
if language == Language.PYTHON:
# Python: class Child(Parent, Mixin): → argument_list > identifier
for child in class_node.children:
if child.type == "argument_list":
for arg in child.children:
if arg.type == "identifier":
parents.append(get_node_text(arg, source))
elif language in (Language.TYPESCRIPT, Language.JAVASCRIPT):
# TS/JS: class Child extends Parent implements Interface
for child in class_node.children:
if child.type == "class_heritage":
for clause in child.children:
if clause.type in ("extends_clause", "implements_clause"):
for item in clause.children:
if item.type in ("identifier", "type_identifier"):
parents.append(get_node_text(item, source))
elif language == Language.JAVA:
# Java: class Child extends Parent implements Interface
for child in class_node.children:
if child.type == "superclass":
for item in child.children:
if item.type == "type_identifier":
parents.append(get_node_text(item, source))
elif child.type == "super_interfaces":
for item in find_nodes_by_type(child, {"type_identifier"}):
parents.append(get_node_text(item, source))
elif language == Language.SWIFT:
# Swift: class Child: Parent, Protocol
for child in class_node.children:
if child.type == "type_inheritance_clause":
for item in child.children:
if item.type in ("user_type", "type_identifier"):
# Get the identifier from user_type
text = get_node_text(item, source)
if text and text not in (":", ","):
parents.append(text)
elif language == Language.CPP:
# C++: class Child : public Parent
for child in class_node.children:
if child.type == "base_class_clause":
for item in find_nodes_by_type(child, {"type_identifier"}):
parents.append(get_node_text(item, source))
elif language == Language.RUBY:
# Ruby: class Child < Parent
for child in class_node.children:
if child.type == "superclass":
for sc in child.children:
if sc.type == "constant" or sc.type == "scope_resolution":
parents.append(get_node_text(sc, source))
elif language == Language.PHP:
# PHP: class Child extends Parent implements Interface1, Interface2
for child in class_node.children:
if child.type == "base_clause":
# extends clause
for item in find_nodes_by_type(child, {"name", "qualified_name"}):
parents.append(get_node_text(item, source))
elif child.type == "class_interface_clause":
# implements clause
for item in find_nodes_by_type(child, {"name", "qualified_name"}):
parents.append(get_node_text(item, source))
elif language == Language.KOTLIN:
# Kotlin: class Child : Parent(), Interface1, Interface2
for child in class_node.children:
if child.type == "delegation_specifiers":
for spec in child.children:
if spec.type == "delegation_specifier":
for item in find_nodes_by_type(spec, {"user_type", "simple_identifier"}):
text = get_node_text(item, source)
if text and text not in (":", ","):
parents.append(text)
break # Only get the type name, not nested parts
elif language == Language.CSHARP:
# C#: class Child : Parent, IInterface1, IInterface2
for child in class_node.children:
if child.type == "base_list":
for item in find_nodes_by_type(
child, {"identifier", "generic_name", "qualified_name"}
):
text = get_node_text(item, source)
if text:
parents.append(text)
return parents
def extract_python_parameter_types(func_node: Node, source: bytes) -> dict[str, str | None]:
"""Extract parameter types from a Python function.
Args:
func_node: The function_definition AST node.
source: Source code bytes.
Returns:
Dictionary mapping parameter names to their type hints.
"""
param_types: dict[str, str | None] = {}
params_node = func_node.child_by_field_name("parameters")
if not params_node:
return param_types
for child in params_node.children:
if child.type == "identifier":
# Simple parameter without type hint
name = get_node_text(child, source)
if name not in ("self", "cls"):
param_types[name] = None
elif child.type == "typed_parameter":
# Parameter with type hint: name: type
# Or typed *args: *args: type, typed **kwargs: **kwargs: type
name_node = None
type_node = None
splat_pattern = None
for c in child.children:
if c.type == "identifier":
name_node = c
elif c.type == "type":
type_node = c
elif c.type == "list_splat_pattern":
splat_pattern = c
for sc in c.children:
if sc.type == "identifier":
name_node = sc
break
elif c.type == "dictionary_splat_pattern":
splat_pattern = c
for sc in c.children:
if sc.type == "identifier":
name_node = sc
break
if name_node:
name = get_node_text(name_node, source)
if name not in ("self", "cls"):
type_hint = get_node_text(type_node, source) if type_node else None
# Add prefix for splat patterns
if splat_pattern:
prefix = "*" if splat_pattern.type == "list_splat_pattern" else "**"
name = f"{prefix}{name}"
param_types[name] = type_hint
elif child.type == "default_parameter":
# Parameter with default: name = value
name_node = child.child_by_field_name("name")
if name_node:
name = get_node_text(name_node, source)
if name not in ("self", "cls"):
param_types[name] = None
elif child.type == "typed_default_parameter":
# Parameter with type and default: name: type = value
name_node = child.child_by_field_name("name")
type_node = child.child_by_field_name("type")
if name_node:
name = get_node_text(name_node, source)
if name not in ("self", "cls"):
type_hint = get_node_text(type_node, source) if type_node else None
param_types[name] = type_hint
elif child.type in ("list_splat_pattern", "dictionary_splat_pattern"):
# *args or **kwargs
for c in child.children:
if c.type == "identifier":
name = get_node_text(c, source)
prefix = "*" if child.type == "list_splat_pattern" else "**"
param_types[f"{prefix}{name}"] = None
break
elif c.type == "typed_parameter":
# *args: type or **kwargs: type
inner_name = None
inner_type = None
for tc in c.children:
if tc.type == "identifier":
inner_name = tc
elif tc.type == "type":
inner_type = tc
if inner_name:
name = get_node_text(inner_name, source)
prefix = "*" if child.type == "list_splat_pattern" else "**"
type_hint = get_node_text(inner_type, source) if inner_type else None
param_types[f"{prefix}{name}"] = type_hint
break
return param_types
def extract_python_parameter_defaults(func_node: Node, source: bytes) -> dict[str, str]:
"""Extract parameter default values from a Python function.
Args:
func_node: The function_definition AST node.
source: Source code bytes.
Returns:
Dictionary mapping parameter names to their default values.
"""
defaults: dict[str, str] = {}
params_node = func_node.child_by_field_name("parameters")
if not params_node:
return defaults
for child in params_node.children:
if child.type == "default_parameter":
name_node = child.child_by_field_name("name")
value_node = child.child_by_field_name("value")
if name_node and value_node:
name = get_node_text(name_node, source)
if name not in ("self", "cls"):
defaults[name] = get_node_text(value_node, source)
elif child.type == "typed_default_parameter":
name_node = child.child_by_field_name("name")
value_node = child.child_by_field_name("value")
if name_node and value_node:
name = get_node_text(name_node, source)
if name not in ("self", "cls"):
defaults[name] = get_node_text(value_node, source)
return defaults
def extract_python_return_type(func_node: Node, source: bytes) -> str | None:
"""Extract return type annotation from a Python function.
Args:
func_node: The function_definition AST node.
source: Source code bytes.
Returns:
Return type string or None.
"""
return_type_node = func_node.child_by_field_name("return_type")
if return_type_node:
return get_node_text(return_type_node, source)
return None
def extract_python_decorators(func_node: Node, source: bytes) -> list[str]:
"""Extract decorators from a Python function.
Args:
func_node: The function_definition AST node.
source: Source code bytes.
Returns:
List of decorator strings.
"""
decorators: list[str] = []
if func_node.parent:
prev_sibling = func_node.prev_sibling
while prev_sibling:
if prev_sibling.type == "decorator":
dec_text = get_node_text(prev_sibling, source)
decorators.insert(0, dec_text)
elif prev_sibling.type not in ("comment", "decorator"):
break
prev_sibling = prev_sibling.prev_sibling
return decorators
def is_async_function(func_node: Node) -> bool:
"""Check if a function is async.
Args:
func_node: The function AST node.
Returns:
True if the function is async.
"""
return func_node.type == "async_function_definition" or any(
c.type == "async" for c in func_node.children
)
def extract_python_raised_exceptions(func_node: Node, source: bytes) -> list[str]:
"""Extract exception types raised by a Python function.
Finds all `raise` statements within the function and extracts the exception
type being raised.
Args:
func_node: The function_definition AST node.
source: Source code bytes.
Returns:
List of unique exception type names raised by the function.
"""
exceptions: set[str] = set()
def find_raise_statements(node: Node) -> None:
"""Recursively find raise statements in the AST."""
if node.type == "raise_statement":
# Extract the exception type
for child in node.children:
if child.type == "identifier":
# Direct raise like: raise ValueError
exc_name = get_node_text(child, source)
if exc_name and exc_name != "raise":
exceptions.add(exc_name)
break
elif child.type == "call":
# Raise with call like: raise ValueError("msg")
for call_child in child.children:
if call_child.type == "identifier":
exc_name = get_node_text(call_child, source)
if exc_name:
exceptions.add(exc_name)
break
elif call_child.type == "attribute":
# Handle module.Exception like: raise errors.CustomError
exc_name = get_node_text(call_child, source)
if exc_name:
exceptions.add(exc_name)
break
break
# Recurse into child nodes (but not into nested functions)
for child in node.children:
if child.type not in ("function_definition", "async_function_definition"):
find_raise_statements(child)
# Start searching from the function body
for child in func_node.children:
if child.type == "block":
find_raise_statements(child)
return sorted(exceptions)
def extract_function_type_metadata(
func_node: Node, source: bytes, language: Language
) -> dict[str, Any]:
"""Extract type annotation metadata from a function node.
Args:
func_node: The function AST node.
source: Source code bytes.
language: Programming language.
Returns:
Metadata dictionary with type information.
"""
metadata: dict[str, Any] = {}
if language == Language.PYTHON:
# Extract parameter types
param_types = extract_python_parameter_types(func_node, source)
# Only include parameters that have type hints
typed_params = {k: v for k, v in param_types.items() if v is not None}
if typed_params:
metadata["parameter_types"] = typed_params
# Extract parameter defaults
param_defaults = extract_python_parameter_defaults(func_node, source)
if param_defaults:
metadata["parameter_defaults"] = param_defaults
# Extract return type
return_type = extract_python_return_type(func_node, source)
if return_type:
metadata["return_type"] = return_type
# Extract decorators
decorators = extract_python_decorators(func_node, source)
if decorators:
metadata["decorators"] = decorators
# Check if async
if is_async_function(func_node):
metadata["is_async"] = True
# Extract raised exceptions
raised_exceptions = extract_python_raised_exceptions(func_node, source)
if raised_exceptions:
metadata["raises"] = raised_exceptions
# TODO: Add support for other languages (TypeScript, Java, etc.)
return metadata
class CodeChunker:
"""Extract semantic code chunks from source files using AST analysis."""
def __init__(self, config: ChunkingConfig | None = None):
"""Initialize the chunker.
Args:
config: Optional chunking configuration.
"""
base_config = config or get_config().chunking
# Store a defensive copy to prevent external mutation
self.config = base_config.model_copy(deep=True)
self.parser = CodeParser()
def chunk_file(self, file_path: Path, repo_root: Path) -> Iterator[CodeChunk]:
"""Extract code chunks from a source file.
Checks for registered language parser plugins first. If a plugin
handles the file extension, uses the plugin's parse_file method.
Otherwise falls back to the built-in tree-sitter parser.
Args:
file_path: Path to the source file.
repo_root: Root directory of the repository.
Yields:
CodeChunk objects for each semantic unit found.
"""
# Check for plugin parser first
registry = get_plugin_registry()
plugin_parser = registry.get_parser_for_extension(file_path.suffix)
if plugin_parser is not None:
# Use plugin parser - it returns CodeChunk objects directly
logger.debug(f"Using plugin parser '{plugin_parser.language_name}' for {file_path.name}")
try:
source = file_path.read_bytes()
chunks = plugin_parser.parse_file(file_path, source)
yield from chunks
return
except Exception as e:
logger.warning(f"Plugin parser failed for {file_path}: {e}, falling back to built-in")
# Fall back to built-in tree-sitter parser
result = self.parser.parse_file(file_path)
if result is None:
logger.debug(f"Skipping unsupported file: {file_path}")
return
root, language, source = result
rel_path = str(file_path.relative_to(repo_root))
logger.debug(f"Chunking {rel_path} ({language.value})")
# Extract module-level chunk (file overview)
yield self._create_module_chunk(root, source, language, rel_path)
# Extract imports
import_types = IMPORT_NODE_TYPES.get(language, set())
import_nodes = find_nodes_by_type(root, import_types)
if import_nodes:
yield self._create_imports_chunk(import_nodes, source, language, rel_path)
# Extract classes and their methods
class_types = CLASS_NODE_TYPES.get(language, set())
for class_node in find_nodes_by_type(root, class_types):
yield from self._extract_class_chunks(class_node, source, language, rel_path)
# Extract top-level functions (not inside classes)
function_types = FUNCTION_NODE_TYPES.get(language, set())
for func_node in find_nodes_by_type(root, function_types):
# Skip if inside a class (already processed)
if not self._is_inside_class(func_node, class_types):
yield self._create_function_chunk(func_node, source, language, rel_path)
def _create_module_chunk(
self,
root: Node,
source: bytes,
language: Language,
file_path: str,
) -> CodeChunk:
"""Create a chunk for the module/file overview.
Args:
root: AST root node.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
Returns:
A CodeChunk for the module.
"""
# Get module docstring if present
docstring = None
if language == Language.PYTHON:
# Python module docstring is first expression
if root.children and root.children[0].type == "expression_statement":
expr = root.children[0]
if expr.children and expr.children[0].type == "string":
docstring = get_node_text(expr.children[0], source)
if docstring.startswith('"""') or docstring.startswith("'''"):
docstring = docstring[3:-3].strip()
# Create a summary of the file structure
content = self._create_file_summary(root, source, language)
chunk_id = self._generate_id(file_path, "module", 0)
return CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.MODULE,
name=Path(file_path).stem,
content=content,
start_line=1,
end_line=source.count(b"\n") + 1,
docstring=docstring,
metadata={"is_overview": True},
)
def _create_file_summary(self, root: Node, source: bytes, language: Language) -> str:
"""Create a summary of file structure for the module chunk.
Args:
root: AST root node.
source: Source bytes.
language: Programming language.
Returns:
A summary string of file contents.
"""
parts = []
# List imports
import_types = IMPORT_NODE_TYPES.get(language, set())
imports = find_nodes_by_type(root, import_types)
if imports:
import_text = "\n".join(get_node_text(n, source) for n in imports[:10])
if len(imports) > 10:
import_text += f"\n# ... and {len(imports) - 10} more imports"
parts.append(f"# Imports:\n{import_text}")
# List classes
class_types = CLASS_NODE_TYPES.get(language, set())
classes = find_nodes_by_type(root, class_types)
if classes:
class_names = [get_node_name(c, source, language) or "anonymous" for c in classes]
parts.append(f"# Classes: {', '.join(class_names)}")
# List functions
function_types = FUNCTION_NODE_TYPES.get(language, set())
functions = [
f
for f in find_nodes_by_type(root, function_types)
if not self._is_inside_class(f, class_types)
]
if functions:
func_names = [get_node_name(f, source, language) or "anonymous" for f in functions]
parts.append(f"# Functions: {', '.join(func_names)}")
return "\n\n".join(parts) if parts else "# Empty file"
def _create_imports_chunk(
self,
import_nodes: list[Node],
source: bytes,
language: Language,
file_path: str,
) -> CodeChunk:
"""Create a chunk for import statements.
Args:
import_nodes: List of import nodes.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
Returns:
A CodeChunk for imports.
"""
content = "\n".join(get_node_text(n, source) for n in import_nodes)
start_line = min(n.start_point[0] + 1 for n in import_nodes)
end_line = max(n.end_point[0] + 1 for n in import_nodes)
chunk_id = self._generate_id(file_path, "imports", start_line)
return CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.IMPORT,
name="imports",
content=content,
start_line=start_line,
end_line=end_line,
metadata={"import_count": len(import_nodes)},
)
def _extract_class_chunks(
self,
class_node: Node,
source: bytes,
language: Language,
file_path: str,
) -> Iterator[CodeChunk]:
"""Extract chunks from a class definition.
Args:
class_node: The class AST node.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
Yields:
CodeChunks for the class and its methods.
"""
class_name = get_node_name(class_node, source, language) or "anonymous"
docstring = get_docstring(class_node, source, language)
content = get_node_text(class_node, source)
# Extract parent classes for inheritance
parent_classes = get_parent_classes(class_node, source, language)
# Check if class is too large and needs to be split
lines = content.count("\n") + 1
if lines > self.config.class_split_threshold:
# For large classes, create a summary chunk and method chunks
yield self._create_class_summary_chunk(
class_node, source, language, file_path, class_name, docstring, parent_classes
)
# Extract methods separately
function_types = FUNCTION_NODE_TYPES.get(language, set())
for method_node in find_nodes_by_type(class_node, function_types):
yield self._create_method_chunk(
method_node, source, language, file_path, class_name
)
else:
# Small class - include everything in one chunk
chunk_id = self._generate_id(
file_path, f"class_{class_name}", class_node.start_point[0]
)
metadata: dict[str, int | list[str]] = {"line_count": lines}
if parent_classes:
metadata["parent_classes"] = parent_classes
yield CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.CLASS,
name=class_name,
content=content,
start_line=class_node.start_point[0] + 1,
end_line=class_node.end_point[0] + 1,
docstring=docstring,
metadata=metadata,
)
def _create_class_summary_chunk(
self,
class_node: Node,
source: bytes,
language: Language,
file_path: str,
class_name: str,
docstring: str | None,
parent_classes: list[str] | None = None,
) -> CodeChunk:
"""Create a summary chunk for a large class.
Args:
class_node: The class AST node.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
class_name: Name of the class.
docstring: Class docstring if any.
parent_classes: List of parent class names.
Returns:
A summary CodeChunk for the class.
"""
# Get class signature and method list
function_types = FUNCTION_NODE_TYPES.get(language, set())
methods = find_nodes_by_type(class_node, function_types)
method_names = [get_node_name(m, source, language) or "anonymous" for m in methods]
# Build summary content
signature_end = class_node.start_byte
for child in class_node.children:
if child.type in ("block", "class_body", "declaration_list"):
signature_end = child.start_byte
break
signature = (
source[class_node.start_byte : signature_end].decode("utf-8", errors="replace").strip()
)
content = f"{signature}\n # Methods: {', '.join(method_names)}"
chunk_id = self._generate_id(file_path, f"class_{class_name}", class_node.start_point[0])
metadata: dict[str, bool | int | list[str]] = {
"is_summary": True,
"method_count": len(methods),
}
if parent_classes:
metadata["parent_classes"] = parent_classes
return CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.CLASS,
name=class_name,
content=content,
start_line=class_node.start_point[0] + 1,
end_line=class_node.end_point[0] + 1,
docstring=docstring,
metadata=metadata,
)
def _create_method_chunk(
self,
method_node: Node,
source: bytes,
language: Language,
file_path: str,
class_name: str,
) -> CodeChunk:
"""Create a chunk for a class method.
Args:
method_node: The method AST node.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
class_name: Name of the parent class.
Returns:
A CodeChunk for the method.
"""
method_name = get_node_name(method_node, source, language) or "anonymous"
content = get_node_text(method_node, source)
docstring = get_docstring(method_node, source, language)
# Extract type annotation metadata
metadata = extract_function_type_metadata(method_node, source, language)
chunk_id = self._generate_id(
file_path, f"{class_name}.{method_name}", method_node.start_point[0]
)
return CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.METHOD,
name=method_name,
content=content,
start_line=method_node.start_point[0] + 1,
end_line=method_node.end_point[0] + 1,
docstring=docstring,
parent_name=class_name,
metadata=metadata,
)
def _create_function_chunk(
self,
func_node: Node,
source: bytes,
language: Language,
file_path: str,
) -> CodeChunk:
"""Create a chunk for a top-level function.
Args:
func_node: The function AST node.
source: Source bytes.
language: Programming language.
file_path: Relative file path.
Returns:
A CodeChunk for the function.
"""
func_name = get_node_name(func_node, source, language) or "anonymous"
content = get_node_text(func_node, source)
docstring = get_docstring(func_node, source, language)
# Extract type annotation metadata
metadata = extract_function_type_metadata(func_node, source, language)
chunk_id = self._generate_id(file_path, f"func_{func_name}", func_node.start_point[0])
return CodeChunk(
id=chunk_id,
file_path=file_path,
language=language,
chunk_type=ChunkType.FUNCTION,
name=func_name,
content=content,
start_line=func_node.start_point[0] + 1,
end_line=func_node.end_point[0] + 1,
docstring=docstring,
metadata=metadata,
)
def _is_inside_class(self, node: Node, class_types: set[str]) -> bool:
"""Check if a node is inside a class definition.
Args:
node: The node to check.
class_types: Set of class node type names.
Returns:
True if the node is inside a class.
"""
parent = node.parent
while parent:
if parent.type in class_types:
return True
parent = parent.parent
return False
def _generate_id(self, file_path: str, name: str, line: int) -> str:
"""Generate a unique chunk ID.
Args:
file_path: File path.
name: Chunk name.
line: Line number.
Returns:
A unique ID string.
"""
key = f"{file_path}:{name}:{line}"
return hashlib.sha256(key.encode()).hexdigest()[:16]