"""Code chunk and file models."""
from __future__ import annotations
import json
from typing import Any
from pydantic import BaseModel, Field
from local_deepwiki.models.foundation import ChunkType, Language
class CodeChunk(BaseModel):
"""A chunk of code extracted from the repository."""
id: str = Field(description="Unique identifier for this chunk")
file_path: str = Field(description="Path to the source file")
language: Language = Field(description="Programming language")
chunk_type: ChunkType = Field(description="Type of code chunk")
name: str | None = Field(default=None, description="Name of function/class/etc")
content: str = Field(description="The actual code content")
start_line: int = Field(description="Starting line number")
end_line: int = Field(description="Ending line number")
docstring: str | None = Field(default=None, description="Associated docstring")
parent_name: str | None = Field(
default=None, description="Parent class/module name"
)
metadata: dict[str, Any] = Field(
default_factory=dict, description="Additional metadata"
)
def to_vector_record(self, vector: list[float] | None = None) -> dict[str, Any]:
"""Convert chunk to a dict suitable for vector store storage.
Args:
vector: Optional embedding vector to include in the record.
Returns:
Dict with all fields formatted for LanceDB storage.
"""
record: dict[str, Any] = {
"id": self.id,
"file_path": self.file_path,
"language": self.language.value,
"chunk_type": self.chunk_type.value,
"name": self.name or "",
"content": self.content,
"start_line": self.start_line,
"end_line": self.end_line,
"docstring": self.docstring or "",
"parent_name": self.parent_name or "",
"metadata": json.dumps(self.metadata),
}
if vector is not None:
record["vector"] = vector
return record
def __repr__(self) -> str:
"""Return a concise representation for debugging."""
name_part = f" {self.name}" if self.name else ""
return (
f"<CodeChunk {self.chunk_type.value}{name_part} "
f"at {self.file_path}:{self.start_line}-{self.end_line}>"
)
class FileInfo(BaseModel):
"""Information about a source file."""
path: str = Field(description="Relative path from repo root")
language: Language | None = Field(default=None, description="Detected language")
size_bytes: int = Field(description="File size in bytes")
last_modified: float = Field(description="Last modification timestamp")
hash: str = Field(description="Content hash for change detection")
chunk_count: int = Field(default=0, description="Number of chunks extracted")
def __repr__(self) -> str:
"""Return a concise representation for debugging."""
lang = self.language.value if self.language else "unknown"
return f"<FileInfo {self.path} ({lang}, {self.chunk_count} chunks)>"
class SearchResult(BaseModel):
"""A search result from semantic search."""
chunk: CodeChunk = Field(description="The matched code chunk")
score: float = Field(description="Similarity score")
highlights: list[str] = Field(default_factory=list, description="Relevant snippets")
suggestions: list[str] | None = Field(
default=None, description="'Did you mean?' suggestions when results are poor"
)
def __repr__(self) -> str:
"""Return a concise representation for debugging."""
name = self.chunk.name or self.chunk.chunk_type.value
suggestion_str = (
f" suggestions={len(self.suggestions)}" if self.suggestions else ""
)
return f"<SearchResult {name} score={self.score:.3f}{suggestion_str}>"