"""Data models for local-deepwiki."""
from enum import Enum
from pathlib import Path
from typing import Any
from pydantic import BaseModel, Field
class Language(str, Enum):
"""Supported programming languages."""
PYTHON = "python"
JAVASCRIPT = "javascript"
TYPESCRIPT = "typescript"
GO = "go"
RUST = "rust"
JAVA = "java"
C = "c"
CPP = "cpp"
SWIFT = "swift"
class ChunkType(str, Enum):
"""Types of code chunks."""
FUNCTION = "function"
CLASS = "class"
METHOD = "method"
MODULE = "module"
IMPORT = "import"
COMMENT = "comment"
OTHER = "other"
class CodeChunk(BaseModel):
"""A chunk of code extracted from the repository."""
id: str = Field(description="Unique identifier for this chunk")
file_path: str = Field(description="Path to the source file")
language: Language = Field(description="Programming language")
chunk_type: ChunkType = Field(description="Type of code chunk")
name: str | None = Field(default=None, description="Name of function/class/etc")
content: str = Field(description="The actual code content")
start_line: int = Field(description="Starting line number")
end_line: int = Field(description="Ending line number")
docstring: str | None = Field(default=None, description="Associated docstring")
parent_name: str | None = Field(default=None, description="Parent class/module name")
metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
class FileInfo(BaseModel):
"""Information about a source file."""
path: str = Field(description="Relative path from repo root")
language: Language | None = Field(default=None, description="Detected language")
size_bytes: int = Field(description="File size in bytes")
last_modified: float = Field(description="Last modification timestamp")
hash: str = Field(description="Content hash for change detection")
chunk_count: int = Field(default=0, description="Number of chunks extracted")
class IndexStatus(BaseModel):
"""Status of repository indexing."""
repo_path: str = Field(description="Path to the repository")
indexed_at: float = Field(description="Timestamp of last indexing")
total_files: int = Field(description="Total files processed")
total_chunks: int = Field(description="Total chunks extracted")
languages: dict[str, int] = Field(default_factory=dict, description="Files per language")
files: list[FileInfo] = Field(default_factory=list, description="Indexed file info")
class WikiPage(BaseModel):
"""A generated wiki page."""
path: str = Field(description="Relative path in wiki directory")
title: str = Field(description="Page title")
content: str = Field(description="Markdown content")
generated_at: float = Field(description="Generation timestamp")
class WikiStructure(BaseModel):
"""Structure of the generated wiki."""
root: str = Field(description="Wiki root directory")
pages: list[WikiPage] = Field(default_factory=list, description="All wiki pages")
def to_toc(self) -> dict[str, Any]:
"""Generate table of contents."""
toc: dict[str, Any] = {"sections": []}
for page in sorted(self.pages, key=lambda p: p.path):
parts = Path(page.path).parts
current = toc
for part in parts[:-1]:
section = next((s for s in current.get("sections", []) if s["name"] == part), None)
if not section:
section = {"name": part, "sections": [], "pages": []}
current.setdefault("sections", []).append(section)
current = section
current.setdefault("pages", []).append({
"path": page.path,
"title": page.title
})
return toc
class SearchResult(BaseModel):
"""A search result from semantic search."""
chunk: CodeChunk = Field(description="The matched code chunk")
score: float = Field(description="Similarity score")
highlights: list[str] = Field(default_factory=list, description="Relevant snippets")