"""Documentation coverage analysis and reporting."""
from __future__ import annotations
from dataclasses import dataclass, field
from operator import attrgetter
from pathlib import Path
from local_deepwiki.core.vectorstore import VectorStore
from local_deepwiki.generators.wiki_utils import file_path_to_wiki_path
from local_deepwiki.models import ChunkType, IndexStatus
# Minimum characters for a docstring to be considered meaningful
MIN_DOCSTRING_LENGTH = 10
# Coverage level thresholds (percentages)
COVERAGE_EXCELLENT_THRESHOLD = 90
COVERAGE_GOOD_THRESHOLD = 70
COVERAGE_FAIR_THRESHOLD = 50
# Maximum items shown in the "needs attention" section
MAX_LOW_COVERAGE_FILES = 10
MAX_UNDOCUMENTED_ITEMS = 20
@dataclass(slots=True)
class CoverageStats:
"""Documentation coverage statistics."""
total_classes: int = 0
documented_classes: int = 0
total_functions: int = 0
documented_functions: int = 0
total_methods: int = 0
documented_methods: int = 0
@property
def total_entities(self) -> int:
"""Total number of documentable entities."""
return self.total_classes + self.total_functions + self.total_methods
@property
def documented_entities(self) -> int:
"""Total number of documented entities."""
return (
self.documented_classes
+ self.documented_functions
+ self.documented_methods
)
@property
def coverage_percent(self) -> float:
"""Overall documentation coverage percentage."""
if self.total_entities == 0:
return 100.0
return (self.documented_entities / self.total_entities) * 100
def __iadd__(self, other: CoverageStats) -> CoverageStats:
"""Accumulate stats from another CoverageStats instance."""
self.total_classes += other.total_classes
self.documented_classes += other.documented_classes
self.total_functions += other.total_functions
self.documented_functions += other.documented_functions
self.total_methods += other.total_methods
self.documented_methods += other.documented_methods
return self
@dataclass(slots=True)
class FileCoverage:
"""Coverage statistics for a single file."""
file_path: str
stats: CoverageStats = field(default_factory=CoverageStats)
undocumented: list[str] = field(
default_factory=list
) # List of undocumented entity names
def _has_meaningful_docstring(docstring: str | None) -> bool:
"""Check if a docstring is meaningful (not empty or trivial).
Args:
docstring: The docstring to check.
Returns:
True if the docstring is meaningful.
"""
if not docstring:
return False
# Strip and check for minimal content
cleaned = docstring.strip()
if len(cleaned) < MIN_DOCSTRING_LENGTH: # Too short to be meaningful
return False
# Check for placeholder docstrings
placeholders = ["todo", "fixme", "xxx", "pass", "..."]
if cleaned.lower() in placeholders:
return False
return True
async def analyze_file_coverage(
file_path: str,
vector_store: VectorStore,
) -> FileCoverage:
"""Analyze documentation coverage for a single file.
Args:
file_path: Path to the source file.
vector_store: Vector store with code chunks.
Returns:
FileCoverage object with statistics.
"""
coverage = FileCoverage(file_path=file_path)
chunks = await vector_store.get_chunks_by_file(file_path)
for chunk in chunks:
name = chunk.name or "Unknown"
has_doc = _has_meaningful_docstring(chunk.docstring)
if chunk.chunk_type == ChunkType.CLASS:
coverage.stats.total_classes += 1
if has_doc:
coverage.stats.documented_classes += 1
else:
coverage.undocumented.append(f"class {name}")
elif chunk.chunk_type == ChunkType.FUNCTION:
coverage.stats.total_functions += 1
if has_doc:
coverage.stats.documented_functions += 1
else:
coverage.undocumented.append(f"function {name}")
elif chunk.chunk_type == ChunkType.METHOD:
coverage.stats.total_methods += 1
if has_doc:
coverage.stats.documented_methods += 1
else:
parent = chunk.parent_name or "Unknown"
coverage.undocumented.append(f"method {parent}.{name}")
return coverage
async def analyze_project_coverage(
index_status: IndexStatus,
vector_store: VectorStore,
) -> tuple[CoverageStats, list[FileCoverage]]:
"""Analyze documentation coverage for the entire project.
Args:
index_status: Index status with file information.
vector_store: Vector store with code chunks.
Returns:
Tuple of (overall stats, list of per-file coverage).
"""
overall = CoverageStats()
file_coverages: list[FileCoverage] = []
for file_info in index_status.files:
file_coverage = await analyze_file_coverage(file_info.path, vector_store)
file_coverages.append(file_coverage)
overall += file_coverage.stats
# Sort by coverage (lowest first)
file_coverages = sorted(file_coverages, key=attrgetter("stats.coverage_percent"))
return overall, file_coverages
def _get_coverage_emoji(percent: float) -> str:
"""Get an emoji indicator for coverage level.
Args:
percent: Coverage percentage.
Returns:
Emoji string.
"""
if percent >= COVERAGE_EXCELLENT_THRESHOLD:
return "π’"
elif percent >= COVERAGE_GOOD_THRESHOLD:
return "π‘"
elif percent >= COVERAGE_FAIR_THRESHOLD:
return "π "
else:
return "π΄"
_get_wiki_link = file_path_to_wiki_path
async def generate_coverage_page(
index_status: IndexStatus,
vector_store: VectorStore,
) -> str | None:
"""Generate the documentation coverage report page.
Args:
index_status: Index status with file information.
vector_store: Vector store with code chunks.
Returns:
Markdown content for the coverage page, or None if no entities found.
"""
overall, file_coverages = await analyze_project_coverage(index_status, vector_store)
if overall.total_entities == 0:
return None
lines = [
"# Documentation Coverage",
"",
"This report shows the documentation coverage for the codebase.",
"",
]
# Overall summary
emoji = _get_coverage_emoji(overall.coverage_percent)
lines.append("## Summary")
lines.append("")
lines.append(f"{emoji} **Overall Coverage: {overall.coverage_percent:.1f}%**")
lines.append("")
lines.append(
f"- **{overall.documented_entities}** / **{overall.total_entities}** entities documented"
)
lines.append("")
# Breakdown by type
lines.append("### By Type")
lines.append("")
lines.append("| Type | Documented | Total | Coverage |")
lines.append("|------|------------|-------|----------|")
if overall.total_classes > 0:
class_pct = (overall.documented_classes / overall.total_classes) * 100
lines.append(
f"| Classes | {overall.documented_classes} | {overall.total_classes} | {class_pct:.1f}% |"
)
if overall.total_functions > 0:
func_pct = (overall.documented_functions / overall.total_functions) * 100
lines.append(
f"| Functions | {overall.documented_functions} | {overall.total_functions} | {func_pct:.1f}% |"
)
if overall.total_methods > 0:
method_pct = (overall.documented_methods / overall.total_methods) * 100
lines.append(
f"| Methods | {overall.documented_methods} | {overall.total_methods} | {method_pct:.1f}% |"
)
lines.append("")
# Coverage by file
lines.append("## Coverage by File")
lines.append("")
lines.append("| File | Documented | Total | Coverage |")
lines.append("|------|------------|-------|----------|")
for fc in file_coverages:
if fc.stats.total_entities == 0:
continue
emoji = _get_coverage_emoji(fc.stats.coverage_percent)
file_name = Path(fc.file_path).name
wiki_link = _get_wiki_link(fc.file_path)
lines.append(
f"| {emoji} [{file_name}]({wiki_link}) | "
f"{fc.stats.documented_entities} | {fc.stats.total_entities} | "
f"{fc.stats.coverage_percent:.1f}% |"
)
lines.append("")
# Files needing attention (lowest coverage)
low_coverage_files = [
fc
for fc in file_coverages
if fc.stats.coverage_percent < COVERAGE_FAIR_THRESHOLD
and fc.stats.total_entities > 0
]
if low_coverage_files:
lines.append("## Files Needing Attention")
lines.append("")
lines.append("Files with less than 50% documentation coverage:")
lines.append("")
for fc in low_coverage_files[:MAX_LOW_COVERAGE_FILES]: # Top worst
file_name = Path(fc.file_path).name
wiki_link = _get_wiki_link(fc.file_path)
lines.append(f"### [{file_name}]({wiki_link})")
lines.append("")
lines.append(f"Coverage: {fc.stats.coverage_percent:.1f}%")
lines.append("")
if fc.undocumented:
lines.append("Undocumented:")
for item in fc.undocumented[:MAX_UNDOCUMENTED_ITEMS]:
lines.append(f"- `{item}`")
if len(fc.undocumented) > MAX_UNDOCUMENTED_ITEMS:
lines.append(
f"- ... and {len(fc.undocumented) - MAX_UNDOCUMENTED_ITEMS} more"
)
lines.append("")
# Legend
lines.append("---")
lines.append("")
lines.append("**Legend:** π’ β₯90% | π‘ β₯70% | π β₯50% | π΄ <50%")
lines.append("")
return "\n".join(lines)