def update_document_content(self, filepath: str, content: str) -> Dict[str, Any]:
"""Update an existing document. Removes old chunks and re-indexes."""
filepath = Path(filepath)
if not filepath.exists():
return {"error": f"File not found: {filepath}"}
# Resolve to absolute for consistent comparison with stored metadata
filepath_resolved = str(filepath.resolve())
doc_id = None
for did, info in self._indexed_docs.items():
stored = str(Path(info.get("source", "")).resolve())
if stored == filepath_resolved:
doc_id = did
break
old_chunks_removed = 0
if doc_id:
old_chunks_removed = self._remove_document_chunks(doc_id)
del self._indexed_docs[doc_id]
filepath.write_text(content, encoding="utf-8")
doc = self.parser.parse_file(filepath)
if not doc:
self._save_metadata()
return {"error": "Failed to parse updated content", "old_chunks_removed": old_chunks_removed}
new_chunks_added, dedup_skipped = self._index_document(doc)
try:
file_stat = filepath.stat()
file_mtime = datetime.fromtimestamp(file_stat.st_mtime).isoformat()
file_size = file_stat.st_size
except OSError:
file_mtime = datetime.now().isoformat()
file_size = 0
self._indexed_docs[doc.id] = {
"source": str(filepath),
"category": doc.category,
"format": doc.format,
"chunks": new_chunks_added,
"keywords": doc.keywords,
"indexed_at": datetime.now().isoformat(),
"file_mtime": file_mtime,
"file_size": file_size,
}
self._save_metadata()
self.query_cache.invalidate()
self.bm25_index.build_index()
return {
"old_chunks_removed": old_chunks_removed,
"new_chunks_added": new_chunks_added,
"dedup_skipped": dedup_skipped,
"filepath": str(filepath),
}