path_files.pyā¢2 kB
import json
from pathlib import Path
MIB_TO_BYTES = 1024 * 1024
def list_documents_by_folder(root_path: str | Path) -> list[dict]:
"""
Recursively list documents in each folder under the given root path.
Args:
root_path (str or Path): The root directory to start searching from.
Returns:
list: A list of dictionaries, each containing:
- 'relative_path': The relative path of the folder (from root).
- 'documents': A list of file names (with extension) in that folder.
"""
root = Path(root_path)
result = []
# Include files in the root folder
root_documents = [file.name for file in root.iterdir() if file.is_file()]
if root_documents:
result.append({
"relative_path": ".",
"documents": root_documents
})
# Recursively process subfolders
for folder in root.rglob('*'):
if folder.is_dir():
documents = [
{
"name": file.name,
"size": round(file.stat().st_size / (MIB_TO_BYTES), 2)
}
for file in folder.iterdir() if file.is_file()
]
if documents:
relative_folder = str(folder.relative_to(root).as_posix())
result.append({
"relative_path": relative_folder,
"documents": documents
})
return result
# if __name__ == "__main__":
# # Ask the user for the root directory path
# path = input("Enter the root directory path: ")
# # Get the structured list of documents
# data = list_documents_by_folder(path)
# # Save the result as formatted JSON in the specified path
# output_path = Path(r"D:\DSProject\MCP Servers\LocalDocMCP\test_json\documents.json")
# output_path.parent.mkdir(parents=True, exist_ok=True)
# with open(output_path, "w", encoding="utf-8") as f:
# json.dump(data, f, indent=2, ensure_ascii=False)