import streamlit as st
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from rag_pipeline import VectorStore
import pandas as pd
st.title("📚 Document Library")
vector_store = VectorStore(use_local=True)
# Get unique documents
try:
collection = vector_store.client.get_collection(vector_store.vectorstore._collection.name)
all_data = collection.get()
# Extract unique documents
docs = {}
for metadata in all_data['metadatas']:
source = metadata.get('source_path', 'Unknown')
if source not in docs:
docs[source] = {
'Title': metadata.get('title', 'Unknown'),
'Author': metadata.get('author', 'Unknown'),
'Pages': metadata.get('page_count', 'N/A'),
'Type': metadata.get('doc_type', 'N/A'),
'Keywords': metadata.get('keywords', '')[:50] + '...'
}
if docs:
df = pd.DataFrame.from_dict(docs, orient='index')
df.index.name = 'Source'
st.metric("Total Documents", len(docs))
st.dataframe(df, use_container_width=True)
# Delete option
st.subheader("Delete Document")
doc_to_delete = st.selectbox("Select document to delete", list(docs.keys()))
if st.button("Delete"):
vector_store.delete_by_source(doc_to_delete)
st.success(f"Deleted {doc_to_delete}")
st.rerun()
else:
st.info("No documents indexed yet!")
except Exception as e:
st.error(f"Error loading library: {e}")