pc.py•2 kB
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from pinecone import Pinecone
from dotenv import load_dotenv
load_dotenv(override = True)
from server.helper import embed, build_rerank_documents
class Pinecone_DB:
def __init__(self, index_name: str = 'sec-embeddings'):
self.index_name = index_name
self.namespace = '__default__'
self.pc = Pinecone(api_key = os.environ.get('PINECONE_API_KEY'))
def list_indexes(self):
print(self.pc.list_indexes())
def query(self, query: str, top_k: int = 30, top_n: int = 30, rerank: bool = False):
query_results = []
query_embed = embed(query)
results = self.pc.Index(self.index_name).query(
namespace = self.namespace,
vector = query_embed,
top_k = top_k,
include_values = False,
include_metadata = True
)
if rerank:
ranked_results = self.pc.inference.rerank(
model = 'bge-reranker-v2-m3',
query = query,
documents = build_rerank_documents(results['matches']),
top_n = top_n,
rank_fields = ['chunk_text'],
return_documents = True,
parameters = {
"truncate": "END"
}
)
for ranked_result in ranked_results.data:
query_results.append({
'document': ranked_result.document,
'score': ranked_result.score
})
else:
for result in results['matches']:
query_results.append({
'document': result['metadata']['original_text'],
'score': result['score']
})
return query_results
if __name__ == '__main__':
pc = Pinecone_DB(index_name = 'sec-embeddings')
pc.query('What is the latest 10-K for Apple Inc.?')