import boto3
import logging
import sys
import os
import json
logging.basicConfig(
level=logging.INFO, # Default to INFO level
format='%(filename)s:%(lineno)d | %(message)s',
handlers=[
logging.StreamHandler(sys.stderr)
]
)
logger = logging.getLogger("retrieve")
def load_config():
config = None
script_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(script_dir, "config.json")
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
return config
config = load_config()
bedrock_region = config['region']
projectName = config['projectName']
knowledge_base_id = config['knowledge_base_id']
number_of_results = 5
bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime", region_name=bedrock_region)
def retrieve(query):
response = bedrock_agent_runtime_client.retrieve(
retrievalQuery={"text": query},
knowledgeBaseId=knowledge_base_id,
retrievalConfiguration={
"vectorSearchConfiguration": {"numberOfResults": number_of_results},
},
)
# logger.info(f"response: {response}")
retrieval_results = response.get("retrievalResults", [])
# logger.info(f"retrieval_results: {retrieval_results}")
json_docs = []
for result in retrieval_results:
text = url = name = None
if "content" in result:
content = result["content"]
if "text" in content:
text = content["text"]
if "location" in result:
location = result["location"]
if "s3Location" in location:
uri = location["s3Location"]["uri"] if location["s3Location"]["uri"] is not None else ""
name = uri.split("/")[-1]
# encoded_name = parse.quote(name)
# url = f"{path}/{doc_prefix}{encoded_name}"
url = uri # TODO: add path and doc_prefix
elif "webLocation" in location:
url = location["webLocation"]["url"] if location["webLocation"]["url"] is not None else ""
name = "WEB"
json_docs.append({
"contents": text,
"reference": {
"url": url,
"title": name,
"from": "RAG"
}
})
logger.info(f"json_docs: {json_docs}")
return json.dumps(json_docs, ensure_ascii=False)