LLM_summary_shortener.py•5.96 kB
import os
import glob
import yaml
import json
import logging
import requests
import re
import sys
# Set up logging with time, level, and message
logging.basicConfig(level=logging.INFO,
format='%(levelname)s - %(message)s')
def is_valid_string(s):
pattern = r'^[a-zA-Z0-9 _-]+$'
return bool(re.match(pattern, s))
def update_summary(current_summary: str, depth: int = 0) -> str:
"""
Sends a request to the Magic API using the current_summary and returns the new_summary from the response.
Parameters:
current_summary (str): The summary text to be sent as the user input.
Returns:
new_summary (str): The new summary generated by the API.
Raises:
ValueError: If the API key is not found in the environment.
requests.HTTPError: For any HTTP errors that occur during the API call.
"""
# Retrieve API key from environment variable
logging.info(
f"Initiating API call with the provided input: {current_summary}")
api_key = os.environ.get("x_magicapi_key")
if not api_key:
raise ValueError(
"API key not found. Please set the 'x_magicapi_key' environment variable.")
# API endpoint and headers
url = "https://api.magicapi.dev/api/v2/bridgeml/llm/llm/chat/completions"
headers = {
"accept": "application/json",
"x-magicapi-key": api_key,
"Content-Type": "application/json"
}
system_prompt = """
You are a technical API documentation summarizer.
Given an API endpoint summary, create a shorter version that:
1. Maintains the core functionality description
2. Is under 54 characters (including spaces)
3. Uses only alphanumeric characters, hyphens and underscores
4. Preserves technical meaning and clarity
Bad example: 'iCanSee' for 'A fast text-to-image model that makes high-quality images in 4 steps'
Good example: 'Create high-quality images from text in 4 steps'
"""
# Prepare payload with current_summary as the user message
payload = {
"messages": [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": current_summary
}
],
"model": "llama-3.2-1b-preview",
"temperature": 1,
"max_tokens": 1024,
"top_p": 1,
"stream": False,
"stop": "</Output>"
}
# Make the POST request and wait for the API response
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status() # Raises an exception for HTTP error codes
# Parse the API response and extract the new summary
data = response.json()
new_summary = data["choices"][0]["message"]["content"]
if (len(new_summary) >= 55 or (not is_valid_string(new_summary))) :
logging.info(f'New summary is {new_summary}')
if(depth ==10):
logging.error('Please use manual summary shortener, the llm is unable to provide an appropriate summary, even after 10 retries')
sys.exit()
return update_summary(new_summary, depth+1)
return new_summary
# Define the HTTP methods we are interested in.
HTTP_METHODS = {"get", "post", "put", "delete",
"patch", "head", "options", "trace"}
def update_method_summaries(paths_dict, file_name):
"""
Iterate over all path entries and update the summary field in method definitions.
Logs the original summary and the updated summary.
"""
for path, methods in paths_dict.items():
if isinstance(methods, dict):
for method, operation in methods.items():
# Only target keys that represent HTTP methods (case-insensitive)
if (method.lower() in HTTP_METHODS and
isinstance(operation, dict) and
"summary" in operation and
isinstance(operation["summary"], str) and
len(operation["summary"]) >= 55):
original_summary = operation["summary"]
logging.info(
f"path '{path}', method '{method}': updating summary from '{original_summary}'")
try:
new_summary = update_summary(
current_summary=original_summary)
except Exception as e:
logging.error(f"Error updating summary for '{path}', method '{method}': {e}")
continue
logging.info(
f"path '{path}', method '{method}': updating summary to '{new_summary}'")
operation["summary"] = new_summary
# Gather all OpenAPI spec files with .yaml, .yml, or .json extensions from the current directory.
file_patterns = ["*.yaml", "*.yml", "*.json"]
files = []
script_dir = os.path.dirname(os.path.abspath(__file__))
directory = os.path.join(script_dir, "../src/lib")
for pattern in file_patterns:
search_pattern = os.path.join(directory, pattern)
files.extend(glob.glob(search_pattern))
for file in files:
logging.info(f"Opening file: {file}")
try:
with open(file, 'r') as f:
data = json.load(f) if file.endswith(".json") else yaml.safe_load(f)
logging.info(f"Finished reading file: {file}")
except Exception as e:
logging.error(f"Error reading {file}: {e}")
continue
# Only process files that have a 'paths' section.
if "paths" in data and isinstance(data["paths"], dict):
update_method_summaries(data["paths"], file)
try:
with open(file, 'w') as f:
if file.endswith(".json"):
json.dump(data, f, indent=2)
else:
yaml.dump(data, f, sort_keys=False)
logging.info(f"Finished writing (closed) file: {file}")
except Exception as e:
logging.error(f"Error writing {file}: {e}")