"""
Exa AI service for advanced web search and content retrieval
"""
import os
from typing import Any, Dict, List, Optional, Union
from exa_py import Exa
from pydantic import BaseModel
class ExaSearchResult(BaseModel):
"""Represents a single search result from Exa"""
title: str
url: str
id: str
score: Optional[float] = None
published_date: Optional[str] = None
author: Optional[str] = None
text: Optional[str] = None
highlights: Optional[List[str]] = None
highlight_scores: Optional[List[float]] = None
class ExaSearchOptions(BaseModel):
"""Options for customizing Exa search behavior"""
type: str = "neural" # "neural" or "keyword"
use_autoprompt: bool = True
num_results: int = 10
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
start_published_date: Optional[str] = None
end_published_date: Optional[str] = None
start_crawled_date: Optional[str] = None
end_crawled_date: Optional[str] = None
include_text: Optional[List[str]] = None
exclude_text: Optional[List[str]] = None
category: Optional[str] = None
class ExaContentOptions(BaseModel):
"""Options for content retrieval"""
text: Union[bool, Dict[str, Any]] = True
highlights: Optional[Dict[str, Any]] = None
summary: Optional[Dict[str, Any]] = None
class ExaService:
"""Service for interacting with Exa AI search API"""
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.getenv("EXA_API_KEY")
if not self.api_key:
raise ValueError("EXA_API_KEY environment variable is required")
self.client = Exa(api_key=self.api_key)
async def search(
self,
query: str,
options: Optional[ExaSearchOptions] = None
) -> List[ExaSearchResult]:
"""Perform a search using Exa AI"""
try:
if options is None:
options = ExaSearchOptions()
# Build search parameters
search_params = {
"query": query,
"type": options.type,
"use_autoprompt": options.use_autoprompt,
"num_results": options.num_results
}
# Add optional parameters if provided
if options.include_domains:
search_params["include_domains"] = options.include_domains
if options.exclude_domains:
search_params["exclude_domains"] = options.exclude_domains
if options.start_published_date:
search_params["start_published_date"] = options.start_published_date
if options.end_published_date:
search_params["end_published_date"] = options.end_published_date
if options.start_crawled_date:
search_params["start_crawled_date"] = options.start_crawled_date
if options.end_crawled_date:
search_params["end_crawled_date"] = options.end_crawled_date
if options.include_text:
search_params["include_text"] = options.include_text
if options.exclude_text:
search_params["exclude_text"] = options.exclude_text
if options.category:
search_params["category"] = options.category
# Perform search
results = self.client.search(**search_params)
# Convert to our result format
return [
ExaSearchResult(
title=result.title,
url=result.url,
id=result.id,
score=getattr(result, 'score', None),
published_date=getattr(result, 'published_date', None),
author=getattr(result, 'author', None)
)
for result in results.results
]
except Exception as e:
print(f"Exa search error: {e}")
return []
async def search_and_contents(
self,
query: str,
search_options: Optional[ExaSearchOptions] = None,
content_options: Optional[ExaContentOptions] = None
) -> List[ExaSearchResult]:
"""Search and retrieve content in one call"""
try:
if search_options is None:
search_options = ExaSearchOptions()
if content_options is None:
content_options = ExaContentOptions()
# Build search parameters
search_params = {
"query": query,
"type": search_options.type,
"use_autoprompt": search_options.use_autoprompt,
"num_results": search_options.num_results
}
# Add search filters
if search_options.include_domains:
search_params["include_domains"] = search_options.include_domains
if search_options.exclude_domains:
search_params["exclude_domains"] = search_options.exclude_domains
if search_options.start_published_date:
search_params["start_published_date"] = search_options.start_published_date
if search_options.end_published_date:
search_params["end_published_date"] = search_options.end_published_date
# Add content options
if isinstance(content_options.text, bool):
search_params["text"] = content_options.text
else:
search_params["text"] = content_options.text
if content_options.highlights:
search_params["highlights"] = content_options.highlights
if content_options.summary:
search_params["summary"] = content_options.summary
# Perform search with contents
results = self.client.search_and_contents(**search_params)
# Convert to our result format with content
return [
ExaSearchResult(
title=result.title,
url=result.url,
id=result.id,
score=getattr(result, 'score', None),
published_date=getattr(result, 'published_date', None),
author=getattr(result, 'author', None),
text=getattr(result, 'text', None),
highlights=getattr(result, 'highlights', None),
highlight_scores=getattr(result, 'highlight_scores', None)
)
for result in results.results
]
except Exception as e:
print(f"Exa search_and_contents error: {e}")
return []
async def find_similar(
self,
url: str,
num_results: int = 10,
exclude_source_domain: bool = False
) -> List[ExaSearchResult]:
"""Find similar pages to a given URL"""
try:
results = self.client.find_similar(
url=url,
num_results=num_results,
exclude_source_domain=exclude_source_domain
)
return [
ExaSearchResult(
title=result.title,
url=result.url,
id=result.id,
score=getattr(result, 'score', None),
published_date=getattr(result, 'published_date', None),
author=getattr(result, 'author', None)
)
for result in results.results
]
except Exception as e:
print(f"Exa find_similar error: {e}")
return []
async def find_similar_and_contents(
self,
url: str,
num_results: int = 10,
exclude_source_domain: bool = False,
content_options: Optional[ExaContentOptions] = None
) -> List[ExaSearchResult]:
"""Find similar pages and retrieve their content"""
try:
if content_options is None:
content_options = ExaContentOptions()
params = {
"url": url,
"num_results": num_results,
"exclude_source_domain": exclude_source_domain
}
# Add content options
if isinstance(content_options.text, bool):
params["text"] = content_options.text
else:
params["text"] = content_options.text
if content_options.highlights:
params["highlights"] = content_options.highlights
if content_options.summary:
params["summary"] = content_options.summary
results = self.client.find_similar_and_contents(**params)
return [
ExaSearchResult(
title=result.title,
url=result.url,
id=result.id,
score=getattr(result, 'score', None),
published_date=getattr(result, 'published_date', None),
author=getattr(result, 'author', None),
text=getattr(result, 'text', None),
highlights=getattr(result, 'highlights', None),
highlight_scores=getattr(result, 'highlight_scores', None)
)
for result in results.results
]
except Exception as e:
print(f"Exa find_similar_and_contents error: {e}")
return []
async def get_contents(
self,
urls: List[str],
content_options: Optional[ExaContentOptions] = None
) -> List[ExaSearchResult]:
"""Get contents for specific URLs"""
try:
if content_options is None:
content_options = ExaContentOptions()
params = {"ids": urls}
# Add content options
if isinstance(content_options.text, bool):
params["text"] = content_options.text
else:
params["text"] = content_options.text
if content_options.highlights:
params["highlights"] = content_options.highlights
if content_options.summary:
params["summary"] = content_options.summary
results = self.client.get_contents(**params)
return [
ExaSearchResult(
title=result.title,
url=result.url,
id=result.id,
text=getattr(result, 'text', None),
highlights=getattr(result, 'highlights', None),
highlight_scores=getattr(result, 'highlight_scores', None)
)
for result in results.results
]
except Exception as e:
print(f"Exa get_contents error: {e}")
return []
def get_exa_service(api_key: Optional[str] = None) -> Optional[ExaService]:
"""
Get an Exa service instance
Args:
api_key: Optional API key. If not provided, will try to get from environment
Returns:
ExaService instance or None if not available
"""
try:
return ExaService(api_key)
except ValueError:
# API key not available
return None