import os
import requests
from typing import Dict, Optional, Union
from pathlib import Path
from src.agents.tools.reader import ReaderTools
from dotenv import load_dotenv
load_dotenv()
class DocTranslateTools:
"""
A class to interact with the DocTranslate API for document translation services.
This class provides methods to submit documents for translation and check the status
of translation tasks using the DocTranslate API.
"""
BASE_URL = "https://doctranslate-api.doctranslate.io"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the DocTranslateTools with an API key.
Args:
api_key (str): The API key for authenticating with the DocTranslate API.
"""
if api_key is None:
api_key = os.getenv("DOCTRANSLATE_API_KEY")
self.api_key = api_key
self.headers = {"Authorization": f"Bearer {self.api_key}"}
@staticmethod
def calculate_credits_of_document(
file_path: Union[str, os.PathLike]
) -> Dict[str, Union[int, str]]:
"""
Calculate the estimated credits required to translate a document.
Args:
file_path (Union[str, os.PathLike]): Path to the document file.
Returns:
Dict[str, Union[int, str]]: Dictionary containing:
- credits: Estimated credits required
- file_size: File size in bytes
- file_type: Detected file type
- word_count_estimate: Estimated word count
Raises:
FileNotFoundError: If the specified file does not exist.
"""
if file_path.startswith("http"):
file_path = ReaderTools.download_link_to_file(file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
# Get file size in bytes
file_size = os.path.getsize(file_path)
file_ext = Path(file_path).suffix.lower()
# Define credit calculation parameters (these can be adjusted based on API requirements)
CREDIT_PER_KB = 0.1 # Example: 0.1 credit per KB
# Calculate base credits based on file size
credits = max(1, int((file_size / 1024) * CREDIT_PER_KB)) # Minimum 1 credit
# Adjust credits based on file type
if file_ext == ".pdf":
credits = int(credits * 1.2) # PDFs might require more processing
elif file_ext == ".docx":
credits = int(
credits * 1.1
) # DOCX files might have more complex formatting
# Estimate word count (very rough estimate: 1 word ≈ 5 bytes on average)
word_count_estimate = file_size // 5
return {
"credits": credits,
"file_size": file_size,
"file_type": file_ext,
"word_count_estimate": word_count_estimate,
}
def doc2pptx(
self,
file_path: Union[str, os.PathLike],
dest_lang: str,
template_path: str = "system/v2/template_1",
) -> Dict:
"""
Convert a document to PPTX format.
Args:
file_path (Union[str, os.PathLike]): Path to the document file to be converted.
file_type (str): MIME type of the input file. Must be one of: 'application/docx', 'application/pdf'.
dest_lang (str): Target language code for the text in the presentation.
slides_number (str, optional): Number of slides in the output presentation.
Must be one of: '10', '15', '20'. Defaults to '10'.
template_path (str, optional): Path to the template file.
Must be one of: 'system/Template_7.pptx', 'system/Template_8.pptx', 'system/Template_9.pptx'.
Defaults to 'system/Template_7.pptx'.
Returns:
Dict: Response from the DocTranslate API containing the conversion result.
Raises:
FileNotFoundError: If the specified file does not exist.
ValueError: If any of the parameters are invalid.
requests.exceptions.RequestException: If the API request fails.
"""
if file_path.startswith("http"):
file_path = ReaderTools.download_link_to_file(file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
file_type = Path(file_path).suffix.lower()
if file_type not in [".docx", ".pdf"]:
raise ValueError(f"Invalid file_type. Must be one of: pdf or docx")
# Guess MIME type from file extension
mime_type = {
".docx": "application/docx",
".pdf": "application/pdf",
}.get(file_type, "application/octet-stream")
valid_templates = [
"system/v2/template_1",
"system/v2/template_2",
"system/v2/template_3",
]
if template_path not in valid_templates:
raise ValueError(
f"Invalid template_path. Must be one of: {', '.join(valid_templates)}"
)
headers = {"X-Api-Key": self.api_key}
with open(file_path, "rb") as f:
files = {"file": (os.path.basename(file_path), f, mime_type)}
data = {
"file_type": mime_type,
"dest_lang": dest_lang,
"template_path": template_path,
}
response = requests.post(
f"{self.BASE_URL}/v3/summarization",
headers=headers,
files=files,
data=data,
)
response.raise_for_status()
return response.json()
def run_translate_document(
self,
file_path: Union[str, os.PathLike],
dest_lang: str,
tone: str = "Default",
domain: str = "Default",
original_lang: Optional[str] = None,
process_mode: Optional[str] = "replace",
translate_type: Optional[str] = "Professional",
is_translate_images: Optional[bool] = False,
bilingual_text_style__font: Optional[str] = None,
bilingual_text_style__color: Optional[str] = None,
dictionary: Optional[str] = None,
output_format: Optional[str] = "auto",
custom_prompt: str = "",
**kwargs,
) -> Dict:
if file_path.startswith("http"):
file_path = ReaderTools.download_link_to_file(file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
headers = {"X-Api-Key": self.api_key}
# Guess MIME type from file extension
mime_type = {
".docx": "application/docx",
".pdf": "application/pdf",
".xlsx": "application/xlsx",
".pptx": "application/pptx",
}.get(Path(file_path).suffix.lower(), "application/octet-stream")
with open(file_path, "rb") as f:
files = {"file": (os.path.basename(file_path), f, mime_type)}
data = {
"file_type": mime_type,
"dest_lang": dest_lang,
"tone": tone,
"domain": domain,
"custom_prompt": custom_prompt,
"output_format": output_format,
"original_lang": original_lang,
"process_mode": process_mode,
"translate_type": translate_type,
"is_translate_images": is_translate_images,
"bilingual_text_style__font": bilingual_text_style__font,
"bilingual_text_style__color": bilingual_text_style__color,
"dictionary": dictionary,
**kwargs,
}
response = requests.post(
f"{self.BASE_URL}/v3/translate/document",
headers=headers,
files=files,
data=data,
)
response.raise_for_status()
return response.json()
def get_translate_result_by_task_id(self, task_id: str) -> Dict:
"""
Retrieve the translation result for a specific task.
Args:
task_id (str): The ID of the translation task.
Returns:
Dict: Translation result or status information.
Raises:
requests.exceptions.RequestException: If the API request fails.
"""
headers = {"X-Api-Key": self.api_key}
response = requests.get(f"{self.BASE_URL}/v3/result/{task_id}", headers=headers)
response.raise_for_status()
return response.json()
def get_user_history(
self,
page: int = 1,
page_size: int = 10,
status: Optional[str] = None,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
) -> Dict:
"""
Retrieve the user's translation history.
Args:
page (int, optional): Page number to return (1-based). Defaults to 1.
page_size (int, optional): Number of items per page. Defaults to 10.
status (str, optional): Filter by status. Possible values: 'completed', 'processing', 'failed'.
start_date (str, optional): Filter by start date (YYYY-MM-DD format).
end_date (str, optional): Filter by end date (YYYY-MM-DD format).
Returns:
Dict: Response containing the user's translation history.
Raises:
requests.exceptions.RequestException: If the API request fails.
ValueError: If any of the parameters are invalid.
"""
if page < 1:
raise ValueError("Page must be greater than 0")
if page_size < 1 or page_size > 100:
raise ValueError("Page size must be between 1 and 100")
params = {"page": page, "page_size": page_size}
if status is not None:
if status not in ["completed", "processing", "failed"]:
raise ValueError(
"Status must be one of: 'completed', 'processing', 'failed'"
)
params["status"] = status
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
headers = {"X-Api-Key": self.api_key}
response = requests.post(
f"{self.BASE_URL}/user/history", headers=headers, params=params
)
response.raise_for_status()
return response.json()
def translate_text(
self,
text: str,
dest_lang: str,
original_lang: Optional[str] = None,
process_mode: Optional[str] = "replace",
style_mode: Optional[int] = None,
translate_type: Optional[str] = "Professional",
tone: Optional[str] = "Default",
domain: Optional[str] = "Default",
bilingual_text_style__font: Optional[str] = None,
bilingual_text_style__font_size: Optional[str] = None,
bilingual_text_style__color: Optional[str] = None,
dictionary: Optional[str] = None,
custom_prompt: Optional[str] = "",
**kwargs,
) -> Dict:
headers = {"X-Api-Key": self.api_key}
data = {
"text": text,
"dest_lang": dest_lang,
"process_mode": process_mode,
"translate_type": translate_type,
"tone": tone,
"domain": domain,
"custom_prompt": custom_prompt,
**kwargs,
}
# Add optional fields if provided
if original_lang:
data["original_lang"] = original_lang
if style_mode is not None:
data["style_mode"] = style_mode
if bilingual_text_style__font:
data["bilingual_text_style__font"] = bilingual_text_style__font
if bilingual_text_style__font_size:
data["bilingual_text_style__font_size"] = bilingual_text_style__font_size
if bilingual_text_style__color:
data["bilingual_text_style__color"] = bilingual_text_style__color
if dictionary:
data["dictionary"] = dictionary
response = requests.post(
f"{self.BASE_URL}/v3/translate/text", headers=headers, data=data
)
response.raise_for_status()
return response.json()