grok_agent
Process prompts with AI to generate responses, analyze images and videos, search web content, execute code, and handle file inputs through configurable parameters.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| prompt | Yes | ||
| session | No | ||
| model | No | grok-4-1-fast-reasoning | |
| file_ids | No | ||
| image_urls | No | ||
| image_paths | No | ||
| use_web_search | No | ||
| use_x_search | No | ||
| use_code_execution | No | ||
| allowed_domains | No | ||
| excluded_domains | No | ||
| allowed_x_handles | No | ||
| excluded_x_handles | No | ||
| from_date | No | ||
| to_date | No | ||
| enable_image_understanding | No | ||
| enable_video_understanding | No | ||
| include_inline_citations | No | ||
| system_prompt | No | ||
| max_turns | No |
Implementation Reference
- src/server.py:376-477 (handler)The implementation of the `grok_agent` tool, which orchestrates chat functionality with optional web search, X search, and code execution capabilities using the xAI SDK.
async def grok_agent( prompt: str, session: Optional[str] = None, model: str = "grok-4-1-fast-reasoning", file_ids: Optional[List[str]] = None, image_urls: Optional[List[str]] = None, image_paths: Optional[List[str]] = None, use_web_search: bool = False, use_x_search: bool = False, use_code_execution: bool = False, allowed_domains: Optional[List[str]] = None, excluded_domains: Optional[List[str]] = None, allowed_x_handles: Optional[List[str]] = None, excluded_x_handles: Optional[List[str]] = None, from_date: Optional[str] = None, to_date: Optional[str] = None, enable_image_understanding: bool = False, enable_video_understanding: bool = False, include_inline_citations: bool = False, system_prompt: Optional[str] = None, max_turns: Optional[int] = None ): history = load_history(session) if session else [] client = Client(api_key=XAI_API_KEY) tools = [] if use_web_search: web_params = build_params( allowed_domains=allowed_domains, excluded_domains=excluded_domains, enable_image_understanding=enable_image_understanding, ) tools.append(xai_web_search(**web_params)) if use_x_search: x_params = build_params( allowed_x_handles=allowed_x_handles, excluded_x_handles=excluded_x_handles, from_date=datetime.strptime(from_date, "%d-%m-%Y") if from_date else None, to_date=datetime.strptime(to_date, "%d-%m-%Y") if to_date else None, enable_image_understanding=enable_image_understanding, enable_video_understanding=enable_video_understanding, ) tools.append(xai_x_search(**x_params)) if use_code_execution: tools.append(code_execution()) include_options = ["code_execution_call_output"] if include_inline_citations: include_options.append("inline_citations") chat_params = {"model": model, "include": include_options} if tools: chat_params["tools"] = tools if max_turns: chat_params["max_turns"] = max_turns chat = client.chat.create(**chat_params) if system_prompt: chat.append(system(system_prompt)) for message in history: if message["role"] == "user": chat.append(user(message["content"])) elif message["role"] == "assistant": chat.append(assistant(message["content"])) content_items = [] if file_ids: for fid in file_ids: content_items.append(file(fid)) if image_urls: for url in image_urls: content_items.append(image(image_url=url)) if image_paths: for path in image_paths: ext = Path(path).suffix.lower().replace('.', '') base64_img = encode_image_to_base64(path) content_items.append(image(image_url=f"data:image/{ext};base64,{base64_img}")) content_items.append(prompt) chat.append(user(*content_items)) response = chat.sample() client.close() if session: history.append({"role": "user", "content": prompt, "time": datetime.now().strftime("%d.%m.%Y %H:%M:%S")}) history.append({"role": "assistant", "content": response.content, "time": datetime.now().strftime("%d.%m.%Y %H:%M:%S")}) save_history(session, history) result = [response.content] if response.citations: result.append("\n\n**Sources:**") for url in response.citations: result.append(f"- {url}") return "\n".join(result)