Get Questions By IDs
get_questionsRetrieve detailed question information and authoritative mark scheme points for specified question IDs, with compact LLM-friendly responses and optional image/OCR data.
Instructions
Fetch full question details and mark schemes for selected IDs.
Accepts either question_ids (comma-separated) or question_ids_list.
Default detail is compact for LLM-friendly responses.
Returns for each question:
Full question text and context
Key mark scheme points (authoritative answers)
Paper identification
question_url: link to full question page
Image URLs (question_image_url, ms_image_url) for image-based questions
Use include_images=True to get image URLs for ALL questions (not just image-based). Use include_ocr=True to get OCR text extracted from question images.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| question_ids | No | ||
| question_ids_list | No | ||
| detail | No | compact | |
| max_key_points | No | ||
| include_images | No | ||
| include_ocr | No |
Implementation Reference
- mcp_server.py:834-982 (handler)The get_questions tool handler function. Accepts question_ids (comma-separated string), question_ids_list (list of ints), detail ('compact' or 'full'), max_key_points, include_images, and include_ocr. Fetches question details from /questions/batch API endpoint, extracts key points from mark schemes, and returns a ToolResult with text summary and structured content.
def get_questions( question_ids: str = "", question_ids_list: Optional[list[int]] = None, detail: str = "compact", max_key_points: int = 8, include_images: bool = False, include_ocr: bool = False, ) -> ToolResult: """Fetch full question details and mark schemes for selected IDs. Accepts either `question_ids` (comma-separated) or `question_ids_list`. Default detail is `compact` for LLM-friendly responses. Returns for each question: - Full question text and context - Key mark scheme points (authoritative answers) - Paper identification - question_url: link to full question page - Image URLs (question_image_url, ms_image_url) for image-based questions Use include_images=True to get image URLs for ALL questions (not just image-based). Use include_ocr=True to get OCR text extracted from question images. """ if detail not in {"compact", "full"}: raise ToolError("INVALID_DETAIL: detail must be 'compact' or 'full'.") capped_points = max(1, min(max_key_points, 20)) try: unique_ids = _parse_question_ids_input(question_ids, question_ids_list) except (TypeError, ValueError): raise ToolError( "INVALID_IDS: Use integers only, e.g. question_ids='552,799' or question_ids_list=[552,799]." ) if not unique_ids: raise ToolError("NO_IDS: No question IDs were provided.") if len(unique_ids) > MAX_BATCH_IDS: raise ToolError( f"TOO_MANY_IDS: Maximum {MAX_BATCH_IDS} IDs per request (received {len(unique_ids)})." ) try: rows = _api_get("/questions/batch", {"ids": ",".join(str(x) for x in unique_ids)}) except Exception as exc: logger.error("get_questions failed: %s", exc, exc_info=True) error_payload = _error_from_exception(exc, "/questions/batch") message = error_payload.get("error", {}).get("message", "Question fetch failed.") raise ToolError(message) if not isinstance(rows, list): rows = [] rows_by_id = {row.get("id"): row for row in rows if isinstance(row, dict)} ordered_rows = [rows_by_id[i] for i in unique_ids if i in rows_by_id] missing_ids = [i for i in unique_ids if i not in rows_by_id] questions: list[dict[str, Any]] = [] for row in ordered_rows: paper = row.get("paper") or {} bullets = row.get("answer_bullet_points") if not isinstance(bullets, list): bullets = [] key_points = _extract_key_points( answer_text=row.get("answer_text"), bullet_points=bullets, max_points=capped_points, ) base_payload = { "id": row.get("id"), "question_number": row.get("question_number"), "question_text": row.get("question_text"), "question_context": row.get("question_context"), "marks": row.get("marks"), "topic": row.get("topic"), "chapter_name": row.get("chapter_name"), "subtopic": row.get("subtopic"), "paper": { "paper_number": paper.get("paper_number"), "year": paper.get("year"), "session": _short_session(paper.get("session_name")), "variant": paper.get("variant"), }, "is_image_based": bool(row.get("is_image_based")), "question_url": f"{QUESTION_URL_BASE}/{row.get('id')}" if row.get("id") else None, "key_points": key_points, } # Always include image URLs for image-based questions if bool(row.get("is_image_based")): base_payload["question_image_url"] = _to_image_url(row.get("image_path")) base_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path")) if detail == "compact": compact_payload = dict(base_payload) compact_payload["question_text"] = _to_ascii_text(row.get("question_text"), max_len=550) compact_payload["question_context"] = _to_ascii_text( row.get("question_context"), max_len=280, ) compact_payload["answer_preview"] = _to_ascii_text( row.get("answer_text"), max_len=320, ) if include_images and not bool(row.get("is_image_based")): compact_payload["question_image_url"] = _to_image_url(row.get("image_path")) compact_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path")) if include_ocr: compact_payload["ocr_text"] = _to_ascii_text(row.get("ocr_text"), max_len=500) questions.append(compact_payload) continue full_payload = { **base_payload, "answer_text": row.get("answer_text") or "", "answer_bullet_points": bullets, "ocr_text": row.get("ocr_text") if include_ocr else None, } if include_images and not bool(row.get("is_image_based")): full_payload["question_image_url"] = _to_image_url(row.get("image_path")) full_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path")) questions.append(full_payload) payload = { "ok": True, "requested_ids": unique_ids, "meta": { "requested": len(unique_ids), "found": len(questions), "missing": len(missing_ids), "detail": detail, }, "missing_ids": missing_ids, "questions": questions, } summary_text = _build_questions_summary( questions=questions, missing_ids=missing_ids, detail=detail, ) return ToolResult(content=summary_text, structured_content=payload) - mcp_server.py:829-833 (registration)The @mcp.tool decorator registering 'get_questions' as an MCP tool with title 'Get Questions By IDs', tags 'search' and 'core', and annotations for readOnlyHint and idempotentHint.
@mcp.tool( title="Get Questions By IDs", tags={"search", "core"}, annotations={"readOnlyHint": True, "idempotentHint": True}, ) - mcp_server.py:326-343 (helper)Helper function _parse_question_ids_input that parses question IDs from either a comma-separated string or a list of ints, deduplicating them. Used by get_questions to normalize input.
def _parse_question_ids_input(question_ids: str, question_ids_list: Optional[list[int]]) -> list[int]: parsed_ids: list[int] = [] if isinstance(question_ids, str) and question_ids.strip(): for part in question_ids.split(","): value = part.strip() if value: parsed_ids.append(int(value)) if question_ids_list: for value in question_ids_list: parsed_ids.append(int(value)) unique_ids: list[int] = [] for qid in parsed_ids: if qid not in unique_ids: unique_ids.append(qid) return unique_ids - mcp_server.py:393-444 (helper)Helper function _extract_key_points that parses mark scheme answer text and bullet points into concise key points. Used by get_questions to produce the key_points field.
def _extract_key_points(answer_text: Any, bullet_points: Any, max_points: int = 8) -> list[str]: points: list[str] = [] seen_keys: set[str] = set() if isinstance(bullet_points, list): for bullet in bullet_points: text = _to_ascii_text(bullet, max_len=220) text = re.sub(r"\s+", " ", text).strip("-:;,. ") key = text.lower() if text and key not in seen_keys: points.append(text) seen_keys.add(key) if len(points) >= max_points: return points text_blob = str(answer_text or "") for raw_line in text_blob.splitlines(): line = _to_ascii_text(raw_line, max_len=220) line = re.sub(r"\s+", " ", line).strip() if not line: continue lowered = line.lower() if lowered.startswith("one mark per"): continue if lowered.startswith("one mark"): continue if lowered.startswith("two marks"): continue if lowered.startswith("three marks"): continue if lowered.startswith("max "): continue if re.match(r"^mp\d+\b", lowered): continue if lowered.startswith("mp") and len(line) <= 6: continue line = line.strip("-:;,. ") if len(line) < 4: continue if line.startswith("/"): continue key = line.lower() if key not in seen_keys: points.append(line) seen_keys.add(key) if len(points) >= max_points: break return points - mcp_server.py:474-509 (helper)Helper function _build_questions_summary that builds the human-readable text summary of fetched questions, used for the content field of the ToolResult returned by get_questions.
def _build_questions_summary( questions: list[dict[str, Any]], missing_ids: list[int], detail: str, ) -> str: lines = [f"Fetched {len(questions)} questions (detail={detail})."] if missing_ids: lines.append(f"Missing IDs: {', '.join(str(i) for i in missing_ids)}") preview = questions[:10] for q in preview: paper = q.get("paper") or {} session_short = _short_session(paper.get("session_name")) label_parts: list[str] = [] if paper.get("paper_number") is not None: label_parts.append(f"P{paper.get('paper_number')}") if session_short and paper.get("year") is not None: label_parts.append(f"{session_short}{paper.get('year')}") elif paper.get("year") is not None: label_parts.append(str(paper.get("year"))) if paper.get("variant") is not None: label_parts.append(f"v{paper.get('variant')}") paper_label = " ".join(label_parts) if label_parts else "Unknown paper" lines.append( f"ID:{q.get('id')} | {paper_label} | Q{q.get('question_number')} | {q.get('marks', '?')}m" ) lines.append(f"Q: {_to_ascii_text(q.get('question_text'), max_len=180)}") key_points = q.get("key_points") or [] if key_points: lines.append("Key points: " + "; ".join(str(p) for p in key_points[:5])) if len(questions) > len(preview): lines.append(f"... {len(questions) - len(preview)} more questions available in structured output.") return "\n".join(lines)