Get Questions By IDs

get_questions

Read-onlyIdempotent

Retrieve detailed question information and authoritative mark scheme points for specified question IDs, with compact LLM-friendly responses and optional image/OCR data.

Instructions

Fetch full question details and mark schemes for selected IDs.

Accepts either question_ids (comma-separated) or question_ids_list. Default detail is compact for LLM-friendly responses.

Returns for each question:

Full question text and context
Key mark scheme points (authoritative answers)
Paper identification
question_url: link to full question page
Image URLs (question_image_url, ms_image_url) for image-based questions

Use include_images=True to get image URLs for ALL questions (not just image-based). Use include_ocr=True to get OCR text extracted from question images.

Input Schema

TableJSON Schema

Name	Required	Default
`question_ids`	No
`question_ids_list`	No
`detail`	No	compact
`max_key_points`	No
`include_images`	No
`include_ocr`	No

Implementation Reference

mcp_server.py:834-982 (handler)

The get_questions tool handler function. Accepts question_ids (comma-separated string), question_ids_list (list of ints), detail ('compact' or 'full'), max_key_points, include_images, and include_ocr. Fetches question details from /questions/batch API endpoint, extracts key points from mark schemes, and returns a ToolResult with text summary and structured content.

def get_questions(
    question_ids: str = "",
    question_ids_list: Optional[list[int]] = None,
    detail: str = "compact",
    max_key_points: int = 8,
    include_images: bool = False,
    include_ocr: bool = False,
) -> ToolResult:
    """Fetch full question details and mark schemes for selected IDs.

    Accepts either `question_ids` (comma-separated) or `question_ids_list`.
    Default detail is `compact` for LLM-friendly responses.

    Returns for each question:
    - Full question text and context
    - Key mark scheme points (authoritative answers)
    - Paper identification
    - question_url: link to full question page
    - Image URLs (question_image_url, ms_image_url) for image-based questions

    Use include_images=True to get image URLs for ALL questions (not just image-based).
    Use include_ocr=True to get OCR text extracted from question images.
    """
    if detail not in {"compact", "full"}:
        raise ToolError("INVALID_DETAIL: detail must be 'compact' or 'full'.")

    capped_points = max(1, min(max_key_points, 20))

    try:
        unique_ids = _parse_question_ids_input(question_ids, question_ids_list)
    except (TypeError, ValueError):
        raise ToolError(
            "INVALID_IDS: Use integers only, e.g. question_ids='552,799' or question_ids_list=[552,799]."
        )

    if not unique_ids:
        raise ToolError("NO_IDS: No question IDs were provided.")
    if len(unique_ids) > MAX_BATCH_IDS:
        raise ToolError(
            f"TOO_MANY_IDS: Maximum {MAX_BATCH_IDS} IDs per request (received {len(unique_ids)})."
        )

    try:
        rows = _api_get("/questions/batch", {"ids": ",".join(str(x) for x in unique_ids)})
    except Exception as exc:
        logger.error("get_questions failed: %s", exc, exc_info=True)
        error_payload = _error_from_exception(exc, "/questions/batch")
        message = error_payload.get("error", {}).get("message", "Question fetch failed.")
        raise ToolError(message)

    if not isinstance(rows, list):
        rows = []

    rows_by_id = {row.get("id"): row for row in rows if isinstance(row, dict)}
    ordered_rows = [rows_by_id[i] for i in unique_ids if i in rows_by_id]
    missing_ids = [i for i in unique_ids if i not in rows_by_id]

    questions: list[dict[str, Any]] = []
    for row in ordered_rows:
        paper = row.get("paper") or {}
        bullets = row.get("answer_bullet_points")
        if not isinstance(bullets, list):
            bullets = []

        key_points = _extract_key_points(
            answer_text=row.get("answer_text"),
            bullet_points=bullets,
            max_points=capped_points,
        )

        base_payload = {
            "id": row.get("id"),
            "question_number": row.get("question_number"),
            "question_text": row.get("question_text"),
            "question_context": row.get("question_context"),
            "marks": row.get("marks"),
            "topic": row.get("topic"),
            "chapter_name": row.get("chapter_name"),
            "subtopic": row.get("subtopic"),
            "paper": {
                "paper_number": paper.get("paper_number"),
                "year": paper.get("year"),
                "session": _short_session(paper.get("session_name")),
                "variant": paper.get("variant"),
            },
            "is_image_based": bool(row.get("is_image_based")),
            "question_url": f"{QUESTION_URL_BASE}/{row.get('id')}" if row.get("id") else None,
            "key_points": key_points,
        }

        # Always include image URLs for image-based questions
        if bool(row.get("is_image_based")):
            base_payload["question_image_url"] = _to_image_url(row.get("image_path"))
            base_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path"))

        if detail == "compact":
            compact_payload = dict(base_payload)
            compact_payload["question_text"] = _to_ascii_text(row.get("question_text"), max_len=550)
            compact_payload["question_context"] = _to_ascii_text(
                row.get("question_context"),
                max_len=280,
            )
            compact_payload["answer_preview"] = _to_ascii_text(
                row.get("answer_text"),
                max_len=320,
            )

            if include_images and not bool(row.get("is_image_based")):
                compact_payload["question_image_url"] = _to_image_url(row.get("image_path"))
                compact_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path"))

            if include_ocr:
                compact_payload["ocr_text"] = _to_ascii_text(row.get("ocr_text"), max_len=500)

            questions.append(compact_payload)
            continue

        full_payload = {
            **base_payload,
            "answer_text": row.get("answer_text") or "",
            "answer_bullet_points": bullets,
            "ocr_text": row.get("ocr_text") if include_ocr else None,
        }
        if include_images and not bool(row.get("is_image_based")):
            full_payload["question_image_url"] = _to_image_url(row.get("image_path"))
            full_payload["ms_image_url"] = _to_image_url(row.get("ms_image_path"))

        questions.append(full_payload)

    payload = {
        "ok": True,
        "requested_ids": unique_ids,
        "meta": {
            "requested": len(unique_ids),
            "found": len(questions),
            "missing": len(missing_ids),
            "detail": detail,
        },
        "missing_ids": missing_ids,
        "questions": questions,
    }

    summary_text = _build_questions_summary(
        questions=questions,
        missing_ids=missing_ids,
        detail=detail,
    )

    return ToolResult(content=summary_text, structured_content=payload)

mcp_server.py:829-833 (registration)
The @mcp.tool decorator registering 'get_questions' as an MCP tool with title 'Get Questions By IDs', tags 'search' and 'core', and annotations for readOnlyHint and idempotentHint.
```
@mcp.tool(
    title="Get Questions By IDs",
    tags={"search", "core"},
    annotations={"readOnlyHint": True, "idempotentHint": True},
)
```

mcp_server.py:326-343 (helper)

Helper function _parse_question_ids_input that parses question IDs from either a comma-separated string or a list of ints, deduplicating them. Used by get_questions to normalize input.

def _parse_question_ids_input(question_ids: str, question_ids_list: Optional[list[int]]) -> list[int]:
    parsed_ids: list[int] = []

    if isinstance(question_ids, str) and question_ids.strip():
        for part in question_ids.split(","):
            value = part.strip()
            if value:
                parsed_ids.append(int(value))

    if question_ids_list:
        for value in question_ids_list:
            parsed_ids.append(int(value))

    unique_ids: list[int] = []
    for qid in parsed_ids:
        if qid not in unique_ids:
            unique_ids.append(qid)
    return unique_ids

mcp_server.py:393-444 (helper)

Helper function _extract_key_points that parses mark scheme answer text and bullet points into concise key points. Used by get_questions to produce the key_points field.

def _extract_key_points(answer_text: Any, bullet_points: Any, max_points: int = 8) -> list[str]:
    points: list[str] = []
    seen_keys: set[str] = set()

    if isinstance(bullet_points, list):
        for bullet in bullet_points:
            text = _to_ascii_text(bullet, max_len=220)
            text = re.sub(r"\s+", " ", text).strip("-:;,. ")
            key = text.lower()
            if text and key not in seen_keys:
                points.append(text)
                seen_keys.add(key)
            if len(points) >= max_points:
                return points

    text_blob = str(answer_text or "")
    for raw_line in text_blob.splitlines():
        line = _to_ascii_text(raw_line, max_len=220)
        line = re.sub(r"\s+", " ", line).strip()
        if not line:
            continue

        lowered = line.lower()
        if lowered.startswith("one mark per"):
            continue
        if lowered.startswith("one mark"):
            continue
        if lowered.startswith("two marks"):
            continue
        if lowered.startswith("three marks"):
            continue
        if lowered.startswith("max "):
            continue
        if re.match(r"^mp\d+\b", lowered):
            continue
        if lowered.startswith("mp") and len(line) <= 6:
            continue

        line = line.strip("-:;,. ")
        if len(line) < 4:
            continue
        if line.startswith("/"):
            continue

        key = line.lower()
        if key not in seen_keys:
            points.append(line)
            seen_keys.add(key)
        if len(points) >= max_points:
            break

    return points

mcp_server.py:474-509 (helper)

Helper function _build_questions_summary that builds the human-readable text summary of fetched questions, used for the content field of the ToolResult returned by get_questions.

def _build_questions_summary(
    questions: list[dict[str, Any]],
    missing_ids: list[int],
    detail: str,
) -> str:
    lines = [f"Fetched {len(questions)} questions (detail={detail})."]
    if missing_ids:
        lines.append(f"Missing IDs: {', '.join(str(i) for i in missing_ids)}")

    preview = questions[:10]
    for q in preview:
        paper = q.get("paper") or {}
        session_short = _short_session(paper.get("session_name"))
        label_parts: list[str] = []
        if paper.get("paper_number") is not None:
            label_parts.append(f"P{paper.get('paper_number')}")
        if session_short and paper.get("year") is not None:
            label_parts.append(f"{session_short}{paper.get('year')}")
        elif paper.get("year") is not None:
            label_parts.append(str(paper.get("year")))
        if paper.get("variant") is not None:
            label_parts.append(f"v{paper.get('variant')}")
        paper_label = " ".join(label_parts) if label_parts else "Unknown paper"
        lines.append(
            f"ID:{q.get('id')} | {paper_label} | Q{q.get('question_number')} | {q.get('marks', '?')}m"
        )
        lines.append(f"Q: {_to_ascii_text(q.get('question_text'), max_len=180)}")

        key_points = q.get("key_points") or []
        if key_points:
            lines.append("Key points: " + "; ".join(str(p) for p in key_points[:5]))

    if len(questions) > len(preview):
        lines.append(f"... {len(questions) - len(preview)} more questions available in structured output.")

    return "\n".join(lines)

SearchCAIE MCP Server

Get Questions By IDs

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API