03-mcp
This repository contains the homework for the MCP (Model Context Protocol) assignment.
Questions, answers, and the code used for this homework are collected below.
Question 1
Answers / actions performed:
uv installed and verified.
Project initialized with uv init.
fastmcp added with uv add fastmcp.
First sha256 in uv.lock is on line 20 for annotated-types:
sdist = { url = "https://files.pythonhosted.org/packages/ee/67/.../annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
Question 2 — FastMCP Transport
I updated main.py using the FastMCP starter and ran the server. The welcome screen shows the transport:
Answer: STDIO
Question 3 — Scrape Web Tool (Jina reader)
I implemented a tool using the Jina reader (https://r.jina.ai/...) and requests, added test.py to test it against https://github.com/alexeygrigorev/minsearch.
Test result (character count): 31361 → closest provided option: 29184.
Question 4 — Integrate the Tool
I added count_data.py that uses the MCP Jina-reader tool to fetch https://datatalks.club/ and count occurrences of the whole word data (case-insensitive).
Script output: 10 → closest option: 61.
Question 5 — Implement Search (minsearch)
I downloaded the FastMCP repo zip, extracted .md and .mdx files, indexed them with minsearch, and searched for demo.
First file returned for query "demo": examples/testing_demo/README.md.
Question 6 — Search Tool (ungraded)
I added a search_docs MCP tool to main.py that builds the minsearch index from the zip and returns the top filenames for a query.
Files added / modified (full contents)
main.py
from fastmcp import FastMCP
import requests
import os
import zipfile
from minsearch import Index
mcp = FastMCP("Demo 🚀")
def fetch_markdown_impl(url: str) -> str:
"""Fetch a web page using Jina reader and return its markdown text.
The Jina reader endpoint is `https://r.jina.ai/{original_url}`.
The `url` argument may be a full URL (including scheme) or a hostname/path.
"""
if not url.startswith("http://") and not url.startswith("https://"):
url = "https://" + url
target = "https://r.jina.ai/" + url
resp = requests.get(target, timeout=15)
resp.raise_for_status()
return resp.text
@mcp.tool
def fetch_markdown(url: str) -> str:
"""Return markdown content of a web page via Jina reader."""
return fetch_markdown_impl(url)
@mcp.tool
def add(a: int, b: int) -> int:
"""Add two numbers"""
return a + b
# --- minsearch integration for documentation search ---
ZIP_URL = "https://github.com/jlowin/fastmcp/archive/refs/heads/main.zip"
ZIP_NAME = "fastmcp-main.zip"
# simple module-level cache for the built index
_INDEX_CACHE = None
def ensure_zip():
if os.path.exists(ZIP_NAME):
return
resp = requests.get(ZIP_URL, stream=True, timeout=60)
resp.raise_for_status()
with open(ZIP_NAME, "wb") as f:
for chunk in resp.iter_content(1024 * 64):
if chunk:
f.write(chunk)
def iter_md_files_from_zip(zip_path):
with zipfile.ZipFile(zip_path, "r") as z:
for name in z.namelist():
lower = name.lower()
if lower.endswith(".md") or lower.endswith(".mdx"):
data = z.read(name)
text = data.decode("utf-8", errors="replace")
if "/" in name:
_, rest = name.split("/", 1)
else:
rest = name
yield rest, text
def build_index_from_zip():
docs = []
ensure_zip()
for fname in os.listdir('.'):
if fname.lower().endswith('.zip'):
for filename, text in iter_md_files_from_zip(fname):
docs.append({'content': text, 'filename': filename})
idx = Index(text_fields=["content"], keyword_fields=["filename"])
idx.fit(docs)
return idx
def get_index():
global _INDEX_CACHE
if _INDEX_CACHE is None:
_INDEX_CACHE = build_index_from_zip()
return _INDEX_CACHE
def search_docs_impl(query: str, top_k: int = 5):
idx = get_index()
results = idx.search(query, num_results=top_k)
return results
@mcp.tool
def search_docs(query: str) -> list:
"""Search the documentation index and return top filenames for `query`."""
results = search_docs_impl(query, top_k=5)
return [r.get('filename') for r in results]
if __name__ == "__main__":
mcp.run()
test.py
from main import fetch_markdown_impl
if __name__ == "__main__":
url = "https://github.com/alexeygrigorev/minsearch"
text = fetch_markdown_impl(url)
print(len(text))
test_search.py
from main import search_docs_impl
if __name__ == '__main__':
res = search_docs_impl('demo', top_k=5)
if not res:
print('No results')
else:
print(res[0].get('filename'))
count_data.py
from main import fetch_markdown_impl
import re
if __name__ == "__main__":
url = "https://datatalks.club/"
text = fetch_markdown_impl(url)
count = len(re.findall(r"\bdata\b", text, flags=re.IGNORECASE))
print(count)
search.py
import os
import requests
import zipfile
import io
from minsearch import Index
ZIP_URL = "https://github.com/jlowin/fastmcp/archive/refs/heads/main.zip"
ZIP_NAME = "fastmcp-main.zip"
def ensure_zip():
if os.path.exists(ZIP_NAME):
print(f"Zip already exists: {ZIP_NAME}")
return
print(f"Downloading {ZIP_URL} -> {ZIP_NAME}")
resp = requests.get(ZIP_URL, stream=True, timeout=60)
resp.raise_for_status()
with open(ZIP_NAME, "wb") as f:
for chunk in resp.iter_content(1024 * 64):
if chunk:
f.write(chunk)
def iter_md_files_from_zip(zip_path):
with zipfile.ZipFile(zip_path, "r") as z:
for name in z.namelist():
lower = name.lower()
if lower.endswith(".md") or lower.endswith(".mdx"):
# read file
data = z.read(name)
text = data.decode("utf-8", errors="replace")
# strip first path segment
if "/" in name:
_, rest = name.split("/", 1)
else:
rest = name
yield rest, text
def build_index(docs):
# docs: list of {'content':..., 'filename':...}
idx = Index(text_fields=["content"], keyword_fields=["filename"])
idx.fit(docs)
return idx
def main():
ensure_zip()
docs = []
# iterate all zip files in cwd
for fname in os.listdir('.'):
if fname.lower().endswith('.zip'):
for filename, text in iter_md_files_from_zip(fname):
docs.append({'content': text, 'filename': filename})
print(f"Indexed {len(docs)} markdown files")
idx = build_index(docs)
results = idx.search("demo", num_results=5)
if not results:
print("No results")
return
# print first returned filename
first = results[0]
print(first.get('filename'))
if __name__ == '__main__':
main()
Git & Repository
If you want me to create a separate GitHub repository for this homework and push the 03-mcp folder there, tell me the desired repo name and whether it should be public or private.
If you want any formatting changes or additional files included in this README, tell me which items to add.
03-mcp
MCP-Model Context Protocol