import os
import requests
import zipfile
import io
from minsearch import Index
ZIP_URL = "https://github.com/jlowin/fastmcp/archive/refs/heads/main.zip"
ZIP_NAME = "fastmcp-main.zip"
def ensure_zip():
if os.path.exists(ZIP_NAME):
print(f"Zip already exists: {ZIP_NAME}")
return
print(f"Downloading {ZIP_URL} -> {ZIP_NAME}")
resp = requests.get(ZIP_URL, stream=True, timeout=60)
resp.raise_for_status()
with open(ZIP_NAME, "wb") as f:
for chunk in resp.iter_content(1024 * 64):
if chunk:
f.write(chunk)
def iter_md_files_from_zip(zip_path):
with zipfile.ZipFile(zip_path, "r") as z:
for name in z.namelist():
lower = name.lower()
if lower.endswith(".md") or lower.endswith(".mdx"):
# read file
data = z.read(name)
text = data.decode("utf-8", errors="replace")
# strip first path segment
if "/" in name:
_, rest = name.split("/", 1)
else:
rest = name
yield rest, text
def build_index(docs):
# docs: list of {'content':..., 'filename':...}
idx = Index(text_fields=["content"], keyword_fields=["filename"])
idx.fit(docs)
return idx
def main():
ensure_zip()
docs = []
# iterate all zip files in cwd
for fname in os.listdir('.'):
if fname.lower().endswith('.zip'):
for filename, text in iter_md_files_from_zip(fname):
docs.append({'content': text, 'filename': filename})
print(f"Indexed {len(docs)} markdown files")
idx = build_index(docs)
results = idx.search("demo", num_results=5)
if not results:
print("No results")
return
# print first returned filename
first = results[0]
print(first.get('filename'))
if __name__ == '__main__':
main()