InfiniteMCP

insert_chroma.py•4.47 KiB

#!/usr/bin/env python3 from tqdm import tqdm from pathlib import Path import chromadb import os import sys import torch import magic import html2text import chardet import signal import re import spacy from sentence_transformers import SentenceTransformer model = None #common.model #client = chromadb.PersistentClient(path="./chroma_db") #collection = client.get_or_create_collection(name="documents") BATCH_SIZE = 1 def detect_encoding(file_path): with open(file_path, 'rb') as f: raw_data = f.read() result = chardet.detect(raw_data) return result['encoding'] def move_batch(amount): global BATCH_SIZE BATCH_SIZE += amount BATCH_SIZE = max(1, BATCH_SIZE) print(f"\n*** New batch size {BATCH_SIZE}", flush=True) def _increase_batch(signum, frame): move_batch(1) def _decrease_batch(signum, frame): move_batch(-1) signal.signal(signal.SIGUSR1, _increase_batch) signal.signal(signal.SIGUSR2, _decrease_batch) counter = 0 def reader(config): lines = [] encoding = detect_encoding(config) with open(config, 'r', encoding=encoding, errors='replace') as f: lines = [] for what in f.readlines(): if "`" not in what: lines.append(what) return re.sub(r'\s+', ' ', " ".join(lines)) def getter(fp, path): config = Path(os.path.dirname(fp)) / path if not os.path.isfile(config): print(f"{fp} no {path}") return with open(config, 'r') as f: if f.read().strip() == "none": print(f"{config} bad json") return config i = -1 while True: texts = [] configs = [] onelines = [] valid_paths = [] metas = [] while len(valid_paths) < BATCH_SIZE: i += 1 #sys.stdout.write('.') #sys.stdout.flush() fp = sys.stdin.readline().strip() if not os.path.isfile(fp): break try: meta = getter(fp, "_meta-info.json") if not meta: print(f"?? {fp} meta_info") continue #config = getter(fp, "_mcp-config.json") #if not config: # continue config=[] oneline = getter(fp, "_one-liner.json") if not oneline: print(f"!! {fp} one-liner") continue try_one = reader(oneline) if 'npx' in try_one: pass elif 'your' in try_one and 'program' in try_one: print(f"!? {try_one} {fp}") continue elif 'go", "run' in try_one: pass elif 'uv", "tool' in try_one: pass elif 'uvx' in try_one: pass #print(f".. {try_one} {fp}") else: print(f"!@ {try_one} {fp}") continue #configs.append(reader(config)) onelines.append(try_one) metas.append(reader(meta)) encoding = detect_encoding(fp) with open(fp, 'r', encoding=encoding, errors='replace') as f: processed_text = f.read() texts.append(processed_text) valid_paths.append(str(fp)) except Exception as e: print(f"{fp} => {e}") if len(texts) == 0: break if model == None: import common model = common.model client = chromadb.PersistentClient(path="./chroma_db") collection = client.get_or_create_collection(name="documents") try: with torch.no_grad(): embeddings = model.encode( texts, show_progress_bar=False, batch_size=len(texts), convert_to_numpy=True ) torch.cuda.empty_cache() meta = [] stubs = [] for i in range(len(valid_paths)): stub = "/".join(valid_paths[i].split("/")[-3:-1]) stubs.append(stub) # 'config': configs[i], meta.append({'file_path': stub, 'meta': metas[i], 'oneline': onelines[i] }) try: collection.add( embeddings=embeddings.tolist(), documents=texts, ids=[j.replace('/', '_') for j in stubs], metadatas=meta ) except Exception as ex: print(ex) pass torch.cuda.empty_cache() counter += 1 if counter > 25: move_batch(1) counter = 0 except RuntimeError as e: print(e) i -= BATCH_SIZE move_batch(-1) counter = 0 continue except Exception as ex: print(ex)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kristopolous/infinite-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

insert_chroma.py•4.47 KiB