TxtAI MCP Server

Overview Schema Related Servers Score Discussions

testhuggingface.py•3 KiB

""" Huggingface module tests """ import os import unittest import numpy as np from txtai.vectors import VectorsFactory class TestHFVectors(unittest.TestCase): """ HFVectors tests """ @classmethod def setUpClass(cls): """ Create HFVectors instance. """ cls.model = VectorsFactory.create({"path": "sentence-transformers/nli-mpnet-base-v2"}, None) def testIndex(self): """ Test transformers indexing """ # Generate enough volume to test batching documents = [(x, "This is a test", None) for x in range(1000)] ids, dimension, batches, stream = self.model.index(documents) self.assertEqual(len(ids), 1000) self.assertEqual(dimension, 768) self.assertEqual(batches, 2) self.assertIsNotNone(os.path.exists(stream)) # Test shape of serialized embeddings with open(stream, "rb") as queue: self.assertEqual(np.load(queue).shape, (500, 768)) def testText(self): """ Test transformers text conversion """ self.model.tokenize = True self.assertEqual(self.model.prepare("Y 123 This is a test!"), "test") self.assertEqual(self.model.prepare(["This", "is", "a", "test"]), "This is a test") self.model.tokenize = False self.assertEqual(self.model.prepare("Y 123 This is a test!"), "Y 123 This is a test!") self.assertEqual(self.model.prepare(["This", "is", "a", "test"]), "This is a test") def testTransform(self): """ Test transformers transform """ # Sample documents: one where tokenizer changes text and one with no changes to text documents = [(0, "This is a test and has no tokenization", None), (1, "test tokenization", None)] # Run with tokenization enabled self.model.tokenize = True embeddings1 = [self.model.transform(d) for d in documents] # Run with tokenization disabled self.model.tokenize = False embeddings2 = [self.model.transform(d) for d in documents] self.assertFalse(np.array_equal(embeddings1[0], embeddings2[0])) self.assertTrue(np.array_equal(embeddings1[1], embeddings2[1])) def testTransformArray(self): """ Test transformers skips transforming NumPy arrays """ # Generate data and run through vector model data1 = np.random.rand(5, 5).astype(np.float32) data2 = self.model.transform((0, data1, None)) # Test transform method returns original data self.assertTrue(np.array_equal(data1, data2)) def testTransformLong(self): """ Test transformers transform on long text """ # Sample documents: short text and longer text documents = [(0, "This is long text " * 512, None), (1, "This is short text", None)] # Run transform and ensure it completes without errors embeddings = [self.model.transform(d) for d in documents] self.assertIsNotNone(embeddings)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/neuml/txtai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

testhuggingface.py•3 KiB