Skip to main content
Glama

GenAIScript

Official
by microsoft
MIT License
43
2,820
  • Linux
  • Apple
retrieval.ts5.47 kB
import { normalizeFloat, normalizeInt } from "../../core/src/cleaners" import { expandFiles } from "../../core/src/fs" import { fuzzSearch } from "../../core/src/fuzzsearch" import { vectorIndex, vectorSearch } from "../../core/src/vectorsearch" import { YAMLStringify } from "../../core/src/yaml" /** * Generates a vector index for retrieval tasks by processing specified files. * * @param indexName - Name of the index to be created. * @param filesGlobs - Glob patterns defining the target files for indexing. * @param options - Additional configuration options. * @param excludedFiles - List of files to exclude from indexing. * @param embeddingsModel - Model used to generate embeddings. * @param ignoreGitIgnore - Whether to bypass .gitignore rules. * @param database - Backend type for storing the generated index. Can be "local" or "azure_ai_search". */ export async function retrievalIndex( indexName: string, filesGlobs: string[], options: { excludedFiles: string[] embeddingsModel: string ignoreGitIgnore: boolean database: "local" | "azure_ai_search" } ) { const { excludedFiles, embeddingsModel, ignoreGitIgnore, database } = options || {} const files = ( await expandFiles(filesGlobs, { excludedFiles, applyGitIgnore: !ignoreGitIgnore, }) ).map((filename) => <WorkspaceFile>{ filename }) await vectorIndex(indexName, files, { embeddingsModel, type: database, }) } /** * This file contains functions to perform retrieval searches on files. * It supports both vector-based searches and fuzzy searches. */ /** * Performs a vector-based retrieval search on specified files. * Uses vector embeddings to find top matching files for a query. * * @param q - The query string to search for. * @param filesGlobs - Glob patterns specifying which files to search. * @param options - Additional options for the search. * @param excludedFiles - Files to exclude from the search. * @param topK - The number of top results to return. * @param minScore - The minimum score threshold for results. * @param name - Index name for storing vectors. * @param embeddingsModel - Model to use for generating embeddings. * @param ignoreGitIgnore - Whether to ignore .gitignore rules. */ export async function retrievalSearch( q: string, filesGlobs: string[], options: { excludedFiles: string[] topK: string minScore: string name: string embeddingsModel: string ignoreGitIgnore: boolean } ) { // Destructure options with default values const { excludedFiles, name: indexName, topK, minScore, embeddingsModel, ignoreGitIgnore, } = options || {} // Expand file globs and map to WorkspaceFile object // Excludes specified files const files = ( await expandFiles(filesGlobs, { excludedFiles, applyGitIgnore: !ignoreGitIgnore, }) ).map((filename) => <WorkspaceFile>{ filename }) // Perform vector search with the given query and options // Searches using embeddings to find relevant files const res = await vectorSearch(indexName, q, files, { topK: normalizeInt(topK), minScore: normalizeFloat(minScore), embeddingsModel, }) // Output the results in YAML format for readability console.log( YAMLStringify(res.map(({ filename, score }) => ({ filename, score }))) ) } /** * Performs a fuzzy search on specified files. * Uses fuzzy matching to find approximate matches for a query. * * @param q - The query string to search for. * @param filesGlobs - Glob patterns specifying which files to search. Defaults to all files if not provided. * @param options - Additional options for the search. * @param excludedFiles - Files to exclude from the search. Defaults to excluding node_modules if not provided. * @param topK - The number of top results to return. * @param minScore - The minimum score threshold for matches. * @param ignoreGitIgnore - Whether to ignore .gitignore rules. */ export async function retrievalFuzz( q: string, filesGlobs: string[], options: { excludedFiles: string[] topK: string minScore: string ignoreGitIgnore: boolean } ) { // Destructure options with default values let { excludedFiles, topK, minScore } = options || {} // Default to searching all files if no globs are provided if (!filesGlobs?.length) filesGlobs = ["**"] // Default to excluding node_modules if no exclusions are provided if (!excludedFiles?.length) excludedFiles = ["**/node_modules/**"] // Expand file globs and resolve the list of files const files = await expandFiles(filesGlobs, options) // Log the number of files being searched for transparency console.log(`searching '${q}' in ${files.length} files`) // Perform fuzzy search with the given query and options // Matches against filenames to find approximate matches const res = await fuzzSearch( q, files.map((filename) => ({ filename })), { topK: normalizeInt(topK), minScore: normalizeFloat(minScore) } ) // Output the results in YAML format for readability console.log( YAMLStringify(res.map(({ filename, score }) => ({ filename, score }))) ) }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/microsoft/genaiscript'

If you have feedback or need assistance with the MCP directory API, please join our Discord server