Skip to main content
Glama

docs-mcp-server

patternMatcher.test.ts19 kB
import { describe, expect, it } from "vitest"; import { DEFAULT_EXCLUSION_PATTERNS } from "./defaultPatterns"; import { extractPathAndQuery, isRegexPattern, matchesAnyPattern, patternToRegExp, shouldIncludeUrl, } from "./patternMatcher"; describe("patternMatcher", () => { it("isRegexPattern detects regex", () => { expect(isRegexPattern("/foo.*/")).toBe(true); expect(isRegexPattern("foo.*/")).toBe(false); expect(isRegexPattern("/foo.*/")).toBe(true); expect(isRegexPattern("foo.*")).toBe(false); }); it("patternToRegExp auto-detects regex and glob", () => { expect(patternToRegExp("/foo.*/").test("foo123")).toBe(true); expect(patternToRegExp("foo*bar").test("fooxbar")).toBe(true); expect(patternToRegExp("foo*bar").test("fooyyybar")).toBe(true); expect(patternToRegExp("foo*bar").test("foo/bar")).toBe(false); }); it("matchesAnyPattern works for globs and regex", () => { expect(matchesAnyPattern("foo/abc/bar", ["foo/*/bar"])).toBe(true); expect(matchesAnyPattern("foo/abc/bar", ["/foo/.*/bar/"])).toBe(true); expect(matchesAnyPattern("foo/abc/bar", ["baz/*"])).toBe(false); }); it("extractPathAndQuery extracts path and query", () => { expect(extractPathAndQuery("https://example.com/foo/bar?x=1")).toBe("/foo/bar?x=1"); expect(extractPathAndQuery("/foo/bar?x=1")).toBe("/foo/bar?x=1"); }); describe("shouldIncludeUrl with explicit patterns", () => { it("should apply exclude over include when patterns are explicitly provided", () => { // Exclude wins expect(shouldIncludeUrl("https://x.com/foo", ["foo*"], ["/foo/"])).toBe(false); // Include only expect(shouldIncludeUrl("https://x.com/foo", ["foo*"], [])).toBe(true); // Exclude only expect(shouldIncludeUrl("https://x.com/foo", undefined, ["foo*"])).toBe(false); }); it("should respect user's choice to have no exclusions", () => { // When user explicitly provides empty array, no defaults should be applied expect(shouldIncludeUrl("https://example.com/CHANGELOG.md", undefined, [])).toBe( true, ); expect(shouldIncludeUrl("https://example.com/LICENSE", undefined, [])).toBe(true); }); }); describe("shouldIncludeUrl with default patterns", () => { it("should apply default exclusions when no user exclude patterns provided", () => { // Default patterns should exclude common documentation files expect( shouldIncludeUrl("https://example.com/CHANGELOG.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/changelog.md", undefined, undefined), ).toBe(false); expect(shouldIncludeUrl("https://example.com/LICENSE", undefined, undefined)).toBe( false, ); expect( shouldIncludeUrl("https://example.com/LICENSE.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/CODE_OF_CONDUCT.md", undefined, undefined), ).toBe(false); }); it("should apply default folder exclusions", () => { // Archive folders expect( shouldIncludeUrl("https://example.com/archive/old-docs.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/archived/legacy.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/old/stuff.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/docs/old/readme.md", undefined, undefined), ).toBe(false); // Deprecated/legacy folders expect( shouldIncludeUrl("https://example.com/deprecated/api.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/legacy/guide.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/previous/version.md", undefined, undefined), ).toBe(false); // i18n folders expect( shouldIncludeUrl("https://example.com/i18n/zh-cn/guide.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/i18n/es/tutorial.md", undefined, undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/i18n/fr/docs.md", undefined, undefined), ).toBe(false); // Locale folders expect( shouldIncludeUrl( "https://example.com/zh-cn/documentation.md", undefined, undefined, ), ).toBe(false); expect( shouldIncludeUrl("https://example.com/zh-tw/guide.md", undefined, undefined), ).toBe(false); }); it("should include normal documentation files when using defaults", () => { expect( shouldIncludeUrl("https://example.com/docs/guide.md", undefined, undefined), ).toBe(true); expect( shouldIncludeUrl("https://example.com/api/reference.md", undefined, undefined), ).toBe(true); expect( shouldIncludeUrl("https://example.com/tutorials/basic.md", undefined, undefined), ).toBe(true); expect( shouldIncludeUrl("https://example.com/README.md", undefined, undefined), ).toBe(true); }); it("should work with file:// URLs and basename matching", () => { // Should exclude based on basename for file:// URLs expect( shouldIncludeUrl("file:///docs/subdir/CHANGELOG.md", undefined, undefined), ).toBe(false); expect(shouldIncludeUrl("file:///project/LICENSE", undefined, undefined)).toBe( false, ); // Should include normal files expect(shouldIncludeUrl("file:///docs/README.md", undefined, undefined)).toBe(true); expect(shouldIncludeUrl("file:///guide/tutorial.md", undefined, undefined)).toBe( true, ); }); it("should apply defaults correctly with include patterns", () => { // Include docs/* but still exclude defaults expect( shouldIncludeUrl("https://example.com/docs/guide.md", ["docs/*"], undefined), ).toBe(true); expect( shouldIncludeUrl("https://example.com/docs/CHANGELOG.md", ["docs/*"], undefined), ).toBe(false); expect( shouldIncludeUrl("https://example.com/other/guide.md", ["docs/*"], undefined), ).toBe(false); }); }); describe("default patterns behavior verification", () => { it("should have expected default exclusion patterns", () => { expect(DEFAULT_EXCLUSION_PATTERNS.length).toBeGreaterThan(0); expect(DEFAULT_EXCLUSION_PATTERNS).toContain("**/CHANGELOG.md"); expect(DEFAULT_EXCLUSION_PATTERNS).toContain("**/LICENSE"); expect(DEFAULT_EXCLUSION_PATTERNS).toContain("**/archive/**"); expect(DEFAULT_EXCLUSION_PATTERNS).toContain("**/i18n/zh*/**"); }); }); describe("double asterisk (**) pattern matching", () => { it("should match files at any depth with **/filename pattern", () => { // Root level expect(matchesAnyPattern("/README.md", ["**/README.md"])).toBe(true); expect(matchesAnyPattern("/foo", ["**/foo"])).toBe(true); // Nested levels expect(matchesAnyPattern("/docs/README.md", ["**/README.md"])).toBe(true); expect(matchesAnyPattern("/docs/foo", ["**/foo"])).toBe(true); // Deep nested expect(matchesAnyPattern("/project/docs/sub/README.md", ["**/README.md"])).toBe( true, ); expect(matchesAnyPattern("/project/docs/sub/foo", ["**/foo"])).toBe(true); // Should not match different filenames expect(matchesAnyPattern("/CHANGELOG.md", ["**/README.md"])).toBe(false); expect(matchesAnyPattern("/docs/bar", ["**/foo"])).toBe(false); }); it("should work with shouldIncludeUrl for HTTP URLs", () => { // Root level matches expect(shouldIncludeUrl("https://example.com/foo", ["**/foo"])).toBe(true); expect(shouldIncludeUrl("https://example.com/README.md", ["**/README.md"])).toBe( true, ); // Nested level matches expect(shouldIncludeUrl("https://example.com/docs/foo", ["**/foo"])).toBe(true); expect( shouldIncludeUrl("https://example.com/docs/README.md", ["**/README.md"]), ).toBe(true); // Deep nested matches expect(shouldIncludeUrl("https://example.com/docs/sub/foo", ["**/foo"])).toBe(true); expect( shouldIncludeUrl("https://example.com/project/docs/sub/README.md", [ "**/README.md", ]), ).toBe(true); // No matches expect(shouldIncludeUrl("https://example.com/bar", ["**/foo"])).toBe(false); expect( shouldIncludeUrl("https://example.com/docs/CHANGELOG.md", ["**/README.md"]), ).toBe(false); }); it("should work with file:// URLs and basename matching", () => { // file:// URLs get both path and basename matching expect(shouldIncludeUrl("file:///path/to/README.md", ["**/README.md"])).toBe(true); expect(shouldIncludeUrl("file:///path/to/README.md", ["README.md"])).toBe(true); // basename expect(shouldIncludeUrl("file:///project/docs/foo", ["**/foo"])).toBe(true); expect(shouldIncludeUrl("file:///project/docs/foo", ["foo"])).toBe(true); // basename }); it("should support complex glob patterns with **", () => { // Directory wildcards expect(matchesAnyPattern("/docs/api/v1/spec.json", ["**/api/*/spec.json"])).toBe( true, ); expect( matchesAnyPattern("/project/docs/api/v2/spec.json", ["**/api/*/spec.json"]), ).toBe(true); expect(matchesAnyPattern("/docs/api/spec.json", ["**/api/*/spec.json"])).toBe( false, ); // missing version // Extension wildcards expect(matchesAnyPattern("/docs/readme.md", ["**/readme.*"])).toBe(true); expect(matchesAnyPattern("/project/docs/readme.txt", ["**/readme.*"])).toBe(true); expect(matchesAnyPattern("/docs/changelog.md", ["**/readme.*"])).toBe(false); }); it("should support directory-based patterns (foo/** and **/foo/**)", () => { // foo/** - matches foo directory at root level and anything under it expect(matchesAnyPattern("/foo/bar", ["foo/**"])).toBe(true); expect(matchesAnyPattern("/foo/bar/baz", ["foo/**"])).toBe(true); expect(matchesAnyPattern("/foo", ["foo/**"])).toBe(false); // foo itself, not under foo expect(matchesAnyPattern("/other/foo/bar", ["foo/**"])).toBe(false); // foo not at root // **/foo/** - matches foo directory anywhere and anything under it expect(matchesAnyPattern("/foo/bar", ["**/foo/**"])).toBe(true); expect(matchesAnyPattern("/docs/foo/bar", ["**/foo/**"])).toBe(true); expect(matchesAnyPattern("/project/docs/foo/baz", ["**/foo/**"])).toBe(true); expect(matchesAnyPattern("/foo", ["**/foo/**"])).toBe(false); // foo itself, not under foo expect(matchesAnyPattern("/docs/foo", ["**/foo/**"])).toBe(false); // foo itself, not under foo expect(matchesAnyPattern("/foobar/test", ["**/foo/**"])).toBe(false); // foobar != foo }); it("should find shortest patterns for matching subdirectory foo", () => { // Different ways to match "foo" as a subdirectory component const testPath = "/project/docs/foo/readme.md"; // Exact directory match anywhere: **/foo/** expect(matchesAnyPattern(testPath, ["**/foo/**"])).toBe(true); // Directory component match: */foo/* (single level before and after) expect(matchesAnyPattern("/docs/foo/readme.md", ["*/foo/*"])).toBe(true); expect(matchesAnyPattern(testPath, ["*/foo/*"])).toBe(false); // too many levels before // Multiple level variants expect(matchesAnyPattern(testPath, ["*/*/foo/*"])).toBe(true); // exactly 2 levels before, 1 after expect(matchesAnyPattern(testPath, ["**/foo/*"])).toBe(true); // any levels before, 1 after // Shortest universal pattern for "foo" directory anywhere: **/foo/** expect(matchesAnyPattern("/foo/", ["**/foo/**"])).toBe(true); // root level expect(matchesAnyPattern("/foo/bar", ["**/foo/**"])).toBe(true); // root level expect(matchesAnyPattern("/a/foo/bar", ["**/foo/**"])).toBe(true); // nested expect(matchesAnyPattern("/a/b/foo/c/d", ["**/foo/**"])).toBe(true); // deep nested }); it("should demonstrate shortest patterns for common use cases", () => { // Shortest pattern to match any subdirectory named "foo": **/foo/** expect(shouldIncludeUrl("https://example.com/foo/index.html", ["**/foo/**"])).toBe( true, ); expect( shouldIncludeUrl("https://example.com/src/foo/utils.js", ["**/foo/**"]), ).toBe(true); expect( shouldIncludeUrl("https://example.com/project/lib/foo/main.ts", ["**/foo/**"]), ).toBe(true); // Alternative patterns for different use cases expect(shouldIncludeUrl("https://example.com/foo", ["**/foo"])).toBe(true); // exact directory name expect(shouldIncludeUrl("https://example.com/foo/file", ["**/foo/**"])).toBe(true); // foo directory contents expect(shouldIncludeUrl("https://example.com/foobar", ["**/foo*"])).toBe(true); // starts with foo // Most specific: exact directory contents only **/foo/** expect(shouldIncludeUrl("https://example.com/foo", ["**/foo/**"])).toBe(false); // directory itself expect(shouldIncludeUrl("https://example.com/foobar", ["**/foo/**"])).toBe(false); // not exact match }); it("should test URL patterns with directory matching", () => { const dirPatterns = ["**/docs/**", "**/api/**", "**/foo/**"]; // Should match directory anywhere in URL path expect(shouldIncludeUrl("https://example.com/docs/guide.html", dirPatterns)).toBe( true, ); expect( shouldIncludeUrl("https://example.com/project/docs/api.html", dirPatterns), ).toBe(true); expect( shouldIncludeUrl("https://example.com/v1/api/endpoints.json", dirPatterns), ).toBe(true); expect(shouldIncludeUrl("https://example.com/lib/foo/utils.js", dirPatterns)).toBe( true, ); // Should not match directory name as part of filename expect(shouldIncludeUrl("https://example.com/myapi.html", dirPatterns)).toBe(false); expect(shouldIncludeUrl("https://example.com/foodocs.html", dirPatterns)).toBe( false, ); // Should not match the directory itself (only contents under it) expect(shouldIncludeUrl("https://example.com/docs", dirPatterns)).toBe(false); expect(shouldIncludeUrl("https://example.com/project/api", dirPatterns)).toBe( false, ); }); }); describe("pattern edge cases", () => { it("should handle patterns without leading/trailing slashes", () => { // Patterns without leading slash should still work expect(matchesAnyPattern("/docs/file.md", ["docs/file.md"])).toBe(true); expect(matchesAnyPattern("/docs/file.md", ["docs/*"])).toBe(true); // Multiple variations should work expect(shouldIncludeUrl("https://example.com/docs/file.md", ["docs/file.md"])).toBe( true, ); // Note: /docs/file.md pattern expects exact match but URL has leading slash normalization expect(shouldIncludeUrl("https://example.com/docs/file.md", ["docs/file.md"])).toBe( true, ); }); it("should handle query parameters in URLs", () => { // Query parameters are included in the path for pattern matching expect(shouldIncludeUrl("https://example.com/docs/api?v=1", ["docs/*"])).toBe(true); expect(shouldIncludeUrl("https://example.com/docs/api?v=1", ["docs/api*"])).toBe( true, ); // * matches query expect( shouldIncludeUrl("https://example.com/docs/api?v=1&format=json", ["docs/api*"]), ).toBe(true); // **/api won't match because the path ends with "?v=1", not "api" expect(shouldIncludeUrl("https://example.com/docs/api?v=1", ["**/api"])).toBe( false, ); // But this will match: expect(shouldIncludeUrl("https://example.com/docs/api", ["**/api"])).toBe(true); // no query params expect(shouldIncludeUrl("https://example.com/docs/api?v=1", ["**/api*"])).toBe( true, ); // wildcard after api }); it("should handle multiple patterns (OR logic)", () => { const patterns = ["docs/*", "api/*", "**/README.md"]; expect(shouldIncludeUrl("https://example.com/docs/guide", patterns)).toBe(true); expect(shouldIncludeUrl("https://example.com/api/v1", patterns)).toBe(true); expect(shouldIncludeUrl("https://example.com/project/README.md", patterns)).toBe( true, ); expect(shouldIncludeUrl("https://example.com/other/file", patterns)).toBe(false); }); it("should handle common documentation file patterns", () => { const docPatterns = [ "**/README.md", "**/CHANGELOG.md", "**/package.json", "**/index.html", ]; // Root level expect(shouldIncludeUrl("https://example.com/README.md", docPatterns)).toBe(true); expect(shouldIncludeUrl("https://example.com/package.json", docPatterns)).toBe( true, ); // Nested expect(shouldIncludeUrl("https://example.com/docs/README.md", docPatterns)).toBe( true, ); expect( shouldIncludeUrl("https://example.com/src/components/README.md", docPatterns), ).toBe(true); expect(shouldIncludeUrl("https://example.com/api/index.html", docPatterns)).toBe( true, ); // Should not match expect(shouldIncludeUrl("https://example.com/src/code.js", docPatterns)).toBe( false, ); }); }); describe("regex pattern behavior", () => { it("should handle regex patterns with ** equivalent", () => { // Regex equivalent of **/foo expect(shouldIncludeUrl("https://example.com/foo", ["/.*\\/foo$/"])).toBe(true); expect(shouldIncludeUrl("https://example.com/docs/foo", ["/.*\\/foo$/"])).toBe( true, ); expect(shouldIncludeUrl("https://example.com/docs/sub/foo", ["/.*\\/foo$/"])).toBe( true, ); // Should also match root level (no leading slash in path) expect(shouldIncludeUrl("https://example.com/foo", ["/.*foo$/"])).toBe(true); }); it("should handle mixed glob and regex patterns", () => { const mixedPatterns = ["**/README.md", "/api\\/v\\d+/", "docs/*"]; expect(shouldIncludeUrl("https://example.com/README.md", mixedPatterns)).toBe(true); // glob expect(shouldIncludeUrl("https://example.com/api/v1", mixedPatterns)).toBe(true); // regex expect(shouldIncludeUrl("https://example.com/docs/guide", mixedPatterns)).toBe( true, ); // glob expect(shouldIncludeUrl("https://example.com/other/file", mixedPatterns)).toBe( false, ); }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server