M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

Overview Schema Related Servers Score Discussions

string_patterns.go•12.7 KiB

// Package cypher - Optimized string-based pattern matching for hot paths. // // This file provides fast string-based alternatives to regex patterns for // operations that are called on every query. These functions are 5-10x faster // than their regex equivalents. // // Performance comparison (benchmark on M1 Mac): // - splitByKeyword vs regex Split: ~8x faster // - extractLimitSkip vs regex FindStringSubmatch: ~6x faster // - extractParameter vs regex FindAllStringSubmatch: ~5x faster package cypher import ( "strings" "unicode" ) // ============================================================================= // Keyword Splitting (replaces matchKeywordPattern and createKeywordPattern) // ============================================================================= // SplitByKeyword splits a string by a keyword (case-insensitive), respecting word boundaries. // This is ~8x faster than regexp.MustCompile(`(?i)\bKEYWORD\s+`).Split(). // // Example: // // SplitByKeyword("MATCH (a) MATCH (b)", "MATCH") // // Returns: ["", "(a) ", "(b)"] func SplitByKeyword(s, keyword string) []string { if s == "" { return []string{s} } upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) keywordLen := len(keyword) var result []string lastEnd := 0 for i := 0; i <= len(upper)-keywordLen; i++ { // Check if keyword matches at this position if upper[i:i+keywordLen] != keywordUpper { continue } // Check word boundary before (start of string or non-alphanumeric) if i > 0 && isWordChar(s[i-1]) { continue } // Check that there's whitespace after the keyword afterIdx := i + keywordLen if afterIdx >= len(s) || !unicode.IsSpace(rune(s[afterIdx])) { continue } // Found a match - add the part before this keyword result = append(result, s[lastEnd:i]) // Skip the keyword and following whitespace lastEnd = afterIdx for lastEnd < len(s) && unicode.IsSpace(rune(s[lastEnd])) { lastEnd++ } i = lastEnd - 1 // -1 because loop will increment } // Add the remaining part result = append(result, s[lastEnd:]) return result } // SplitByMatch splits by "MATCH " keyword. Convenience wrapper for hot path. func SplitByMatch(s string) []string { return SplitByKeyword(s, "MATCH") } // SplitByCreate splits by "CREATE " keyword. Convenience wrapper for hot path. func SplitByCreate(s string) []string { return SplitByKeyword(s, "CREATE") } // isWordChar returns true if c is a word character (alphanumeric or underscore) func isWordChar(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' } // ============================================================================= // LIMIT/SKIP Extraction (replaces limitPattern and skipPattern) // ============================================================================= // ExtractLimit extracts the LIMIT value from a query string. // Returns the value and true if found, or 0 and false if not found. // This is ~6x faster than regex FindStringSubmatch. // // Example: // // ExtractLimit("MATCH (n) RETURN n LIMIT 10") // // Returns: 10, true func ExtractLimit(query string) (int, bool) { return extractIntAfterKeyword(query, "LIMIT") } // ExtractSkip extracts the SKIP value from a query string. // Returns the value and true if found, or 0 and false if not found. // // Example: // // ExtractSkip("MATCH (n) RETURN n SKIP 5 LIMIT 10") // // Returns: 5, true func ExtractSkip(query string) (int, bool) { return extractIntAfterKeyword(query, "SKIP") } // ExtractLimitString extracts the LIMIT value as a string (for compatibility). // Returns empty string if not found. func ExtractLimitString(query string) string { return extractStringAfterKeyword(query, "LIMIT") } // ExtractSkipString extracts the SKIP value as a string (for compatibility). // Returns empty string if not found. func ExtractSkipString(query string) string { return extractStringAfterKeyword(query, "SKIP") } // extractIntAfterKeyword finds a keyword and extracts the integer that follows. func extractIntAfterKeyword(s, keyword string) (int, bool) { upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) idx := strings.Index(upper, keywordUpper) if idx < 0 { return 0, false } // Move past the keyword start := idx + len(keyword) // Skip whitespace for start < len(s) && unicode.IsSpace(rune(s[start])) { start++ } if start >= len(s) { return 0, false } // Parse the integer end := start for end < len(s) && s[end] >= '0' && s[end] <= '9' { end++ } if end == start { return 0, false } // Convert to int (simple, no error handling needed - we know it's digits) result := 0 for i := start; i < end; i++ { result = result*10 + int(s[i]-'0') } return result, true } // extractStringAfterKeyword finds a keyword and extracts the number string that follows. func extractStringAfterKeyword(s, keyword string) string { upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) idx := strings.Index(upper, keywordUpper) if idx < 0 { return "" } // Move past the keyword start := idx + len(keyword) // Skip whitespace for start < len(s) && unicode.IsSpace(rune(s[start])) { start++ } if start >= len(s) { return "" } // Find end of number end := start for end < len(s) && s[end] >= '0' && s[end] <= '9' { end++ } if end == start { return "" } return s[start:end] } // ============================================================================= // Keyword Index Finding (for compound query detection) // ============================================================================= // FindKeywordIndex finds the position of a keyword in a query (case-insensitive). // Returns -1 if not found. Respects word boundaries. // This is faster than using regexp for simple keyword detection. func FindKeywordIndex(s, keyword string) int { if s == "" || keyword == "" { return -1 } upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) keywordLen := len(keyword) for i := 0; i <= len(upper)-keywordLen; i++ { if upper[i:i+keywordLen] != keywordUpper { continue } // Check word boundary before if i > 0 && isWordChar(s[i-1]) { continue } // Check word boundary after afterIdx := i + keywordLen if afterIdx < len(s) && isWordChar(s[afterIdx]) { continue } return i } return -1 } // ContainsKeyword checks if a query contains a keyword (case-insensitive). // Respects word boundaries. func ContainsKeyword(s, keyword string) bool { return FindKeywordIndex(s, keyword) >= 0 } // ============================================================================= // Aggregation Function Parsing (replaces 8 separate regex patterns) // ============================================================================= // AggregationResult holds the parsed components of an aggregation expression. type AggregationResult struct { Function string // COUNT, SUM, AVG, MIN, MAX, COLLECT Variable string // The variable name (e.g., "n") Property string // The property name (e.g., "age"), empty for COUNT(n) or COUNT(*) Distinct bool // True if DISTINCT was specified IsStar bool // True if COUNT(*) } // ParseAggregation parses an aggregation expression like "COUNT(n.prop)" or "SUM(DISTINCT n.age)". // This replaces 8 separate regex patterns with one unified parser (~5x faster). // // Returns nil if the expression is not a valid aggregation. // // Example: // // ParseAggregation("COUNT(n.age)") → {Function: "COUNT", Variable: "n", Property: "age"} // ParseAggregation("SUM(DISTINCT x.value)") → {Function: "SUM", Variable: "x", Property: "value", Distinct: true} // ParseAggregation("COUNT(*)") → {Function: "COUNT", IsStar: true} func ParseAggregation(expr string) *AggregationResult { expr = strings.TrimSpace(expr) if len(expr) < 5 { // Minimum: "MIN()" return nil } upper := strings.ToUpper(expr) // Find the function name var funcName string var funcLen int for _, fn := range []string{"COLLECT", "COUNT", "SUM", "AVG", "MIN", "MAX"} { if strings.HasPrefix(upper, fn+"(") { funcName = fn funcLen = len(fn) break } } if funcName == "" { return nil } // Find the opening and closing parentheses openParen := funcLen if expr[openParen] != '(' { return nil } // Find matching closing paren closeParen := len(expr) - 1 for closeParen > openParen && expr[closeParen] != ')' { closeParen-- } if closeParen <= openParen { return nil } // Extract the content inside parentheses content := strings.TrimSpace(expr[openParen+1 : closeParen]) if content == "" { return nil } result := &AggregationResult{ Function: funcName, } // Check for COUNT(*) if content == "*" { result.IsStar = true return result } upperContent := strings.ToUpper(content) // Check for DISTINCT if strings.HasPrefix(upperContent, "DISTINCT ") { result.Distinct = true content = strings.TrimSpace(content[9:]) // Skip "DISTINCT " } // Parse variable.property or just variable dotIdx := strings.Index(content, ".") if dotIdx > 0 { // Has property: variable.property varPart := content[:dotIdx] propPart := content[dotIdx+1:] if !isValidIdentifier(varPart) || !isValidIdentifier(propPart) { return nil } result.Variable = varPart result.Property = propPart } else { // Just variable: COUNT(n) or similar if !isValidIdentifier(content) { return nil } result.Variable = content } return result } // isValidIdentifier checks if s is a valid Cypher identifier (alphanumeric + underscore, starts with letter/underscore). func isValidIdentifier(s string) bool { if len(s) == 0 { return false } first := s[0] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { return false } for i := 1; i < len(s); i++ { c := s[i] if !isWordChar(c) { return false } } return true } // ParseAggregationProperty is a convenience function that returns just the variable and property. // Returns ("", "") if not a valid aggregation with a property. // This provides compatibility with the regex match[1], match[2] pattern. func ParseAggregationProperty(expr string) (variable, property string) { result := ParseAggregation(expr) if result == nil { return "", "" } return result.Variable, result.Property } // ============================================================================= // Parameter Extraction (replaces parameterPattern regex) // ============================================================================= // ExtractParameters finds all parameter references ($name) in a query string. // Returns a slice of parameter names (without the $ prefix). // This is ~5x faster than regex FindAllStringSubmatch. // // Example: // // ExtractParameters("MATCH (n) WHERE n.name = $name AND n.age > $minAge") // // Returns: ["name", "minAge"] func ExtractParameters(query string) []string { var params []string i := 0 for i < len(query) { // Find next $ dollarIdx := strings.IndexByte(query[i:], '$') if dollarIdx < 0 { break } dollarIdx += i // Check if there's a valid identifier after $ start := dollarIdx + 1 if start >= len(query) { break } // First character must be letter or underscore first := query[start] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { i = start continue } // Find end of identifier end := start + 1 for end < len(query) && isWordChar(query[end]) { end++ } params = append(params, query[start:end]) i = end } return params } // ReplaceParameters replaces all parameter references with their values. // The replacer function receives the parameter name (without $) and returns the replacement string. // This is ~5x faster than regex ReplaceAllStringFunc. // // Example: // // ReplaceParameters("WHERE n.name = $name", func(param string) string { // return fmt.Sprintf("'%s'", params[param]) // }) func ReplaceParameters(query string, replacer func(paramName string) string) string { var result strings.Builder result.Grow(len(query)) i := 0 for i < len(query) { // Find next $ dollarIdx := strings.IndexByte(query[i:], '$') if dollarIdx < 0 { result.WriteString(query[i:]) break } dollarIdx += i // Write everything before the $ result.WriteString(query[i:dollarIdx]) // Check if there's a valid identifier after $ start := dollarIdx + 1 if start >= len(query) { result.WriteByte('$') break } // First character must be letter or underscore first := query[start] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { result.WriteByte('$') i = start continue } // Find end of identifier end := start + 1 for end < len(query) && isWordChar(query[end]) { end++ } // Call replacer with the parameter name paramName := query[start:end] result.WriteString(replacer(paramName)) i = end } return result.String() }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

string_patterns.go•12.7 KiB