Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
string_patterns.go13 kB
// Package cypher - Optimized string-based pattern matching for hot paths. // // This file provides fast string-based alternatives to regex patterns for // operations that are called on every query. These functions are 5-10x faster // than their regex equivalents. // // Performance comparison (benchmark on M1 Mac): // - splitByKeyword vs regex Split: ~8x faster // - extractLimitSkip vs regex FindStringSubmatch: ~6x faster // - extractParameter vs regex FindAllStringSubmatch: ~5x faster package cypher import ( "strings" "unicode" ) // ============================================================================= // Keyword Splitting (replaces matchKeywordPattern and createKeywordPattern) // ============================================================================= // SplitByKeyword splits a string by a keyword (case-insensitive), respecting word boundaries. // This is ~8x faster than regexp.MustCompile(`(?i)\bKEYWORD\s+`).Split(). // // Example: // // SplitByKeyword("MATCH (a) MATCH (b)", "MATCH") // // Returns: ["", "(a) ", "(b)"] func SplitByKeyword(s, keyword string) []string { if s == "" { return []string{s} } upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) keywordLen := len(keyword) var result []string lastEnd := 0 for i := 0; i <= len(upper)-keywordLen; i++ { // Check if keyword matches at this position if upper[i:i+keywordLen] != keywordUpper { continue } // Check word boundary before (start of string or non-alphanumeric) if i > 0 && isWordChar(s[i-1]) { continue } // Check that there's whitespace after the keyword afterIdx := i + keywordLen if afterIdx >= len(s) || !unicode.IsSpace(rune(s[afterIdx])) { continue } // Found a match - add the part before this keyword result = append(result, s[lastEnd:i]) // Skip the keyword and following whitespace lastEnd = afterIdx for lastEnd < len(s) && unicode.IsSpace(rune(s[lastEnd])) { lastEnd++ } i = lastEnd - 1 // -1 because loop will increment } // Add the remaining part result = append(result, s[lastEnd:]) return result } // SplitByMatch splits by "MATCH " keyword. Convenience wrapper for hot path. func SplitByMatch(s string) []string { return SplitByKeyword(s, "MATCH") } // SplitByCreate splits by "CREATE " keyword. Convenience wrapper for hot path. func SplitByCreate(s string) []string { return SplitByKeyword(s, "CREATE") } // isWordChar returns true if c is a word character (alphanumeric or underscore) func isWordChar(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' } // ============================================================================= // LIMIT/SKIP Extraction (replaces limitPattern and skipPattern) // ============================================================================= // ExtractLimit extracts the LIMIT value from a query string. // Returns the value and true if found, or 0 and false if not found. // This is ~6x faster than regex FindStringSubmatch. // // Example: // // ExtractLimit("MATCH (n) RETURN n LIMIT 10") // // Returns: 10, true func ExtractLimit(query string) (int, bool) { return extractIntAfterKeyword(query, "LIMIT") } // ExtractSkip extracts the SKIP value from a query string. // Returns the value and true if found, or 0 and false if not found. // // Example: // // ExtractSkip("MATCH (n) RETURN n SKIP 5 LIMIT 10") // // Returns: 5, true func ExtractSkip(query string) (int, bool) { return extractIntAfterKeyword(query, "SKIP") } // ExtractLimitString extracts the LIMIT value as a string (for compatibility). // Returns empty string if not found. func ExtractLimitString(query string) string { return extractStringAfterKeyword(query, "LIMIT") } // ExtractSkipString extracts the SKIP value as a string (for compatibility). // Returns empty string if not found. func ExtractSkipString(query string) string { return extractStringAfterKeyword(query, "SKIP") } // extractIntAfterKeyword finds a keyword and extracts the integer that follows. func extractIntAfterKeyword(s, keyword string) (int, bool) { upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) idx := strings.Index(upper, keywordUpper) if idx < 0 { return 0, false } // Move past the keyword start := idx + len(keyword) // Skip whitespace for start < len(s) && unicode.IsSpace(rune(s[start])) { start++ } if start >= len(s) { return 0, false } // Parse the integer end := start for end < len(s) && s[end] >= '0' && s[end] <= '9' { end++ } if end == start { return 0, false } // Convert to int (simple, no error handling needed - we know it's digits) result := 0 for i := start; i < end; i++ { result = result*10 + int(s[i]-'0') } return result, true } // extractStringAfterKeyword finds a keyword and extracts the number string that follows. func extractStringAfterKeyword(s, keyword string) string { upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) idx := strings.Index(upper, keywordUpper) if idx < 0 { return "" } // Move past the keyword start := idx + len(keyword) // Skip whitespace for start < len(s) && unicode.IsSpace(rune(s[start])) { start++ } if start >= len(s) { return "" } // Find end of number end := start for end < len(s) && s[end] >= '0' && s[end] <= '9' { end++ } if end == start { return "" } return s[start:end] } // ============================================================================= // Keyword Index Finding (for compound query detection) // ============================================================================= // FindKeywordIndex finds the position of a keyword in a query (case-insensitive). // Returns -1 if not found. Respects word boundaries. // This is faster than using regexp for simple keyword detection. func FindKeywordIndex(s, keyword string) int { if s == "" || keyword == "" { return -1 } upper := strings.ToUpper(s) keywordUpper := strings.ToUpper(keyword) keywordLen := len(keyword) for i := 0; i <= len(upper)-keywordLen; i++ { if upper[i:i+keywordLen] != keywordUpper { continue } // Check word boundary before if i > 0 && isWordChar(s[i-1]) { continue } // Check word boundary after afterIdx := i + keywordLen if afterIdx < len(s) && isWordChar(s[afterIdx]) { continue } return i } return -1 } // ContainsKeyword checks if a query contains a keyword (case-insensitive). // Respects word boundaries. func ContainsKeyword(s, keyword string) bool { return FindKeywordIndex(s, keyword) >= 0 } // ============================================================================= // Aggregation Function Parsing (replaces 8 separate regex patterns) // ============================================================================= // AggregationResult holds the parsed components of an aggregation expression. type AggregationResult struct { Function string // COUNT, SUM, AVG, MIN, MAX, COLLECT Variable string // The variable name (e.g., "n") Property string // The property name (e.g., "age"), empty for COUNT(n) or COUNT(*) Distinct bool // True if DISTINCT was specified IsStar bool // True if COUNT(*) } // ParseAggregation parses an aggregation expression like "COUNT(n.prop)" or "SUM(DISTINCT n.age)". // This replaces 8 separate regex patterns with one unified parser (~5x faster). // // Returns nil if the expression is not a valid aggregation. // // Example: // // ParseAggregation("COUNT(n.age)") → {Function: "COUNT", Variable: "n", Property: "age"} // ParseAggregation("SUM(DISTINCT x.value)") → {Function: "SUM", Variable: "x", Property: "value", Distinct: true} // ParseAggregation("COUNT(*)") → {Function: "COUNT", IsStar: true} func ParseAggregation(expr string) *AggregationResult { expr = strings.TrimSpace(expr) if len(expr) < 5 { // Minimum: "MIN()" return nil } upper := strings.ToUpper(expr) // Find the function name var funcName string var funcLen int for _, fn := range []string{"COLLECT", "COUNT", "SUM", "AVG", "MIN", "MAX"} { if strings.HasPrefix(upper, fn+"(") { funcName = fn funcLen = len(fn) break } } if funcName == "" { return nil } // Find the opening and closing parentheses openParen := funcLen if expr[openParen] != '(' { return nil } // Find matching closing paren closeParen := len(expr) - 1 for closeParen > openParen && expr[closeParen] != ')' { closeParen-- } if closeParen <= openParen { return nil } // Extract the content inside parentheses content := strings.TrimSpace(expr[openParen+1 : closeParen]) if content == "" { return nil } result := &AggregationResult{ Function: funcName, } // Check for COUNT(*) if content == "*" { result.IsStar = true return result } upperContent := strings.ToUpper(content) // Check for DISTINCT if strings.HasPrefix(upperContent, "DISTINCT ") { result.Distinct = true content = strings.TrimSpace(content[9:]) // Skip "DISTINCT " } // Parse variable.property or just variable dotIdx := strings.Index(content, ".") if dotIdx > 0 { // Has property: variable.property varPart := content[:dotIdx] propPart := content[dotIdx+1:] if !isValidIdentifier(varPart) || !isValidIdentifier(propPart) { return nil } result.Variable = varPart result.Property = propPart } else { // Just variable: COUNT(n) or similar if !isValidIdentifier(content) { return nil } result.Variable = content } return result } // isValidIdentifier checks if s is a valid Cypher identifier (alphanumeric + underscore, starts with letter/underscore). func isValidIdentifier(s string) bool { if len(s) == 0 { return false } first := s[0] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { return false } for i := 1; i < len(s); i++ { c := s[i] if !isWordChar(c) { return false } } return true } // ParseAggregationProperty is a convenience function that returns just the variable and property. // Returns ("", "") if not a valid aggregation with a property. // This provides compatibility with the regex match[1], match[2] pattern. func ParseAggregationProperty(expr string) (variable, property string) { result := ParseAggregation(expr) if result == nil { return "", "" } return result.Variable, result.Property } // ============================================================================= // Parameter Extraction (replaces parameterPattern regex) // ============================================================================= // ExtractParameters finds all parameter references ($name) in a query string. // Returns a slice of parameter names (without the $ prefix). // This is ~5x faster than regex FindAllStringSubmatch. // // Example: // // ExtractParameters("MATCH (n) WHERE n.name = $name AND n.age > $minAge") // // Returns: ["name", "minAge"] func ExtractParameters(query string) []string { var params []string i := 0 for i < len(query) { // Find next $ dollarIdx := strings.IndexByte(query[i:], '$') if dollarIdx < 0 { break } dollarIdx += i // Check if there's a valid identifier after $ start := dollarIdx + 1 if start >= len(query) { break } // First character must be letter or underscore first := query[start] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { i = start continue } // Find end of identifier end := start + 1 for end < len(query) && isWordChar(query[end]) { end++ } params = append(params, query[start:end]) i = end } return params } // ReplaceParameters replaces all parameter references with their values. // The replacer function receives the parameter name (without $) and returns the replacement string. // This is ~5x faster than regex ReplaceAllStringFunc. // // Example: // // ReplaceParameters("WHERE n.name = $name", func(param string) string { // return fmt.Sprintf("'%s'", params[param]) // }) func ReplaceParameters(query string, replacer func(paramName string) string) string { var result strings.Builder result.Grow(len(query)) i := 0 for i < len(query) { // Find next $ dollarIdx := strings.IndexByte(query[i:], '$') if dollarIdx < 0 { result.WriteString(query[i:]) break } dollarIdx += i // Write everything before the $ result.WriteString(query[i:dollarIdx]) // Check if there's a valid identifier after $ start := dollarIdx + 1 if start >= len(query) { result.WriteByte('$') break } // First character must be letter or underscore first := query[start] if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') { result.WriteByte('$') i = start continue } // Find end of identifier end := start + 1 for end < len(query) && isWordChar(query[end]) { end++ } // Call replacer with the parameter name paramName := query[start:end] result.WriteString(replacer(paramName)) i = end } return result.String() }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server