// Package cypher - Optimized string-based pattern matching for hot paths.
//
// This file provides fast string-based alternatives to regex patterns for
// operations that are called on every query. These functions are 5-10x faster
// than their regex equivalents.
//
// Performance comparison (benchmark on M1 Mac):
// - splitByKeyword vs regex Split: ~8x faster
// - extractLimitSkip vs regex FindStringSubmatch: ~6x faster
// - extractParameter vs regex FindAllStringSubmatch: ~5x faster
package cypher
import (
"strings"
"unicode"
)
// =============================================================================
// Keyword Splitting (replaces matchKeywordPattern and createKeywordPattern)
// =============================================================================
// SplitByKeyword splits a string by a keyword (case-insensitive), respecting word boundaries.
// This is ~8x faster than regexp.MustCompile(`(?i)\bKEYWORD\s+`).Split().
//
// Example:
//
// SplitByKeyword("MATCH (a) MATCH (b)", "MATCH")
// // Returns: ["", "(a) ", "(b)"]
func SplitByKeyword(s, keyword string) []string {
if s == "" {
return []string{s}
}
upper := strings.ToUpper(s)
keywordUpper := strings.ToUpper(keyword)
keywordLen := len(keyword)
var result []string
lastEnd := 0
for i := 0; i <= len(upper)-keywordLen; i++ {
// Check if keyword matches at this position
if upper[i:i+keywordLen] != keywordUpper {
continue
}
// Check word boundary before (start of string or non-alphanumeric)
if i > 0 && isWordChar(s[i-1]) {
continue
}
// Check that there's whitespace after the keyword
afterIdx := i + keywordLen
if afterIdx >= len(s) || !unicode.IsSpace(rune(s[afterIdx])) {
continue
}
// Found a match - add the part before this keyword
result = append(result, s[lastEnd:i])
// Skip the keyword and following whitespace
lastEnd = afterIdx
for lastEnd < len(s) && unicode.IsSpace(rune(s[lastEnd])) {
lastEnd++
}
i = lastEnd - 1 // -1 because loop will increment
}
// Add the remaining part
result = append(result, s[lastEnd:])
return result
}
// SplitByMatch splits by "MATCH " keyword. Convenience wrapper for hot path.
func SplitByMatch(s string) []string {
return SplitByKeyword(s, "MATCH")
}
// SplitByCreate splits by "CREATE " keyword. Convenience wrapper for hot path.
func SplitByCreate(s string) []string {
return SplitByKeyword(s, "CREATE")
}
// isWordChar returns true if c is a word character (alphanumeric or underscore)
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}
// =============================================================================
// LIMIT/SKIP Extraction (replaces limitPattern and skipPattern)
// =============================================================================
// ExtractLimit extracts the LIMIT value from a query string.
// Returns the value and true if found, or 0 and false if not found.
// This is ~6x faster than regex FindStringSubmatch.
//
// Example:
//
// ExtractLimit("MATCH (n) RETURN n LIMIT 10")
// // Returns: 10, true
func ExtractLimit(query string) (int, bool) {
return extractIntAfterKeyword(query, "LIMIT")
}
// ExtractSkip extracts the SKIP value from a query string.
// Returns the value and true if found, or 0 and false if not found.
//
// Example:
//
// ExtractSkip("MATCH (n) RETURN n SKIP 5 LIMIT 10")
// // Returns: 5, true
func ExtractSkip(query string) (int, bool) {
return extractIntAfterKeyword(query, "SKIP")
}
// ExtractLimitString extracts the LIMIT value as a string (for compatibility).
// Returns empty string if not found.
func ExtractLimitString(query string) string {
return extractStringAfterKeyword(query, "LIMIT")
}
// ExtractSkipString extracts the SKIP value as a string (for compatibility).
// Returns empty string if not found.
func ExtractSkipString(query string) string {
return extractStringAfterKeyword(query, "SKIP")
}
// extractIntAfterKeyword finds a keyword and extracts the integer that follows.
func extractIntAfterKeyword(s, keyword string) (int, bool) {
upper := strings.ToUpper(s)
keywordUpper := strings.ToUpper(keyword)
idx := strings.Index(upper, keywordUpper)
if idx < 0 {
return 0, false
}
// Move past the keyword
start := idx + len(keyword)
// Skip whitespace
for start < len(s) && unicode.IsSpace(rune(s[start])) {
start++
}
if start >= len(s) {
return 0, false
}
// Parse the integer
end := start
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
end++
}
if end == start {
return 0, false
}
// Convert to int (simple, no error handling needed - we know it's digits)
result := 0
for i := start; i < end; i++ {
result = result*10 + int(s[i]-'0')
}
return result, true
}
// extractStringAfterKeyword finds a keyword and extracts the number string that follows.
func extractStringAfterKeyword(s, keyword string) string {
upper := strings.ToUpper(s)
keywordUpper := strings.ToUpper(keyword)
idx := strings.Index(upper, keywordUpper)
if idx < 0 {
return ""
}
// Move past the keyword
start := idx + len(keyword)
// Skip whitespace
for start < len(s) && unicode.IsSpace(rune(s[start])) {
start++
}
if start >= len(s) {
return ""
}
// Find end of number
end := start
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
end++
}
if end == start {
return ""
}
return s[start:end]
}
// =============================================================================
// Keyword Index Finding (for compound query detection)
// =============================================================================
// FindKeywordIndex finds the position of a keyword in a query (case-insensitive).
// Returns -1 if not found. Respects word boundaries.
// This is faster than using regexp for simple keyword detection.
func FindKeywordIndex(s, keyword string) int {
if s == "" || keyword == "" {
return -1
}
upper := strings.ToUpper(s)
keywordUpper := strings.ToUpper(keyword)
keywordLen := len(keyword)
for i := 0; i <= len(upper)-keywordLen; i++ {
if upper[i:i+keywordLen] != keywordUpper {
continue
}
// Check word boundary before
if i > 0 && isWordChar(s[i-1]) {
continue
}
// Check word boundary after
afterIdx := i + keywordLen
if afterIdx < len(s) && isWordChar(s[afterIdx]) {
continue
}
return i
}
return -1
}
// ContainsKeyword checks if a query contains a keyword (case-insensitive).
// Respects word boundaries.
func ContainsKeyword(s, keyword string) bool {
return FindKeywordIndex(s, keyword) >= 0
}
// =============================================================================
// Aggregation Function Parsing (replaces 8 separate regex patterns)
// =============================================================================
// AggregationResult holds the parsed components of an aggregation expression.
type AggregationResult struct {
Function string // COUNT, SUM, AVG, MIN, MAX, COLLECT
Variable string // The variable name (e.g., "n")
Property string // The property name (e.g., "age"), empty for COUNT(n) or COUNT(*)
Distinct bool // True if DISTINCT was specified
IsStar bool // True if COUNT(*)
}
// ParseAggregation parses an aggregation expression like "COUNT(n.prop)" or "SUM(DISTINCT n.age)".
// This replaces 8 separate regex patterns with one unified parser (~5x faster).
//
// Returns nil if the expression is not a valid aggregation.
//
// Example:
//
// ParseAggregation("COUNT(n.age)") → {Function: "COUNT", Variable: "n", Property: "age"}
// ParseAggregation("SUM(DISTINCT x.value)") → {Function: "SUM", Variable: "x", Property: "value", Distinct: true}
// ParseAggregation("COUNT(*)") → {Function: "COUNT", IsStar: true}
func ParseAggregation(expr string) *AggregationResult {
expr = strings.TrimSpace(expr)
if len(expr) < 5 { // Minimum: "MIN()"
return nil
}
upper := strings.ToUpper(expr)
// Find the function name
var funcName string
var funcLen int
for _, fn := range []string{"COLLECT", "COUNT", "SUM", "AVG", "MIN", "MAX"} {
if strings.HasPrefix(upper, fn+"(") {
funcName = fn
funcLen = len(fn)
break
}
}
if funcName == "" {
return nil
}
// Find the opening and closing parentheses
openParen := funcLen
if expr[openParen] != '(' {
return nil
}
// Find matching closing paren
closeParen := len(expr) - 1
for closeParen > openParen && expr[closeParen] != ')' {
closeParen--
}
if closeParen <= openParen {
return nil
}
// Extract the content inside parentheses
content := strings.TrimSpace(expr[openParen+1 : closeParen])
if content == "" {
return nil
}
result := &AggregationResult{
Function: funcName,
}
// Check for COUNT(*)
if content == "*" {
result.IsStar = true
return result
}
upperContent := strings.ToUpper(content)
// Check for DISTINCT
if strings.HasPrefix(upperContent, "DISTINCT ") {
result.Distinct = true
content = strings.TrimSpace(content[9:]) // Skip "DISTINCT "
}
// Parse variable.property or just variable
dotIdx := strings.Index(content, ".")
if dotIdx > 0 {
// Has property: variable.property
varPart := content[:dotIdx]
propPart := content[dotIdx+1:]
if !isValidIdentifier(varPart) || !isValidIdentifier(propPart) {
return nil
}
result.Variable = varPart
result.Property = propPart
} else {
// Just variable: COUNT(n) or similar
if !isValidIdentifier(content) {
return nil
}
result.Variable = content
}
return result
}
// isValidIdentifier checks if s is a valid Cypher identifier (alphanumeric + underscore, starts with letter/underscore).
func isValidIdentifier(s string) bool {
if len(s) == 0 {
return false
}
first := s[0]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
return false
}
for i := 1; i < len(s); i++ {
c := s[i]
if !isWordChar(c) {
return false
}
}
return true
}
// ParseAggregationProperty is a convenience function that returns just the variable and property.
// Returns ("", "") if not a valid aggregation with a property.
// This provides compatibility with the regex match[1], match[2] pattern.
func ParseAggregationProperty(expr string) (variable, property string) {
result := ParseAggregation(expr)
if result == nil {
return "", ""
}
return result.Variable, result.Property
}
// =============================================================================
// Parameter Extraction (replaces parameterPattern regex)
// =============================================================================
// ExtractParameters finds all parameter references ($name) in a query string.
// Returns a slice of parameter names (without the $ prefix).
// This is ~5x faster than regex FindAllStringSubmatch.
//
// Example:
//
// ExtractParameters("MATCH (n) WHERE n.name = $name AND n.age > $minAge")
// // Returns: ["name", "minAge"]
func ExtractParameters(query string) []string {
var params []string
i := 0
for i < len(query) {
// Find next $
dollarIdx := strings.IndexByte(query[i:], '$')
if dollarIdx < 0 {
break
}
dollarIdx += i
// Check if there's a valid identifier after $
start := dollarIdx + 1
if start >= len(query) {
break
}
// First character must be letter or underscore
first := query[start]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
i = start
continue
}
// Find end of identifier
end := start + 1
for end < len(query) && isWordChar(query[end]) {
end++
}
params = append(params, query[start:end])
i = end
}
return params
}
// ReplaceParameters replaces all parameter references with their values.
// The replacer function receives the parameter name (without $) and returns the replacement string.
// This is ~5x faster than regex ReplaceAllStringFunc.
//
// Example:
//
// ReplaceParameters("WHERE n.name = $name", func(param string) string {
// return fmt.Sprintf("'%s'", params[param])
// })
func ReplaceParameters(query string, replacer func(paramName string) string) string {
var result strings.Builder
result.Grow(len(query))
i := 0
for i < len(query) {
// Find next $
dollarIdx := strings.IndexByte(query[i:], '$')
if dollarIdx < 0 {
result.WriteString(query[i:])
break
}
dollarIdx += i
// Write everything before the $
result.WriteString(query[i:dollarIdx])
// Check if there's a valid identifier after $
start := dollarIdx + 1
if start >= len(query) {
result.WriteByte('$')
break
}
// First character must be letter or underscore
first := query[start]
if !((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_') {
result.WriteByte('$')
i = start
continue
}
// Find end of identifier
end := start + 1
for end < len(query) && isWordChar(query[end]) {
end++
}
// Call replacer with the parameter name
paramName := query[start:end]
result.WriteString(replacer(paramName))
i = end
}
return result.String()
}