Domain Search MCP

qwen-inference.ts•23.1 KiB

/**
 * Qwen Inference API client.
 *
 * Optional AI-powered domain suggestions using fine-tuned Qwen 2.5-7B model.
 * Falls back gracefully if endpoint is not configured or unavailable.
 *
 * This MCP does NOT require Qwen to function - it's an optional enhancement
 * for self-hosted users who deploy the inference server on their VPS.
 */

import { z } from 'zod';
import { config } from '../config.js';
import { logger } from '../utils/logger.js';
import { TtlCache } from '../utils/cache.js';
import { CircuitBreaker, CircuitOpenError } from '../utils/circuit-breaker.js';

// ═══════════════════════════════════════════════════════════════════════════
// STYLE CONFIGURATIONS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Style-specific prompting configurations for domain name generation.
 * Each style guides the model to use different naming techniques.
 */
const STYLE_PROMPTS: Record<string, {
  description: string;
  techniques: string[];
  examples: string[];
  constraints: string[];
}> = {
  brandable: {
    description: 'Create memorable invented words that sound like real brands',
    techniques: [
      'Portmanteau blending (Instagram = Instant + Telegram)',
      'Modern suffixes: -ly, -ify, -io, -ai, -eo, -va, -ra',
      'Phonetic spellings (Lyft, Fiverr, Tumblr)',
      'Consonant clusters that are pronounceable (Spotify, Stripe)',
      'Neologisms - completely new words that sound natural',
      'Letter substitution (K for C, X for Ex, Z for S)',
    ],
    examples: [
      'spotify - blend of "spot" + made-up suffix',
      'calendly - "calendar" + trendy "-ly" suffix',
      'shopify - "shop" + tech suffix "-ify"',
      'zapier - invented word from "zap" concept',
      'airtable - compound of "air" + "table"',
      'notion - single real word reimagined',
      'figma - invented word, short and punchy',
      'vercel - invented, sounds like "versatile"',
    ],
    constraints: [
      'MUST be pronounceable (say it out loud test)',
      'Length: 4-10 characters ideal',
      'NO generic descriptive names like "fastapp" or "quickdata"',
      'AVOID real dictionary words unless reimagined',
    ],
  },
  descriptive: {
    description: 'Clear, professional names that convey meaning immediately',
    techniques: [
      'Compound words that describe the product',
      'Professional suffixes: -hq, -hub, -base, -stack, -cloud',
      'Action + object patterns (Dropbox, Mailchimp)',
      'Industry term + qualifier (Salesforce, Workday)',
    ],
    examples: [
      'dropbox - action + container',
      'mailchimp - service + mascot',
      'hubspot - central + location',
      'zendesk - philosophy + workspace',
      'basecamp - foundation + project term',
    ],
    constraints: [
      'Should be understandable at first glance',
      'Length: 5-12 characters',
      'Must relate to the product/service',
    ],
  },
  short: {
    description: 'Ultra-short, punchy names (4-7 characters max)',
    techniques: [
      'Truncation (removing vowels or syllables)',
      'Single syllable words',
      'Acronym-like patterns',
      'Sound-based (onomatopoeia)',
      'Prefix/suffix removal',
    ],
    examples: [
      'uber - short, powerful',
      'lyft - phonetic spelling, short',
      'snap - one syllable, action',
      'zoom - one syllable, energy',
      'trello - invented, compact',
      'asana - borrowed word, elegant',
      'jira - short, distinctive',
    ],
    constraints: [
      'MAXIMUM 7 characters',
      'MINIMUM 3 characters',
      'Must be easy to type and remember',
      'One or two syllables preferred',
    ],
  },
  creative: {
    description: 'Maximum experimentation - wordplay, unusual sounds, artistic names',
    techniques: [
      'Unusual letter combinations',
      'Phonetic playfulness',
      'Onomatopoeia (sounds like what it does)',
      'Mythological or invented language references',
      'Reversed words or misspellings',
      'Sound symbolism (sharp sounds for speed, soft for comfort)',
    ],
    examples: [
      'skype - sky + type blended',
      'twitch - evocative action word',
      'flickr - vowel removal style',
      'tumblr - vowel removal style',
      'hulu - completely invented, playful',
      'etsy - invented, crafty feel',
      'vimeo - anagram of "movie"',
    ],
    constraints: [
      'Can break conventional rules',
      'Must still be pronounceable',
      'Should evoke emotion or imagery',
    ],
  },
};

// ═══════════════════════════════════════════════════════════════════════════
// TYPES & SCHEMAS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Domain suggestion from Qwen model.
 */
export const QwenDomainSchema = z.object({
  name: z.string().min(1),
  tld: z.string().min(1),
  reason: z.string().optional(),
});

export type QwenDomain = z.infer<typeof QwenDomainSchema>;

/**
 * Request payload to Qwen inference API.
 */
export const QwenRequestSchema = z.object({
  prompt: z.string().min(10).max(1000),
  style: z.enum(['brandable', 'descriptive', 'short', 'creative']).optional(),
  max_tokens: z.number().int().min(128).max(1024).optional(),
  temperature: z.number().min(0.1).max(1.5).optional(),
});

export type QwenRequest = z.infer<typeof QwenRequestSchema>;

/**
 * Response from Qwen inference API.
 */
export const QwenResponseSchema = z.object({
  domains: z.array(QwenDomainSchema),
  raw_response: z.string(),
  inference_time_ms: z.number(),
  cached: z.boolean(),
});

export type QwenResponse = z.infer<typeof QwenResponseSchema>;

/**
 * Project or idea context for more relevant domain suggestions.
 */
export interface QwenContext {
  /** Project or business description */
  description?: string;
  /** Detected or specified industry */
  industry?: string;
  /** Keywords to blend or incorporate */
  keywords?: string[];
  /** Inspiration words for the brand */
  brandWords?: string[];
  /** Minimum domain name length */
  minLength?: number;
  /** Maximum domain name length */
  maxLength?: number;
  /** Project name (if analyzing a codebase) */
  projectName?: string;
  /** Repository URL (for context) */
  repositoryUrl?: string;
}

/**
 * Options for Qwen suggestion request.
 */
export interface QwenSuggestOptions {
  query: string;
  style?: 'brandable' | 'descriptive' | 'short' | 'creative';
  tld?: string;
  max_suggestions?: number;
  temperature?: number;
  /** Additional context for more relevant suggestions */
  context?: QwenContext;
}

/**
 * Custom error for Qwen inference failures.
 */
export class QwenInferenceError extends Error {
  constructor(
    message: string,
    public readonly code:
      | 'TIMEOUT'
      | 'CONNECTION_REFUSED'
      | 'INVALID_RESPONSE'
      | 'SERVER_ERROR'
      | 'NOT_CONFIGURED',
    public readonly statusCode?: number,
  ) {
    super(message);
    this.name = 'QwenInferenceError';
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// CLIENT IMPLEMENTATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Qwen Inference API client with retry logic and caching.
 */
export class QwenInferenceClient {
  private readonly endpoint: string;
  private readonly apiKey?: string;
  private readonly timeoutMs: number;
  private readonly maxRetries: number;
  private readonly cache: TtlCache<QwenResponse>;
  private readonly circuitBreaker: CircuitBreaker;

  constructor(
    endpoint: string,
    options: {
      apiKey?: string;
      timeoutMs?: number;
      maxRetries?: number;
      cacheTtl?: number;
    } = {},
  ) {
    this.endpoint = endpoint.replace(/\/+$/, ''); // Remove trailing slash
    this.apiKey = options.apiKey;
    this.timeoutMs = options.timeoutMs || 15000;
    this.maxRetries = options.maxRetries || 2;
    this.cache = new TtlCache<QwenResponse>(options.cacheTtl || 3600, 500);

    // Circuit breaker: 5 failures in 60s → open for 30s
    this.circuitBreaker = new CircuitBreaker({
      name: 'qwen_inference',
      failureThreshold: 5,
      resetTimeoutMs: 30_000,
      failureWindowMs: 60_000,
      successThreshold: 2,
    });
  }

  /**
   * Generate domain suggestions using Qwen model.
   *
   * Returns suggestions or null if Qwen is unavailable.
   * Graceful degradation - caller should fall back to other sources.
   */
  async suggest(options: QwenSuggestOptions): Promise<QwenDomain[] | null> {
    const { query, style = 'brandable', tld = 'com', max_suggestions = 10, temperature = 0.7, context } = options;

    // Build enhanced prompt with context
    const prompt = this._buildPrompt(query, style, tld, max_suggestions, context);

    // Check cache first
    const cacheKey = `${prompt}:${temperature}`;
    const cached = this.cache.get(cacheKey);
    if (cached) {
      logger.debug('Qwen cache hit', { query, cached_domains: cached.domains.length });
      return cached.domains;
    }

    // Make request with retry + circuit breaker
    try {
      const response = await this.circuitBreaker.execute(() =>
        this._makeRequestWithRetry({
          prompt,
          style,
          max_tokens: this._calculateMaxTokens(max_suggestions, style),
          temperature,
        })
      );

      // Validate response
      const validated = QwenResponseSchema.safeParse(response);
      if (!validated.success) {
        logger.warn('Qwen returned invalid response format', {
          error: validated.error.message,
        });
        return null;
      }

      // Cache successful response
      this.cache.set(cacheKey, validated.data);

      logger.info('Qwen inference success', {
        query,
        domains: validated.data.domains.length,
        inference_ms: validated.data.inference_time_ms,
        cached: validated.data.cached,
      });

      return validated.data.domains;
    } catch (error) {
      // Circuit breaker open - fail fast
      if (error instanceof CircuitOpenError) {
        logger.debug('Qwen circuit breaker open, skipping', {
          resetAt: new Date(error.resetAt).toISOString(),
        });
        return null;
      }

      if (error instanceof QwenInferenceError) {
        logger.warn('Qwen inference failed', {
          code: error.code,
          message: error.message,
          statusCode: error.statusCode,
        });
      } else {
        logger.warn('Qwen inference error', {
          error: error instanceof Error ? error.message : String(error),
        });
      }

      return null; // Graceful degradation
    }
  }

  /**
   * Build comprehensive prompt for Qwen model based on style and context.
   *
   * Uses structured blocks to guide the model toward generating
   * truly inventive, brandable domain names.
   */
  private _buildPrompt(
    query: string,
    style: string,
    tld: string,
    maxSuggestions: number,
    context?: QwenContext,
  ): string {
    const styleConfig = STYLE_PROMPTS[style] || STYLE_PROMPTS.brandable;

    // Build the prompt in structured blocks
    const blocks: string[] = [];

    // System block - explains the task and techniques
    blocks.push(this._buildSystemBlock(styleConfig));

    // Context block - project/idea specific information
    if (context) {
      blocks.push(this._buildContextBlock(context));
    }

    // Task block - the actual request
    blocks.push(this._buildTaskBlock(query, tld, maxSuggestions, context));

    // Format block - specifies output format
    blocks.push(this._buildFormatBlock(tld));

    return blocks.join('\n\n');
  }

  /**
   * Build the system instruction block explaining techniques and style.
   */
  private _buildSystemBlock(styleConfig: typeof STYLE_PROMPTS.brandable): string {
    const lines: string[] = [
      '=== DOMAIN NAME GENERATION ===',
      '',
      `Style: ${styleConfig?.description || 'Create memorable, brandable domain names'}`,
      '',
      'TECHNIQUES TO USE:',
    ];

    const techniques = styleConfig?.techniques || [];
    for (const technique of techniques) {
      lines.push(`• ${technique}`);
    }

    lines.push('', 'EXAMPLE NAMES (for inspiration, NOT to copy):');
    const examples = styleConfig?.examples || [];
    for (const example of examples.slice(0, 5)) {
      lines.push(`• ${example}`);
    }

    lines.push('', 'CONSTRAINTS:');
    const constraints = styleConfig?.constraints || [];
    for (const constraint of constraints) {
      lines.push(`• ${constraint}`);
    }

    return lines.join('\n');
  }

  /**
   * Build the context block from project/idea information.
   */
  private _buildContextBlock(context: QwenContext): string {
    const lines: string[] = ['=== CONTEXT ==='];

    if (context.projectName) {
      lines.push(`Project Name: ${context.projectName}`);
    }

    if (context.description) {
      lines.push(`Description: ${context.description}`);
    }

    if (context.industry) {
      lines.push(`Industry: ${context.industry}`);
    }

    if (context.keywords && context.keywords.length > 0) {
      lines.push(`Keywords to incorporate: ${context.keywords.join(', ')}`);
    }

    if (context.brandWords && context.brandWords.length > 0) {
      lines.push(`Brand inspiration words: ${context.brandWords.join(', ')}`);
    }

    const minLen = context.minLength || 4;
    const maxLen = context.maxLength || 12;
    lines.push(`Length requirement: ${minLen}-${maxLen} characters`);

    return lines.join('\n');
  }

  /**
   * Build the task block specifying what to generate.
   */
  private _buildTaskBlock(
    query: string,
    tld: string,
    count: number,
    context?: QwenContext,
  ): string {
    const lines: string[] = [
      '=== TASK ===',
      `Generate ${count} unique, INVENTED domain names for: "${query}"`,
      `Target TLD: .${tld}`,
      '',
      'IMPORTANT RULES:',
      '1. INVENT NEW WORDS - do not use common dictionary words directly',
      '2. Each name must be UNIQUE and CREATIVE',
      '3. Names must be PRONOUNCEABLE (read it aloud)',
      '4. NO generic patterns like "fastX", "quickY", "proZ"',
      '5. Think like a startup founder naming their company',
    ];

    // Add context-specific guidance
    if (context?.keywords && context.keywords.length > 0) {
      lines.push(`6. Try to BLEND or TRANSFORM these keywords: ${context.keywords.slice(0, 3).join(', ')}`);
    }

    if (context?.industry) {
      lines.push(`7. Names should feel appropriate for the ${context.industry} industry`);
    }

    return lines.join('\n');
  }

  /**
   * Build the output format specification block.
   */
  private _buildFormatBlock(tld: string): string {
    return `=== OUTPUT FORMAT ===
Return EXACTLY in this format (one domain per line):
- name.${tld} - Brief reason why this name works

Example output:
- voxify.${tld} - Blend of "voice" + "-ify", modern tech feel
- zestora.${tld} - Invented word, energetic "zest" + melodic ending

Domains:`;
  }

  /**
   * Calculate max_tokens based on number of suggestions and style.
   *
   * Token requirements vary by style:
   * - short: ~30 tokens (4-7 char names, brief reasons)
   * - brandable: ~50 tokens (invented names, medium reasons)
   * - descriptive: ~60 tokens (compound words, detailed reasons)
   * - creative: ~70 tokens (wordplay, artistic explanations)
   *
   * Style-aware calculation reduces costs by 20-30% on average.
   */
  private _calculateMaxTokens(maxSuggestions: number, style: string): number {
    // Token budget per suggestion varies by style complexity
    const tokensPerSuggestion: Record<string, number> = {
      short: 30,       // Ultra-short names, minimal reasons
      brandable: 50,   // Invented names, moderate explanations
      descriptive: 60, // Compound words, detailed reasoning
      creative: 70,    // Wordplay, artistic explanations
    };

    const perSuggestion = tokensPerSuggestion[style] || 50;

    // Reduced base buffer (128 vs 256) since we're style-aware
    // Cap at 1536 tokens (reduced from 2048) for cost efficiency
    return Math.min(128 + maxSuggestions * perSuggestion, 1536);
  }

  /**
   * Parse domain names from model-generated text.
   *
   * Matches the fine-tuned model's output format:
   * - domain.tld — Reason
   * - domain.tld - Reason
   */
  private _parseDomainsFromText(text: string): QwenDomain[] {
    const domains: QwenDomain[] = [];
    const lines = text.split('\n').filter((l) => l.trim());

    for (const line of lines) {
      // Match: "- domain.tld — reason" or "- domain.tld - reason"
      const match = line.match(/^[-*]\s*([a-z0-9-]+)\.([a-z]+)\s*[—\-:]\s*(.+)$/i);
      if (match && match[1] && match[2]) {
        const name = match[1];
        const tld = match[2];
        const reason = match[3];
        domains.push({
          name: name.toLowerCase(),
          tld: tld.toLowerCase(),
          reason: reason?.trim(),
        });
      }
    }

    return domains;
  }

  /**
   * Make HTTP request with timeout and error handling.
   */
  private async _makeRequest(payload: QwenRequest): Promise<unknown> {
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);

    try {
      const headers: Record<string, string> = {
        'Content-Type': 'application/json',
      };

      if (this.apiKey) {
        headers['Authorization'] = `Bearer ${this.apiKey}`;
      }

      // llama.cpp uses OpenAI-compatible /v1/completions endpoint
      const llamaPayload = {
        prompt: payload.prompt,
        max_tokens: payload.max_tokens || 512,
        temperature: payload.temperature || 0.7,
        stop: ['Query:', '\n\nQuery:'], // Stop when model starts new query
      };

      const response = await fetch(`${this.endpoint}/v1/completions`, {
        method: 'POST',
        headers,
        body: JSON.stringify(llamaPayload),
        signal: controller.signal,
      });

      // Handle non-200 responses
      if (!response.ok) {
        const text = await response.text().catch(() => 'Unknown error');
        throw new QwenInferenceError(
          `HTTP ${response.status}: ${text}`,
          response.status >= 500 ? 'SERVER_ERROR' : 'INVALID_RESPONSE',
          response.status,
        );
      }

      // Parse llama.cpp OpenAI-compatible response
      const json = (await response.json()) as {
        choices?: Array<{ text?: string }>;
        timings?: { predicted_ms?: number };
      };

      // Extract generated text from llama.cpp response
      if (!json.choices || !Array.isArray(json.choices) || json.choices.length === 0) {
        throw new QwenInferenceError(
          'Invalid llama.cpp response: no choices',
          'INVALID_RESPONSE',
        );
      }

      const generatedText = json.choices[0]?.text || '';
      const inferenceTimeMs = json.timings?.predicted_ms || 0;

      // Parse domains from generated text
      const domains = this._parseDomainsFromText(generatedText);

      // Return in expected QwenResponse format
      return {
        domains,
        raw_response: generatedText,
        inference_time_ms: inferenceTimeMs,
        cached: false,
      };
    } catch (error) {
      if (error instanceof QwenInferenceError) {
        throw error;
      }

      // Handle timeout
      if (error instanceof Error && error.name === 'AbortError') {
        throw new QwenInferenceError(
          `Request timeout after ${this.timeoutMs}ms`,
          'TIMEOUT',
        );
      }

      // Handle connection refused
      if (error instanceof Error && error.message.includes('ECONNREFUSED')) {
        throw new QwenInferenceError(
          'Connection refused - inference server may be down',
          'CONNECTION_REFUSED',
        );
      }

      // Generic error
      throw new QwenInferenceError(
        error instanceof Error ? error.message : String(error),
        'SERVER_ERROR',
      );
    } finally {
      clearTimeout(timeout);
    }
  }

  /**
   * Make request with exponential backoff retry.
   *
   * Retries on 5xx errors and timeouts, no retry on 4xx errors.
   */
  private async _makeRequestWithRetry(payload: QwenRequest): Promise<unknown> {
    let lastError: QwenInferenceError | null = null;

    for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
      try {
        return await this._makeRequest(payload);
      } catch (error) {
        if (!(error instanceof QwenInferenceError)) {
          throw error;
        }

        lastError = error;

        // Don't retry on 4xx errors (bad request)
        if (error.statusCode && error.statusCode >= 400 && error.statusCode < 500) {
          throw error;
        }

        // Don't retry on last attempt
        if (attempt === this.maxRetries) {
          break;
        }

        // Exponential backoff: 500ms, 1000ms, 2000ms
        const backoffMs = 500 * Math.pow(2, attempt);
        logger.debug('Qwen request failed, retrying', {
          attempt: attempt + 1,
          maxRetries: this.maxRetries,
          backoffMs,
          error: error.message,
        });

        await new Promise((resolve) => setTimeout(resolve, backoffMs));
      }
    }

    // All retries exhausted
    throw lastError!;
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// SINGLETON INSTANCE
// ═══════════════════════════════════════════════════════════════════════════

let qwenClient: QwenInferenceClient | null | undefined = undefined;

/**
 * Get Qwen client instance (singleton).
 *
 * Returns null if Qwen is not configured - caller should fall back to other sources.
 */
export function getQwenClient(): QwenInferenceClient | null {
  // Return cached instance
  if (qwenClient !== undefined) {
    return qwenClient;
  }

  // Check if Qwen is configured
  if (!config.qwenInference?.enabled || !config.qwenInference.endpoint) {
    qwenClient = null;
    return null;
  }

  // Create new instance
  qwenClient = new QwenInferenceClient(config.qwenInference.endpoint, {
    apiKey: config.qwenInference.apiKey,
    timeoutMs: config.qwenInference.timeoutMs,
    maxRetries: config.qwenInference.maxRetries,
    cacheTtl: 3600, // 1 hour cache
  });

  logger.info('Qwen inference client initialized', {
    endpoint: config.qwenInference.endpoint,
    timeoutMs: config.qwenInference.timeoutMs,
    maxRetries: config.qwenInference.maxRetries,
  });

  return qwenClient;
}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dorukardahan/domain-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

qwen-inference.ts•23.1 KiB

/**
 * Qwen Inference API client.
 *
 * Optional AI-powered domain suggestions using fine-tuned Qwen 2.5-7B model.
 * Falls back gracefully if endpoint is not configured or unavailable.
 *
 * This MCP does NOT require Qwen to function - it's an optional enhancement
 * for self-hosted users who deploy the inference server on their VPS.
 */

import { z } from 'zod';
import { config } from '../config.js';
import { logger } from '../utils/logger.js';
import { TtlCache } from '../utils/cache.js';
import { CircuitBreaker, CircuitOpenError } from '../utils/circuit-breaker.js';

// ═══════════════════════════════════════════════════════════════════════════
// STYLE CONFIGURATIONS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Style-specific prompting configurations for domain name generation.
 * Each style guides the model to use different naming techniques.
 */
const STYLE_PROMPTS: Record<string, {
  description: string;
  techniques: string[];
  examples: string[];
  constraints: string[];
}> = {
  brandable: {
    description: 'Create memorable invented words that sound like real brands',
    techniques: [
      'Portmanteau blending (Instagram = Instant + Telegram)',
      'Modern suffixes: -ly, -ify, -io, -ai, -eo, -va, -ra',
      'Phonetic spellings (Lyft, Fiverr, Tumblr)',
      'Consonant clusters that are pronounceable (Spotify, Stripe)',
      'Neologisms - completely new words that sound natural',
      'Letter substitution (K for C, X for Ex, Z for S)',
    ],
    examples: [
      'spotify - blend of "spot" + made-up suffix',
      'calendly - "calendar" + trendy "-ly" suffix',
      'shopify - "shop" + tech suffix "-ify"',
      'zapier - invented word from "zap" concept',
      'airtable - compound of "air" + "table"',
      'notion - single real word reimagined',
      'figma - invented word, short and punchy',
      'vercel - invented, sounds like "versatile"',
    ],
    constraints: [
      'MUST be pronounceable (say it out loud test)',
      'Length: 4-10 characters ideal',
      'NO generic descriptive names like "fastapp" or "quickdata"',
      'AVOID real dictionary words unless reimagined',
    ],
  },
  descriptive: {
    description: 'Clear, professional names that convey meaning immediately',
    techniques: [
      'Compound words that describe the product',
      'Professional suffixes: -hq, -hub, -base, -stack, -cloud',
      'Action + object patterns (Dropbox, Mailchimp)',
      'Industry term + qualifier (Salesforce, Workday)',
    ],
    examples: [
      'dropbox - action + container',
      'mailchimp - service + mascot',
      'hubspot - central + location',
      'zendesk - philosophy + workspace',
      'basecamp - foundation + project term',
    ],
    constraints: [
      'Should be understandable at first glance',
      'Length: 5-12 characters',
      'Must relate to the product/service',
    ],
  },
  short: {
    description: 'Ultra-short, punchy names (4-7 characters max)',
    techniques: [
      'Truncation (removing vowels or syllables)',
      'Single syllable words',
      'Acronym-like patterns',
      'Sound-based (onomatopoeia)',
      'Prefix/suffix removal',
    ],
    examples: [
      'uber - short, powerful',
      'lyft - phonetic spelling, short',
      'snap - one syllable, action',
      'zoom - one syllable, energy',
      'trello - invented, compact',
      'asana - borrowed word, elegant',
      'jira - short, distinctive',
    ],
    constraints: [
      'MAXIMUM 7 characters',
      'MINIMUM 3 characters',
      'Must be easy to type and remember',
      'One or two syllables preferred',
    ],
  },
  creative: {
    description: 'Maximum experimentation - wordplay, unusual sounds, artistic names',
    techniques: [
      'Unusual letter combinations',
      'Phonetic playfulness',
      'Onomatopoeia (sounds like what it does)',
      'Mythological or invented language references',
      'Reversed words or misspellings',
      'Sound symbolism (sharp sounds for speed, soft for comfort)',
    ],
    examples: [
      'skype - sky + type blended',
      'twitch - evocative action word',
      'flickr - vowel removal style',
      'tumblr - vowel removal style',
      'hulu - completely invented, playful',
      'etsy - invented, crafty feel',
      'vimeo - anagram of "movie"',
    ],
    constraints: [
      'Can break conventional rules',
      'Must still be pronounceable',
      'Should evoke emotion or imagery',
    ],
  },
};

// ═══════════════════════════════════════════════════════════════════════════
// TYPES & SCHEMAS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Domain suggestion from Qwen model.
 */
export const QwenDomainSchema = z.object({
  name: z.string().min(1),
  tld: z.string().min(1),
  reason: z.string().optional(),
});

export type QwenDomain = z.infer<typeof QwenDomainSchema>;

/**
 * Request payload to Qwen inference API.
 */
export const QwenRequestSchema = z.object({
  prompt: z.string().min(10).max(1000),
  style: z.enum(['brandable', 'descriptive', 'short', 'creative']).optional(),
  max_tokens: z.number().int().min(128).max(1024).optional(),
  temperature: z.number().min(0.1).max(1.5).optional(),
});

export type QwenRequest = z.infer<typeof QwenRequestSchema>;

/**
 * Response from Qwen inference API.
 */
export const QwenResponseSchema = z.object({
  domains: z.array(QwenDomainSchema),
  raw_response: z.string(),
  inference_time_ms: z.number(),
  cached: z.boolean(),
});

export type QwenResponse = z.infer<typeof QwenResponseSchema>;

/**
 * Project or idea context for more relevant domain suggestions.
 */
export interface QwenContext {
  /** Project or business description */
  description?: string;
  /** Detected or specified industry */
  industry?: string;
  /** Keywords to blend or incorporate */
  keywords?: string[];
  /** Inspiration words for the brand */
  brandWords?: string[];
  /** Minimum domain name length */
  minLength?: number;
  /** Maximum domain name length */
  maxLength?: number;
  /** Project name (if analyzing a codebase) */
  projectName?: string;
  /** Repository URL (for context) */
  repositoryUrl?: string;
}

/**
 * Options for Qwen suggestion request.
 */
export interface QwenSuggestOptions {
  query: string;
  style?: 'brandable' | 'descriptive' | 'short' | 'creative';
  tld?: string;
  max_suggestions?: number;
  temperature?: number;
  /** Additional context for more relevant suggestions */
  context?: QwenContext;
}

/**
 * Custom error for Qwen inference failures.
 */
export class QwenInferenceError extends Error {
  constructor(
    message: string,
    public readonly code:
      | 'TIMEOUT'
      | 'CONNECTION_REFUSED'
      | 'INVALID_RESPONSE'
      | 'SERVER_ERROR'
      | 'NOT_CONFIGURED',
    public readonly statusCode?: number,
  ) {
    super(message);
    this.name = 'QwenInferenceError';
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// CLIENT IMPLEMENTATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Qwen Inference API client with retry logic and caching.
 */
export class QwenInferenceClient {
  private readonly endpoint: string;
  private readonly apiKey?: string;
  private readonly timeoutMs: number;
  private readonly maxRetries: number;
  private readonly cache: TtlCache<QwenResponse>;
  private readonly circuitBreaker: CircuitBreaker;

  constructor(
    endpoint: string,
    options: {
      apiKey?: string;
      timeoutMs?: number;
      maxRetries?: number;
      cacheTtl?: number;
    } = {},
  ) {
    this.endpoint = endpoint.replace(/\/+$/, ''); // Remove trailing slash
    this.apiKey = options.apiKey;
    this.timeoutMs = options.timeoutMs || 15000;
    this.maxRetries = options.maxRetries || 2;
    this.cache = new TtlCache<QwenResponse>(options.cacheTtl || 3600, 500);

    // Circuit breaker: 5 failures in 60s → open for 30s
    this.circuitBreaker = new CircuitBreaker({
      name: 'qwen_inference',
      failureThreshold: 5,
      resetTimeoutMs: 30_000,
      failureWindowMs: 60_000,
      successThreshold: 2,
    });
  }

  /**
   * Generate domain suggestions using Qwen model.
   *
   * Returns suggestions or null if Qwen is unavailable.
   * Graceful degradation - caller should fall back to other sources.
   */
  async suggest(options: QwenSuggestOptions): Promise<QwenDomain[] | null> {
    const { query, style = 'brandable', tld = 'com', max_suggestions = 10, temperature = 0.7, context } = options;

    // Build enhanced prompt with context
    const prompt = this._buildPrompt(query, style, tld, max_suggestions, context);

    // Check cache first
    const cacheKey = `${prompt}:${temperature}`;
    const cached = this.cache.get(cacheKey);
    if (cached) {
      logger.debug('Qwen cache hit', { query, cached_domains: cached.domains.length });
      return cached.domains;
    }

    // Make request with retry + circuit breaker
    try {
      const response = await this.circuitBreaker.execute(() =>
        this._makeRequestWithRetry({
          prompt,
          style,
          max_tokens: this._calculateMaxTokens(max_suggestions, style),
          temperature,
        })
      );

      // Validate response
      const validated = QwenResponseSchema.safeParse(response);
      if (!validated.success) {
        logger.warn('Qwen returned invalid response format', {
          error: validated.error.message,
        });
        return null;
      }

      // Cache successful response
      this.cache.set(cacheKey, validated.data);

      logger.info('Qwen inference success', {
        query,
        domains: validated.data.domains.length,
        inference_ms: validated.data.inference_time_ms,
        cached: validated.data.cached,
      });

      return validated.data.domains;
    } catch (error) {
      // Circuit breaker open - fail fast
      if (error instanceof CircuitOpenError) {
        logger.debug('Qwen circuit breaker open, skipping', {
          resetAt: new Date(error.resetAt).toISOString(),
        });
        return null;
      }

      if (error instanceof QwenInferenceError) {
        logger.warn('Qwen inference failed', {
          code: error.code,
          message: error.message,
          statusCode: error.statusCode,
        });
      } else {
        logger.warn('Qwen inference error', {
          error: error instanceof Error ? error.message : String(error),
        });
      }

      return null; // Graceful degradation
    }
  }

  /**
   * Build comprehensive prompt for Qwen model based on style and context.
   *
   * Uses structured blocks to guide the model toward generating
   * truly inventive, brandable domain names.
   */
  private _buildPrompt(
    query: string,
    style: string,
    tld: string,
    maxSuggestions: number,
    context?: QwenContext,
  ): string {
    const styleConfig = STYLE_PROMPTS[style] || STYLE_PROMPTS.brandable;

    // Build the prompt in structured blocks
    const blocks: string[] = [];

    // System block - explains the task and techniques
    blocks.push(this._buildSystemBlock(styleConfig));

    // Context block - project/idea specific information
    if (context) {
      blocks.push(this._buildContextBlock(context));
    }

    // Task block - the actual request
    blocks.push(this._buildTaskBlock(query, tld, maxSuggestions, context));

    // Format block - specifies output format
    blocks.push(this._buildFormatBlock(tld));

    return blocks.join('\n\n');
  }

  /**
   * Build the system instruction block explaining techniques and style.
   */
  private _buildSystemBlock(styleConfig: typeof STYLE_PROMPTS.brandable): string {
    const lines: string[] = [
      '=== DOMAIN NAME GENERATION ===',
      '',
      `Style: ${styleConfig?.description || 'Create memorable, brandable domain names'}`,
      '',
      'TECHNIQUES TO USE:',
    ];

    const techniques = styleConfig?.techniques || [];
    for (const technique of techniques) {
      lines.push(`• ${technique}`);
    }

    lines.push('', 'EXAMPLE NAMES (for inspiration, NOT to copy):');
    const examples = styleConfig?.examples || [];
    for (const example of examples.slice(0, 5)) {
      lines.push(`• ${example}`);
    }

    lines.push('', 'CONSTRAINTS:');
    const constraints = styleConfig?.constraints || [];
    for (const constraint of constraints) {
      lines.push(`• ${constraint}`);
    }

    return lines.join('\n');
  }

  /**
   * Build the context block from project/idea information.
   */
  private _buildContextBlock(context: QwenContext): string {
    const lines: string[] = ['=== CONTEXT ==='];

    if (context.projectName) {
      lines.push(`Project Name: ${context.projectName}`);
    }

    if (context.description) {
      lines.push(`Description: ${context.description}`);
    }

    if (context.industry) {
      lines.push(`Industry: ${context.industry}`);
    }

    if (context.keywords && context.keywords.length > 0) {
      lines.push(`Keywords to incorporate: ${context.keywords.join(', ')}`);
    }

    if (context.brandWords && context.brandWords.length > 0) {
      lines.push(`Brand inspiration words: ${context.brandWords.join(', ')}`);
    }

    const minLen = context.minLength || 4;
    const maxLen = context.maxLength || 12;
    lines.push(`Length requirement: ${minLen}-${maxLen} characters`);

    return lines.join('\n');
  }

  /**
   * Build the task block specifying what to generate.
   */
  private _buildTaskBlock(
    query: string,
    tld: string,
    count: number,
    context?: QwenContext,
  ): string {
    const lines: string[] = [
      '=== TASK ===',
      `Generate ${count} unique, INVENTED domain names for: "${query}"`,
      `Target TLD: .${tld}`,
      '',
      'IMPORTANT RULES:',
      '1. INVENT NEW WORDS - do not use common dictionary words directly',
      '2. Each name must be UNIQUE and CREATIVE',
      '3. Names must be PRONOUNCEABLE (read it aloud)',
      '4. NO generic patterns like "fastX", "quickY", "proZ"',
      '5. Think like a startup founder naming their company',
    ];

    // Add context-specific guidance
    if (context?.keywords && context.keywords.length > 0) {
      lines.push(`6. Try to BLEND or TRANSFORM these keywords: ${context.keywords.slice(0, 3).join(', ')}`);
    }

    if (context?.industry) {
      lines.push(`7. Names should feel appropriate for the ${context.industry} industry`);
    }

    return lines.join('\n');
  }

  /**
   * Build the output format specification block.
   */
  private _buildFormatBlock(tld: string): string {
    return `=== OUTPUT FORMAT ===
Return EXACTLY in this format (one domain per line):
- name.${tld} - Brief reason why this name works

Example output:
- voxify.${tld} - Blend of "voice" + "-ify", modern tech feel
- zestora.${tld} - Invented word, energetic "zest" + melodic ending

Domains:`;
  }

  /**
   * Calculate max_tokens based on number of suggestions and style.
   *
   * Token requirements vary by style:
   * - short: ~30 tokens (4-7 char names, brief reasons)
   * - brandable: ~50 tokens (invented names, medium reasons)
   * - descriptive: ~60 tokens (compound words, detailed reasons)
   * - creative: ~70 tokens (wordplay, artistic explanations)
   *
   * Style-aware calculation reduces costs by 20-30% on average.
   */
  private _calculateMaxTokens(maxSuggestions: number, style: string): number {
    // Token budget per suggestion varies by style complexity
    const tokensPerSuggestion: Record<string, number> = {
      short: 30,       // Ultra-short names, minimal reasons
      brandable: 50,   // Invented names, moderate explanations
      descriptive: 60, // Compound words, detailed reasoning
      creative: 70,    // Wordplay, artistic explanations
    };

    const perSuggestion = tokensPerSuggestion[style] || 50;

    // Reduced base buffer (128 vs 256) since we're style-aware
    // Cap at 1536 tokens (reduced from 2048) for cost efficiency
    return Math.min(128 + maxSuggestions * perSuggestion, 1536);
  }

  /**
   * Parse domain names from model-generated text.
   *
   * Matches the fine-tuned model's output format:
   * - domain.tld — Reason
   * - domain.tld - Reason
   */
  private _parseDomainsFromText(text: string): QwenDomain[] {
    const domains: QwenDomain[] = [];
    const lines = text.split('\n').filter((l) => l.trim());

    for (const line of lines) {
      // Match: "- domain.tld — reason" or "- domain.tld - reason"
      const match = line.match(/^[-*]\s*([a-z0-9-]+)\.([a-z]+)\s*[—\-:]\s*(.+)$/i);
      if (match && match[1] && match[2]) {
        const name = match[1];
        const tld = match[2];
        const reason = match[3];
        domains.push({
          name: name.toLowerCase(),
          tld: tld.toLowerCase(),
          reason: reason?.trim(),
        });
      }
    }

    return domains;
  }

  /**
   * Make HTTP request with timeout and error handling.
   */
  private async _makeRequest(payload: QwenRequest): Promise<unknown> {
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);

    try {
      const headers: Record<string, string> = {
        'Content-Type': 'application/json',
      };

      if (this.apiKey) {
        headers['Authorization'] = `Bearer ${this.apiKey}`;
      }

      // llama.cpp uses OpenAI-compatible /v1/completions endpoint
      const llamaPayload = {
        prompt: payload.prompt,
        max_tokens: payload.max_tokens || 512,
        temperature: payload.temperature || 0.7,
        stop: ['Query:', '\n\nQuery:'], // Stop when model starts new query
      };

      const response = await fetch(`${this.endpoint}/v1/completions`, {
        method: 'POST',
        headers,
        body: JSON.stringify(llamaPayload),
        signal: controller.signal,
      });

      // Handle non-200 responses
      if (!response.ok) {
        const text = await response.text().catch(() => 'Unknown error');
        throw new QwenInferenceError(
          `HTTP ${response.status}: ${text}`,
          response.status >= 500 ? 'SERVER_ERROR' : 'INVALID_RESPONSE',
          response.status,
        );
      }

      // Parse llama.cpp OpenAI-compatible response
      const json = (await response.json()) as {
        choices?: Array<{ text?: string }>;
        timings?: { predicted_ms?: number };
      };

      // Extract generated text from llama.cpp response
      if (!json.choices || !Array.isArray(json.choices) || json.choices.length === 0) {
        throw new QwenInferenceError(
          'Invalid llama.cpp response: no choices',
          'INVALID_RESPONSE',
        );
      }

      const generatedText = json.choices[0]?.text || '';
      const inferenceTimeMs = json.timings?.predicted_ms || 0;

      // Parse domains from generated text
      const domains = this._parseDomainsFromText(generatedText);

      // Return in expected QwenResponse format
      return {
        domains,
        raw_response: generatedText,
        inference_time_ms: inferenceTimeMs,
        cached: false,
      };
    } catch (error) {
      if (error instanceof QwenInferenceError) {
        throw error;
      }

      // Handle timeout
      if (error instanceof Error && error.name === 'AbortError') {
        throw new QwenInferenceError(
          `Request timeout after ${this.timeoutMs}ms`,
          'TIMEOUT',
        );
      }

      // Handle connection refused
      if (error instanceof Error && error.message.includes('ECONNREFUSED')) {
        throw new QwenInferenceError(
          'Connection refused - inference server may be down',
          'CONNECTION_REFUSED',
        );
      }

      // Generic error
      throw new QwenInferenceError(
        error instanceof Error ? error.message : String(error),
        'SERVER_ERROR',
      );
    } finally {
      clearTimeout(timeout);
    }
  }

  /**
   * Make request with exponential backoff retry.
   *
   * Retries on 5xx errors and timeouts, no retry on 4xx errors.
   */
  private async _makeRequestWithRetry(payload: QwenRequest): Promise<unknown> {
    let lastError: QwenInferenceError | null = null;

    for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
      try {
        return await this._makeRequest(payload);
      } catch (error) {
        if (!(error instanceof QwenInferenceError)) {
          throw error;
        }

        lastError = error;

        // Don't retry on 4xx errors (bad request)
        if (error.statusCode && error.statusCode >= 400 && error.statusCode < 500) {
          throw error;
        }

        // Don't retry on last attempt
        if (attempt === this.maxRetries) {
          break;
        }

        // Exponential backoff: 500ms, 1000ms, 2000ms
        const backoffMs = 500 * Math.pow(2, attempt);
        logger.debug('Qwen request failed, retrying', {
          attempt: attempt + 1,
          maxRetries: this.maxRetries,
          backoffMs,
          error: error.message,
        });

        await new Promise((resolve) => setTimeout(resolve, backoffMs));
      }
    }

    // All retries exhausted
    throw lastError!;
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// SINGLETON INSTANCE
// ═══════════════════════════════════════════════════════════════════════════

let qwenClient: QwenInferenceClient | null | undefined = undefined;

/**
 * Get Qwen client instance (singleton).
 *
 * Returns null if Qwen is not configured - caller should fall back to other sources.
 */
export function getQwenClient(): QwenInferenceClient | null {
  // Return cached instance
  if (qwenClient !== undefined) {
    return qwenClient;
  }

  // Check if Qwen is configured
  if (!config.qwenInference?.enabled || !config.qwenInference.endpoint) {
    qwenClient = null;
    return null;
  }

  // Create new instance
  qwenClient = new QwenInferenceClient(config.qwenInference.endpoint, {
    apiKey: config.qwenInference.apiKey,
    timeoutMs: config.qwenInference.timeoutMs,
    maxRetries: config.qwenInference.maxRetries,
    cacheTtl: 3600, // 1 hour cache
  });

  logger.info('Qwen inference client initialized', {
    endpoint: config.qwenInference.endpoint,
    timeoutMs: config.qwenInference.timeoutMs,
    maxRetries: config.qwenInference.maxRetries,
  });

  return qwenClient;
}