Domain Search MCP

together-inference.ts•19.9 KiB

/**
 * Together.ai Inference API client.
 *
 * Cloud-based AI inference using Together.ai's API.
 * Supports Qwen3-14B, Qwen 2.5-72B, and other models.
 *
 * This is the PRIMARY inference provider ($50/month budget).
 * Falls back to local llama.cpp (qwen-inference.ts) if unavailable.
 *
 * @see https://docs.together.ai/reference/chat-completions
 */

import { z } from 'zod';
import { config } from '../config.js';
import { logger } from '../utils/logger.js';
import { TtlCache } from '../utils/cache.js';
import { CircuitBreaker, CircuitOpenError } from '../utils/circuit-breaker.js';
import type { QwenDomain, QwenContext, QwenSuggestOptions } from './qwen-inference.js';

// ═══════════════════════════════════════════════════════════════════════════
// CONFIGURATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai API base URL.
 */
const TOGETHER_API_URL = 'https://api.together.ai/v1/chat/completions';

/**
 * Available models on Together.ai (sorted by quality).
 *
 * @see https://docs.together.ai/docs/serverless-models
 */
export const TOGETHER_MODELS = {
  // Primary choice based on @alicankiraz0 research (January 2026)
  'qwen3-14b': 'Qwen/Qwen3-14B',
  'qwen3-14b-instruct': 'Qwen/Qwen3-14B-Instruct',

  // Larger alternatives for higher quality
  'qwen2.5-72b': 'Qwen/Qwen2.5-72B-Instruct',
  'qwen2.5-32b': 'Qwen/Qwen2.5-32B-Instruct',

  // Smaller fallbacks
  'qwen2.5-7b': 'Qwen/Qwen2.5-7B-Instruct',

  // Default: best quality-cost balance for domain names
  'default': 'Qwen/Qwen3-14B-Instruct',
} as const;

export type TogetherModelKey = keyof typeof TOGETHER_MODELS;

// ═══════════════════════════════════════════════════════════════════════════
// STYLE CONFIGURATIONS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Style-specific prompting configurations for domain name generation.
 * Optimized for chat-based models (system + user message format).
 */
const STYLE_CONFIGS: Record<string, {
  systemPrompt: string;
  techniques: string[];
  constraints: string[];
}> = {
  brandable: {
    systemPrompt: `You are a domain name expert specializing in creating brandable, memorable domain names for startups and tech companies.

Your task is to INVENT new words that sound like real brands. Think: Spotify, Calendly, Shopify, Zapier, Figma.

Key techniques:
- Portmanteau blending (Instagram = Instant + Telegram)
- Modern suffixes: -ly, -ify, -io, -ai, -eo, -va, -ra
- Phonetic spellings (Lyft, Fiverr, Tumblr)
- Consonant clusters that are pronounceable (Stripe, Slack)
- Neologisms - completely new words that sound natural

IMPORTANT: Every name must be PRONOUNCEABLE. Say it out loud before suggesting.`,
    techniques: [
      'Portmanteau blending',
      'Modern tech suffixes',
      'Phonetic spellings',
      'Invented neologisms',
    ],
    constraints: [
      'Must be pronounceable',
      '4-10 characters ideal',
      'No generic descriptive names',
      'Avoid dictionary words',
    ],
  },
  descriptive: {
    systemPrompt: `You are a domain name expert specializing in clear, professional names that immediately convey meaning.

Your task is to create compound words or phrases that describe the product/service.

Key techniques:
- Professional suffixes: -hq, -hub, -base, -stack, -cloud
- Action + object patterns (Dropbox, Mailchimp)
- Industry term + qualifier (Salesforce, Workday)

Focus on clarity and professionalism.`,
    techniques: [
      'Professional compound words',
      'Action + object patterns',
      'Industry terminology',
    ],
    constraints: [
      'Understandable at first glance',
      '5-12 characters',
      'Must relate to service',
    ],
  },
  short: {
    systemPrompt: `You are a domain name expert specializing in ultra-short, punchy names.

Your task is to create names that are 7 characters or LESS.

Key techniques:
- Truncation (removing vowels or syllables)
- Single syllable words
- Acronym-like patterns
- Sound-based (onomatopoeia)

Think: Uber, Lyft, Snap, Zoom, Jira.`,
    techniques: [
      'Truncation',
      'Single syllables',
      'Acronym patterns',
    ],
    constraints: [
      'MAXIMUM 7 characters',
      'MINIMUM 3 characters',
      'Easy to type',
      'One or two syllables',
    ],
  },
  creative: {
    systemPrompt: `You are an experimental domain name creator. Maximum creativity allowed.

Your task is to create unusual, artistic, playful names that stand out.

Key techniques:
- Unusual letter combinations
- Phonetic playfulness
- Onomatopoeia (sounds like what it does)
- Mythological or invented language references
- Reversed words or intentional misspellings

Think: Skype, Twitch, Flickr, Hulu, Etsy, Vimeo.`,
    techniques: [
      'Unusual combinations',
      'Phonetic playfulness',
      'Mythology references',
      'Creative misspellings',
    ],
    constraints: [
      'Can break rules',
      'Must be pronounceable',
      'Should evoke emotion',
    ],
  },
};

// ═══════════════════════════════════════════════════════════════════════════
// TYPES
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai specific options extending base options.
 */
export interface TogetherSuggestOptions extends QwenSuggestOptions {
  /** Model to use (default: qwen3-14b-instruct) */
  model?: TogetherModelKey;
}

/**
 * Custom error for Together.ai inference failures.
 */
export class TogetherInferenceError extends Error {
  constructor(
    message: string,
    public readonly code:
      | 'TIMEOUT'
      | 'RATE_LIMIT'
      | 'INVALID_RESPONSE'
      | 'SERVER_ERROR'
      | 'NOT_CONFIGURED'
      | 'INSUFFICIENT_CREDITS',
    public readonly statusCode?: number,
  ) {
    super(message);
    this.name = 'TogetherInferenceError';
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// CLIENT IMPLEMENTATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai Inference API client with retry logic and caching.
 */
export class TogetherInferenceClient {
  private readonly apiKey: string;
  private readonly timeoutMs: number;
  private readonly maxRetries: number;
  private readonly defaultModel: string;
  private readonly cache: TtlCache<QwenDomain[]>;
  private readonly circuitBreaker: CircuitBreaker;

  constructor(
    apiKey: string,
    options: {
      timeoutMs?: number;
      maxRetries?: number;
      cacheTtl?: number;
      defaultModel?: TogetherModelKey;
    } = {},
  ) {
    this.apiKey = apiKey;
    this.timeoutMs = options.timeoutMs || 30000; // 30s for cloud API
    this.maxRetries = options.maxRetries || 2;
    this.defaultModel = TOGETHER_MODELS[options.defaultModel || 'default'];
    this.cache = new TtlCache<QwenDomain[]>(options.cacheTtl || 3600, 1000);

    // Circuit breaker: 5 failures in 60s → open for 30s
    this.circuitBreaker = new CircuitBreaker({
      name: 'together_ai',
      failureThreshold: 5,
      resetTimeoutMs: 30_000,
      failureWindowMs: 60_000,
      successThreshold: 2,
    });
  }

  /**
   * Generate domain suggestions using Together.ai.
   *
   * Returns suggestions or null if Together.ai is unavailable.
   * Graceful degradation - caller should fall back to local inference.
   */
  async suggest(options: TogetherSuggestOptions): Promise<QwenDomain[] | null> {
    const {
      query,
      style = 'brandable',
      tld = 'com',
      max_suggestions = 10,
      temperature = 0.8,
      context,
      model,
    } = options;

    // Build cache key
    const cacheKey = `together:${query}:${style}:${tld}:${max_suggestions}:${temperature}`;
    const cached = this.cache.get(cacheKey);
    if (cached) {
      logger.debug('Together.ai cache hit', { query, cached_domains: cached.length });
      return cached;
    }

    // Build messages
    const messages = this._buildMessages(query, style, tld, max_suggestions, context);
    const modelId = model ? TOGETHER_MODELS[model] : this.defaultModel;

    try {
      const startTime = Date.now();
      const response = await this.circuitBreaker.execute(() =>
        this._makeRequestWithRetry({
          model: modelId,
          messages,
          max_tokens: this._calculateMaxTokens(max_suggestions, style),
          temperature,
          stop: ['Query:', '\n\nQuery:'],
        })
      );

      const inferenceMs = Date.now() - startTime;

      // Parse domains from response
      const domains = this._parseDomainsFromText(response, tld);

      // Cache successful response
      if (domains.length > 0) {
        this.cache.set(cacheKey, domains);
      }

      logger.info('Together.ai inference success', {
        query,
        model: modelId,
        domains: domains.length,
        inference_ms: inferenceMs,
      });

      return domains;
    } catch (error) {
      // Circuit breaker open - fail fast
      if (error instanceof CircuitOpenError) {
        logger.debug('Together.ai circuit breaker open, skipping', {
          resetAt: new Date(error.resetAt).toISOString(),
        });
        return null;
      }

      if (error instanceof TogetherInferenceError) {
        logger.warn('Together.ai inference failed', {
          code: error.code,
          message: error.message,
          statusCode: error.statusCode,
        });
      } else {
        logger.warn('Together.ai inference error', {
          error: error instanceof Error ? error.message : String(error),
        });
      }

      return null; // Graceful degradation
    }
  }

  /**
   * Build chat messages for Together.ai API.
   */
  private _buildMessages(
    query: string,
    style: string,
    tld: string,
    maxSuggestions: number,
    context?: QwenContext,
  ): Array<{ role: 'system' | 'user'; content: string }> {
    const styleConfig = STYLE_CONFIGS[style] || STYLE_CONFIGS.brandable!;

    // System message with style-specific instructions
    const systemContent = styleConfig!.systemPrompt;

    // Build user message with task details
    const userParts: string[] = [];

    // Add context if provided
    if (context) {
      userParts.push('=== CONTEXT ===');
      if (context.projectName) {
        userParts.push(`Project: ${context.projectName}`);
      }
      if (context.description) {
        userParts.push(`Description: ${context.description}`);
      }
      if (context.industry) {
        userParts.push(`Industry: ${context.industry}`);
      }
      if (context.keywords && context.keywords.length > 0) {
        userParts.push(`Keywords: ${context.keywords.join(', ')}`);
      }
      userParts.push('');
    }

    // Add task
    userParts.push('=== TASK ===');
    userParts.push(`Generate ${maxSuggestions} unique domain names for: "${query}"`);
    userParts.push(`Target TLD: .${tld}`);

    // Length constraints
    const minLen = context?.minLength || 4;
    const maxLen = context?.maxLength || 12;
    userParts.push(`Length: ${minLen}-${maxLen} characters`);
    userParts.push('');

    // Output format
    userParts.push('=== OUTPUT FORMAT ===');
    userParts.push('Return EXACTLY in this format (one per line):');
    userParts.push(`- name.${tld} - Brief reason`);
    userParts.push('');
    userParts.push('Example:');
    userParts.push(`- voxify.${tld} - Blend of "voice" + "-ify", modern feel`);
    userParts.push('');
    userParts.push('Generate now:');

    return [
      { role: 'system', content: systemContent },
      { role: 'user', content: userParts.join('\n') },
    ];
  }

  /**
   * Calculate max_tokens based on number of suggestions and style.
   *
   * Style-aware token allocation reduces API costs by 20-30%.
   */
  private _calculateMaxTokens(maxSuggestions: number, style: string): number {
    // Token budget per suggestion varies by style complexity
    const tokensPerSuggestion: Record<string, number> = {
      short: 30,       // Ultra-short names, minimal reasons
      brandable: 50,   // Invented names, moderate explanations
      descriptive: 60, // Compound words, detailed reasoning
      creative: 70,    // Wordplay, artistic explanations
    };

    const perSuggestion = tokensPerSuggestion[style] || 50;

    // Reduced base buffer (128 vs 200) since we're style-aware
    // Cap at 1536 tokens (reduced from 2048) for cost efficiency
    return Math.min(128 + maxSuggestions * perSuggestion, 1536);
  }

  /**
   * Parse domain names from model-generated text.
   */
  private _parseDomainsFromText(text: string, defaultTld: string): QwenDomain[] {
    const domains: QwenDomain[] = [];
    const seen = new Set<string>();
    const lines = text.split('\n').filter((l) => l.trim());

    for (const line of lines) {
      // Match: "- domain.tld — reason" or "- domain.tld - reason"
      const match = line.match(/^[-*•]\s*([a-z0-9-]+)\.([a-z]+)\s*[—\-:]\s*(.+)$/i);
      if (match && match[1] && match[2]) {
        const name = match[1].toLowerCase();
        const tld = match[2].toLowerCase();
        const reason = match[3]?.trim();

        // Deduplicate
        const key = `${name}.${tld}`;
        if (!seen.has(key)) {
          seen.add(key);
          domains.push({ name, tld, reason });
        }
      }
    }

    return domains;
  }

  /**
   * Make HTTP request with timeout and error handling.
   */
  private async _makeRequest(payload: {
    model: string;
    messages: Array<{ role: 'system' | 'user'; content: string }>;
    max_tokens: number;
    temperature: number;
    stop?: string[];
  }): Promise<string> {
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);

    try {
      const response = await fetch(TOGETHER_API_URL, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization': `Bearer ${this.apiKey}`,
        },
        body: JSON.stringify(payload),
        signal: controller.signal,
      });

      // Handle non-200 responses
      if (!response.ok) {
        const text = await response.text().catch(() => 'Unknown error');

        // Check for specific error codes
        if (response.status === 429) {
          throw new TogetherInferenceError(
            'Rate limit exceeded',
            'RATE_LIMIT',
            429,
          );
        }

        if (response.status === 402) {
          throw new TogetherInferenceError(
            'Insufficient credits',
            'INSUFFICIENT_CREDITS',
            402,
          );
        }

        throw new TogetherInferenceError(
          `HTTP ${response.status}: ${text}`,
          response.status >= 500 ? 'SERVER_ERROR' : 'INVALID_RESPONSE',
          response.status,
        );
      }

      // Parse OpenAI-compatible response
      const json = (await response.json()) as {
        choices?: Array<{ message?: { content?: string } }>;
        usage?: { total_tokens?: number };
      };

      if (!json.choices || !Array.isArray(json.choices) || json.choices.length === 0) {
        throw new TogetherInferenceError(
          'Invalid response: no choices',
          'INVALID_RESPONSE',
        );
      }

      const content = json.choices[0]?.message?.content || '';

      logger.debug('Together.ai response', {
        model: payload.model,
        tokens: json.usage?.total_tokens,
        content_length: content.length,
      });

      return content;
    } catch (error) {
      if (error instanceof TogetherInferenceError) {
        throw error;
      }

      // Handle timeout
      if (error instanceof Error && error.name === 'AbortError') {
        throw new TogetherInferenceError(
          `Request timeout after ${this.timeoutMs}ms`,
          'TIMEOUT',
        );
      }

      // Generic error
      throw new TogetherInferenceError(
        error instanceof Error ? error.message : String(error),
        'SERVER_ERROR',
      );
    } finally {
      clearTimeout(timeout);
    }
  }

  /**
   * Make request with exponential backoff retry.
   */
  private async _makeRequestWithRetry(payload: {
    model: string;
    messages: Array<{ role: 'system' | 'user'; content: string }>;
    max_tokens: number;
    temperature: number;
    stop?: string[];
  }): Promise<string> {
    let lastError: TogetherInferenceError | null = null;

    for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
      try {
        return await this._makeRequest(payload);
      } catch (error) {
        if (!(error instanceof TogetherInferenceError)) {
          throw error;
        }

        lastError = error;

        // Don't retry on 4xx errors (except rate limit)
        if (
          error.statusCode &&
          error.statusCode >= 400 &&
          error.statusCode < 500 &&
          error.code !== 'RATE_LIMIT'
        ) {
          throw error;
        }

        // Don't retry on last attempt
        if (attempt === this.maxRetries) {
          break;
        }

        // Exponential backoff: 1s, 2s, 4s (longer for cloud API)
        const backoffMs = 1000 * Math.pow(2, attempt);
        logger.debug('Together.ai request failed, retrying', {
          attempt: attempt + 1,
          maxRetries: this.maxRetries,
          backoffMs,
          error: error.message,
        });

        await new Promise((resolve) => setTimeout(resolve, backoffMs));
      }
    }

    throw lastError!;
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// SINGLETON INSTANCE
// ═══════════════════════════════════════════════════════════════════════════

let togetherClient: TogetherInferenceClient | null | undefined = undefined;

/**
 * Get Together.ai client instance (singleton).
 *
 * Returns null if Together.ai is not configured.
 * Caller should fall back to local qwen-inference.ts if null.
 */
export function getTogetherClient(): TogetherInferenceClient | null {
  // Return cached instance
  if (togetherClient !== undefined) {
    return togetherClient;
  }

  // Check if Together.ai is configured
  const apiKey = config.togetherAi?.apiKey;
  if (!config.togetherAi?.enabled || !apiKey) {
    togetherClient = null;
    return null;
  }

  // Create new instance
  togetherClient = new TogetherInferenceClient(apiKey, {
    timeoutMs: config.togetherAi.timeoutMs,
    maxRetries: config.togetherAi.maxRetries,
    defaultModel: config.togetherAi.defaultModel as TogetherModelKey,
    cacheTtl: 3600, // 1 hour cache
  });

  logger.info('Together.ai inference client initialized', {
    model: config.togetherAi.defaultModel || 'qwen3-14b-instruct',
    timeoutMs: config.togetherAi.timeoutMs,
  });

  return togetherClient;
}

/**
 * Check if Together.ai is configured and available.
 */
export function isTogetherConfigured(): boolean {
  return !!(config.togetherAi?.enabled && config.togetherAi.apiKey);
}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dorukardahan/domain-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

together-inference.ts•19.9 KiB

/**
 * Together.ai Inference API client.
 *
 * Cloud-based AI inference using Together.ai's API.
 * Supports Qwen3-14B, Qwen 2.5-72B, and other models.
 *
 * This is the PRIMARY inference provider ($50/month budget).
 * Falls back to local llama.cpp (qwen-inference.ts) if unavailable.
 *
 * @see https://docs.together.ai/reference/chat-completions
 */

import { z } from 'zod';
import { config } from '../config.js';
import { logger } from '../utils/logger.js';
import { TtlCache } from '../utils/cache.js';
import { CircuitBreaker, CircuitOpenError } from '../utils/circuit-breaker.js';
import type { QwenDomain, QwenContext, QwenSuggestOptions } from './qwen-inference.js';

// ═══════════════════════════════════════════════════════════════════════════
// CONFIGURATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai API base URL.
 */
const TOGETHER_API_URL = 'https://api.together.ai/v1/chat/completions';

/**
 * Available models on Together.ai (sorted by quality).
 *
 * @see https://docs.together.ai/docs/serverless-models
 */
export const TOGETHER_MODELS = {
  // Primary choice based on @alicankiraz0 research (January 2026)
  'qwen3-14b': 'Qwen/Qwen3-14B',
  'qwen3-14b-instruct': 'Qwen/Qwen3-14B-Instruct',

  // Larger alternatives for higher quality
  'qwen2.5-72b': 'Qwen/Qwen2.5-72B-Instruct',
  'qwen2.5-32b': 'Qwen/Qwen2.5-32B-Instruct',

  // Smaller fallbacks
  'qwen2.5-7b': 'Qwen/Qwen2.5-7B-Instruct',

  // Default: best quality-cost balance for domain names
  'default': 'Qwen/Qwen3-14B-Instruct',
} as const;

export type TogetherModelKey = keyof typeof TOGETHER_MODELS;

// ═══════════════════════════════════════════════════════════════════════════
// STYLE CONFIGURATIONS
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Style-specific prompting configurations for domain name generation.
 * Optimized for chat-based models (system + user message format).
 */
const STYLE_CONFIGS: Record<string, {
  systemPrompt: string;
  techniques: string[];
  constraints: string[];
}> = {
  brandable: {
    systemPrompt: `You are a domain name expert specializing in creating brandable, memorable domain names for startups and tech companies.

Your task is to INVENT new words that sound like real brands. Think: Spotify, Calendly, Shopify, Zapier, Figma.

Key techniques:
- Portmanteau blending (Instagram = Instant + Telegram)
- Modern suffixes: -ly, -ify, -io, -ai, -eo, -va, -ra
- Phonetic spellings (Lyft, Fiverr, Tumblr)
- Consonant clusters that are pronounceable (Stripe, Slack)
- Neologisms - completely new words that sound natural

IMPORTANT: Every name must be PRONOUNCEABLE. Say it out loud before suggesting.`,
    techniques: [
      'Portmanteau blending',
      'Modern tech suffixes',
      'Phonetic spellings',
      'Invented neologisms',
    ],
    constraints: [
      'Must be pronounceable',
      '4-10 characters ideal',
      'No generic descriptive names',
      'Avoid dictionary words',
    ],
  },
  descriptive: {
    systemPrompt: `You are a domain name expert specializing in clear, professional names that immediately convey meaning.

Your task is to create compound words or phrases that describe the product/service.

Key techniques:
- Professional suffixes: -hq, -hub, -base, -stack, -cloud
- Action + object patterns (Dropbox, Mailchimp)
- Industry term + qualifier (Salesforce, Workday)

Focus on clarity and professionalism.`,
    techniques: [
      'Professional compound words',
      'Action + object patterns',
      'Industry terminology',
    ],
    constraints: [
      'Understandable at first glance',
      '5-12 characters',
      'Must relate to service',
    ],
  },
  short: {
    systemPrompt: `You are a domain name expert specializing in ultra-short, punchy names.

Your task is to create names that are 7 characters or LESS.

Key techniques:
- Truncation (removing vowels or syllables)
- Single syllable words
- Acronym-like patterns
- Sound-based (onomatopoeia)

Think: Uber, Lyft, Snap, Zoom, Jira.`,
    techniques: [
      'Truncation',
      'Single syllables',
      'Acronym patterns',
    ],
    constraints: [
      'MAXIMUM 7 characters',
      'MINIMUM 3 characters',
      'Easy to type',
      'One or two syllables',
    ],
  },
  creative: {
    systemPrompt: `You are an experimental domain name creator. Maximum creativity allowed.

Your task is to create unusual, artistic, playful names that stand out.

Key techniques:
- Unusual letter combinations
- Phonetic playfulness
- Onomatopoeia (sounds like what it does)
- Mythological or invented language references
- Reversed words or intentional misspellings

Think: Skype, Twitch, Flickr, Hulu, Etsy, Vimeo.`,
    techniques: [
      'Unusual combinations',
      'Phonetic playfulness',
      'Mythology references',
      'Creative misspellings',
    ],
    constraints: [
      'Can break rules',
      'Must be pronounceable',
      'Should evoke emotion',
    ],
  },
};

// ═══════════════════════════════════════════════════════════════════════════
// TYPES
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai specific options extending base options.
 */
export interface TogetherSuggestOptions extends QwenSuggestOptions {
  /** Model to use (default: qwen3-14b-instruct) */
  model?: TogetherModelKey;
}

/**
 * Custom error for Together.ai inference failures.
 */
export class TogetherInferenceError extends Error {
  constructor(
    message: string,
    public readonly code:
      | 'TIMEOUT'
      | 'RATE_LIMIT'
      | 'INVALID_RESPONSE'
      | 'SERVER_ERROR'
      | 'NOT_CONFIGURED'
      | 'INSUFFICIENT_CREDITS',
    public readonly statusCode?: number,
  ) {
    super(message);
    this.name = 'TogetherInferenceError';
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// CLIENT IMPLEMENTATION
// ═══════════════════════════════════════════════════════════════════════════

/**
 * Together.ai Inference API client with retry logic and caching.
 */
export class TogetherInferenceClient {
  private readonly apiKey: string;
  private readonly timeoutMs: number;
  private readonly maxRetries: number;
  private readonly defaultModel: string;
  private readonly cache: TtlCache<QwenDomain[]>;
  private readonly circuitBreaker: CircuitBreaker;

  constructor(
    apiKey: string,
    options: {
      timeoutMs?: number;
      maxRetries?: number;
      cacheTtl?: number;
      defaultModel?: TogetherModelKey;
    } = {},
  ) {
    this.apiKey = apiKey;
    this.timeoutMs = options.timeoutMs || 30000; // 30s for cloud API
    this.maxRetries = options.maxRetries || 2;
    this.defaultModel = TOGETHER_MODELS[options.defaultModel || 'default'];
    this.cache = new TtlCache<QwenDomain[]>(options.cacheTtl || 3600, 1000);

    // Circuit breaker: 5 failures in 60s → open for 30s
    this.circuitBreaker = new CircuitBreaker({
      name: 'together_ai',
      failureThreshold: 5,
      resetTimeoutMs: 30_000,
      failureWindowMs: 60_000,
      successThreshold: 2,
    });
  }

  /**
   * Generate domain suggestions using Together.ai.
   *
   * Returns suggestions or null if Together.ai is unavailable.
   * Graceful degradation - caller should fall back to local inference.
   */
  async suggest(options: TogetherSuggestOptions): Promise<QwenDomain[] | null> {
    const {
      query,
      style = 'brandable',
      tld = 'com',
      max_suggestions = 10,
      temperature = 0.8,
      context,
      model,
    } = options;

    // Build cache key
    const cacheKey = `together:${query}:${style}:${tld}:${max_suggestions}:${temperature}`;
    const cached = this.cache.get(cacheKey);
    if (cached) {
      logger.debug('Together.ai cache hit', { query, cached_domains: cached.length });
      return cached;
    }

    // Build messages
    const messages = this._buildMessages(query, style, tld, max_suggestions, context);
    const modelId = model ? TOGETHER_MODELS[model] : this.defaultModel;

    try {
      const startTime = Date.now();
      const response = await this.circuitBreaker.execute(() =>
        this._makeRequestWithRetry({
          model: modelId,
          messages,
          max_tokens: this._calculateMaxTokens(max_suggestions, style),
          temperature,
          stop: ['Query:', '\n\nQuery:'],
        })
      );

      const inferenceMs = Date.now() - startTime;

      // Parse domains from response
      const domains = this._parseDomainsFromText(response, tld);

      // Cache successful response
      if (domains.length > 0) {
        this.cache.set(cacheKey, domains);
      }

      logger.info('Together.ai inference success', {
        query,
        model: modelId,
        domains: domains.length,
        inference_ms: inferenceMs,
      });

      return domains;
    } catch (error) {
      // Circuit breaker open - fail fast
      if (error instanceof CircuitOpenError) {
        logger.debug('Together.ai circuit breaker open, skipping', {
          resetAt: new Date(error.resetAt).toISOString(),
        });
        return null;
      }

      if (error instanceof TogetherInferenceError) {
        logger.warn('Together.ai inference failed', {
          code: error.code,
          message: error.message,
          statusCode: error.statusCode,
        });
      } else {
        logger.warn('Together.ai inference error', {
          error: error instanceof Error ? error.message : String(error),
        });
      }

      return null; // Graceful degradation
    }
  }

  /**
   * Build chat messages for Together.ai API.
   */
  private _buildMessages(
    query: string,
    style: string,
    tld: string,
    maxSuggestions: number,
    context?: QwenContext,
  ): Array<{ role: 'system' | 'user'; content: string }> {
    const styleConfig = STYLE_CONFIGS[style] || STYLE_CONFIGS.brandable!;

    // System message with style-specific instructions
    const systemContent = styleConfig!.systemPrompt;

    // Build user message with task details
    const userParts: string[] = [];

    // Add context if provided
    if (context) {
      userParts.push('=== CONTEXT ===');
      if (context.projectName) {
        userParts.push(`Project: ${context.projectName}`);
      }
      if (context.description) {
        userParts.push(`Description: ${context.description}`);
      }
      if (context.industry) {
        userParts.push(`Industry: ${context.industry}`);
      }
      if (context.keywords && context.keywords.length > 0) {
        userParts.push(`Keywords: ${context.keywords.join(', ')}`);
      }
      userParts.push('');
    }

    // Add task
    userParts.push('=== TASK ===');
    userParts.push(`Generate ${maxSuggestions} unique domain names for: "${query}"`);
    userParts.push(`Target TLD: .${tld}`);

    // Length constraints
    const minLen = context?.minLength || 4;
    const maxLen = context?.maxLength || 12;
    userParts.push(`Length: ${minLen}-${maxLen} characters`);
    userParts.push('');

    // Output format
    userParts.push('=== OUTPUT FORMAT ===');
    userParts.push('Return EXACTLY in this format (one per line):');
    userParts.push(`- name.${tld} - Brief reason`);
    userParts.push('');
    userParts.push('Example:');
    userParts.push(`- voxify.${tld} - Blend of "voice" + "-ify", modern feel`);
    userParts.push('');
    userParts.push('Generate now:');

    return [
      { role: 'system', content: systemContent },
      { role: 'user', content: userParts.join('\n') },
    ];
  }

  /**
   * Calculate max_tokens based on number of suggestions and style.
   *
   * Style-aware token allocation reduces API costs by 20-30%.
   */
  private _calculateMaxTokens(maxSuggestions: number, style: string): number {
    // Token budget per suggestion varies by style complexity
    const tokensPerSuggestion: Record<string, number> = {
      short: 30,       // Ultra-short names, minimal reasons
      brandable: 50,   // Invented names, moderate explanations
      descriptive: 60, // Compound words, detailed reasoning
      creative: 70,    // Wordplay, artistic explanations
    };

    const perSuggestion = tokensPerSuggestion[style] || 50;

    // Reduced base buffer (128 vs 200) since we're style-aware
    // Cap at 1536 tokens (reduced from 2048) for cost efficiency
    return Math.min(128 + maxSuggestions * perSuggestion, 1536);
  }

  /**
   * Parse domain names from model-generated text.
   */
  private _parseDomainsFromText(text: string, defaultTld: string): QwenDomain[] {
    const domains: QwenDomain[] = [];
    const seen = new Set<string>();
    const lines = text.split('\n').filter((l) => l.trim());

    for (const line of lines) {
      // Match: "- domain.tld — reason" or "- domain.tld - reason"
      const match = line.match(/^[-*•]\s*([a-z0-9-]+)\.([a-z]+)\s*[—\-:]\s*(.+)$/i);
      if (match && match[1] && match[2]) {
        const name = match[1].toLowerCase();
        const tld = match[2].toLowerCase();
        const reason = match[3]?.trim();

        // Deduplicate
        const key = `${name}.${tld}`;
        if (!seen.has(key)) {
          seen.add(key);
          domains.push({ name, tld, reason });
        }
      }
    }

    return domains;
  }

  /**
   * Make HTTP request with timeout and error handling.
   */
  private async _makeRequest(payload: {
    model: string;
    messages: Array<{ role: 'system' | 'user'; content: string }>;
    max_tokens: number;
    temperature: number;
    stop?: string[];
  }): Promise<string> {
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);

    try {
      const response = await fetch(TOGETHER_API_URL, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization': `Bearer ${this.apiKey}`,
        },
        body: JSON.stringify(payload),
        signal: controller.signal,
      });

      // Handle non-200 responses
      if (!response.ok) {
        const text = await response.text().catch(() => 'Unknown error');

        // Check for specific error codes
        if (response.status === 429) {
          throw new TogetherInferenceError(
            'Rate limit exceeded',
            'RATE_LIMIT',
            429,
          );
        }

        if (response.status === 402) {
          throw new TogetherInferenceError(
            'Insufficient credits',
            'INSUFFICIENT_CREDITS',
            402,
          );
        }

        throw new TogetherInferenceError(
          `HTTP ${response.status}: ${text}`,
          response.status >= 500 ? 'SERVER_ERROR' : 'INVALID_RESPONSE',
          response.status,
        );
      }

      // Parse OpenAI-compatible response
      const json = (await response.json()) as {
        choices?: Array<{ message?: { content?: string } }>;
        usage?: { total_tokens?: number };
      };

      if (!json.choices || !Array.isArray(json.choices) || json.choices.length === 0) {
        throw new TogetherInferenceError(
          'Invalid response: no choices',
          'INVALID_RESPONSE',
        );
      }

      const content = json.choices[0]?.message?.content || '';

      logger.debug('Together.ai response', {
        model: payload.model,
        tokens: json.usage?.total_tokens,
        content_length: content.length,
      });

      return content;
    } catch (error) {
      if (error instanceof TogetherInferenceError) {
        throw error;
      }

      // Handle timeout
      if (error instanceof Error && error.name === 'AbortError') {
        throw new TogetherInferenceError(
          `Request timeout after ${this.timeoutMs}ms`,
          'TIMEOUT',
        );
      }

      // Generic error
      throw new TogetherInferenceError(
        error instanceof Error ? error.message : String(error),
        'SERVER_ERROR',
      );
    } finally {
      clearTimeout(timeout);
    }
  }

  /**
   * Make request with exponential backoff retry.
   */
  private async _makeRequestWithRetry(payload: {
    model: string;
    messages: Array<{ role: 'system' | 'user'; content: string }>;
    max_tokens: number;
    temperature: number;
    stop?: string[];
  }): Promise<string> {
    let lastError: TogetherInferenceError | null = null;

    for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
      try {
        return await this._makeRequest(payload);
      } catch (error) {
        if (!(error instanceof TogetherInferenceError)) {
          throw error;
        }

        lastError = error;

        // Don't retry on 4xx errors (except rate limit)
        if (
          error.statusCode &&
          error.statusCode >= 400 &&
          error.statusCode < 500 &&
          error.code !== 'RATE_LIMIT'
        ) {
          throw error;
        }

        // Don't retry on last attempt
        if (attempt === this.maxRetries) {
          break;
        }

        // Exponential backoff: 1s, 2s, 4s (longer for cloud API)
        const backoffMs = 1000 * Math.pow(2, attempt);
        logger.debug('Together.ai request failed, retrying', {
          attempt: attempt + 1,
          maxRetries: this.maxRetries,
          backoffMs,
          error: error.message,
        });

        await new Promise((resolve) => setTimeout(resolve, backoffMs));
      }
    }

    throw lastError!;
  }
}

// ═══════════════════════════════════════════════════════════════════════════
// SINGLETON INSTANCE
// ═══════════════════════════════════════════════════════════════════════════

let togetherClient: TogetherInferenceClient | null | undefined = undefined;

/**
 * Get Together.ai client instance (singleton).
 *
 * Returns null if Together.ai is not configured.
 * Caller should fall back to local qwen-inference.ts if null.
 */
export function getTogetherClient(): TogetherInferenceClient | null {
  // Return cached instance
  if (togetherClient !== undefined) {
    return togetherClient;
  }

  // Check if Together.ai is configured
  const apiKey = config.togetherAi?.apiKey;
  if (!config.togetherAi?.enabled || !apiKey) {
    togetherClient = null;
    return null;
  }

  // Create new instance
  togetherClient = new TogetherInferenceClient(apiKey, {
    timeoutMs: config.togetherAi.timeoutMs,
    maxRetries: config.togetherAi.maxRetries,
    defaultModel: config.togetherAi.defaultModel as TogetherModelKey,
    cacheTtl: 3600, // 1 hour cache
  });

  logger.info('Together.ai inference client initialized', {
    model: config.togetherAi.defaultModel || 'qwen3-14b-instruct',
    timeoutMs: config.togetherAi.timeoutMs,
  });

  return togetherClient;
}

/**
 * Check if Together.ai is configured and available.
 */
export function isTogetherConfigured(): boolean {
  return !!(config.togetherAi?.enabled && config.togetherAi.apiKey);
}