browser-devtools-mcp

compare-text-embedding.ts•14.6 KiB

import { AMAZON_BEDROCK_ENABLE, AMAZON_BEDROCK_TEXT_EMBED_MODEL_ID, AMAZON_BEDROCK_VISION_MODEL_ID, AWS_PROFILE, AWS_REGION, } from '../../../config'; import { ImageType, ScreenshotInfo } from './types'; import { cosineSimilarity, Vector } from './vector'; import { BedrockRuntimeClient, InvokeModelCommand, } from '@aws-sdk/client-bedrock-runtime'; import { fromIni } from '@aws-sdk/credential-providers'; import sharp from 'sharp'; /** * ----------------------------------------------------------------------------- * Public types * ----------------------------------------------------------------------------- */ export type CompareOptions = { /** * Vision-capable model id used to analyze images and produce * a layout-focused textual description. * * This model must support image + text inputs (multimodal). * * Examples: * - anthropic.claude-3-7-sonnet-20250219-v1:0 (Amazon Bedrock) * - gpt-4.1 / gpt-5 (OpenAI, if used) * * If omitted, a sensible default is selected based on the active * provider and environment configuration. */ visionModelId?: string; /** * Text embedding model id used to embed the generated vision * descriptions for semantic similarity comparison. * * Examples: * - amazon.titan-embed-text-v2:0 * - text-embedding-3-large * * If omitted, a provider-specific default is used. */ textEmbedModelId?: string; /** * If true, L2-normalize text embeddings before cosine similarity. * * Normalization improves numerical stability and makes similarity * scores comparable across different models and runs. * * Default: true */ normalize?: boolean; /** * Optional override for the vision prompt used to describe the UI. * * This allows callers to specialize the comparison strategy, * for example: * - Focus more on layout vs components * - Emphasize navigation structure * - Ignore colors / typography explicitly * * If omitted, a built-in layout-focused prompt is used. */ prompt?: string; /** * Maximum width or height (in pixels) for image preprocessing * before sending images to the vision model. * * The image is resized with aspect ratio preserved so that: * max(width, height) <= maxDim * * This helps: * - Avoid model input size limits * - Reduce noise from very high-resolution screenshots * - Improve consistency across different viewport sizes * * Typical values: * - 512 : fast, very layout-oriented * - 1024 : good default for UI comparison * - 2048+ : rarely useful for semantic layout matching */ maxDim?: number; /** * Image encoding format used when sending images to the vision model. * * PNG is lossless and preferred when fine structural details * (edges, alignment) matter. * * JPEG produces smaller payloads but introduces compression * artifacts that may slightly affect vision understanding. * * Default: 'png' */ imageFormat?: ImageType; /** * JPEG quality used when encoding images before sending them * to the vision model (only relevant when imageFormat = 'jpeg'). * * Lower values reduce payload size but may introduce compression * artifacts. * * Typical values: * - 75–85 : smaller payloads, usually acceptable * - 90 : high quality, recommended default * - 100 : maximum quality, larger payload, usually unnecessary */ jpegQuality?: number; }; export type CompareResult = { score: number; }; const UI_DESCRIBE_PROMPT: string = ` You are analyzing a UI screenshot to compare it against another UI. Your goal is to produce a STRUCTURAL LAYOUT FINGERPRINT that remains stable even when real data, text values, or content change. Write a concise but highly informative description using the rules below. GENERAL RULES - Describe WHAT EXISTS and WHERE IT IS, not how it looks visually. - Prefer explicit structure and hierarchy over natural language. - Be consistent and deterministic in wording. - Do NOT describe colors, fonts, themes, or exact text. - Do NOT include user data, names, numbers, timestamps, or labels. LAYOUT STRUCTURE Describe the UI from top to bottom: 1) PAGE REGIONS - Identify major regions in order: - top header / app bar - left or right sidebar - main content area - footer (if present) 2) REGION DETAILS For EACH region, describe: - Position (top / left / right / center / full-width) - Layout type (row, column, grid, split, stacked) - Whether it is fixed or scrollable - Primary purpose (navigation, content, controls, metadata) 3) COMPONENT INVENTORY List the components that exist, grouped by region: - navigation menus - tabs - tables (rows/columns, header present or not) - lists (vertical/horizontal, item density: sparse/medium/dense) - cards (count: single / few / many) - forms (inline / multi-section) - modals, drawers, overlays (present or not) 4) HIERARCHY & RELATIONSHIPS Explicitly mention: - parent → child relationships - repeated patterns (e.g. "repeating card list", "uniform table rows") - alignment relationships (sidebar + main content, header spanning all columns) 5) ABSENCE IS SIGNAL If something is NOT present, state it explicitly when relevant: - no sidebar - no table - no modal - no pagination FORMAT - Use short bullet-style sentences. - Use consistent phrasing across similar structures. - Avoid synonyms (always say “sidebar”, not sometimes “side panel”). - Keep the output under ~30 lines. Return plain text only. No markdown. `; /** * ----------------------------------------------------------------------------- * Helpers * ----------------------------------------------------------------------------- */ function _resolvePrompt(opt?: CompareOptions): string { return opt?.prompt ?? UI_DESCRIBE_PROMPT.trim(); } function _resolveMaxDim(opt?: CompareOptions): number { return typeof opt?.maxDim === 'number' && opt.maxDim > 0 ? Math.floor(opt.maxDim) : 1024; } function _resolveImageFormat(opt?: CompareOptions): 'png' | 'jpeg' { return opt?.imageFormat === 'jpeg' ? 'jpeg' : 'png'; } function _resolveJpegQuality(opt?: CompareOptions): number { const q: number | undefined = opt?.jpegQuality; return typeof q === 'number' && q >= 50 && q <= 100 ? Math.floor(q) : 90; } async function _preprocessImage( buf: Buffer, opt?: CompareOptions ): Promise<{ bytes: Buffer; mimeType: string }> { const maxDim: number = _resolveMaxDim(opt); const format: 'png' | 'jpeg' = _resolveImageFormat(opt); const jpegQuality: number = _resolveJpegQuality(opt); let img: sharp.Sharp = sharp(buf).resize({ width: maxDim, height: maxDim, fit: 'inside', withoutEnlargement: true, }); let out: Buffer; let mimeType: string; if (format === 'png') { out = await img.png().toBuffer(); mimeType = 'image/png'; } else { out = await img.jpeg({ quality: jpegQuality }).toBuffer(); mimeType = 'image/jpeg'; } return { bytes: out, mimeType }; } /** * ----------------------------------------------------------------------------- * Amazon Bedrock specific embedding implementations * ----------------------------------------------------------------------------- */ //////////////////////////////////////////////////////////////////////////////// const SUPPORTED_AMAZON_BEDROCK_TEXT_EMBED_MODEL_IDS: Set<string> = new Set([ 'amazon.titan-embed-text-v2:0', ]); const DEFAULT_AMAZON_BEDROCK_TEXT_EMBED_MODEL_ID: string = 'amazon.titan-embed-text-v2:0'; const SUPPORTED_AMAZON_BEDROCK_VISION_MODEL_IDS: Set<string> = new Set([ 'anthropic.claude-3-haiku-20240307-v1', 'anthropic.claude-3-sonnet-20240229-v1:0', 'anthropic.claude-3-5-sonnet-20241022-v2:0', 'anthropic.claude-3-7-sonnet-20250219-v1:0', 'anthropic.claude-3-opus-20240229-v1:0', 'anthropic.claude-haiku-4-5-20251001-v1:0', 'anthropic.claude-opus-4-1-20250805-v1:0', 'anthropic.claude-opus-4-5-20251101-v1:0', ]); const DEFAULT_AMAZON_BEDROCK_VISION_MODEL_ID: string = 'anthropic.claude-3-sonnet-20240229-v1:0'; let _bedrockClient: BedrockRuntimeClient | undefined; function _isAwsBedrockActive(): boolean { // Minimal "is Bedrock usable?" check: // - Amazon Bedrock must be enabled // - Region must exist // - Credentials are resolved by AWS SDK default chain or AWS_PROFILE return AMAZON_BEDROCK_ENABLE && Boolean(AWS_REGION); } function _getOrCreateBedrockClient(): BedrockRuntimeClient | undefined { if (_bedrockClient) { return _bedrockClient; } const region: string | undefined = AWS_REGION; if (!region) { return undefined; } const profile: string | undefined = AWS_PROFILE; if (profile) { _bedrockClient = new BedrockRuntimeClient({ region, credentials: fromIni({ profile }), }); return _bedrockClient; } _bedrockClient = new BedrockRuntimeClient({ region }); return _bedrockClient; } async function _invokeBedrock( client: BedrockRuntimeClient, modelId: string, payload: Record<string, unknown> ): Promise<any> { const cmd = new InvokeModelCommand({ modelId, contentType: 'application/json', accept: 'application/json', body: Buffer.from(JSON.stringify(payload)), }); const resp: any = await client.send(cmd); const raw: string = Buffer.from(resp.body as Uint8Array).toString('utf-8'); return JSON.parse(raw); } async function _describeUIWithAmazonBedrockClaude( ss: ScreenshotInfo, opt: CompareOptions | undefined, client: BedrockRuntimeClient, modelId: string ): Promise<string> { const { bytes, mimeType } = await _preprocessImage(ss.image, opt); const prompt: string = _resolvePrompt(opt); const payload = { anthropic_version: 'bedrock-2023-05-31', max_tokens: 10_000, temperature: 0, messages: [ { role: 'user', content: [ { type: 'text', text: prompt }, { type: 'image', source: { type: 'base64', media_type: mimeType, data: bytes.toString('base64'), }, }, ], }, ], }; const parsed: any = await _invokeBedrock(client, modelId, payload); const text: string | undefined = parsed?.content?.[0]?.text ?? parsed?.output_text ?? parsed?.completion; if (!text || !text.trim()) { throw new Error('Amazon Bedrock Claude returned empty description.'); } return text.trim(); } async function _embedTextWithAmazonBedrockTitan( text: string, client: BedrockRuntimeClient, modelId: string ): Promise<Vector> { const payload = { inputText: text, }; const parsed: any = await _invokeBedrock(client, modelId, payload); const emb: unknown = parsed?.embedding; if (!Array.isArray(emb) || typeof emb[0] !== 'number') { throw new Error( 'Unexpected embedding response for Amazon Bedrock Titan text embedding.' ); } return emb as number[]; } async function _describeUIWithAmazonBedrock( ss: ScreenshotInfo, opt: CompareOptions | undefined, client: BedrockRuntimeClient ): Promise<string> { const modelId: string = opt?.visionModelId ?? AMAZON_BEDROCK_VISION_MODEL_ID ?? DEFAULT_AMAZON_BEDROCK_VISION_MODEL_ID; if (!SUPPORTED_AMAZON_BEDROCK_VISION_MODEL_IDS.has(modelId)) { throw new Error( `Unsupported Amazon Bedrock vision model id: ${modelId}` ); } return await _describeUIWithAmazonBedrockClaude(ss, opt, client, modelId); } async function _embedTextWithAmazonBedrock( text: string, opt: CompareOptions | undefined, client: BedrockRuntimeClient ): Promise<Vector> { const modelId: string = opt?.textEmbedModelId ?? AMAZON_BEDROCK_TEXT_EMBED_MODEL_ID ?? DEFAULT_AMAZON_BEDROCK_TEXT_EMBED_MODEL_ID; if (!SUPPORTED_AMAZON_BEDROCK_TEXT_EMBED_MODEL_IDS.has(modelId)) { throw new Error( `Unsupported Amazon Bedrock text embedding model id: ${modelId}` ); } return await _embedTextWithAmazonBedrockTitan(text, client, modelId); } /** * ----------------------------------------------------------------------------- * Routing (LLM-agnostic) * ----------------------------------------------------------------------------- */ async function _describeUI( ss: ScreenshotInfo, opt?: CompareOptions ): Promise<string | undefined> { // Provider A: Amazon Bedrock if (_isAwsBedrockActive()) { const client: BedrockRuntimeClient | undefined = _getOrCreateBedrockClient(); if (!client) { return undefined; } return _describeUIWithAmazonBedrock(ss, opt, client); } // No providers active return undefined; } async function _embedTextVector( text: string, opt?: CompareOptions ): Promise<Vector | undefined> { // Provider A: Amazon Bedrock if (_isAwsBedrockActive()) { const client: BedrockRuntimeClient | undefined = _getOrCreateBedrockClient(); if (!client) { return undefined; } return _embedTextWithAmazonBedrock(text, opt, client); } // No providers active return undefined; } /** * ----------------------------------------------------------------------------- * Public API * ----------------------------------------------------------------------------- */ export async function compare( page: ScreenshotInfo, figma: ScreenshotInfo, options?: CompareOptions ): Promise<CompareResult | undefined> { const normalize: boolean = typeof options?.normalize === 'boolean' ? options.normalize : true; const figmaDesc: string | undefined = await _describeUI(figma, options); if (!figmaDesc) { return undefined; } const pageDesc: string | undefined = await _describeUI(page, options); if (!pageDesc) { return undefined; } const figmaVec: Vector | undefined = await _embedTextVector( figmaDesc, options ); if (!figmaVec) { return undefined; } const pageVec: Vector | undefined = await _embedTextVector( pageDesc, options ); if (!pageVec) { return undefined; } const score: number = cosineSimilarity(figmaVec, pageVec, normalize); return { score }; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/serkan-ozal/browser-devtools-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

compare-text-embedding.ts•14.6 KiB