Skip to main content
Glama

Better Playwright MCP

by livoras
dom-simhash.ts8.47 kB
/** * DOM SimHash Implementation * 用于检测DOM树结构相似性 */ import type { ElementNode } from '../types/outline.js'; export class DOMSimHash { private readonly HASH_BITS = 32; private hashCache = new Map<ElementNode, number>(); /** * 提取DOM节点的结构特征 */ extractFeatures(node: ElementNode): string[] { const features: string[] = []; // 1. 骨架特征:类型树 features.push(this.getSkeletonSignature(node)); // 2. 形状特征:宽度分布 features.push(this.getShapeSignature(node)); // 3. 类型计数特征 features.push(this.getTypeCountSignature(node)); // 4. 交互特征 if (this.hasInteractiveElements(node)) { features.push('interactive'); } // 5. 深度特征 features.push(`d${this.getMaxDepth(node)}`); return features; } /** * 获取骨架签名 - 类型树的简化表示 */ private getSkeletonSignature(node: ElementNode): string { // 根节点类型 let signature = node.type; // 子节点类型序列(只取前5个) const childTypes = node.children .slice(0, 5) .map(c => c.type) .join('+'); if (childTypes) { signature += `>${childTypes}`; } // 孙节点(只取第一个子节点的子节点) if (node.children.length > 0 && node.children[0].children.length > 0) { const grandchildTypes = node.children[0].children .slice(0, 3) .map(c => c.type) .join('+'); signature += `>${grandchildTypes}`; } return signature; } /** * 获取形状签名 - 树的宽度分布 */ private getShapeSignature(node: ElementNode): string { const widths: number[] = [node.children.length]; // 记录前3个子节点的宽度 for (let i = 0; i < Math.min(3, node.children.length); i++) { widths.push(node.children[i].children.length); } return 'w' + widths.join('-'); } /** * 获取类型计数签名 */ private getTypeCountSignature(node: ElementNode): string { const counts = new Map<string, number>(); function count(n: ElementNode, depth: number) { if (depth > 2) return; // 只统计前3层 const type = n.type; counts.set(type, (counts.get(type) || 0) + 1); n.children.forEach(c => count(c, depth + 1)); } count(node, 0); // 只记录重要类型的首字母和数量 const important = ['button', 'link', 'text', 'img', 'heading', 'checkbox', 'radio']; const sig = important .map(t => { const c = counts.get(t) || 0; return c > 0 ? `${t[0]}${c}` : ''; }) .filter(s => s) .join(''); return sig || 'empty'; } /** * 检查是否有交互元素 */ private hasInteractiveElements(node: ElementNode): boolean { if (node.line.includes('[cursor=pointer]')) { return true; } // 递归检查子节点(限制深度) function check(n: ElementNode, depth: number): boolean { if (depth > 2) return false; if (n.type === 'button' || n.type === 'link' || n.type === 'checkbox' || n.type === 'radio') { return true; } return n.children.some(c => check(c, depth + 1)); } return check(node, 0); } /** * 获取最大深度 */ private getMaxDepth(node: ElementNode): number { if (node.children.length === 0) return 0; return 1 + Math.max(...node.children.map(c => this.getMaxDepth(c))); } /** * DJB2 hash算法 - 简单快速 */ private djb2Hash(str: string): number { let hash = 5381; for (let i = 0; i < str.length; i++) { hash = ((hash << 5) + hash) + str.charCodeAt(i); // hash * 33 + c } return hash >>> 0; // 转为无符号整数 } /** * 特征权重 */ private getWeight(feature: string): number { if (feature.includes('>')) return 5; // 骨架特征最重要 if (feature.startsWith('w')) return 3; // 形状特征 if (feature.startsWith('d')) return 2; // 深度特征 return 1; } /** * 计算SimHash值 */ computeHash(node: ElementNode): number { // 缓存检查 if (this.hashCache.has(node)) { return this.hashCache.get(node)!; } const features = this.extractFeatures(node); const vector = new Array(this.HASH_BITS).fill(0); for (const feature of features) { const hash = this.djb2Hash(feature); const weight = this.getWeight(feature); for (let i = 0; i < this.HASH_BITS; i++) { const bit = (hash >> i) & 1; vector[i] += bit ? weight : -weight; } } // 降维:正数->1, 负数->0 let simhash = 0; for (let i = 0; i < this.HASH_BITS; i++) { if (vector[i] > 0) { simhash |= (1 << i); } } this.hashCache.set(node, simhash); return simhash; } /** * 计算汉明距离 */ hammingDistance(hash1: number, hash2: number): number { let xor = hash1 ^ hash2; let count = 0; while (xor) { count += xor & 1; xor >>>= 1; } return count; } /** * 判断两个节点是否相似 */ areSimilar(node1: ElementNode, node2: ElementNode, threshold = 3): boolean { const hash1 = this.computeHash(node1); const hash2 = this.computeHash(node2); const distance = this.hammingDistance(hash1, hash2); return distance <= threshold; } /** * 查找最大相似子序列 * 不要求从第一个元素开始 */ findSimilarSequence(nodes: ElementNode[]): { start: number; end: number; samples: ElementNode[]; baseHash: number; } | null { if (nodes.length < 3) return null; let maxLen = 0; let bestStart = 0; let bestEnd = 0; let bestBaseHash = 0; // 滑动窗口找最长相似序列 for (let i = 0; i <= nodes.length - 3; i++) { // 修复:允许检查最后3个节点 const baseHash = this.computeHash(nodes[i]); let j = i + 1; let similarCount = 1; // 向后扩展相似序列 while (j < nodes.length) { const hash = this.computeHash(nodes[j]); const distance = this.hammingDistance(baseHash, hash); if (distance <= 3) { // 相似,继续扩展 similarCount++; if (similarCount >= 3 && similarCount > maxLen) { // 修复:只在>=3时更新 maxLen = similarCount; bestStart = i; bestEnd = j; bestBaseHash = baseHash; } } else if (similarCount >= 3) { // 已找到足够长的序列,可以结束 break; } j++; } } if (maxLen >= 3) { return { start: bestStart, end: bestEnd, samples: nodes.slice(bestStart, bestEnd + 1), baseHash: bestBaseHash }; } return null; } /** * 批量查找所有相似序列 */ findAllSimilarSequences(nodes: ElementNode[]): Array<{ start: number; end: number; count: number; sample: ElementNode; }> { const sequences: Array<{ start: number; end: number; count: number; sample: ElementNode; }> = []; const processed = new Set<number>(); // 收集所有未处理节点,不管位置 while (processed.size < nodes.length) { const unprocessed = []; const indexMap = []; for (let i = 0; i < nodes.length; i++) { if (!processed.has(i)) { unprocessed.push(nodes[i]); indexMap.push(i); } } if (unprocessed.length < 3) { break; } const seq = this.findSimilarSequence(unprocessed); if (seq) { const actualStart = indexMap[seq.start]; const actualEnd = indexMap[seq.end]; sequences.push({ start: actualStart, end: actualEnd, count: seq.end - seq.start + 1, sample: nodes[actualStart] }); // 标记已处理的节点 for (let k = seq.start; k <= seq.end; k++) { processed.add(indexMap[k]); } } else { // 没有找到序列,退出避免无限循环 break; } } return sequences; } /** * 清除缓存 */ clearCache(): void { this.hashCache.clear(); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/livoras/better-playwright-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server