Skip to main content
Glama
page-utils.ts14.3 kB
/** * Page utilities for extracting responses from NotebookLM web UI * * This module provides functions to: * - Extract latest assistant responses from the page * - Wait for new responses with streaming detection * - Detect placeholders and loading states * - Snapshot existing responses for comparison * * Based on the Python implementation from page_utils.py */ import type { Page } from "patchright"; import { log } from "./logger.js"; // ============================================================================ // Constants // ============================================================================ /** * CSS selectors to find assistant response elements * Ordered by priority (most specific first) */ const RESPONSE_SELECTORS = [ ".to-user-container .message-text-content", "[data-message-author='bot']", "[data-message-author='assistant']", "[data-message-role='assistant']", "[data-author='assistant']", "[data-renderer*='assistant']", "[data-automation-id='response-text']", "[data-automation-id='assistant-response']", "[data-automation-id='chat-response']", "[data-testid*='assistant']", "[data-testid*='response']", "[aria-live='polite']", "[role='listitem'][data-message-author]", ]; // ============================================================================ // Helper Functions // ============================================================================ /** * Simple string hash function (for efficient comparison) */ function hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32bit integer } return hash; } // ============================================================================ // Main Functions // ============================================================================ /** * Snapshot the latest response text currently visible * Returns null if no response found */ export async function snapshotLatestResponse(page: Page): Promise<string | null> { return await extractLatestText(page, new Set(), false, 0); } /** * Snapshot ALL existing assistant response texts * Used to capture visible responses BEFORE submitting a new question */ export async function snapshotAllResponses(page: Page): Promise<string[]> { const allTexts: string[] = []; const primarySelector = ".to-user-container"; try { const containers = await page.$$(primarySelector); if (containers.length > 0) { for (const container of containers) { try { const textElement = await container.$(".message-text-content"); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { allTexts.push(text.trim()); } } } catch { continue; } } log.info(`📸 [SNAPSHOT] Captured ${allTexts.length} existing responses`); } } catch (error) { log.warning(`⚠️ [SNAPSHOT] Failed to snapshot responses: ${error}`); } return allTexts; } /** * Count the number of visible assistant response elements */ export async function countResponseElements(page: Page): Promise<number> { let count = 0; for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length > 0) { // Count only visible elements for (const el of elements) { try { const isVisible = await el.isVisible(); if (isVisible) { count++; } } catch { continue; } } // If we found elements with this selector, stop trying others if (count > 0) { break; } } } catch { continue; } } return count; } /** * Wait for a new assistant response with streaming detection * * This function: * 1. Polls the page for new response text * 2. Detects streaming (text changes) vs. complete (text stable) * 3. Requires text to be stable for 3 consecutive polls before returning * 4. Ignores placeholders, question echoes, and known responses * * @param page Playwright page instance * @param options Options for waiting * @returns The new response text, or null if timeout */ export async function waitForLatestAnswer( page: Page, options: { question?: string; timeoutMs?: number; pollIntervalMs?: number; ignoreTexts?: string[]; debug?: boolean; } = {} ): Promise<string | null> { const { question = "", timeoutMs = 120000, pollIntervalMs = 1000, ignoreTexts = [], debug = false, } = options; const deadline = Date.now() + timeoutMs; const sanitizedQuestion = question.trim().toLowerCase(); // Track ALL known texts as HASHES (memory efficient!) const knownHashes = new Set<number>(); for (const text of ignoreTexts) { if (typeof text === "string" && text.trim()) { knownHashes.add(hashString(text.trim())); } } if (debug) { log.debug( `🔍 [DEBUG] Waiting for NEW answer. Ignoring ${knownHashes.size} known responses` ); } let pollCount = 0; let lastCandidate: string | null = null; let stableCount = 0; // Track how many times we see the same text const requiredStablePolls = 3; // Text must be stable for 3 consecutive polls while (Date.now() < deadline) { pollCount++; // Check if NotebookLM is still "thinking" (most reliable indicator) try { const thinkingElement = await page.$('div.thinking-message'); if (thinkingElement) { const isVisible = await thinkingElement.isVisible(); if (isVisible) { if (debug && pollCount % 5 === 0) { log.debug("🔍 [DEBUG] NotebookLM still thinking (div.thinking-message visible)..."); } await page.waitForTimeout(pollIntervalMs); continue; } } } catch { // Ignore errors checking thinking state } // Extract latest NEW text const candidate = await extractLatestText( page, knownHashes, debug, pollCount ); if (candidate) { const normalized = candidate.trim(); if (normalized) { const lower = normalized.toLowerCase(); // Check if it's the question echo if (lower === sanitizedQuestion) { if (debug) { log.debug("🔍 [DEBUG] Found question echo, ignoring"); } knownHashes.add(hashString(normalized)); // Mark as seen await page.waitForTimeout(pollIntervalMs); continue; } // ======================================== // STREAMING DETECTION: Check if text is stable // ======================================== if (normalized === lastCandidate) { // Text hasn't changed - it's stable stableCount++; if (debug && stableCount === requiredStablePolls) { log.debug( `✅ [DEBUG] Text stable for ${stableCount} polls (${normalized.length} chars)` ); } } else { // Text changed - streaming in progress if (debug && lastCandidate) { log.debug( `🔄 [DEBUG] Text changed (${normalized.length} chars, was ${lastCandidate.length})` ); } stableCount = 1; lastCandidate = normalized; } // Only return once text is stable if (stableCount >= requiredStablePolls) { if (debug) { log.debug(`✅ [DEBUG] Returning stable answer (${normalized.length} chars)`); } return normalized; } } } await page.waitForTimeout(pollIntervalMs); } if (debug) { log.debug(`⏱️ [DEBUG] Timeout after ${pollCount} polls`); } return null; } /** * Extract the latest NEW response text from the page * Uses hash-based comparison for efficiency * * @param page Playwright page instance * @param knownHashes Set of hashes of already-seen response texts * @param debug Enable debug logging * @param pollCount Current poll number (for conditional logging) * @returns First NEW response text found, or null */ async function extractLatestText( page: Page, knownHashes: Set<number>, debug: boolean, pollCount: number ): Promise<string | null> { // Try the primary selector first (most specific for NotebookLM) const primarySelector = ".to-user-container"; try { const containers = await page.$$(primarySelector); const totalContainers = containers.length; // Early exit if no new containers possible if (totalContainers <= knownHashes.size) { if (debug && pollCount % 5 === 0) { log.dim( `⏭️ [EXTRACT] No new containers (${totalContainers} total, ${knownHashes.size} known)` ); } return null; } if (containers.length > 0) { // Only log every 5th poll to reduce noise if (debug && pollCount % 5 === 0) { log.dim( `🔍 [EXTRACT] Scanning ${totalContainers} containers (${knownHashes.size} known)` ); } let skipped = 0; let empty = 0; // Scan ALL containers to find the FIRST with NEW text for (let idx = 0; idx < containers.length; idx++) { const container = containers[idx]; try { const textElement = await container.$(".message-text-content"); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { // Hash-based comparison (faster & less memory) const textHash = hashString(text.trim()); if (!knownHashes.has(textHash)) { log.success( `✅ [EXTRACT] Found NEW text in container[${idx}]: ${text.trim().length} chars` ); return text.trim(); } else { skipped++; } } else { empty++; } } } catch { continue; } } // Only log summary if debug enabled if (debug && pollCount % 5 === 0) { log.dim( `⏭️ [EXTRACT] No NEW text (skipped ${skipped} known, ${empty} empty)` ); } return null; // Don't fall through to fallback! } else { if (debug) { log.warning("⚠️ [EXTRACT] No containers found"); } } } catch (error) { log.error(`❌ [EXTRACT] Primary selector failed: ${error}`); } // Fallback: Try other selectors (only if primary selector failed/found nothing) if (debug) { log.dim("🔄 [EXTRACT] Trying fallback selectors..."); } for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length === 0) continue; // Scan ALL elements to find the first with NEW text for (const element of elements) { try { // Prefer full container text when available let container = element; try { const closest = await element.evaluateHandle((el) => { return el.closest( "[data-message-author], [data-message-role], [data-author], " + "[data-testid*='assistant'], [data-automation-id*='response'], article, section" ); }); if (closest) { container = closest.asElement() || element; } } catch { container = element; } const text = await container.innerText(); if (text && text.trim() && !knownHashes.has(hashString(text.trim()))) { return text.trim(); } } catch { continue; } } } catch { continue; } } // Final fallback: JavaScript evaluation try { const fallbackText = await page.evaluate((): string | null => { // @ts-expect-error - DOM types available in browser context const unique = new Set<Element>(); // @ts-expect-error - DOM types available in browser context const isVisible = (el: Element): boolean => { // @ts-expect-error - DOM types available in browser context if (!el || !(el as HTMLElement).isConnected) return false; const rect = el.getBoundingClientRect(); if (rect.width === 0 || rect.height === 0) return false; // @ts-expect-error - window available in browser context const style = window.getComputedStyle(el as HTMLElement); if ( style.visibility === "hidden" || style.display === "none" || parseFloat(style.opacity || "1") === 0 ) { return false; } return true; }; const selectors = [ "[data-message-author]", "[data-message-role]", "[data-author]", "[data-renderer*='assistant']", "[data-testid*='assistant']", "[data-automation-id*='response']", ]; const candidates: string[] = []; for (const selector of selectors) { // @ts-expect-error - document available in browser context for (const el of document.querySelectorAll(selector)) { if (!isVisible(el)) continue; if (unique.has(el)) continue; unique.add(el); // @ts-expect-error - DOM types available in browser context const text = (el as HTMLElement).innerText || (el as HTMLElement).textContent || ""; if (!text.trim()) continue; candidates.push(text.trim()); } } if (candidates.length > 0) { return candidates[candidates.length - 1]; } return null; }); if (typeof fallbackText === "string" && fallbackText.trim()) { return fallbackText.trim(); } } catch { // Ignore evaluation errors } return null; } // ============================================================================ // Exports // ============================================================================ export default { snapshotLatestResponse, snapshotAllResponses, countResponseElements, waitForLatestAnswer, };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PleasePrompto/notebooklm-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server