NotebookLM MCP Server

page-utils.ts•14 KiB

/** * Page utilities for extracting responses from NotebookLM web UI * * This module provides functions to: * - Extract latest assistant responses from the page * - Wait for new responses with streaming detection * - Detect placeholders and loading states * - Snapshot existing responses for comparison * * Based on the Python implementation from page_utils.py */ import type { Page } from "patchright"; import { log } from "./logger.js"; // ============================================================================ // Constants // ============================================================================ /** * CSS selectors to find assistant response elements * Ordered by priority (most specific first) */ const RESPONSE_SELECTORS = [ ".to-user-container .message-text-content", "[data-message-author='bot']", "[data-message-author='assistant']", "[data-message-role='assistant']", "[data-author='assistant']", "[data-renderer*='assistant']", "[data-automation-id='response-text']", "[data-automation-id='assistant-response']", "[data-automation-id='chat-response']", "[data-testid*='assistant']", "[data-testid*='response']", "[aria-live='polite']", "[role='listitem'][data-message-author]", ]; // ============================================================================ // Helper Functions // ============================================================================ /** * Simple string hash function (for efficient comparison) */ function hashString(str: string): number { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = (hash << 5) - hash + char; hash = hash & hash; // Convert to 32bit integer } return hash; } // ============================================================================ // Main Functions // ============================================================================ /** * Snapshot the latest response text currently visible * Returns null if no response found */ export async function snapshotLatestResponse(page: Page): Promise<string | null> { return await extractLatestText(page, new Set(), false, 0); } /** * Snapshot ALL existing assistant response texts * Used to capture visible responses BEFORE submitting a new question */ export async function snapshotAllResponses(page: Page): Promise<string[]> { const allTexts: string[] = []; const primarySelector = ".to-user-container"; try { const containers = await page.$$(primarySelector); if (containers.length > 0) { for (const container of containers) { try { const textElement = await container.$(".message-text-content"); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { allTexts.push(text.trim()); } } } catch { continue; } } log.info(`📸 [SNAPSHOT] Captured ${allTexts.length} existing responses`); } } catch (error) { log.warning(`⚠️ [SNAPSHOT] Failed to snapshot responses: ${error}`); } return allTexts; } /** * Count the number of visible assistant response elements */ export async function countResponseElements(page: Page): Promise<number> { let count = 0; for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length > 0) { // Count only visible elements for (const el of elements) { try { const isVisible = await el.isVisible(); if (isVisible) { count++; } } catch { continue; } } // If we found elements with this selector, stop trying others if (count > 0) { break; } } } catch { continue; } } return count; } /** * Wait for a new assistant response with streaming detection * * This function: * 1. Polls the page for new response text * 2. Detects streaming (text changes) vs. complete (text stable) * 3. Requires text to be stable for 3 consecutive polls before returning * 4. Ignores placeholders, question echoes, and known responses * * @param page Playwright page instance * @param options Options for waiting * @returns The new response text, or null if timeout */ export async function waitForLatestAnswer( page: Page, options: { question?: string; timeoutMs?: number; pollIntervalMs?: number; ignoreTexts?: string[]; debug?: boolean; } = {} ): Promise<string | null> { const { question = "", timeoutMs = 120000, pollIntervalMs = 1000, ignoreTexts = [], debug = false, } = options; const deadline = Date.now() + timeoutMs; const sanitizedQuestion = question.trim().toLowerCase(); // Track ALL known texts as HASHES (memory efficient!) const knownHashes = new Set<number>(); for (const text of ignoreTexts) { if (typeof text === "string" && text.trim()) { knownHashes.add(hashString(text.trim())); } } if (debug) { log.debug( `🔍 [DEBUG] Waiting for NEW answer. Ignoring ${knownHashes.size} known responses` ); } let pollCount = 0; let lastCandidate: string | null = null; let stableCount = 0; // Track how many times we see the same text const requiredStablePolls = 3; // Text must be stable for 3 consecutive polls while (Date.now() < deadline) { pollCount++; // Check if NotebookLM is still "thinking" (most reliable indicator) try { const thinkingElement = await page.$('div.thinking-message'); if (thinkingElement) { const isVisible = await thinkingElement.isVisible(); if (isVisible) { if (debug && pollCount % 5 === 0) { log.debug("🔍 [DEBUG] NotebookLM still thinking (div.thinking-message visible)..."); } await page.waitForTimeout(pollIntervalMs); continue; } } } catch { // Ignore errors checking thinking state } // Extract latest NEW text const candidate = await extractLatestText( page, knownHashes, debug, pollCount ); if (candidate) { const normalized = candidate.trim(); if (normalized) { const lower = normalized.toLowerCase(); // Check if it's the question echo if (lower === sanitizedQuestion) { if (debug) { log.debug("🔍 [DEBUG] Found question echo, ignoring"); } knownHashes.add(hashString(normalized)); // Mark as seen await page.waitForTimeout(pollIntervalMs); continue; } // ======================================== // STREAMING DETECTION: Check if text is stable // ======================================== if (normalized === lastCandidate) { // Text hasn't changed - it's stable stableCount++; if (debug && stableCount === requiredStablePolls) { log.debug( `✅ [DEBUG] Text stable for ${stableCount} polls (${normalized.length} chars)` ); } } else { // Text changed - streaming in progress if (debug && lastCandidate) { log.debug( `🔄 [DEBUG] Text changed (${normalized.length} chars, was ${lastCandidate.length})` ); } stableCount = 1; lastCandidate = normalized; } // Only return once text is stable if (stableCount >= requiredStablePolls) { if (debug) { log.debug(`✅ [DEBUG] Returning stable answer (${normalized.length} chars)`); } return normalized; } } } await page.waitForTimeout(pollIntervalMs); } if (debug) { log.debug(`⏱️ [DEBUG] Timeout after ${pollCount} polls`); } return null; } /** * Extract the latest NEW response text from the page * Uses hash-based comparison for efficiency * * @param page Playwright page instance * @param knownHashes Set of hashes of already-seen response texts * @param debug Enable debug logging * @param pollCount Current poll number (for conditional logging) * @returns First NEW response text found, or null */ async function extractLatestText( page: Page, knownHashes: Set<number>, debug: boolean, pollCount: number ): Promise<string | null> { // Try the primary selector first (most specific for NotebookLM) const primarySelector = ".to-user-container"; try { const containers = await page.$$(primarySelector); const totalContainers = containers.length; // Early exit if no new containers possible if (totalContainers <= knownHashes.size) { if (debug && pollCount % 5 === 0) { log.dim( `⏭️ [EXTRACT] No new containers (${totalContainers} total, ${knownHashes.size} known)` ); } return null; } if (containers.length > 0) { // Only log every 5th poll to reduce noise if (debug && pollCount % 5 === 0) { log.dim( `🔍 [EXTRACT] Scanning ${totalContainers} containers (${knownHashes.size} known)` ); } let skipped = 0; let empty = 0; // Scan ALL containers to find the FIRST with NEW text for (let idx = 0; idx < containers.length; idx++) { const container = containers[idx]; try { const textElement = await container.$(".message-text-content"); if (textElement) { const text = await textElement.innerText(); if (text && text.trim()) { // Hash-based comparison (faster & less memory) const textHash = hashString(text.trim()); if (!knownHashes.has(textHash)) { log.success( `✅ [EXTRACT] Found NEW text in container[${idx}]: ${text.trim().length} chars` ); return text.trim(); } else { skipped++; } } else { empty++; } } } catch { continue; } } // Only log summary if debug enabled if (debug && pollCount % 5 === 0) { log.dim( `⏭️ [EXTRACT] No NEW text (skipped ${skipped} known, ${empty} empty)` ); } return null; // Don't fall through to fallback! } else { if (debug) { log.warning("⚠️ [EXTRACT] No containers found"); } } } catch (error) { log.error(`❌ [EXTRACT] Primary selector failed: ${error}`); } // Fallback: Try other selectors (only if primary selector failed/found nothing) if (debug) { log.dim("🔄 [EXTRACT] Trying fallback selectors..."); } for (const selector of RESPONSE_SELECTORS) { try { const elements = await page.$$(selector); if (elements.length === 0) continue; // Scan ALL elements to find the first with NEW text for (const element of elements) { try { // Prefer full container text when available let container = element; try { const closest = await element.evaluateHandle((el) => { return el.closest( "[data-message-author], [data-message-role], [data-author], " + "[data-testid*='assistant'], [data-automation-id*='response'], article, section" ); }); if (closest) { container = closest.asElement() || element; } } catch { container = element; } const text = await container.innerText(); if (text && text.trim() && !knownHashes.has(hashString(text.trim()))) { return text.trim(); } } catch { continue; } } } catch { continue; } } // Final fallback: JavaScript evaluation try { const fallbackText = await page.evaluate((): string | null => { // @ts-expect-error - DOM types available in browser context const unique = new Set<Element>(); // @ts-expect-error - DOM types available in browser context const isVisible = (el: Element): boolean => { // @ts-expect-error - DOM types available in browser context if (!el || !(el as HTMLElement).isConnected) return false; const rect = el.getBoundingClientRect(); if (rect.width === 0 || rect.height === 0) return false; // @ts-expect-error - window available in browser context const style = window.getComputedStyle(el as HTMLElement); if ( style.visibility === "hidden" || style.display === "none" || parseFloat(style.opacity || "1") === 0 ) { return false; } return true; }; const selectors = [ "[data-message-author]", "[data-message-role]", "[data-author]", "[data-renderer*='assistant']", "[data-testid*='assistant']", "[data-automation-id*='response']", ]; const candidates: string[] = []; for (const selector of selectors) { // @ts-expect-error - document available in browser context for (const el of document.querySelectorAll(selector)) { if (!isVisible(el)) continue; if (unique.has(el)) continue; unique.add(el); // @ts-expect-error - DOM types available in browser context const text = (el as HTMLElement).innerText || (el as HTMLElement).textContent || ""; if (!text.trim()) continue; candidates.push(text.trim()); } } if (candidates.length > 0) { return candidates[candidates.length - 1]; } return null; }); if (typeof fallbackText === "string" && fallbackText.trim()) { return fallbackText.trim(); } } catch { // Ignore evaluation errors } return null; } // ============================================================================ // Exports // ============================================================================ export default { snapshotLatestResponse, snapshotAllResponses, countResponseElements, waitForLatestAnswer, };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PleasePrompto/notebooklm-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

page-utils.ts•14 KiB