get_page
Retrieve a specific page from the authenticated CAEZ Urekin web by providing URL and path. Get page content directly.
Instructions
Obtiene una p�gina concreta del sitio.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | No | ||
| path | No |
Implementation Reference
- src/index.js:264-297 (handler)The main handler for the 'get_page' tool. Accepts optional 'url' or 'path', fetches the page HTML via fetchPage(), extracts readable content, and returns title, URL, path, content_text, content_markdown, and links.
server.tool( 'get_page', 'Obtiene una p�gina concreta del sitio.', { url: z.string().optional(), path: z.string().optional() }, async ({ url, path }) => { try { if (!url && !path) throw new Error('INVALID_URL'); const target = url || path; const { html, finalUrl } = await fetchPage(target); const extracted = extractReadableContent(html, finalUrl); return { content: [ { type: 'text', text: JSON.stringify({ title: extracted.title, url: finalUrl, path: new URL(finalUrl).pathname, content_text: extracted.contentText, content_markdown: `# ${extracted.title}\n\n${extracted.contentText}`, links: extracted.links }, null, 2) } ] }; } catch (error) { return mcpError(error); } } ); - src/index.js:267-270 (schema)Input schema for 'get_page': accepts optional 'url' (string) and 'path' (string) parameters.
{ url: z.string().optional(), path: z.string().optional() }, - src/index.js:264-265 (registration)Registration of 'get_page' tool via server.tool(), with description 'Obtiene una página concreta del sitio.'.
server.tool( 'get_page', - src/index.js:198-207 (helper)Helper function fetchPage() that ensures authentication, makes a GET request, and returns the HTML and final URL.
async function fetchPage(urlOrPath) { await ensureAuthenticated(); const response = await request(urlOrPath, { method: 'GET' }); if (!response.ok) { throw new Error(response.status === 404 ? 'PAGE_NOT_FOUND' : 'INTERNAL_ERROR'); } const finalUrl = response.url || normalizeUrl(urlOrPath); const html = await response.text(); return { html, finalUrl }; } - src/index.js:114-164 (helper)Helper function extractReadableContent() that parses HTML with cheerio, extracts title, sections, links, and cleaned body text.
function extractReadableContent(html, pageUrl) { const $ = cheerio.load(html); $('script, style, noscript').remove(); const title = $('title').first().text().trim() || $('h1').first().text().trim() || pageUrl; const sections = []; $('h1, h2, h3').each((_, el) => { const heading = $(el).text().trim(); const texts = []; let current = $(el).next(); while (current.length && !['h1', 'h2', 'h3'].includes(current.get(0)?.tagName)) { const text = current.text().trim(); if (text) texts.push(text); current = current.next(); } if (heading || texts.length) { sections.push({ heading, text: texts.join('\n\n') }); } }); const links = []; $('a[href]').each((_, el) => { const href = $(el).attr('href'); const label = $(el).text().trim(); if (!href) return; try { const url = new URL(href, pageUrl).toString(); if (url.startsWith(BASE_URL)) { links.push({ label: label || url, url }); } } catch { } }); const contentText = $('body') .text() .replace(/\s+\n/g, '\n') .replace(/\n\s+/g, '\n') .replace(/\n{3,}/g, '\n\n') .trim(); return { title, contentText, sections, links: dedupeLinks(links) }; }