Washington Law MCP Server

Overview Schema Related Servers Score Discussions

simple-rcw-scraper.ts•6.3 KiB

import axios from 'axios'; import * as cheerio from 'cheerio'; import Database from 'better-sqlite3'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const BASE_URL = 'https://app.leg.wa.gov'; const RCW_URL = `${BASE_URL}/RCW/`; const DB_PATH = join(__dirname, '../../data/washington-laws.db'); // Simplified scraper focusing on getting the data class SimpleRCWScraper { private db: Database.Database; private insertStmt: Database.Statement; constructor() { this.db = new Database(DB_PATH); this.insertStmt = this.db.prepare(` INSERT OR REPLACE INTO rcw ( citation, title_num, chapter_num, section_num, title_name, chapter_name, section_name, full_text, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP) `); } async delay(ms: number): Promise<void> { return new Promise(resolve => setTimeout(resolve, ms)); } async scrapeAll(): Promise<void> { console.log('Starting simplified RCW scraper...'); // Start with just a few specific titles for testing const testTitles = [ { num: '1', name: 'General Provisions' }, { num: '9', name: 'Crimes and Punishments' }, { num: '9A', name: 'Criminal Code' }, { num: '46', name: 'Motor Vehicles' } ]; for (const title of testTitles) { console.log(`\nProcessing Title ${title.num}: ${title.name}`); await this.scrapeTitle(title.num, title.name); await this.delay(1000); // Be respectful } this.printStats(); this.db.close(); } async scrapeTitle(titleNum: string, titleName: string): Promise<void> { try { const url = `${RCW_URL}default.aspx?cite=${titleNum}`; const response = await axios.get(url); const $ = cheerio.load(response.data); // Find all unique chapter numbers const chapters = new Set<string>(); $('a').each((_, element) => { const href = $(element).attr('href'); if (href && href.includes(`cite=${titleNum}.`)) { const match = href.match(/cite=([\d.A-Z]+)/i); if (match) { const cite = match[1]; // Extract chapter number (e.g., "46.61" from "46.61.502") const parts = cite.split('.'); if (parts.length >= 2) { const chapterNum = `${parts[0]}.${parts[1]}`; chapters.add(chapterNum); } } } }); console.log(` Found ${chapters.size} chapters`); // Process each chapter for (const chapterNum of Array.from(chapters).sort()) { await this.scrapeChapter(titleNum, titleName, chapterNum); await this.delay(500); } } catch (error) { console.error(`Error scraping title ${titleNum}:`, error); } } async scrapeChapter(titleNum: string, titleName: string, chapterNum: string): Promise<void> { try { const url = `${RCW_URL}default.aspx?cite=${chapterNum}`; const response = await axios.get(url); const $ = cheerio.load(response.data); // Extract chapter name const chapterName = $('title').text().replace(/^Chapter [\d.]+ RCW: /, '').trim() || ''; // Find all section links const sections = new Set<string>(); $('a').each((_, element) => { const href = $(element).attr('href'); if (href && href.includes(`cite=${chapterNum}.`) && !href.includes('pdf=true')) { const match = href.match(/cite=([\d.]+)/); if (match) { sections.add(match[1]); } } }); console.log(` Chapter ${chapterNum}: ${sections.size} sections`); // Process each section let count = 0; for (const sectionNum of Array.from(sections).sort()) { await this.scrapeSection(titleNum, titleName, chapterNum, chapterName, sectionNum); count++; if (count % 10 === 0) { console.log(` Processed ${count}/${sections.size} sections`); } await this.delay(200); // Small delay between sections } } catch (error) { console.error(`Error scraping chapter ${chapterNum}:`, error); } } async scrapeSection( titleNum: string, titleName: string, chapterNum: string, chapterName: string, sectionNum: string ): Promise<void> { try { const url = `${RCW_URL}default.aspx?cite=${sectionNum}`; const response = await axios.get(url); const $ = cheerio.load(response.data); // Extract section name from title or heading const pageTitle = $('title').text(); const sectionName = pageTitle.replace(/^RCW [\d.]+/, '').replace(/[—–-]/, '').trim() || ''; // Remove all navigation and script elements $('script, style, nav, .navigation, .breadcrumb, .footer, .header, .menu').remove(); // Get the main text content let fullText = $('body').text() .replace(/\r\n/g, '\n') .replace(/\n{3,}/g, '\n\n') .replace(/\s+/g, ' ') .trim(); // Try to extract just the law text (remove menu items, etc.) const startMarkers = ['RCW ' + sectionNum, sectionNum + ' >>', 'PDF']; for (const marker of startMarkers) { const idx = fullText.indexOf(marker); if (idx > 0) { fullText = fullText.substring(idx); break; } } // Clean up common navigation text fullText = fullText .replace(/Menu Website Search.*?PDF/g, '') .replace(/Beginning of Chapter.*?>>/g, '') .replace(/<<.*?>>/g, '') .trim(); // Save to database this.insertStmt.run( sectionNum, titleNum, chapterNum, sectionNum.split('.').pop() || '', titleName, chapterName, sectionName, fullText ); } catch (error) { console.error(`Error scraping section ${sectionNum}:`, (error as Error).message); } } private printStats(): void { const stats = this.db.prepare('SELECT COUNT(*) as count FROM rcw').get() as any; console.log(`\nScraping complete! Total RCW sections: ${stats.count}`); } } // Run the scraper if (import.meta.url === `file://${process.argv[1]}`) { const scraper = new SimpleRCWScraper(); scraper.scrapeAll().catch(console.error); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccchow/washington-law-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

simple-rcw-scraper.ts•6.3 KiB