de en es ja ko ru zh

Karakeep MCP server

by karakeep-app

TypeScript

AGPL 3.0

234

20,447

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Need Help?View Source Code Report Issue

searchQueryParser.ts•11.1 kB

import { alt, alt_sc, apply, kleft, kmid, kright, lrec_sc, opt, rule, seq, str, tok, Token, TokenPosition, } from "typescript-parsec"; import { z } from "zod"; import { BookmarkTypes } from "./types/bookmarks"; import { Matcher } from "./types/search"; import { parseRelativeDate } from "./utils/relativeDateUtils"; enum TokenType { And = "AND", Or = "OR", Qualifier = "QUALIFIER", Ident = "IDENT", StringLiteral = "STRING_LITERAL", LParen = "LPAREN", RParen = "RPAREN", Space = "SPACE", Hash = "HASH", Minus = "MINUS", } // Rules are in order of priority const lexerRules: [RegExp, TokenType][] = [ [/^\s+and/i, TokenType.And], [/^\s+or/i, TokenType.Or], [/^#/, TokenType.Hash], [/^(is|url|list|after|before|age|feed|title):/, TokenType.Qualifier], [/^"([^"]+)"/, TokenType.StringLiteral], [/^$/, TokenType.LParen], [/^$/, TokenType.RParen], [/^\s+/, TokenType.Space], [/^-/, TokenType.Minus], // This needs to be last as it matches a lot of stuff [/^[^ )(]+/, TokenType.Ident], ] as const; class LexerToken implements Token<TokenType> { private constructor( private readonly input: string, public kind: TokenType, public text: string, public pos: TokenPosition, ) {} public static from(input: string): Token<TokenType> | undefined { const tok = new LexerToken( input, /* Doesn't matter */ TokenType.Ident, "", { index: 0, rowBegin: 1, rowEnd: 1, columnBegin: 0, columnEnd: 0, }, ); return tok.next; } public get next(): Token<TokenType> | undefined { if (!this.input.length) { return undefined; } for (const [regex, tokenType] of lexerRules) { const matchRes = regex.exec(this.input); if (!matchRes) { continue; } const match = matchRes[0]; return new LexerToken(this.input.slice(match.length), tokenType, match, { index: this.pos.index + match.length, columnBegin: this.pos.index + 1, columnEnd: this.pos.index + 1 + match.length, // Our strings are always only one line rowBegin: 1, rowEnd: 1, }); } // No match throw new Error( `Failed to tokenize the token at position ${this.pos.index}: ${this.input[0]}`, ); } } export interface TextAndMatcher { text: string; matcher?: Matcher; } const MATCHER = rule<TokenType, TextAndMatcher>(); const EXP = rule<TokenType, TextAndMatcher>(); MATCHER.setPattern( alt_sc( apply( seq(opt(str("-")), kright(str("is:"), tok(TokenType.Ident))), ([minus, ident]) => { switch (ident.text) { case "fav": return { text: "", matcher: { type: "favourited", favourited: !minus }, }; case "archived": return { text: "", matcher: { type: "archived", archived: !minus }, }; case "tagged": return { text: "", matcher: { type: "tagged", tagged: !minus }, }; case "inlist": return { text: "", matcher: { type: "inlist", inList: !minus }, }; case "link": return { text: "", matcher: { type: "type", typeName: BookmarkTypes.LINK, inverse: !!minus, }, }; case "text": return { text: "", matcher: { type: "type", typeName: BookmarkTypes.TEXT, inverse: !!minus, }, }; case "media": return { text: "", matcher: { type: "type", typeName: BookmarkTypes.ASSET, inverse: !!minus, }, }; default: // If the token is not known, emit it as pure text return { text: `${minus?.text ?? ""}is:${ident.text}`, matcher: undefined, }; } }, ), apply( seq( opt(str("-")), alt(tok(TokenType.Qualifier), tok(TokenType.Hash)), alt( apply(tok(TokenType.Ident), (tok) => { return tok.text; }), apply(tok(TokenType.StringLiteral), (tok) => { return tok.text.slice(1, -1); }), ), ), ([minus, qualifier, ident]) => { switch (qualifier.text) { case "url:": return { text: "", matcher: { type: "url", url: ident, inverse: !!minus }, }; case "title:": return { text: "", matcher: { type: "title", title: ident, inverse: !!minus }, }; case "#": return { text: "", matcher: { type: "tagName", tagName: ident, inverse: !!minus }, }; case "list:": return { text: "", matcher: { type: "listName", listName: ident, inverse: !!minus }, }; case "feed:": return { text: "", matcher: { type: "rssFeedName", feedName: ident, inverse: !!minus, }, }; case "after:": try { return { text: "", matcher: { type: "dateAfter", dateAfter: z.coerce.date().parse(ident), inverse: !!minus, }, }; } catch { return { // If parsing the date fails, emit it as pure text text: (minus?.text ?? "") + qualifier.text + ident, matcher: undefined, }; } case "before:": try { return { text: "", matcher: { type: "dateBefore", dateBefore: z.coerce.date().parse(ident), inverse: !!minus, }, }; } catch { return { // If parsing the date fails, emit it as pure text text: (minus?.text ?? "") + qualifier.text + ident, matcher: undefined, }; } case "age:": try { const { direction, amount, unit } = parseRelativeDate(ident); return { text: "", matcher: { type: "age", relativeDate: { direction, amount, unit }, }, }; } catch { return { // If parsing the relative time fails, emit it as pure text text: (minus?.text ?? "") + qualifier.text + ident, matcher: undefined, }; } default: // If the token is not known, emit it as pure text return { text: (minus?.text ?? "") + qualifier.text + ident, matcher: undefined, }; } }, ), // Ident or an incomlete qualifier apply(alt(tok(TokenType.Ident), tok(TokenType.Qualifier)), (toks) => { return { text: toks.text, matcher: undefined, }; }), kmid(tok(TokenType.LParen), EXP, tok(TokenType.RParen)), ), ); EXP.setPattern( lrec_sc( MATCHER, seq( alt( tok(TokenType.Space), kleft(tok(TokenType.And), tok(TokenType.Space)), kleft(tok(TokenType.Or), tok(TokenType.Space)), ), MATCHER, ), (toks, next) => { switch (next[0].kind) { case TokenType.Space: case TokenType.And: return { text: [toks.text, next[1].text].join(" ").trim(), matcher: !!toks.matcher || !!next[1].matcher ? { type: "and", matchers: [toks.matcher, next[1].matcher].filter( (a) => !!a, ), } : undefined, }; case TokenType.Or: return { text: [toks.text, next[1].text].join(" ").trim(), matcher: !!toks.matcher || !!next[1].matcher ? { type: "or", matchers: [toks.matcher, next[1].matcher].filter( (a) => !!a, ), } : undefined, }; } }, ), ); function flattenAndsAndOrs(matcher: Matcher): Matcher { switch (matcher.type) { case "and": case "or": { if (matcher.matchers.length == 1) { return flattenAndsAndOrs(matcher.matchers[0]); } const flattened: Matcher[] = []; for (let m of matcher.matchers) { // If inside the matcher is another matcher of the same type, flatten it m = flattenAndsAndOrs(m); if (m.type == matcher.type) { flattened.push(...m.matchers); } else { flattened.push(m); } } matcher.matchers = flattened; return matcher; } default: return matcher; } } export function _parseAndPrintTokens(query: string) { console.log(`PARSING: ${query}`); let tok = LexerToken.from(query); do { console.log(tok?.kind, tok?.text); tok = tok?.next; } while (tok); console.log("DONE"); } function consumeTokenStream(token: Token<TokenType>) { let str = ""; let tok: Token<TokenType> | undefined = token; do { str += tok.text; tok = tok.next; } while (tok); return str; } export function parseSearchQuery( query: string, ): TextAndMatcher & { result: "full" | "partial" | "invalid" } { // _parseAndPrintTokens(query); // Uncomment to debug tokenization const parsed = EXP.parse(LexerToken.from(query.trim())); if (!parsed.successful || parsed.candidates.length != 1) { // If the query is not valid, return the whole query as pure text return { text: query, result: "invalid", }; } const parseCandidate = parsed.candidates[0]; if (parseCandidate.result.matcher) { parseCandidate.result.matcher = flattenAndsAndOrs( parseCandidate.result.matcher, ); } if (parseCandidate.nextToken) { // Parser failed to consume the whole query. This usually happen // when the user is still typing the query. Return the partial // result and the remaining query as pure text return { text: ( parseCandidate.result.text + consumeTokenStream(parseCandidate.nextToken) ).trim(), matcher: parseCandidate.result.matcher, result: "partial", }; } return { text: parseCandidate.result.text, matcher: parseCandidate.result.matcher, result: "full", }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/karakeep-app/karakeep'

If you have feedback or need assistance with the MCP directory API, please join our Discord server