Miyabi MCP Bundle

Overview Schema Related Servers Score Discussions

analyze-paper.sh•6.31 KiB

#!/bin/bash # analyze-paper.sh - Extract structured information from research papers # Version: 1.0.0 # Part of: Paper2Agent Skill set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" CACHE_DIR="${MIYABI_PAPER2AGENT_CACHE_DIR:-$HOME/.miyabi/paper2agent}" PDF_PARSER="${MIYABI_PAPER2AGENT_PDF_PARSER:-pdftotext}" mkdir -p "$CACHE_DIR" # ============================================================ # Functions # ============================================================ log() { echo "[$(date -Iseconds)] $*" >&2 } download_paper() { local url="$1" local output="$2" log "Downloading paper from $url" if [[ "$url" =~ ^https://arxiv.org/abs/ ]]; then # Convert abs URL to PDF URL url="${url/\/abs\//\/pdf\/}.pdf" fi curl -L -o "$output" "$url" log "Downloaded to $output" } extract_text() { local pdf_file="$1" local txt_file="$2" log "Extracting text from PDF" case "$PDF_PARSER" in pdftotext) if ! command -v pdftotext &>/dev/null; then log "ERROR: pdftotext not found. Install: brew install poppler" exit 1 fi pdftotext -layout "$pdf_file" "$txt_file" ;; pypdf) python3 <<EOF from pypdf import PdfReader reader = PdfReader("$pdf_file") with open("$txt_file", "w") as f: for page in reader.pages: f.write(page.extract_text()) EOF ;; *) log "ERROR: Unknown PDF parser: $PDF_PARSER" exit 1 ;; esac log "Text extracted to $txt_file" } extract_section() { local text_file="$1" local section_name="$2" # Try different section header formats local patterns=( "^${section_name}\$" "^${section_name}[[:space:]]*\$" "^[0-9]+\.?[[:space:]]*${section_name}" "^${section_name}:" ) for pattern in "${patterns[@]}"; do local result=$(awk -v pattern="$pattern" ' BEGIN { capturing = 0; } $0 ~ pattern { capturing = 1; next; } capturing && /^[A-Z][A-Za-z ]+$/ && !/^(The|A|An|In|For|To|Of)/ { exit; } capturing { print; } ' "$text_file") if [ -n "$result" ]; then echo "$result" return 0 fi done return 1 } extract_github_urls() { local text_file="$1" grep -Eo "https://github\.com/[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+" "$text_file" | sort -u } extract_arxiv_id() { local text_file="$1" # Try to find arXiv ID in various formats grep -Eo "arXiv:[0-9]{4}\.[0-9]{4,5}" "$text_file" | head -1 | sed 's/arXiv://' } extract_title() { local text_file="$1" # Take first few lines and find the longest line (likely title) head -20 "$text_file" | awk ' length > max_length { max_length = length; longest = $0; } END { print longest; } ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' } generate_json_output() { local title="$1" local abstract="$2" local methods="$3" local github_urls="$4" local arxiv_id="$5" # Escape JSON strings title=$(echo "$title" | jq -R .) abstract=$(echo "$abstract" | jq -Rs .) methods=$(echo "$methods" | jq -Rs .) # Convert GitHub URLs to JSON array local github_json="[]" if [ -n "$github_urls" ]; then github_json=$(echo "$github_urls" | jq -R . | jq -s .) fi cat <<EOF { "title": $title, "arxiv_id": "$(echo "$arxiv_id" | jq -R .)", "abstract": $abstract, "methods": $methods, "code_references": $github_json, "extracted_at": "$(date -Iseconds)", "parser": "$PDF_PARSER" } EOF } # ============================================================ # Main Logic # ============================================================ usage() { cat <<EOF Usage: $0 <paper-url-or-path> [output-json] Extract structured information from a research paper PDF. Arguments: paper-url-or-path URL (arXiv, etc.) or local file path output-json Output JSON file (default: stdout) Environment: MIYABI_PAPER2AGENT_CACHE_DIR Cache directory (default: ~/.miyabi/paper2agent) MIYABI_PAPER2AGENT_PDF_PARSER PDF parser: pdftotext (default) or pypdf Examples: # From arXiv URL $0 https://arxiv.org/abs/2509.06917 > analysis.json # From local PDF $0 ./paper.pdf analysis.json # With pypdf parser MIYABI_PAPER2AGENT_PDF_PARSER=pypdf $0 paper.pdf EOF exit 1 } main() { if [ $# -lt 1 ]; then usage fi local input="$1" local output_json="${2:--}" # Default to stdout local pdf_file local is_temp=false # Determine if input is URL or file if [[ "$input" =~ ^https?:// ]]; then # URL - download to cache local cache_name=$(echo "$input" | md5sum | cut -d' ' -f1) pdf_file="$CACHE_DIR/$cache_name.pdf" if [ ! -f "$pdf_file" ]; then download_paper "$input" "$pdf_file" else log "Using cached PDF: $pdf_file" fi else # Local file if [ ! -f "$input" ]; then log "ERROR: File not found: $input" exit 1 fi pdf_file="$input" fi # Extract text local text_file="$CACHE_DIR/$(basename "$pdf_file" .pdf).txt" extract_text "$pdf_file" "$text_file" # Extract sections log "Extracting sections" local title=$(extract_title "$text_file") log "Title: $title" local abstract=$(extract_section "$text_file" "Abstract" || echo "") log "Abstract: ${#abstract} chars" local methods=$(extract_section "$text_file" "Methods" || \ extract_section "$text_file" "Methodology" || \ extract_section "$text_file" "Approach" || echo "") log "Methods: ${#methods} chars" # Extract metadata local github_urls=$(extract_github_urls "$text_file" || echo "") local github_count=$(echo "$github_urls" | wc -l | tr -d ' ') log "GitHub URLs: $github_count found" local arxiv_id=$(extract_arxiv_id "$text_file" || echo "") log "arXiv ID: $arxiv_id" # Generate JSON output local json_output=$(generate_json_output "$title" "$abstract" "$methods" "$github_urls" "$arxiv_id") if [ "$output_json" = "-" ]; then echo "$json_output" else echo "$json_output" > "$output_json" log "Analysis saved to $output_json" fi } # ============================================================ # Entry Point # ============================================================ if [ "${BASH_SOURCE[0]}" == "${0}" ]; then main "$@" fi

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ShunsukeHayashi/miyabi-mcp-bundle'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

analyze-paper.sh•6.31 KiB