cli.py•5.33 kB
"""Command-line interface for the arXiv API client."""
import json
import sys
from pathlib import Path
import click
from arxiv.services import ArxivService
@click.group()
@click.option(
"--download-dir",
default="./.arxiv",
help="Directory to store downloaded PDFs",
show_default=True,
)
@click.pass_context
def cli(ctx, download_dir):
"""arXiv API command-line interface.
Search and download papers from arXiv.org.
"""
ctx.ensure_object(dict)
ctx.obj["service"] = ArxivService(download_dir=download_dir)
ctx.obj["download_dir"] = download_dir
@cli.command()
@click.argument("query")
@click.option(
"--max-results",
"-n",
default=10,
help="Maximum number of results to return",
show_default=True,
)
@click.option(
"--start",
"-s",
default=0,
help="Starting index for pagination",
show_default=True,
)
@click.option(
"--sort-by",
type=click.Choice(["relevance", "lastUpdatedDate", "submittedDate"]),
default="relevance",
help="Sort criterion",
show_default=True,
)
@click.option(
"--sort-order",
type=click.Choice(["ascending", "descending"]),
default="descending",
help="Sort order",
show_default=True,
)
@click.option(
"--json",
"output_json",
is_flag=True,
help="Output results as JSON",
)
@click.pass_context
def search(ctx, query, max_results, start, sort_by, sort_order, output_json):
"""Search arXiv papers.
QUERY supports field prefixes:
\b
- ti: Title
- au: Author
- abs: Abstract
- cat: Category
- all: All fields (default)
\b
Examples:
arxiv search "ti:machine learning"
arxiv search "au:Hinton"
arxiv search "ti:transformer AND au:Vaswani"
arxiv search "cat:cs.AI" --max-results 20
"""
service = ctx.obj["service"]
try:
result = service.search(
query=query,
max_results=max_results,
start=start,
sort_by=sort_by,
sort_order=sort_order,
)
if output_json:
click.echo(result.model_dump_json(indent=2))
else:
click.echo(result)
except Exception as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
@cli.command()
@click.argument("arxiv_id")
@click.option(
"--no-download",
is_flag=True,
help="Don't download the PDF, just show metadata",
)
@click.option(
"--force",
is_flag=True,
help="Force download even if file exists locally",
)
@click.option(
"--json",
"output_json",
is_flag=True,
help="Output results as JSON",
)
@click.pass_context
def get(ctx, arxiv_id, no_download, force, output_json):
"""Get a specific arXiv paper by ID.
ARXIV_ID can be in any of these formats:
- 2301.12345
- arXiv:2301.12345
- 2301.12345v1
\b
Examples:
arxiv get 2301.12345
arxiv get arXiv:2301.12345 --no-download
arxiv get 2301.12345 --force
"""
service = ctx.obj["service"]
try:
entry = service.get(
arxiv_id=arxiv_id,
download_pdf=not no_download,
force_download=force,
)
if output_json:
click.echo(entry.model_dump_json(indent=2))
else:
click.echo(entry)
click.echo()
if not no_download and entry.pdf_url:
pdf_path = Path(ctx.obj["download_dir"]) / f"{entry.arxiv_id}.pdf"
if pdf_path.exists():
click.echo(f"PDF saved to: {pdf_path}")
except ValueError as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
@cli.command()
@click.argument("arxiv_id")
@click.option(
"--force",
is_flag=True,
help="Force download even if file exists locally",
)
@click.pass_context
def download(ctx, arxiv_id, force):
"""Download PDF for a paper by arXiv ID.
ARXIV_ID can be in any of these formats:
- 2301.12345
- arXiv:2301.12345
- 2301.12345v1
\b
Examples:
arxiv download 2301.12345
arxiv download arXiv:2301.12345 --force
"""
service = ctx.obj["service"]
try:
pdf_path = service.download_pdf(arxiv_id=arxiv_id, force_download=force)
if pdf_path:
click.echo(f"Downloaded to: {pdf_path}")
else:
click.echo("Failed to download PDF", err=True)
sys.exit(1)
except Exception as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
@cli.command()
@click.pass_context
def list_downloads(ctx):
"""List all downloaded PDFs."""
download_dir = Path(ctx.obj["download_dir"])
if not download_dir.exists():
click.echo(f"Download directory does not exist: {download_dir}")
return
pdf_files = sorted(download_dir.glob("*.pdf"))
if not pdf_files:
click.echo(f"No PDFs found in {download_dir}")
return
click.echo(f"PDFs in {download_dir}:")
click.echo()
for pdf_file in pdf_files:
size_mb = pdf_file.stat().st_size / (1024 * 1024)
click.echo(f" {pdf_file.name} ({size_mb:.2f} MB)")
click.echo()
click.echo(f"Total: {len(pdf_files)} PDFs")
if __name__ == "__main__":
cli()