Google-Scholar-MCP-Server
by JackKuo666
Verified
import requests
from bs4 import BeautifulSoup
import time
from scholarly import scholarly
# 普通关键词搜索函数
def google_scholar_search(query, num_results=5):
"""
Function to search Google Scholar using a simple keyword query.
Parameters:
query (str): The search query (e.g., paper title or author).
num_results (int): The number of results to retrieve.
Returns:
list: A list of dictionaries containing search results.
"""
# Prepare the search URL
search_url = f"https://scholar.google.com/scholar?q={query.replace(' ', '+')}"
# Set up headers to mimic a real browser request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# Send the GET request to Google Scholar
response = requests.get(search_url, headers=headers)
# Check if the request was successful
if response.status_code != 200:
print(f"Failed to fetch data. HTTP Status code: {response.status_code}")
return []
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find all the articles in the search results
results = []
count = 0
# Find the results on the page
for item in soup.find_all('div', class_='gs_ri'):
if count >= num_results:
break
title_tag = item.find('h3', class_='gs_rt')
title = title_tag.get_text() if title_tag else 'No title available'
link = title_tag.find('a')['href'] if title_tag and title_tag.find('a') else 'No link available'
authors_tag = item.find('div', class_='gs_a')
authors = authors_tag.get_text() if authors_tag else 'No authors available'
abstract_tag = item.find('div', class_='gs_rs')
abstract = abstract_tag.get_text() if abstract_tag else 'No abstract available'
result_data = {
'Title': title,
'Authors': authors,
'Abstract': abstract,
'URL': link
}
results.append(result_data)
count += 1
return results
# 高级搜索函数
def advanced_google_scholar_search(query, author=None, year_range=None, num_results=5):
"""
Function to search Google Scholar using advanced search filters (e.g., author, year range).
Parameters:
query (str): The search query (e.g., paper title or topic).
author (str): The author's name to filter the results (default is None).
year_range (tuple): A tuple (start_year, end_year) to filter the results by publication year (default is None).
num_results (int): The number of results to retrieve.
Returns:
list: A list of dictionaries containing search results.
"""
# Prepare the advanced search URL
search_url = "https://scholar.google.com/scholar?"
# Build the search query
search_params = {'q': query.replace(' ', '+')}
if author:
search_params['as_auth'] = author
if year_range:
start_year, end_year = year_range
search_params['as_ylo'] = start_year # Start year
search_params['as_yhi'] = end_year # End year
# Encode the search parameters into the URL
search_url += '&'.join([f"{key}={value}" for key, value in search_params.items()])
# Set up headers to mimic a real browser request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# Send the GET request to Google Scholar
response = requests.get(search_url, headers=headers)
# Check if the request was successful
if response.status_code != 200:
print(f"Failed to fetch data. HTTP Status code: {response.status_code}")
return []
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find all the articles in the search results
results = []
count = 0
# Find the results on the page
for item in soup.find_all('div', class_='gs_ri'):
if count >= num_results:
break
title_tag = item.find('h3', class_='gs_rt')
title = title_tag.get_text() if title_tag else 'No title available'
link = title_tag.find('a')['href'] if title_tag and title_tag.find('a') else 'No link available'
authors_tag = item.find('div', class_='gs_a')
authors = authors_tag.get_text() if authors_tag else 'No authors available'
abstract_tag = item.find('div', class_='gs_rs')
abstract = abstract_tag.get_text() if abstract_tag else 'No abstract available'
result_data = {
'Title': title,
'Authors': authors,
'Abstract': abstract,
'URL': link
}
results.append(result_data)
count += 1
return results
# Example usage:
if __name__ == "__main__":
# 1.普通关键词搜索
query = "machine learning"
results = google_scholar_search(query, num_results=5)
print("Results for keyword search:")
for result in results:
print(f"\nTitle: {result['Title']}")
print(f"Authors: {result['Authors']}")
print(f"Abstract: {result['Abstract']}")
print(f"URL: {result['URL']}")
print("-" * 80)
# 2.高级搜索
advanced_query = "machine learning"
advanced_results = advanced_google_scholar_search(advanced_query, author="Ian Goodfellow", year_range=(2010, 2021), num_results=5)
print("\nResults for advanced search:")
for result in advanced_results:
print(f"\nTitle: {result['Title']}")
print(f"Authors: {result['Authors']}")
print(f"Abstract: {result['Abstract']}")
print(f"URL: {result['URL']}")
print("-" * 80)
# Retrieve the author's data, fill-in, and print
# 3.Get an iterator for the author results
search_query = scholarly.search_author('Steven A Cholewiak')
# 4.Retrieve the first result from the iterator
first_author_result = next(search_query)
scholarly.pprint(first_author_result)
# 5.Retrieve all the details for the author
author = scholarly.fill(first_author_result )
scholarly.pprint(author)
# 6.Take a closer look at the first publication
first_publication = author['publications'][0]
first_publication_filled = scholarly.fill(first_publication)
scholarly.pprint(first_publication_filled)
# 7.Print the titles of the author's publications
publication_titles = [pub['bib']['title'] for pub in author['publications']]
print(publication_titles)