import pandas as pd
import json
from typing import List, Dict, Any, Optional
from collections import Counter
from datetime import datetime
class UniversityRankingsAssistant:
"""高校学科评估助手"""
def __init__(self, excel_file: str):
"""初始化并加载数据"""
self.df = pd.read_excel(excel_file)
self.df = self.df[self.df['一级学科'] != '一级学科'].reset_index(drop=True)
# 简化列名
self.df.columns = ['_', '一级学科', '评估等级', '院校代码', '专业门类', '专业大类',
'院校名称', '所在省', '所在市', '院校类别', '一流大学学科', '院校性质']
# 清理数据
self.df = self.df.dropna(subset=['一级学科', '院校名称'])
def _to_json(self, data: Any, status: str = "success") -> str:
"""将数据转换为JSON格式"""
result = {
"status": status,
"timestamp": datetime.now().isoformat(),
"data": data
}
return json.dumps(result, ensure_ascii=False, indent=2)
def search_university(self, university_name: str) -> str:
"""
功能1: 高校专业搜索
输入高校名,查看其所有学科及评估等级
"""
result_df = self.df[self.df['院校名称'].str.contains(university_name, na=False)]
if len(result_df) == 0:
return self._to_json({"message": f"未找到包含'{university_name}'的高校"}, "not_found")
data = {
"university": university_name,
"total_disciplines": len(result_df),
"disciplines": []
}
for _, row in result_df.iterrows():
data["disciplines"].append({
"discipline_code": row['一级学科'],
"grade": row['评估等级'],
"category": row['专业门类'],
"province": row['所在省'],
"city": row['所在市'],
"is_first_class": not pd.isna(row['一流大学学科'])
})
# 按等级分组统计
grade_counts = result_df['评估等级'].value_counts().to_dict()
data["grade_summary"] = grade_counts
return self._to_json(data)
def search_discipline(self, discipline_name: str) -> str:
"""
功能2: 学科信息查询
输入学科名,查看开设该学科的所有高校及排名
"""
result_df = self.df[self.df['一级学科'].str.contains(discipline_name, na=False)].copy()
if len(result_df) == 0:
return self._to_json({"message": f"未找到包含'{discipline_name}'的学科"}, "not_found")
# 按等级排序
grade_order = {'A+': 0, 'A': 1, 'A-': 2, 'B+': 3, 'B': 4, 'B-': 5,
'C+': 6, 'C': 7, 'C-': 8}
result_df['grade_rank'] = result_df['评估等级'].map(lambda x: grade_order.get(x, 999))
result_df = result_df.sort_values('grade_rank')
data = {
"discipline": discipline_name,
"total_universities": len(result_df),
"universities": []
}
for rank, (_, row) in enumerate(result_df.iterrows(), 1):
data["universities"].append({
"rank": rank,
"university": row['院校名称'],
"grade": row['评估等级'],
"province": row['所在省'],
"university_category": row['院校类别'],
"is_first_class": not pd.isna(row['一流大学学科'])
})
# 等级分布
grade_dist = result_df['评估等级'].value_counts().to_dict()
data["grade_distribution"] = grade_dist
return self._to_json(data)
def filter_by_grade(self, grades: List[str]) -> str:
"""
功能3: 等级筛选查询
按评估等级(A+/A/A-/B+等)筛选查看学科点
"""
result_df = self.df[self.df['评估等级'].isin(grades)]
if len(result_df) == 0:
return self._to_json({"message": f"未找到等级为{grades}的学科点"}, "not_found")
data = {
"grades": grades,
"total_disciplines": len(result_df),
"by_grade": {},
"disciplines": []
}
# 按等级分类
for grade in grades:
grade_df = result_df[result_df['评估等级'] == grade]
data["by_grade"][grade] = len(grade_df)
# 详细列表
for _, row in result_df.iterrows():
data["disciplines"].append({
"discipline": row['一级学科'],
"grade": row['评估等级'],
"university": row['院校名称'],
"province": row['所在省'],
"category": row['专业门类']
})
return self._to_json(data)
def browse_by_province(self, province: str) -> str:
"""
功能4: 地区浏览
按省份查看该地区高校的学科分布
"""
result_df = self.df[self.df['所在省'] == province]
if len(result_df) == 0:
return self._to_json({"message": f"未找到位于'{province}'的数据"}, "not_found")
data = {
"province": province,
"total_disciplines": len(result_df),
"universities_count": result_df['院校名称'].nunique(),
"universities": [],
"grade_distribution": {},
"category_distribution": {}
}
# 高校列表及其学科数
for uni in result_df['院校名称'].unique():
uni_df = result_df[result_df['院校名称'] == uni]
a_count = len(uni_df[uni_df['评估等级'].isin(['A+', 'A', 'A-'])])
data["universities"].append({
"name": uni,
"total_disciplines": len(uni_df),
"a_grade_count": a_count
})
# 等级分布
data["grade_distribution"] = result_df['评估等级'].value_counts().to_dict()
# 专业门类分布
data["category_distribution"] = result_df['专业门类'].value_counts().to_dict()
return self._to_json(data)
def university_ranking(self, rank_by: str = "disciplines") -> str:
"""
功能5: 高校排行榜
按学科数量、A+数量、一流大学数量等多维度排序
rank_by: 'disciplines', 'a_plus', 'a_grade', 'first_class'
"""
uni_list = []
for uni in self.df['院校名称'].unique():
uni_df = self.df[self.df['院校名称'] == uni]
a_plus = len(uni_df[uni_df['评估等级'] == 'A+'])
a_grade = len(uni_df[uni_df['评估等级'].isin(['A+', 'A', 'A-'])])
first_class = len(uni_df[uni_df['一流大学学科'].notna()])
uni_list.append({
"university": uni,
"total_disciplines": len(uni_df),
"a_plus_count": a_plus,
"a_grade_count": a_grade,
"first_class_count": first_class,
"province": uni_df.iloc[0]['所在省']
})
# 排序
rank_key_map = {
"disciplines": "total_disciplines",
"a_plus": "a_plus_count",
"a_grade": "a_grade_count",
"first_class": "first_class_count"
}
sort_key = rank_key_map.get(rank_by, "total_disciplines")
uni_list.sort(key=lambda x: x[sort_key], reverse=True)
data = {
"rank_by": rank_by,
"rank_description": {
"disciplines": "按学科总数排序",
"a_plus": "按A+学科数排序",
"a_grade": "按A级及以上学科数排序",
"first_class": "按一流大学学科数排序"
}.get(rank_by, "未知排序方式"),
"total_universities": len(uni_list),
"rankings": []
}
for rank, uni_info in enumerate(uni_list[:50], 1): # 取前50
uni_info['rank'] = rank
data["rankings"].append(uni_info)
return self._to_json(data)
def discipline_ranking(self, rank_by: str = "universities") -> str:
"""
功能6: 学科排行榜
显示每个学科的开设高校数、A+数量统计
rank_by: 'universities', 'a_plus', 'a_grade'
"""
discipline_list = []
for disc in self.df['一级学科'].unique():
disc_df = self.df[self.df['一级学科'] == disc]
a_plus = len(disc_df[disc_df['评估等级'] == 'A+'])
a_grade = len(disc_df[disc_df['评估等级'].isin(['A+', 'A', 'A-'])])
discipline_list.append({
"discipline": disc,
"total_universities": len(disc_df),
"a_plus_count": a_plus,
"a_grade_count": a_grade,
"top_universities": []
})
# 排序
rank_key_map = {
"universities": "total_universities",
"a_plus": "a_plus_count",
"a_grade": "a_grade_count"
}
sort_key = rank_key_map.get(rank_by, "total_universities")
discipline_list.sort(key=lambda x: x[sort_key], reverse=True)
# 获取每个学科的Top高校
for disc_info in discipline_list:
disc_df = self.df[self.df['一级学科'] == disc_info['discipline']].copy()
# 按等级排序
grade_order = {'A+': 0, 'A': 1, 'A-': 2, 'B+': 3, 'B': 4, 'B-': 5,
'C+': 6, 'C': 7, 'C-': 8}
disc_df['grade_rank'] = disc_df['评估等级'].map(lambda x: grade_order.get(x, 999))
disc_df_sorted = disc_df.sort_values('grade_rank')
for _, row in disc_df_sorted.head(3).iterrows():
disc_info["top_universities"].append({
"university": row['院校名称'],
"grade": row['评估等级']
})
data = {
"rank_by": rank_by,
"rank_description": {
"universities": "按开设高校数排序",
"a_plus": "按A+数量排序",
"a_grade": "按A级及以上数量排序"
}.get(rank_by, "未知排序方式"),
"total_disciplines": len(discipline_list),
"rankings": discipline_list[:50] # 取前50
}
return self._to_json(data)
def strong_disciplines(self, university: Optional[str] = None,
province: Optional[str] = None,
grades: List[str] = ['A+', 'A', 'A-']) -> str:
"""
功能7: 强势学科分析
查看某高校或地区的A/A+学科
"""
if university:
result_df = self.df[self.df['院校名称'].str.contains(university, na=False)]
scope = f"高校: {university}"
elif province:
result_df = self.df[self.df['所在省'] == province]
scope = f"地区: {province}"
else:
return self._to_json({"message": "请提供高校或地区参数"}, "error")
if len(result_df) == 0:
return self._to_json({"message": f"未找到数据: {scope}"}, "not_found")
# 筛选指定等级
result_df = result_df[result_df['评估等级'].isin(grades)]
data = {
"scope": scope,
"grades": grades,
"total_strong_disciplines": len(result_df),
"disciplines": []
}
# 按等级分类
grade_counts = Counter(result_df['评估等级'])
data["grade_counts"] = dict(grade_counts)
# 详细列表
for grade in sorted(set(grades), key=lambda x: (['A+', 'A', 'A-'].index(x) if x in ['A+', 'A', 'A-'] else 999)):
grade_df = result_df[result_df['评估等级'] == grade]
if len(grade_df) > 0:
data["disciplines"].append({
"grade": grade,
"count": len(grade_df),
"disciplines_list": grade_df['一级学科'].unique().tolist()
})
return self._to_json(data)
def grade_analysis(self) -> str:
"""
功能8: 评估等级分析
统计各等级的学科点分布、热门学科
"""
data = {
"total_disciplines": len(self.df),
"grade_distribution": {},
"grade_details": []
}
# 总体分布
grade_dist = self.df['评估等级'].value_counts()
for grade, count in grade_dist.items():
if grade != '等级': # 排除标题行
pct = (count / len(self.df)) * 100
data["grade_distribution"][grade] = {
"count": count,
"percentage": round(pct, 2)
}
# 按等级详细分析
for grade in ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-']:
grade_df = self.df[self.df['评估等级'] == grade]
if len(grade_df) == 0:
continue
# 热门学科(该等级最多的学科)
top_disciplines = grade_df['一级学科'].value_counts().head(5)
detail = {
"grade": grade,
"total_count": len(grade_df),
"unique_disciplines": grade_df['一级学科'].nunique(),
"unique_universities": grade_df['院校名称'].nunique(),
"top_disciplines": top_disciplines.to_dict(),
"discipline_distribution": grade_df['专业门类'].value_counts().head(3).to_dict()
}
data["grade_details"].append(detail)
return self._to_json(data)
def export_to_json_file(self, query_result: str, filename: str) -> str:
"""
功能16: 生成报告(JSON格式)
将查询结果导出为JSON文件
"""
try:
# 解析JSON字符串
result_data = json.loads(query_result)
# 写入文件
with open(filename, 'w', encoding='utf-8') as f:
json.dump(result_data, f, ensure_ascii=False, indent=2)
return self._to_json({
"message": "导出成功",
"filename": filename,
"size": len(query_result),
"timestamp": datetime.now().isoformat()
})
except Exception as e:
return self._to_json({
"error": str(e),
"message": "导出失败"
}, "error")
# 使用示例
if __name__ == "__main__":
# 初始化助手
assistant = UniversityRankingsAssistant('第四轮学科评估结果.xlsx')
print("=" * 60)
print("高校学科评估助手 - 功能演示")
print("=" * 60)
# 功能1: 高校专业搜索
print("\n【功能1】高校专业搜索 - 搜索清华大学")
print(assistant.search_university("清华大学"))
# 功能2: 学科信息查询
print("\n【功能2】学科信息查询 - 查询计算机科学与技术")
print(assistant.search_discipline("计算机"))
# 功能3: 等级筛选查询
print("\n【功能3】等级筛选查询 - 查询所有A+学科")
print(assistant.filter_by_grade(['A+']))
# 功能4: 地区浏览
print("\n【功能4】地区浏览 - 查询北京地区")
print(assistant.browse_by_province("北京"))
# 功能5: 高校排行榜
print("\n【功能5】高校排行榜 - 按A+学科数排序")
print(assistant.university_ranking("a_plus"))
# 功能6: 学科排行榜
print("\n【功能6】学科排行榜 - 按开设高校数排序")
print(assistant.discipline_ranking("universities"))
# 功能7: 强势学科分析
print("\n【功能7】强势学科分析 - 北京大学的强势学科")
print(assistant.strong_disciplines(university="北京大学"))
# 功能8: 评估等级分析
print("\n【功能8】评估等级分析 - 全体评估等级分析")
print(assistant.grade_analysis())