# coding: utf-8
#
# Copyright 2026 祁筱欣
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
from typing import Type
from huaweicloudsdkcore.exceptions import exceptions
from huaweicloudsdkocr.v1 import *
from huaweicloudsdkocr.v1.region.ocr_region import OcrRegion
from .client import HuaweiCloudClient, ClientClass, RegionClass
class OcrClientManager(HuaweiCloudClient):
"""OCR 客户端管理器"""
def get_client_class(self) -> Type[ClientClass]:
return OcrClient
def get_region_class(self) -> Type[RegionClass]:
return OcrRegion
def get_cache_key(self) -> str:
return "ocr"
# 全局实例
_ocr_manager = OcrClientManager()
async def recognize_web_image(
image_url: str = None,
image: str = None,
detect_direction: bool = None,
pdf_page_number: int = None,
) -> dict:
"""使用华为云 OCR 服务识别图片中的文字内容。
Args:
image_url (str, optional): 图片的 URL 地址(需可公开访问)。
image (str, optional): 图片的 Base64 编码。
detect_direction (bool, optional): 是否检测文字方向。
pdf_page_number (int, optional): PDF 文件的页码,从 1 开始。仅当输入为 PDF 文件时有效。
Returns:
dict: OCR 识别结果。
Note:
image_url 和 image 参数二选一。
"""
# 参数验证
if not image_url and not image:
return {"error": "必须提供 image_url 或 image 参数之一"}
if image_url and image:
return {"error": "image_url 和 image 参数不能同时提供"}
try:
client = _ocr_manager.get_client()
# 统一使用通用文字识别,支持 URL 和 Base64 两种方式
request = RecognizeGeneralTextRequest()
kwargs: dict = {
"language": "auto",
}
if detect_direction is not None:
kwargs["detect_direction"] = detect_direction
if image_url:
kwargs["url"] = image_url.strip()
else:
kwargs["image"] = image
if pdf_page_number is not None:
kwargs["pdf_page_number"] = pdf_page_number
request.body = GeneralTextRequestBody(**kwargs)
response = client.recognize_general_text(request) # type: ignore
return response.to_json_object()
except exceptions.ClientRequestException as e:
return {
"error": {
"status_code": e.status_code,
"request_id": e.request_id,
"error_code": e.error_code,
"error_msg": e.error_msg,
}
}