PaddleOCR MCP Server

Official

Overview Schema Related Servers Score Discussions

processors.cc•26.5 KiB

// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "processors.h" #include <algorithm> #include <cmath> #include <numeric> #include <stdexcept> #include <unordered_map> #include "src/utils/ilogger.h" #include "src/utils/utility.h" absl::StatusOr<int> Resize::GetInterp(const std::string &interp) { static const std::unordered_map<std::string, int> interp_map = { {"NEAREST", cv::INTER_NEAREST}, {"LINEAR", cv::INTER_LINEAR}, {"BICUBIC", cv::INTER_CUBIC}, {"AREA", cv::INTER_AREA}, {"LANCZOS4", cv::INTER_LANCZOS4}}; auto it = interp_map.find(interp); if (it == interp_map.end()) return -1; return it->second; } std::pair<std::vector<int>, double> Resize::RescaleSize(const std::vector<int> &img_size) const { int img_w = img_size[0], img_h = img_size[1]; int target_w = target_size_[0], target_h = target_size_[1]; double scale = std::min(static_cast<double>(std::max(target_w, target_h)) / std::max(img_w, img_h), static_cast<double>(std::min(target_w, target_h)) / std::min(img_w, img_h)); std::vector<int> rescaled_size = { static_cast<int>(std::round(img_w * scale)), static_cast<int>(std::round(img_h * scale))}; return std::make_pair(rescaled_size, scale); } absl::Status Resize::CheckImageSize() const { if (target_size_.size() != 2) { return absl::InvalidArgumentError("Size must be a vector of two elements."); } if (target_size_[0] <= 0 || target_size_[1] <= 0) { return absl::InvalidArgumentError("Width and height must be positive."); } return absl::OkStatus(); } Resize::Resize(const std::vector<int> &target_size, bool keep_ratio, int size_divisor, const std::string &interp) : keep_ratio_(keep_ratio), size_divisor_(size_divisor) { if (target_size.size() == 1) { target_size_ = {target_size[0], target_size[0]}; } else { target_size_ = target_size; } absl::Status status = CheckImageSize(); if (!status.ok()) { INFOE("image check fail : %s", status.ToString().c_str()); exit(-1); } std::string interp_upper = interp; std::transform(interp_upper.begin(), interp_upper.end(), interp_upper.begin(), ::toupper); auto interp_value = GetInterp(interp_upper); if (!interp_value.ok()) { INFOE("Unknown type: %s", interp_value.status().ToString().c_str()); exit(-1); } interp_ = interp_value.value(); } absl::StatusOr<std::vector<cv::Mat>> Resize::Apply(std::vector<cv::Mat> &input, const void *param) const { std::vector<cv::Mat> out_imgs; for (const auto &img : input) { auto out = ResizeOne(img); if (!out.ok()) return out.status(); out_imgs.push_back(std::move(out.value())); } return out_imgs; } absl::StatusOr<cv::Mat> Resize::ResizeOne(const cv::Mat &img) const { if (img.empty()) { return absl::InvalidArgumentError("Input image is empty."); } std::vector<int> cur_target = target_size_; auto size_test = img.size(); cv::Size orig_size = img.size(); int orig_w = orig_size.width, orig_h = orig_size.height; if (keep_ratio_) { std::vector<int> wh = {orig_w, orig_h}; auto rescale = RescaleSize(wh); cur_target = rescale.first; } if (size_divisor_ > 0) { for (auto &x : cur_target) { x = static_cast<int>(std::ceil(static_cast<double>(x) / size_divisor_)) * size_divisor_; } } cv::Mat out; cv::resize(img, out, cv::Size(cur_target[0], cur_target[1]), 0, 0, interp_); return out; } ResizeByShort::ResizeByShort(int target_short_edge, int size_divisor, const std::string &interp) : target_short_edge_(target_short_edge), size_divisor_(size_divisor) { std::string interp_upper = interp; std::transform(interp_upper.begin(), interp_upper.end(), interp_upper.begin(), ::toupper); auto interp_value = Resize::GetInterp(interp_upper); if (!interp_value.ok()) { INFOE("Unknown type: %s", interp_value.status().ToString().c_str()); exit(-1); } interp_ = interp_value.value(); } absl::StatusOr<std::vector<cv::Mat>> ResizeByShort::Apply(std::vector<cv::Mat> &input, const void *param) const { std::vector<cv::Mat> out_imgs; for (auto &image : input) { auto out = ResizeOne(image); if (!out.ok()) return out.status(); out_imgs.push_back(std::move(out.value())); } return out_imgs; } absl::StatusOr<cv::Mat> ResizeByShort::ResizeOne(const cv::Mat &img) const { if (img.empty()) { return absl::InvalidArgumentError("Input image is empty."); } int h = img.size[0]; int w = img.size[1]; int short_edge = std::min(h, w); float scale = static_cast<double>(target_short_edge_) / short_edge; int h_resize = static_cast<int>(std::round(h * scale)); int w_resize = static_cast<int>(std::round(w * scale)); if (size_divisor_ > 0) { h_resize = static_cast<int>(std::ceil(h_resize / (float)size_divisor_)) * size_divisor_; w_resize = static_cast<int>(std::ceil(w_resize / (float)size_divisor_)) * size_divisor_; } cv::Mat dst; cv::resize(img, dst, cv::Size(w_resize, h_resize), 0, 0, interp_); return dst; } ReadImage::ReadImage(const std::string &format) { auto fmt = StringToFormat(format); if (!fmt.ok()) { INFOE(fmt.status().ToString().c_str()); exit(-1); } format_ = *fmt; } absl::StatusOr<std::vector<cv::Mat>> ReadImage::Apply(std::vector<cv::Mat> &input, const void *param_ptr) const { if (input.empty()) { return absl::InvalidArgumentError("Input image vector is empty."); } std::vector<cv::Mat> output; output.reserve(input.size()); for (size_t i = 0; i < input.size(); ++i) { const cv::Mat &img = input[i]; if (img.empty()) { return absl::InvalidArgumentError("Image at index " + std::to_string(i) + " is empty."); } cv::Mat converted; switch (format_) { case Format::BGR: if (img.channels() == 3) { converted = img.clone(); } else if (img.channels() == 1) { cv::cvtColor(img, converted, cv::COLOR_GRAY2BGR); } else { return absl::InvalidArgumentError("Image at index " + std::to_string(i) + " channel not supported for BGR."); } break; case Format::RGB: if (img.channels() == 3) { cv::cvtColor(img, converted, cv::COLOR_BGR2RGB); } else if (img.channels() == 1) { cv::cvtColor(img, converted, cv::COLOR_GRAY2RGB); } else { return absl::InvalidArgumentError("Image at index " + std::to_string(i) + " channel not supported for RGB."); } break; case Format::GRAY: if (img.channels() == 3) { cv::cvtColor(img, converted, cv::COLOR_BGR2GRAY); } else if (img.channels() == 1) { converted = img.clone(); } else { return absl::InvalidArgumentError("Image at index " + std::to_string(i) + " channel not supported for GRAY."); } break; default: return absl::InvalidArgumentError("Unknown format."); } output.push_back(std::move(converted)); } return output; } absl::StatusOr<ReadImage::Format> ReadImage::StringToFormat(const std::string &format) { if (format == "BGR") return Format::BGR; if (format == "RGB") return Format::RGB; if (format == "GRAY") return Format::GRAY; return absl::InvalidArgumentError("Unsupported format: " + format); } absl::StatusOr<std::vector<cv::Mat>> ToCHWImage::operator()(const std::vector<cv::Mat> &imgs_batch) { std::vector<std::vector<cv::Mat>> chw_imgs_batch; std::vector<cv::Mat> chw_imgs; for (const auto &img : imgs_batch) { if (img.empty()) { return absl::InvalidArgumentError("Input image is empty!"); } if (img.channels() != 3) { return absl::InvalidArgumentError( "Input image must have 3 channels (HWC format)!"); } cv::Mat chw_img(3, img.rows * img.cols, CV_32F); float *ptr = chw_img.ptr<float>(); for (int h = 0; h < img.rows; ++h) { for (int w = 0; w < img.cols; ++w) { const cv::Vec3b &pixel = img.at<cv::Vec3b>(h, w); ptr[0 * img.total() + h * img.cols + w] = pixel[0]; ptr[1 * img.total() + h * img.cols + w] = pixel[1]; ptr[2 * img.total() + h * img.cols + w] = pixel[2]; } } chw_imgs.push_back(chw_img); } return chw_imgs; }; Normalize::Normalize(float scale, const std::vector<float> &mean, const std::vector<float> &std) : alpha_(CHANNEL), beta_(CHANNEL) { assert(mean.size() == CHANNEL && std.size() == CHANNEL); for (size_t i = 0; i < CHANNEL; ++i) { alpha_[i] = scale / std.at(i); beta_[i] = -mean.at(i) / std.at(i); } } Normalize::Normalize(float scale, const float &mean, const float &std) : alpha_(CHANNEL), beta_(CHANNEL) { for (size_t i = 0; i < CHANNEL; ++i) { alpha_[i] = scale / std; beta_[i] = -mean / std; } } absl::StatusOr<cv::Mat> Normalize::NormalizeOne(const cv::Mat &image) const { if (image.empty()) { return absl::InvalidArgumentError("Input image is empty."); } if (image.channels() != CHANNEL) { return absl::InvalidArgumentError("Input image must have 3 dims"); } if (image.depth() != CV_8U && image.depth() != CV_32F) { return absl::InvalidArgumentError("Input image must be CV_8U or CV_32F."); } cv::Mat input; if (image.depth() == CV_8U) { image.convertTo(input, CV_32F); } else { input = image.clone(); // note origin type is CV_8U } if (input.channels() == CHANNEL) { cv::Mat processed = input; std::vector<cv::Mat> channels(input.channels()); cv::split(processed, channels); for (int c = 0; c < input.channels(); ++c) { channels[c] = channels[c] * alpha_[c] + beta_[c]; } cv::merge(channels, processed); return processed; } else { // dims >= 3 assert(input.isContinuous()); int total = 1; for (int i = 0; i < input.dims - 1; i++) { total *= input.size[i]; } float *data = input.ptr<float>(); for (int i = 0; i < total; i++) { float *group = data + i * CHANNEL; for (int j = 0; j < CHANNEL; j++) { group[j] = group[j] * alpha_[j] + beta_[j]; } } return input; } } absl::StatusOr<std::vector<cv::Mat>> Normalize::Apply(std::vector<cv::Mat> &input, const void *param) const { std::vector<cv::Mat> results_norm; results_norm.reserve(input.size()); for (const auto &img : input) { auto norm_single = NormalizeOne(img); if (!norm_single.ok()) { return norm_single.status(); } results_norm.emplace_back(norm_single.value()); } return results_norm; } NormalizeImage::NormalizeImage(float scale, const std::vector<float> &mean, const std::vector<float> &std) : alpha_(CHANNEL), beta_(CHANNEL) { assert(mean.size() == CHANNEL && std.size() == CHANNEL); for (size_t i = 0; i < CHANNEL; ++i) { alpha_[i] = scale / std.at(i); beta_[i] = -mean.at(i) / std.at(i); } } absl::StatusOr<cv::Mat> NormalizeImage::Normalize(const cv::Mat &img) const { if (img.empty()) { return absl::InvalidArgumentError("Input image is empty."); } if (img.channels() != CHANNEL) { return absl::InvalidArgumentError("Input image must have 3 channels."); } if (img.depth() != CV_8U && img.depth() != CV_32F) { return absl::InvalidArgumentError("Input image must be CV_8U or CV_32F."); } cv::Mat input; if (img.depth() == CV_8U) { img.convertTo(input, CV_32F); } else { input = img.clone(); } cv::Mat processed = input; std::vector<cv::Mat> channels(CHANNEL); cv::split(processed, channels); for (int c = 0; c < CHANNEL; ++c) { channels[c] = channels[c] * alpha_[c] + beta_[c]; } cv::merge(channels, processed); return processed; } absl::StatusOr<std::vector<cv::Mat>> NormalizeImage::Apply(std::vector<cv::Mat> &imgs, const void *param) const { std::vector<cv::Mat> results; results.reserve(imgs.size()); for (const auto &img : imgs) { auto normed = this->Normalize(img); if (!normed.ok()) { return normed.status(); } results.push_back(std::move(normed).value()); } return results; } // absl::StatusOr<std::vector<cv::Mat>> ToCHWImage::Apply( // std::vector<cv::Mat>& input, const void* param) const { // std::vector<cv::Mat> chw_imgs; // for (const auto& img : input) { // if (img.empty()) { // return absl::InvalidArgumentError("Input image is empty!"); // } // if (img.channels() != 3) { // return absl::InvalidArgumentError( // "Input image must have 3 channels (HWC format)!"); // } // std::vector<int> shape_chw = {img.channels(), img.rows, img.cols}; // // Define sizes for CHW cv::Mat chw_img(shape_chw.size(), shape_chw.data(), // CV_32F); float* ptr = chw_img.ptr<float>(); for (int h = 0; h < img.rows; // ++h) { // for (int w = 0; w < img.cols; ++w) { // const cv::Vec3f& pixel = img.at<cv::Vec3f>(h, w); // ptr[0 * img.total() + h * img.cols + w] = pixel[0]; // ptr[1 * img.total() + h * img.cols + w] = pixel[1]; // ptr[2 * img.total() + h * img.cols + w] = pixel[2]; // } // } // chw_imgs.push_back(chw_img); // } // return chw_imgs; // } absl::StatusOr<std::vector<cv::Mat>> ToCHWImage::Apply(std::vector<cv::Mat> &input, const void *param) const { std::vector<cv::Mat> chw_imgs; for (const auto &img : input) { if (img.empty()) { return absl::InvalidArgumentError("Input image is empty!"); } if (img.channels() != 3) { return absl::InvalidArgumentError( "Input image must have 3 channels (HWC format)!"); } std::vector<cv::Mat> vec_split = {}; cv::split(img, vec_split); cv::Mat chw_img; for (auto &split : vec_split) split = split.reshape(1, 1); cv::hconcat(vec_split, chw_img); std::vector<int> shape = {img.channels(), img.size[0], img.size[1]}; chw_img = chw_img.reshape(1, shape); chw_imgs.push_back(chw_img); } return chw_imgs; } absl::StatusOr<std::vector<cv::Mat>> ToBatch::operator()(const std::vector<cv::Mat> &imgs) const { if (imgs.empty()) { return absl::InvalidArgumentError("Input image vector is empty."); } const int batch = imgs.size(); const int rows = imgs[0].rows; const int cols = imgs[0].cols; const int channels = imgs[0].channels(); for (size_t i = 0; i < imgs.size(); ++i) { if (imgs[i].rows != rows || imgs[i].cols != cols || imgs[i].channels() != channels) { return absl::InvalidArgumentError( "All images must have the same size and number of channels."); } } std::vector<int> sizes = {batch, rows, cols, channels}; cv::Mat out(4, sizes.data(), CV_32F); for (int b = 0; b < batch; ++b) { cv::Mat img_float; if (imgs[b].depth() != CV_32F) { imgs[b].convertTo(img_float, CV_32F); } else { img_float = imgs[b]; } for (int r = 0; r < rows; ++r) { for (int c = 0; c < cols; ++c) { if (channels == 1) { float v = img_float.at<float>(r, c); int idx[4] = {b, r, c, 0}; out.at<float>(idx) = v; } else if (channels == 3) { cv::Vec3f v = img_float.at<cv::Vec3f>(r, c); for (int ch = 0; ch < 3; ++ch) { int idx[4] = {b, r, c, ch}; out.at<float>(idx) = v[ch]; } } else { const float *pix = img_float.ptr<float>(r, c); for (int ch = 0; ch < channels; ++ch) { int idx[4] = {b, r, c, ch}; out.at<float>(idx) = pix[ch]; } } } } } std::vector<cv::Mat> result{out}; return result; } absl::StatusOr<std::vector<cv::Mat>> ToBatch::Apply(std::vector<cv::Mat> &input, const void *param) const { if (input.empty()) { return absl::InvalidArgumentError("Input image vector is empty."); } std::vector<int> batch_shape = {(int)input.size()}; for (const auto &image : input) { if (image.dims != input[0].dims) { return absl::InvalidArgumentError("All images must have the same dims."); } else { for (int i = 0; i < input[0].dims; i++) { if (image.size[i] != input[0].size[i]) { return absl::InvalidArgumentError( "All images must have the same size and number of channels."); } if (&image == &(*std::begin(input))) batch_shape.emplace_back(input[0].size[i]); } } } cv::Mat batch_out; for (auto &image : input) image = image.reshape(1, 1); cv::vconcat(input, batch_out); batch_out = batch_out.reshape(1, batch_shape); std::vector<cv::Mat> out = {batch_out}; return out; } absl::StatusOr<cv::Mat> ComponentsProcessor::RotateImage(const cv::Mat &image, int angle) { if (image.empty() || image.channels() != 3) { return absl::InvalidArgumentError("image is invalid"); } if (angle < 0 || angle >= 360) { return absl::InvalidArgumentError("`angle` should be in range [0, 360)"); } if (std::abs(angle) < 1e-7) { return image.clone(); } int h = image.rows; int w = image.cols; cv::Point2f center(w / 2.0f, h / 2.0f); double scale = 1.0; cv::Mat rot_mat = cv::getRotationMatrix2D(center, angle, scale); double abs_cos = std::abs(rot_mat.at<double>(0, 0)); double abs_sin = std::abs(rot_mat.at<double>(0, 1)); int new_w = int(h * abs_sin + w * abs_cos); int new_h = int(h * abs_cos + w * abs_sin); rot_mat.at<double>(0, 2) += (new_w - w) / 2.0; rot_mat.at<double>(1, 2) += (new_h - h) / 2.0; cv::Mat rotated; cv::warpAffine(image, rotated, rot_mat, cv::Size(new_w, new_h), cv::INTER_CUBIC); return rotated; } std::vector<std::vector<cv::Point2f>> ComponentsProcessor::SortQuadBoxes( const std::vector<std::vector<cv::Point2f>> &dt_polys) { std::vector<std::vector<cv::Point2f>> dt_boxes = dt_polys; std::sort( dt_boxes.begin(), dt_boxes.end(), [](const std::vector<cv::Point2f> &a, const std::vector<cv::Point2f> &b) { return (a[0].y < b[0].y) || (a[0].y == b[0].y && a[0].x < b[0].x); }); for (size_t i = 0; i < dt_boxes.size() - 1; ++i) { for (size_t j = i + 1; j > 0; --j) { if (std::abs(dt_boxes[j][0].y - dt_boxes[j - 1][0].y) < 10 && dt_boxes[j][0].x < dt_boxes[j - 1][0].x) { std::swap(dt_boxes[j], dt_boxes[j - 1]); } else { break; } } } return dt_boxes; } std::vector<std::vector<cv::Point2f>> ComponentsProcessor::SortPolyBoxes( const std::vector<std::vector<cv::Point2f>> &dt_polys) { size_t num_boxes = dt_polys.size(); if (num_boxes == 0) return dt_polys; std::vector<int> y_min_list(num_boxes); for (size_t i = 0; i < num_boxes; ++i) { int y_min = dt_polys[i][0].y; for (size_t j = 1; j < dt_polys[i].size(); ++j) { if (dt_polys[i][j].y < y_min) { y_min = dt_polys[i][j].y; } } y_min_list[i] = y_min; } std::vector<size_t> rank(num_boxes); std::iota(rank.begin(), rank.end(), 0); std::sort(rank.begin(), rank.end(), [&](size_t a, size_t b) { return y_min_list[a] < y_min_list[b]; }); std::vector<std::vector<cv::Point2f>> dt_polys_rank(num_boxes); for (size_t i = 0; i < num_boxes; ++i) { dt_polys_rank[i] = dt_polys[rank[i]]; } return dt_polys_rank; } std::vector<std::array<float, 4>> ComponentsProcessor::ConvertPointsToBoxes( const std::vector<std::vector<cv::Point2f>> &dt_polys) { std::vector<std::array<float, 4>> dt_boxes; for (const auto &poly : dt_polys) { if (poly.empty()) { continue; } float left = std::numeric_limits<float>::max(); float right = std::numeric_limits<float>::lowest(); float top = std::numeric_limits<float>::max(); float bottom = std::numeric_limits<float>::lowest(); for (const auto &pt : poly) { if (pt.x < left) left = pt.x; if (pt.x > right) right = pt.x; if (pt.y < top) top = pt.y; if (pt.y > bottom) bottom = pt.y; } dt_boxes.push_back({left, top, right, bottom}); } return dt_boxes; } CropByPolys::CropByPolys(const std::string &box_type) { assert(box_type == "quad" || box_type == "poly"); if (box_type == "quad") { box_type_ = DetBoxType::kQuad; } else { box_type_ = DetBoxType::kPoly; } } absl::StatusOr<std::vector<cv::Mat>> CropByPolys::operator()(const cv::Mat &img, const std::vector<std::vector<cv::Point2f>> &dt_polys) { if (img.empty()) return absl::InvalidArgumentError("Input image is empty."); std::vector<cv::Mat> output_list; try { if (box_type_ == DetBoxType::kQuad) { for (const auto &poly : dt_polys) { auto out = GetMinAreaRectCrop(img, poly); if (!out.ok()) return out.status(); output_list.push_back(*out); } } else if (box_type_ == DetBoxType::kPoly) { for (const auto &poly : dt_polys) { auto out = GetPolyRectCrop(img, poly); if (!out.ok()) return out.status(); output_list.push_back(*out); } } else { return absl::UnimplementedError("Unknown box type."); } } catch (const std::exception &e) { return absl::InternalError(std::string("Exception: ") + e.what()); } return output_list; } absl::StatusOr<cv::Mat> CropByPolys::GetMinAreaRectCrop(const cv::Mat &img, const std::vector<cv::Point2f> &points) const { if (points.size() < 4) return absl::InvalidArgumentError("Less than 4 points for min area rect."); std::vector<cv::Point2f> box = GetMinAreaRectPoints(points); return GetRotateCropImage(img, box); } absl::StatusOr<cv::Mat> CropByPolys::GetRotateCropImage(const cv::Mat &img, const std::vector<cv::Point2f> &box) const { if (box.size() != 4) return absl::InvalidArgumentError("Box must have 4 points."); float widthTop = cv::norm(box[0] - box[1]); float widthBottom = cv::norm(box[2] - box[3]); float maxWidth = std::max(widthTop, widthBottom); float heightLeft = cv::norm(box[0] - box[3]); float heightRight = cv::norm(box[1] - box[2]); float maxHeight = std::max(heightLeft, heightRight); std::vector<cv::Point2f> dst = { cv::Point2f(0, 0), cv::Point2f(maxWidth - 1, 0), cv::Point2f(maxWidth - 1, maxHeight - 1), cv::Point2f(0, maxHeight - 1)}; cv::Mat M = cv::getPerspectiveTransform(box, dst); cv::Mat out; cv::warpPerspective(img, out, M, cv::Size((int)maxWidth, (int)maxHeight), cv::INTER_CUBIC, cv::BORDER_REPLICATE); if (out.rows != 0 && 1.0 * out.rows / out.cols >= 1.5) cv::rotate(out, out, cv::ROTATE_90_COUNTERCLOCKWISE); return out; } std::vector<cv::Point2f> CropByPolys::GetMinAreaRectPoints(const std::vector<cv::Point2f> &poly) const { auto pts = poly; if (pts.size() < 4) return {}; cv::RotatedRect minRect = cv::minAreaRect(pts); std::vector<cv::Point2f> box(4); minRect.points(box.data()); std::sort(box.begin(), box.end(), [](const cv::Point2f &a, const cv::Point2f &b) { return a.x < b.x || (a.x == b.x && a.y < b.y); }); size_t index_a = 0, index_d = 1; if (box[1].y > box[0].y) { index_a = 0; index_d = 1; } else { index_a = 1; index_d = 0; } size_t index_b = 2, index_c = 3; if (box[3].y > box[2].y) { index_b = 2; index_c = 3; } else { index_b = 3; index_c = 2; } return {box[index_a], box[index_b], box[index_c], box[index_d]}; } absl::StatusOr<cv::Mat> CropByPolys::GetPolyRectCrop(const cv::Mat &img, const std::vector<cv::Point2f> &poly) const { if (poly.size() < 4) return absl::InvalidArgumentError( "Less than 4 points for GetPolyRectCrop."); // 对Poly和最小外接矩形做IoU判断 std::vector<cv::Point2f> minrect = GetMinAreaRectPoints(poly); if (minrect.size() != 4) return absl::InternalError("Failed to get minarea rect."); double iou = IoU(poly, minrect); // 若IoU>0.7则返回直接crop，否则可做更复杂处理，如透视矫正，可进一步实现自定义变形矫正 auto crop_result = GetRotateCropImage(img, minrect); if (!crop_result.ok()) return crop_result.status(); // 测试下如果IoU很高就用直接的最小外接矩形crop，否则复杂矫正（本实现只用直接crop） // 若需更强几何修复，可集成TPS、ThinPlateSpline或AutoRectifier return *crop_result; } const double CropByPolys::SCALE = 10000.0; ClipperLib::Path CropByPolys::CvPolyToClipperPath(const std::vector<cv::Point2f> &poly) { ClipperLib::Path path; for (const auto &pt : poly) path.emplace_back(static_cast<ClipperLib::cInt>(std::round(pt.x * SCALE)), static_cast<ClipperLib::cInt>(std::round(pt.y * SCALE))); return path; } double CropByPolys::IoU(const std::vector<cv::Point2f> &poly1, const std::vector<cv::Point2f> &poly2) { auto path1 = CvPolyToClipperPath(poly1); auto path2 = CvPolyToClipperPath(poly2); ClipperLib::Paths inter_solution, union_solution; ClipperLib::Clipper c_inter, c_union; c_inter.AddPath(path1, ClipperLib::ptSubject, true); c_inter.AddPath(path2, ClipperLib::ptClip, true); c_inter.Execute(ClipperLib::ctIntersection, inter_solution, ClipperLib::pftNonZero, ClipperLib::pftNonZero); double area_inter = 0.0; for (const auto &p : inter_solution) area_inter += std::fabs(ClipperLib::Area(p)); c_union.AddPath(path1, ClipperLib::ptSubject, true); c_union.AddPath(path2, ClipperLib::ptClip, true); c_union.Execute(ClipperLib::ctUnion, union_solution, ClipperLib::pftNonZero, ClipperLib::pftNonZero); double area_union = 0.0; for (const auto &p : union_solution) area_union += std::fabs(ClipperLib::Area(p)); area_inter /= (SCALE * SCALE); area_union /= (SCALE * SCALE); if (area_union < 1e-8) return 0.0; return area_inter / area_union; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PaddlePaddle/PaddleOCR'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

processors.cc•26.5 KiB