From d0d9bd20ed03e6559ef61aa2ac1aceaa6245a64a Mon Sep 17 00:00:00 2001 From: Kumataro Date: Thu, 16 Oct 2025 18:03:02 +0900 Subject: [PATCH] Merge pull request #27890 from Kumataro:fix26899 core: support 16 bit LUT #27890 Close https://github.com/opencv/opencv/issues/26899 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/core/include/opencv2/core.hpp | 6 +- modules/core/src/hal_replacement.hpp | 26 +++++ modules/core/src/lut.cpp | 148 +++++++++++++------------- modules/core/test/test_arithm.cpp | 121 +++++++++++++-------- 4 files changed, 176 insertions(+), 125 deletions(-) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 153bd17320..7f73c71388 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -540,9 +540,9 @@ The function LUT fills the output array with values from the look-up table. Indi are taken from the input array. That is, the function processes each element of src as follows: \f[\texttt{dst} (I) \leftarrow \texttt{lut(src(I) + d)}\f] where -\f[d = \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f] -@param src input array of 8-bit elements. -@param lut look-up table of 256 elements; in case of multi-channel input array, the table should +\f[d = \forkthree{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\) or \(\texttt{CV_16U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}{32768}{if \(\texttt{src}\) has depth \(\texttt{CV_16S}\)}\f] +@param src input array of 8-bit or 16-bit integer elements. +@param lut look-up table of 256 elements (if src has depth CV_8U or CV_8S) or 65536 elements(if src has depth CV_16U or CV_16S); in case of multi-channel input array, the table should either have a single channel (in this case the same table is used for all channels) or the same number of channels as in the input array. @param dst output array of the same size and number of channels as src, and the same depth as lut. diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index f809351550..0b4b660667 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -282,6 +282,32 @@ inline int hal_ni_lut(const uchar *src_data, size_t src_step, size_t src_type, c #define cv_hal_lut hal_ni_lut //! @endcond +/** +Lookup table replacement +Table consists of 65536 elements of a size from 1 to 8 bytes having 1 channel or src_channels +For 16s input typea 32768 is added to LUT index +Destination should have the same element type and number of channels as lookup table elements +@param src_data Source image data +@param src_step Source image step +@param src_type Source image type +@param lut_data Pointer to lookup table +@param lut_channel_size Size of each channel in bytes +@param lut_channels Number of channels in lookup table +@param dst_data Destination data +@param dst_step Destination step +@param width Width of images +@param height Height of images +@sa LUT +*/ +//! @addtogroup core_hal_interface_lut16 Lookup table for 16 bit index +//! @{ +inline int hal_ni_lut16(const ushort *src_data, size_t src_step, size_t src_type, const ushort* lut_data, size_t lut_channel_size, size_t lut_channels, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +//! @cond IGNORED +#define cv_hal_lut16 hal_ni_lut16 +//! @endcond + /** Hamming norm of a vector @param a pointer to vector data diff --git a/modules/core/src/lut.cpp b/modules/core/src/lut.cpp index 090ba50d5e..30958fca92 100644 --- a/modules/core/src/lut.cpp +++ b/modules/core/src/lut.cpp @@ -6,6 +6,7 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" #include "convert.hpp" +#include /****************************************************************************************\ * LUT Transform * @@ -14,8 +15,8 @@ namespace cv { -template static void -LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn ) +template static void +LUT_( const Ti* src, const T* lut, T* dst, const int len, const int cn, const int lutcn ) { if( lutcn == 1 ) { @@ -30,53 +31,45 @@ LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn ) } } -static void LUT8u_8u( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_8s( const uchar* src, const schar* lut, schar* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_16u( const uchar* src, const ushort* lut, ushort* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_16s( const uchar* src, const short* lut, short* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_32s( const uchar* src, const int* lut, int* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_16f( const uchar* src, const hfloat* lut, hfloat* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_32f( const uchar* src, const float* lut, float* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - -static void LUT8u_64f( const uchar* src, const double* lut, double* dst, int len, int cn, int lutcn ) -{ - LUT8u_( src, lut, dst, len, cn, lutcn ); -} - typedef void (*LUTFunc)( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn ); -static LUTFunc lutTab[CV_DEPTH_MAX] = +static LUTFunc getLUTFunc(const int srcDepth, const int dstDepth) { - (LUTFunc)LUT8u_8u, (LUTFunc)LUT8u_8s, (LUTFunc)LUT8u_16u, (LUTFunc)LUT8u_16s, - (LUTFunc)LUT8u_32s, (LUTFunc)LUT8u_32f, (LUTFunc)LUT8u_64f, (LUTFunc)LUT8u_16f -}; + LUTFunc ret = nullptr; + if((srcDepth == CV_8U) || (srcDepth == CV_8S)) + { + switch(dstDepth) + { + case CV_8U: ret = (LUTFunc)LUT_; break; + case CV_8S: ret = (LUTFunc)LUT_; break; + case CV_16U: ret = (LUTFunc)LUT_; break; + case CV_16S: ret = (LUTFunc)LUT_; break; + case CV_32S: ret = (LUTFunc)LUT_; break; + case CV_32F: ret = (LUTFunc)LUT_; break; // float + case CV_64F: ret = (LUTFunc)LUT_; break; // double + case CV_16F: ret = (LUTFunc)LUT_; break; // hfloat + default: ret = nullptr; break; + } + } + else if((srcDepth == CV_16U) || (srcDepth == CV_16S)) + { + switch(dstDepth) + { + case CV_8U: ret = (LUTFunc)LUT_; break; + case CV_8S: ret = (LUTFunc)LUT_; break; + case CV_16U: ret = (LUTFunc)LUT_; break; + case CV_16S: ret = (LUTFunc)LUT_; break; + case CV_32S: ret = (LUTFunc)LUT_; break; + case CV_32F: ret = (LUTFunc)LUT_; break; // float + case CV_64F: ret = (LUTFunc)LUT_; break; // double + case CV_16F: ret = (LUTFunc)LUT_; break; // hfloat + default: ret = nullptr; break; + } + } + + CV_CheckTrue(ret != nullptr, "An unexpected type combination was specified."); + return ret; +} #ifdef HAVE_OPENCL @@ -107,24 +100,19 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst) class LUTParallelBody : public ParallelLoopBody { public: - bool* ok; const Mat& src_; const Mat& lut_; Mat& dst_; - LUTFunc func; + LUTFunc func_; - LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, bool* _ok) - : ok(_ok), src_(src), lut_(lut), dst_(dst) + LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, LUTFunc func) + : src_(src), lut_(lut), dst_(dst), func_(func) { - func = lutTab[lut.depth()]; - *ok = (func != NULL); } void operator()( const cv::Range& range ) const CV_OVERRIDE { - CV_Assert(*ok); - const int row0 = range.start; const int row1 = range.end; @@ -140,7 +128,7 @@ public: int len = (int)it.size; for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn); + func_(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn); } private: LUTParallelBody(const LUTParallelBody&); @@ -155,39 +143,47 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst ) int cn = _src.channels(), depth = _src.depth(); int lutcn = _lut.channels(); + const size_t lut_size = _lut.total(); - CV_Assert( (lutcn == cn || lutcn == 1) && - _lut.total() == 256 && _lut.isContinuous() && - (depth == CV_8U || depth == CV_8S) ); + CV_Assert( (lutcn == cn || lutcn == 1) && _lut.isContinuous() && + ( + ((lut_size == 256) && ((depth == CV_8U)||(depth == CV_8S))) || + ((lut_size == 65536) && ((depth == CV_16U)||(depth == CV_16S))) + ) + ); - CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (lut_size == 256), ocl_LUT(_src, _lut, _dst)) Mat src = _src.getMat(), lut = _lut.getMat(); _dst.create(src.dims, src.size, CV_MAKETYPE(_lut.depth(), cn)); Mat dst = _dst.getMat(); - CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data, - lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows); + if(lut_size == 256) + { + CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data, + lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows); + } + else + { + CALL_HAL(LUT16, cv_hal_lut16, src.ptr(), src.step, src.type(), lut.ptr(), + lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows); + } + + const LUTFunc func = getLUTFunc(src.depth(), dst.depth()); + CV_Assert( func != nullptr ); if (_src.dims() <= 2) { - bool ok = false; - LUTParallelBody body(src, lut, dst, &ok); - if (ok) - { - Range all(0, dst.rows); - if (dst.total() >= (size_t)(1<<18)) - parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16)); - else - body(all); - if (ok) - return; - } - } + LUTParallelBody body(src, lut, dst, func); + Range all(0, dst.rows); + if (dst.total() >= (size_t)(1<<18)) + parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16)); + else + body(all); - LUTFunc func = lutTab[lut.depth()]; - CV_Assert( func != 0 ); + return; + } const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2] = {}; diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 88d646b09f..d2ce1f03fd 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -3221,11 +3221,12 @@ INSTANTIATE_TEST_CASE_P(Core_CartPolar, Core_PolarToCart_inplace, ) ); -CV_ENUM(LutMatType, CV_8U, CV_16U, CV_16F, CV_32S, CV_32F, CV_64F) +CV_ENUM(LutIdxType, CV_8U, CV_8S, CV_16U, CV_16S) +CV_ENUM(LutMatType, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F) -struct Core_LUT: public testing::TestWithParam +struct Core_LUT: public testing::TestWithParam< std::tuple > { - template + template cv::Mat referenceWithType(cv::Mat input, cv::Mat table) { cv::Mat ref(input.size(), CV_MAKE_TYPE(table.depth(), ch)); @@ -3235,7 +3236,7 @@ struct Core_LUT: public testing::TestWithParam { if(ch == 1) { - ref.at(i, j) = table.at(input.at(i, j)); + ref.at(i, j) = table.at(input.at(i, j)); } else { @@ -3244,11 +3245,11 @@ struct Core_LUT: public testing::TestWithParam { if (same_cn) { - val[k] = table.at>(input.at>(i, j)[k])[k]; + val[k] = table.at>(input.at>(i, j)[k])[k]; } else { - val[k] = table.at(input.at>(i, j)[k]); + val[k] = table.at(input.at>(i, j)[k]); } } ref.at>(i, j) = val; @@ -3261,86 +3262,114 @@ struct Core_LUT: public testing::TestWithParam template cv::Mat reference(cv::Mat input, cv::Mat table) { - if (table.depth() == CV_8U) + cv::Mat ret = cv::Mat(); + if ((input.depth() == CV_8U) || (input.depth() == CV_8S)) // Index type for LUT operation { - return referenceWithType(input, table); + switch(table.depth()) // Value type for LUT operation + { + case CV_8U: ret = referenceWithType(input, table); break; + case CV_8S: ret = referenceWithType(input, table); break; + case CV_16U: ret = referenceWithType(input, table); break; + case CV_16S: ret = referenceWithType(input, table); break; + case CV_32S: ret = referenceWithType(input, table); break; + case CV_32F: ret = referenceWithType(input, table); break; + case CV_64F: ret = referenceWithType(input, table); break; + case CV_16F: ret = referenceWithType(input, table); break; + default: ret = cv::Mat(); break; + } } - else if (table.depth() == CV_16U) + else if ((input.depth() == CV_16U) || (input.depth() == CV_16S)) { - return referenceWithType(input, table); - } - else if (table.depth() == CV_16F) - { - return referenceWithType(input, table); - } - else if (table.depth() == CV_32S) - { - return referenceWithType(input, table); - } - else if (table.depth() == CV_32F) - { - return referenceWithType(input, table); - } - else if (table.depth() == CV_64F) - { - return referenceWithType(input, table); + switch(table.depth()) // Value type for LUT operation + { + case CV_8U: ret = referenceWithType(input, table); break; + case CV_8S: ret = referenceWithType(input, table); break; + case CV_16U: ret = referenceWithType(input, table); break; + case CV_16S: ret = referenceWithType(input, table); break; + case CV_32S: ret = referenceWithType(input, table); break; + case CV_32F: ret = referenceWithType(input, table); break; + case CV_64F: ret = referenceWithType(input, table); break; + case CV_16F: ret = referenceWithType(input, table); break; + default: ret = cv::Mat(); break; + } } - return cv::Mat(); + return ret; } }; TEST_P(Core_LUT, accuracy) { - int type = GetParam(); - cv::Mat input(117, 113, CV_8UC1); - randu(input, 0, 256); + int idx_type = get<0>(GetParam()); + int value_type = get<1>(GetParam()); - cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1)); - randu(table, 0, getMaxVal(type)); + ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U ) || (idx_type == CV_16S)); + const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536; + + cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 1)); + randu(input, getMinVal(idx_type), getMaxVal(idx_type)); + + cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1)); + randu(table, getMinVal(value_type), getMaxVal(value_type)); cv::Mat output; - cv::LUT(input, table, output); + ASSERT_NO_THROW(cv::LUT(input, table, output)); + ASSERT_FALSE(output.empty()); cv::Mat gt = reference(input, table); + ASSERT_FALSE(gt.empty()); ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF)); } TEST_P(Core_LUT, accuracy_multi) { - int type = (int)GetParam(); - cv::Mat input(117, 113, CV_8UC3); - randu(input, 0, 256); + int idx_type = get<0>(GetParam()); + int value_type = get<1>(GetParam()); - cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1)); - randu(table, 0, getMaxVal(type)); + ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S)); + const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S) ) ? 256: 65536; + + cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3)); + randu(input, getMinVal(idx_type), getMaxVal(idx_type)); + + cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1)); + randu(table, getMinVal(value_type), getMaxVal(value_type)); cv::Mat output; - cv::LUT(input, table, output); + ASSERT_NO_THROW(cv::LUT(input, table, output)); + ASSERT_FALSE(output.empty()); cv::Mat gt = reference<3>(input, table); + ASSERT_FALSE(gt.empty()); ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF)); } TEST_P(Core_LUT, accuracy_multi2) { - int type = (int)GetParam(); - cv::Mat input(117, 113, CV_8UC3); - randu(input, 0, 256); + int idx_type = get<0>(GetParam()); + int value_type = get<1>(GetParam()); - cv::Mat table(1, 256, CV_MAKE_TYPE(type, 3)); - randu(table, 0, getMaxVal(type)); + ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S)); + const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536; + + cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3)); + randu(input, getMinVal(idx_type), getMaxVal(idx_type)); + + cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 3)); + randu(table, getMinVal(value_type), getMaxVal(value_type)); cv::Mat output; - cv::LUT(input, table, output); + ASSERT_NO_THROW(cv::LUT(input, table, output)); + ASSERT_FALSE(output.empty()); cv::Mat gt = reference<3, true>(input, table); + ASSERT_FALSE(gt.empty()); ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF)); } -INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, LutMatType::all()); +INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, testing::Combine( LutIdxType::all(), LutMatType::all())); }} // namespace