Merge pull request #27890 from Kumataro:fix26899

core: support 16 bit LUT #27890 Close https://github.com/opencv/opencv/issues/26899 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-12-06 00:19:46 +01:00 · 2025-10-16 18:03:02 +09:00 · 2025-10-16 18:03:02 +09:00 · d0d9bd20ed
commit d0d9bd20ed
parent c88b3cb11f
4 changed files with 176 additions and 125 deletions
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -540,9 +540,9 @@ The function LUT fills the output array with values from the look-up table. Indi
 are taken from the input array. That is, the function processes each element of src as follows:
 \f[\texttt{dst} (I)  \leftarrow \texttt{lut(src(I) + d)}\f]
 where
-\f[d =  \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f]
+\f[d =  \forkthree{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\) or \(\texttt{CV_16U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}{32768}{if \(\texttt{src}\) has depth \(\texttt{CV_16S}\)}\f]
-@param src input array of 8-bit elements.
+@param src input array of 8-bit or 16-bit integer elements.
-@param lut look-up table of 256 elements; in case of multi-channel input array, the table should
+@param lut look-up table of 256 elements (if src has depth CV_8U or CV_8S) or 65536 elements(if src has depth CV_16U or CV_16S); in case of multi-channel input array, the table should
 either have a single channel (in this case the same table is used for all channels) or the same
 number of channels as in the input array.
@param dst output array of the same size and number of channels as src, and the same depth as lut.
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@ -282,6 +282,32 @@ inline int hal_ni_lut(const uchar *src_data, size_t src_step, size_t src_type, c
 #define cv_hal_lut hal_ni_lut
 //! @endcond
 /**
 Lookup table replacement
 Table consists of 65536 elements of a size from 1 to 8 bytes having 1 channel or src_channels
 For 16s input typea 32768 is added to LUT index
 Destination should have the same element type and number of channels as lookup table elements
@param src_data Source image data
@param src_step Source image step
@param src_type Source image type
@param lut_data Pointer to lookup table
@param lut_channel_size Size of each channel in bytes
@param lut_channels Number of channels in lookup table
@param dst_data Destination data
@param dst_step Destination step
@param width Width of images
@param height Height of images
@sa LUT
 */
 //! @addtogroup core_hal_interface_lut16 Lookup table for 16 bit index
 //! @{
 inline int hal_ni_lut16(const ushort *src_data, size_t src_step, size_t src_type, const ushort* lut_data, size_t lut_channel_size, size_t lut_channels, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
 //! @}
 //! @cond IGNORED
 #define cv_hal_lut16 hal_ni_lut16
 //! @endcond
 /**
 Hamming norm of a vector
@param a pointer to vector data
--- a/modules/core/src/lut.cpp
+++ b/modules/core/src/lut.cpp
@ -6,6 +6,7 @@
 #include "precomp.hpp"
 #include "opencl_kernels_core.hpp"
 #include "convert.hpp"
 #include <sys/types.h>
 /****************************************************************************************\
 *                                    LUT Transform                                       *
@ -14,8 +15,8 @@
 namespace cv
 {
-template<typename T> static void
+template<typename Ti, typename T> static void
-LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
+LUT_( const Ti* src, const T* lut, T* dst, const int len, const int cn, const int lutcn )
 {
    if( lutcn == 1 )
    {
@ -30,53 +31,45 @@ LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
    }
 }
 static void LUT8u_8u( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_8s( const uchar* src, const schar* lut, schar* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_16u( const uchar* src, const ushort* lut, ushort* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_16s( const uchar* src, const short* lut, short* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_32s( const uchar* src, const int* lut, int* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_16f( const uchar* src, const hfloat* lut, hfloat* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_32f( const uchar* src, const float* lut, float* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 static void LUT8u_64f( const uchar* src, const double* lut, double* dst, int len, int cn, int lutcn )
 {
    LUT8u_( src, lut, dst, len, cn, lutcn );
 }
 typedef void (*LUTFunc)( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn );
-static LUTFunc lutTab[CV_DEPTH_MAX] =
+static LUTFunc getLUTFunc(const int srcDepth, const int dstDepth)
 {
-    (LUTFunc)LUT8u_8u, (LUTFunc)LUT8u_8s, (LUTFunc)LUT8u_16u, (LUTFunc)LUT8u_16s,
+    LUTFunc ret = nullptr;
-    (LUTFunc)LUT8u_32s, (LUTFunc)LUT8u_32f, (LUTFunc)LUT8u_64f, (LUTFunc)LUT8u_16f
+    if((srcDepth == CV_8U) || (srcDepth == CV_8S))
-};
+    {
        switch(dstDepth)
        {
            case CV_8U:   ret = (LUTFunc)LUT_<uint8_t, uint8_t>;   break;
            case CV_8S:   ret = (LUTFunc)LUT_<uint8_t, int8_t>;    break;
            case CV_16U:  ret = (LUTFunc)LUT_<uint8_t, uint16_t>;  break;
            case CV_16S:  ret = (LUTFunc)LUT_<uint8_t, int16_t>;   break;
            case CV_32S:  ret = (LUTFunc)LUT_<uint8_t, int32_t>;   break;
            case CV_32F:  ret = (LUTFunc)LUT_<uint8_t, int32_t>;   break; // float
            case CV_64F:  ret = (LUTFunc)LUT_<uint8_t, int64_t>;   break; // double
            case CV_16F:  ret = (LUTFunc)LUT_<uint8_t, int16_t>;   break; // hfloat
            default:      ret = nullptr;                           break;
        }
    }
    else if((srcDepth == CV_16U) || (srcDepth == CV_16S))
    {
        switch(dstDepth)
        {
            case CV_8U:   ret = (LUTFunc)LUT_<uint16_t, uint8_t>;  break;
            case CV_8S:   ret = (LUTFunc)LUT_<uint16_t, int8_t>;   break;
            case CV_16U:  ret = (LUTFunc)LUT_<uint16_t, uint16_t>; break;
            case CV_16S:  ret = (LUTFunc)LUT_<uint16_t, int16_t>;  break;
            case CV_32S:  ret = (LUTFunc)LUT_<uint16_t, int32_t>;  break;
            case CV_32F:  ret = (LUTFunc)LUT_<uint16_t, int32_t>;  break; // float
            case CV_64F:  ret = (LUTFunc)LUT_<uint16_t, int64_t>;  break; // double
            case CV_16F:  ret = (LUTFunc)LUT_<uint16_t, int16_t>;  break; // hfloat
            default:      ret = nullptr;                           break;
        }
    }
    CV_CheckTrue(ret != nullptr, "An unexpected type combination was specified.");
    return ret;
 }
 #ifdef HAVE_OPENCL
@ -107,24 +100,19 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
 class LUTParallelBody : public ParallelLoopBody
 {
 public:
    bool* ok;
    const Mat& src_;
    const Mat& lut_;
    Mat& dst_;
-    LUTFunc func;
+    LUTFunc func_;
-    LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
+    LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, LUTFunc func)
-        : ok(_ok), src_(src), lut_(lut), dst_(dst)
+        : src_(src), lut_(lut), dst_(dst), func_(func)
    {
        func = lutTab[lut.depth()];
        *ok = (func != NULL);
    }
    void operator()( const cv::Range& range ) const CV_OVERRIDE
    {
        CV_Assert(*ok);
        const int row0 = range.start;
        const int row1 = range.end;
@ -140,7 +128,7 @@ public:
        int len = (int)it.size;
        for( size_t i = 0; i < it.nplanes; i++, ++it )
-            func(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
+            func_(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
    }
 private:
    LUTParallelBody(const LUTParallelBody&);
@ -155,39 +143,47 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
    int cn = _src.channels(), depth = _src.depth();
    int lutcn = _lut.channels();
    const size_t lut_size = _lut.total();
-    CV_Assert( (lutcn == cn || lutcn == 1) &&
+    CV_Assert( (lutcn == cn || lutcn == 1) && _lut.isContinuous() &&
-        _lut.total() == 256 && _lut.isContinuous() &&
+        (
-        (depth == CV_8U || depth == CV_8S) );
+            ((lut_size == 256) && ((depth == CV_8U)||(depth == CV_8S))) ||
            ((lut_size == 65536) && ((depth == CV_16U)||(depth == CV_16S)))
        )
    );
-    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
+    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (lut_size == 256),
               ocl_LUT(_src, _lut, _dst))
    Mat src = _src.getMat(), lut = _lut.getMat();
    _dst.create(src.dims, src.size, CV_MAKETYPE(_lut.depth(), cn));
    Mat dst = _dst.getMat();
-    CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
+    if(lut_size == 256)
-             lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
+    {
        CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
                 lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
    }
    else
    {
        CALL_HAL(LUT16, cv_hal_lut16, src.ptr<ushort>(), src.step, src.type(), lut.ptr<ushort>(),
                 lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
    }
    const LUTFunc func = getLUTFunc(src.depth(), dst.depth());
    CV_Assert( func != nullptr );
    if (_src.dims() <= 2)
    {
-        bool ok = false;
+        LUTParallelBody body(src, lut, dst, func);
-        LUTParallelBody body(src, lut, dst, &ok);
+        Range all(0, dst.rows);
-        if (ok)
+        if (dst.total() >= (size_t)(1<<18))
-        {
+            parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
-            Range all(0, dst.rows);
+        else
-            if (dst.total() >= (size_t)(1<<18))
+            body(all);
                parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
            else
                body(all);
            if (ok)
                return;
        }
    }
-    LUTFunc func = lutTab[lut.depth()];
+        return;
-    CV_Assert( func != 0 );
+    }
    const Mat* arrays[] = {&src, &dst, 0};
    uchar* ptrs[2] = {};
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -3221,11 +3221,12 @@ INSTANTIATE_TEST_CASE_P(Core_CartPolar, Core_PolarToCart_inplace,
    )
 );
-CV_ENUM(LutMatType, CV_8U, CV_16U, CV_16F, CV_32S, CV_32F, CV_64F)
+CV_ENUM(LutIdxType, CV_8U, CV_8S, CV_16U, CV_16S)
 CV_ENUM(LutMatType, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F)
-struct Core_LUT: public testing::TestWithParam<LutMatType>
+struct Core_LUT: public testing::TestWithParam< std::tuple<LutIdxType, LutMatType> >
 {
-    template<typename T, int ch, bool same_cn>
+    template<typename Ti, typename T, int ch, bool same_cn>
    cv::Mat referenceWithType(cv::Mat input, cv::Mat table)
    {
        cv::Mat ref(input.size(), CV_MAKE_TYPE(table.depth(), ch));
@ -3235,7 +3236,7 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
            {
                if(ch == 1)
                {
-                    ref.at<T>(i, j) = table.at<T>(input.at<uchar>(i, j));
+                    ref.at<T>(i, j) = table.at<T>(input.at<Ti>(i, j));
                }
                else
                {
@ -3244,11 +3245,11 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
                    {
                        if (same_cn)
                        {
-                            val[k] = table.at<Vec<T, ch>>(input.at<Vec<uchar, ch>>(i, j)[k])[k];
+                            val[k] = table.at<Vec<T, ch>>(input.at<Vec<Ti, ch>>(i, j)[k])[k];
                        }
                        else
                        {
-                            val[k] = table.at<T>(input.at<Vec<uchar, ch>>(i, j)[k]);
+                            val[k] = table.at<T>(input.at<Vec<Ti, ch>>(i, j)[k]);
                        }
                    }
                    ref.at<Vec<T, ch>>(i, j) = val;
@ -3261,86 +3262,114 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
    template<int ch = 1, bool same_cn = false>
    cv::Mat reference(cv::Mat input, cv::Mat table)
    {
-        if (table.depth() == CV_8U)
+        cv::Mat ret = cv::Mat();
        if ((input.depth() == CV_8U) || (input.depth() == CV_8S)) // Index type for LUT operation
        {
-            return referenceWithType<uchar, ch, same_cn>(input, table);
+            switch(table.depth()) // Value type for LUT operation
            {
                case CV_8U:   ret = referenceWithType<uint8_t, uint8_t,  ch, same_cn>(input, table); break;
                case CV_8S:   ret = referenceWithType<uint8_t, int8_t,   ch, same_cn>(input, table); break;
                case CV_16U:  ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
                case CV_16S:  ret = referenceWithType<uint8_t, int16_t,  ch, same_cn>(input, table); break;
                case CV_32S:  ret = referenceWithType<uint8_t, int32_t,  ch, same_cn>(input, table); break;
                case CV_32F:  ret = referenceWithType<uint8_t, float,    ch, same_cn>(input, table); break;
                case CV_64F:  ret = referenceWithType<uint8_t, double,   ch, same_cn>(input, table); break;
                case CV_16F:  ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
                default:      ret = cv::Mat();                                                       break;
            }
        }
-        else if (table.depth() == CV_16U)
+        else if ((input.depth() == CV_16U) || (input.depth() == CV_16S))
        {
-            return referenceWithType<ushort, ch, same_cn>(input, table);
+            switch(table.depth()) // Value type for LUT operation
-        }
+            {
-        else if (table.depth() == CV_16F)
+                case CV_8U:   ret = referenceWithType<uint16_t, uint8_t,  ch, same_cn>(input, table); break;
-        {
+                case CV_8S:   ret = referenceWithType<uint16_t, int8_t,   ch, same_cn>(input, table); break;
-            return referenceWithType<ushort, ch, same_cn>(input, table);
+                case CV_16U:  ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
-        }
+                case CV_16S:  ret = referenceWithType<uint16_t, int16_t,  ch, same_cn>(input, table); break;
-        else if (table.depth() == CV_32S)
+                case CV_32S:  ret = referenceWithType<uint16_t, int32_t,  ch, same_cn>(input, table); break;
-        {
+                case CV_32F:  ret = referenceWithType<uint16_t, float,    ch, same_cn>(input, table); break;
-            return referenceWithType<int, ch, same_cn>(input, table);
+                case CV_64F:  ret = referenceWithType<uint16_t, double,   ch, same_cn>(input, table); break;
-        }
+                case CV_16F:  ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
-        else if (table.depth() == CV_32F)
+                default:      ret = cv::Mat();                                                        break;
-        {
+            }
            return referenceWithType<float, ch, same_cn>(input, table);
        }
        else if (table.depth() == CV_64F)
        {
            return referenceWithType<double, ch, same_cn>(input, table);
        }
-        return cv::Mat();
+        return ret;
    }
 };
 TEST_P(Core_LUT, accuracy)
 {
-    int type = GetParam();
+    int idx_type = get<0>(GetParam());
-    cv::Mat input(117, 113, CV_8UC1);
+    int value_type = get<1>(GetParam());
    randu(input, 0, 256);
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U ) || (idx_type == CV_16S));
-    randu(table, 0, getMaxVal(type));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 1));
    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
    randu(table, getMinVal(value_type), getMaxVal(value_type));
    cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
    ASSERT_FALSE(output.empty());
    cv::Mat gt = reference(input, table);
    ASSERT_FALSE(gt.empty());
    ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
 TEST_P(Core_LUT, accuracy_multi)
 {
-    int type = (int)GetParam();
+    int idx_type = get<0>(GetParam());
-    cv::Mat input(117, 113, CV_8UC3);
+    int value_type = get<1>(GetParam());
    randu(input, 0, 256);
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
-    randu(table, 0, getMaxVal(type));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S) ) ? 256: 65536;
    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
    randu(table, getMinVal(value_type), getMaxVal(value_type));
    cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
    ASSERT_FALSE(output.empty());
    cv::Mat gt = reference<3>(input, table);
    ASSERT_FALSE(gt.empty());
    ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
 TEST_P(Core_LUT, accuracy_multi2)
 {
-    int type = (int)GetParam();
+    int idx_type = get<0>(GetParam());
-    cv::Mat input(117, 113, CV_8UC3);
+    int value_type = get<1>(GetParam());
    randu(input, 0, 256);
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 3));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
-    randu(table, 0, getMaxVal(type));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 3));
    randu(table, getMinVal(value_type), getMaxVal(value_type));
    cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
    ASSERT_FALSE(output.empty());
    cv::Mat gt = reference<3, true>(input, table);
    ASSERT_FALSE(gt.empty());
    ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
-INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, LutMatType::all());
+INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, testing::Combine( LutIdxType::all(), LutMatType::all()));
 }} // namespace