From d0d9bd20ed03e6559ef61aa2ac1aceaa6245a64a Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:03:02 +0900
Subject: [PATCH] Merge pull request #27890 from Kumataro:fix26899

core: support 16 bit LUT #27890

Close https://github.com/opencv/opencv/issues/26899

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/core/include/opencv2/core.hpp |   6 +-
 modules/core/src/hal_replacement.hpp  |  26 +++++
 modules/core/src/lut.cpp              | 148 +++++++++++++-------------
 modules/core/test/test_arithm.cpp     | 121 +++++++++++++--------
 4 files changed, 176 insertions(+), 125 deletions(-)

diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index 153bd17320..7f73c71388 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -540,9 +540,9 @@ The function LUT fills the output array with values from the look-up table. Indi
 are taken from the input array. That is, the function processes each element of src as follows:
 \f[\texttt{dst} (I)  \leftarrow \texttt{lut(src(I) + d)}\f]
 where
-\f[d =  \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f]
-@param src input array of 8-bit elements.
-@param lut look-up table of 256 elements; in case of multi-channel input array, the table should
+\f[d =  \forkthree{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\) or \(\texttt{CV_16U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}{32768}{if \(\texttt{src}\) has depth \(\texttt{CV_16S}\)}\f]
+@param src input array of 8-bit or 16-bit integer elements.
+@param lut look-up table of 256 elements (if src has depth CV_8U or CV_8S) or 65536 elements(if src has depth CV_16U or CV_16S); in case of multi-channel input array, the table should
 either have a single channel (in this case the same table is used for all channels) or the same
 number of channels as in the input array.
 @param dst output array of the same size and number of channels as src, and the same depth as lut.
diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp
index f809351550..0b4b660667 100644
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@@ -282,6 +282,32 @@ inline int hal_ni_lut(const uchar *src_data, size_t src_step, size_t src_type, c
 #define cv_hal_lut hal_ni_lut
 //! @endcond
 
+/**
+Lookup table replacement
+Table consists of 65536 elements of a size from 1 to 8 bytes having 1 channel or src_channels
+For 16s input typea 32768 is added to LUT index
+Destination should have the same element type and number of channels as lookup table elements
+@param src_data Source image data
+@param src_step Source image step
+@param src_type Source image type
+@param lut_data Pointer to lookup table
+@param lut_channel_size Size of each channel in bytes
+@param lut_channels Number of channels in lookup table
+@param dst_data Destination data
+@param dst_step Destination step
+@param width Width of images
+@param height Height of images
+@sa LUT
+*/
+//! @addtogroup core_hal_interface_lut16 Lookup table for 16 bit index
+//! @{
+inline int hal_ni_lut16(const ushort *src_data, size_t src_step, size_t src_type, const ushort* lut_data, size_t lut_channel_size, size_t lut_channels, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+
+//! @cond IGNORED
+#define cv_hal_lut16 hal_ni_lut16
+//! @endcond
+
 /**
 Hamming norm of a vector
 @param a pointer to vector data
diff --git a/modules/core/src/lut.cpp b/modules/core/src/lut.cpp
index 090ba50d5e..30958fca92 100644
--- a/modules/core/src/lut.cpp
+++ b/modules/core/src/lut.cpp
@@ -6,6 +6,7 @@
 #include "precomp.hpp"
 #include "opencl_kernels_core.hpp"
 #include "convert.hpp"
+#include <sys/types.h>
 
 /****************************************************************************************\
 *                                    LUT Transform                                       *
@@ -14,8 +15,8 @@
 namespace cv
 {
 
-template<typename T> static void
-LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
+template<typename Ti, typename T> static void
+LUT_( const Ti* src, const T* lut, T* dst, const int len, const int cn, const int lutcn )
 {
     if( lutcn == 1 )
     {
@@ -30,53 +31,45 @@ LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
     }
 }
 
-static void LUT8u_8u( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_8s( const uchar* src, const schar* lut, schar* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_16u( const uchar* src, const ushort* lut, ushort* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_16s( const uchar* src, const short* lut, short* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_32s( const uchar* src, const int* lut, int* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_16f( const uchar* src, const hfloat* lut, hfloat* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_32f( const uchar* src, const float* lut, float* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
-static void LUT8u_64f( const uchar* src, const double* lut, double* dst, int len, int cn, int lutcn )
-{
-    LUT8u_( src, lut, dst, len, cn, lutcn );
-}
-
 typedef void (*LUTFunc)( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn );
 
-static LUTFunc lutTab[CV_DEPTH_MAX] =
+static LUTFunc getLUTFunc(const int srcDepth, const int dstDepth)
 {
-    (LUTFunc)LUT8u_8u, (LUTFunc)LUT8u_8s, (LUTFunc)LUT8u_16u, (LUTFunc)LUT8u_16s,
-    (LUTFunc)LUT8u_32s, (LUTFunc)LUT8u_32f, (LUTFunc)LUT8u_64f, (LUTFunc)LUT8u_16f
-};
+    LUTFunc ret = nullptr;
+    if((srcDepth == CV_8U) || (srcDepth == CV_8S))
+    {
+        switch(dstDepth)
+        {
+            case CV_8U:   ret = (LUTFunc)LUT_<uint8_t, uint8_t>;   break;
+            case CV_8S:   ret = (LUTFunc)LUT_<uint8_t, int8_t>;    break;
+            case CV_16U:  ret = (LUTFunc)LUT_<uint8_t, uint16_t>;  break;
+            case CV_16S:  ret = (LUTFunc)LUT_<uint8_t, int16_t>;   break;
+            case CV_32S:  ret = (LUTFunc)LUT_<uint8_t, int32_t>;   break;
+            case CV_32F:  ret = (LUTFunc)LUT_<uint8_t, int32_t>;   break; // float
+            case CV_64F:  ret = (LUTFunc)LUT_<uint8_t, int64_t>;   break; // double
+            case CV_16F:  ret = (LUTFunc)LUT_<uint8_t, int16_t>;   break; // hfloat
+            default:      ret = nullptr;                           break;
+        }
+    }
+    else if((srcDepth == CV_16U) || (srcDepth == CV_16S))
+    {
+        switch(dstDepth)
+        {
+            case CV_8U:   ret = (LUTFunc)LUT_<uint16_t, uint8_t>;  break;
+            case CV_8S:   ret = (LUTFunc)LUT_<uint16_t, int8_t>;   break;
+            case CV_16U:  ret = (LUTFunc)LUT_<uint16_t, uint16_t>; break;
+            case CV_16S:  ret = (LUTFunc)LUT_<uint16_t, int16_t>;  break;
+            case CV_32S:  ret = (LUTFunc)LUT_<uint16_t, int32_t>;  break;
+            case CV_32F:  ret = (LUTFunc)LUT_<uint16_t, int32_t>;  break; // float
+            case CV_64F:  ret = (LUTFunc)LUT_<uint16_t, int64_t>;  break; // double
+            case CV_16F:  ret = (LUTFunc)LUT_<uint16_t, int16_t>;  break; // hfloat
+            default:      ret = nullptr;                           break;
+        }
+    }
+
+    CV_CheckTrue(ret != nullptr, "An unexpected type combination was specified.");
+    return ret;
+}
 
 #ifdef HAVE_OPENCL
 
@@ -107,24 +100,19 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
 class LUTParallelBody : public ParallelLoopBody
 {
 public:
-    bool* ok;
     const Mat& src_;
     const Mat& lut_;
     Mat& dst_;
 
-    LUTFunc func;
+    LUTFunc func_;
 
-    LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
-        : ok(_ok), src_(src), lut_(lut), dst_(dst)
+    LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, LUTFunc func)
+        : src_(src), lut_(lut), dst_(dst), func_(func)
     {
-        func = lutTab[lut.depth()];
-        *ok = (func != NULL);
     }
 
     void operator()( const cv::Range& range ) const CV_OVERRIDE
     {
-        CV_Assert(*ok);
-
         const int row0 = range.start;
         const int row1 = range.end;
 
@@ -140,7 +128,7 @@ public:
         int len = (int)it.size;
 
         for( size_t i = 0; i < it.nplanes; i++, ++it )
-            func(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
+            func_(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
     }
 private:
     LUTParallelBody(const LUTParallelBody&);
@@ -155,39 +143,47 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
 
     int cn = _src.channels(), depth = _src.depth();
     int lutcn = _lut.channels();
+    const size_t lut_size = _lut.total();
 
-    CV_Assert( (lutcn == cn || lutcn == 1) &&
-        _lut.total() == 256 && _lut.isContinuous() &&
-        (depth == CV_8U || depth == CV_8S) );
+    CV_Assert( (lutcn == cn || lutcn == 1) && _lut.isContinuous() &&
+        (
+            ((lut_size == 256) && ((depth == CV_8U)||(depth == CV_8S))) ||
+            ((lut_size == 65536) && ((depth == CV_16U)||(depth == CV_16S)))
+        )
+    );
 
-    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
+    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (lut_size == 256),
                ocl_LUT(_src, _lut, _dst))
 
     Mat src = _src.getMat(), lut = _lut.getMat();
     _dst.create(src.dims, src.size, CV_MAKETYPE(_lut.depth(), cn));
     Mat dst = _dst.getMat();
 
-    CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
-             lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
+    if(lut_size == 256)
+    {
+        CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
+                 lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
+    }
+    else
+    {
+        CALL_HAL(LUT16, cv_hal_lut16, src.ptr<ushort>(), src.step, src.type(), lut.ptr<ushort>(),
+                 lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
+    }
+
+    const LUTFunc func = getLUTFunc(src.depth(), dst.depth());
+    CV_Assert( func != nullptr );
 
     if (_src.dims() <= 2)
     {
-        bool ok = false;
-        LUTParallelBody body(src, lut, dst, &ok);
-        if (ok)
-        {
-            Range all(0, dst.rows);
-            if (dst.total() >= (size_t)(1<<18))
-                parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
-            else
-                body(all);
-            if (ok)
-                return;
-        }
-    }
+        LUTParallelBody body(src, lut, dst, func);
+        Range all(0, dst.rows);
+        if (dst.total() >= (size_t)(1<<18))
+            parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
+        else
+            body(all);
 
-    LUTFunc func = lutTab[lut.depth()];
-    CV_Assert( func != 0 );
+        return;
+    }
 
     const Mat* arrays[] = {&src, &dst, 0};
     uchar* ptrs[2] = {};
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index 88d646b09f..d2ce1f03fd 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -3221,11 +3221,12 @@ INSTANTIATE_TEST_CASE_P(Core_CartPolar, Core_PolarToCart_inplace,
     )
 );
 
-CV_ENUM(LutMatType, CV_8U, CV_16U, CV_16F, CV_32S, CV_32F, CV_64F)
+CV_ENUM(LutIdxType, CV_8U, CV_8S, CV_16U, CV_16S)
+CV_ENUM(LutMatType, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F)
 
-struct Core_LUT: public testing::TestWithParam<LutMatType>
+struct Core_LUT: public testing::TestWithParam< std::tuple<LutIdxType, LutMatType> >
 {
-    template<typename T, int ch, bool same_cn>
+    template<typename Ti, typename T, int ch, bool same_cn>
     cv::Mat referenceWithType(cv::Mat input, cv::Mat table)
     {
         cv::Mat ref(input.size(), CV_MAKE_TYPE(table.depth(), ch));
@@ -3235,7 +3236,7 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
             {
                 if(ch == 1)
                 {
-                    ref.at<T>(i, j) = table.at<T>(input.at<uchar>(i, j));
+                    ref.at<T>(i, j) = table.at<T>(input.at<Ti>(i, j));
                 }
                 else
                 {
@@ -3244,11 +3245,11 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
                     {
                         if (same_cn)
                         {
-                            val[k] = table.at<Vec<T, ch>>(input.at<Vec<uchar, ch>>(i, j)[k])[k];
+                            val[k] = table.at<Vec<T, ch>>(input.at<Vec<Ti, ch>>(i, j)[k])[k];
                         }
                         else
                         {
-                            val[k] = table.at<T>(input.at<Vec<uchar, ch>>(i, j)[k]);
+                            val[k] = table.at<T>(input.at<Vec<Ti, ch>>(i, j)[k]);
                         }
                     }
                     ref.at<Vec<T, ch>>(i, j) = val;
@@ -3261,86 +3262,114 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
     template<int ch = 1, bool same_cn = false>
     cv::Mat reference(cv::Mat input, cv::Mat table)
     {
-        if (table.depth() == CV_8U)
+        cv::Mat ret = cv::Mat();
+        if ((input.depth() == CV_8U) || (input.depth() == CV_8S)) // Index type for LUT operation
         {
-            return referenceWithType<uchar, ch, same_cn>(input, table);
+            switch(table.depth()) // Value type for LUT operation
+            {
+                case CV_8U:   ret = referenceWithType<uint8_t, uint8_t,  ch, same_cn>(input, table); break;
+                case CV_8S:   ret = referenceWithType<uint8_t, int8_t,   ch, same_cn>(input, table); break;
+                case CV_16U:  ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
+                case CV_16S:  ret = referenceWithType<uint8_t, int16_t,  ch, same_cn>(input, table); break;
+                case CV_32S:  ret = referenceWithType<uint8_t, int32_t,  ch, same_cn>(input, table); break;
+                case CV_32F:  ret = referenceWithType<uint8_t, float,    ch, same_cn>(input, table); break;
+                case CV_64F:  ret = referenceWithType<uint8_t, double,   ch, same_cn>(input, table); break;
+                case CV_16F:  ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
+                default:      ret = cv::Mat();                                                       break;
+            }
         }
-        else if (table.depth() == CV_16U)
+        else if ((input.depth() == CV_16U) || (input.depth() == CV_16S))
         {
-            return referenceWithType<ushort, ch, same_cn>(input, table);
-        }
-        else if (table.depth() == CV_16F)
-        {
-            return referenceWithType<ushort, ch, same_cn>(input, table);
-        }
-        else if (table.depth() == CV_32S)
-        {
-            return referenceWithType<int, ch, same_cn>(input, table);
-        }
-        else if (table.depth() == CV_32F)
-        {
-            return referenceWithType<float, ch, same_cn>(input, table);
-        }
-        else if (table.depth() == CV_64F)
-        {
-            return referenceWithType<double, ch, same_cn>(input, table);
+            switch(table.depth()) // Value type for LUT operation
+            {
+                case CV_8U:   ret = referenceWithType<uint16_t, uint8_t,  ch, same_cn>(input, table); break;
+                case CV_8S:   ret = referenceWithType<uint16_t, int8_t,   ch, same_cn>(input, table); break;
+                case CV_16U:  ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
+                case CV_16S:  ret = referenceWithType<uint16_t, int16_t,  ch, same_cn>(input, table); break;
+                case CV_32S:  ret = referenceWithType<uint16_t, int32_t,  ch, same_cn>(input, table); break;
+                case CV_32F:  ret = referenceWithType<uint16_t, float,    ch, same_cn>(input, table); break;
+                case CV_64F:  ret = referenceWithType<uint16_t, double,   ch, same_cn>(input, table); break;
+                case CV_16F:  ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
+                default:      ret = cv::Mat();                                                        break;
+            }
         }
 
-        return cv::Mat();
+        return ret;
     }
 };
 
 TEST_P(Core_LUT, accuracy)
 {
-    int type = GetParam();
-    cv::Mat input(117, 113, CV_8UC1);
-    randu(input, 0, 256);
+    int idx_type = get<0>(GetParam());
+    int value_type = get<1>(GetParam());
 
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
-    randu(table, 0, getMaxVal(type));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U ) || (idx_type == CV_16S));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
+
+    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 1));
+    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
+
+    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
+    randu(table, getMinVal(value_type), getMaxVal(value_type));
 
     cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
+    ASSERT_FALSE(output.empty());
 
     cv::Mat gt = reference(input, table);
+    ASSERT_FALSE(gt.empty());
 
     ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
 
 TEST_P(Core_LUT, accuracy_multi)
 {
-    int type = (int)GetParam();
-    cv::Mat input(117, 113, CV_8UC3);
-    randu(input, 0, 256);
+    int idx_type = get<0>(GetParam());
+    int value_type = get<1>(GetParam());
 
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
-    randu(table, 0, getMaxVal(type));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S) ) ? 256: 65536;
+
+    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
+    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
+
+    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
+    randu(table, getMinVal(value_type), getMaxVal(value_type));
 
     cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
+    ASSERT_FALSE(output.empty());
 
     cv::Mat gt = reference<3>(input, table);
+    ASSERT_FALSE(gt.empty());
 
     ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
 
 TEST_P(Core_LUT, accuracy_multi2)
 {
-    int type = (int)GetParam();
-    cv::Mat input(117, 113, CV_8UC3);
-    randu(input, 0, 256);
+    int idx_type = get<0>(GetParam());
+    int value_type = get<1>(GetParam());
 
-    cv::Mat table(1, 256, CV_MAKE_TYPE(type, 3));
-    randu(table, 0, getMaxVal(type));
+    ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
+    const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
+
+    cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
+    randu(input, getMinVal(idx_type), getMaxVal(idx_type));
+
+    cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 3));
+    randu(table, getMinVal(value_type), getMaxVal(value_type));
 
     cv::Mat output;
-    cv::LUT(input, table, output);
+    ASSERT_NO_THROW(cv::LUT(input, table, output));
+    ASSERT_FALSE(output.empty());
 
     cv::Mat gt = reference<3, true>(input, table);
+    ASSERT_FALSE(gt.empty());
 
     ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
 }
 
-INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, LutMatType::all());
+INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, testing::Combine( LutIdxType::all(), LutMatType::all()));
 
 }} // namespace