Merge pull request #27890 from Kumataro:fix26899

core: support 16 bit LUT #27890

Close https://github.com/opencv/opencv/issues/26899

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
Kumataro 2025-10-16 18:03:02 +09:00 committed by GitHub
parent c88b3cb11f
commit d0d9bd20ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 176 additions and 125 deletions

View File

@ -540,9 +540,9 @@ The function LUT fills the output array with values from the look-up table. Indi
are taken from the input array. That is, the function processes each element of src as follows:
\f[\texttt{dst} (I) \leftarrow \texttt{lut(src(I) + d)}\f]
where
\f[d = \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f]
@param src input array of 8-bit elements.
@param lut look-up table of 256 elements; in case of multi-channel input array, the table should
\f[d = \forkthree{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\) or \(\texttt{CV_16U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}{32768}{if \(\texttt{src}\) has depth \(\texttt{CV_16S}\)}\f]
@param src input array of 8-bit or 16-bit integer elements.
@param lut look-up table of 256 elements (if src has depth CV_8U or CV_8S) or 65536 elements(if src has depth CV_16U or CV_16S); in case of multi-channel input array, the table should
either have a single channel (in this case the same table is used for all channels) or the same
number of channels as in the input array.
@param dst output array of the same size and number of channels as src, and the same depth as lut.

View File

@ -282,6 +282,32 @@ inline int hal_ni_lut(const uchar *src_data, size_t src_step, size_t src_type, c
#define cv_hal_lut hal_ni_lut
//! @endcond
/**
Lookup table replacement
Table consists of 65536 elements of a size from 1 to 8 bytes having 1 channel or src_channels
For 16s input typea 32768 is added to LUT index
Destination should have the same element type and number of channels as lookup table elements
@param src_data Source image data
@param src_step Source image step
@param src_type Source image type
@param lut_data Pointer to lookup table
@param lut_channel_size Size of each channel in bytes
@param lut_channels Number of channels in lookup table
@param dst_data Destination data
@param dst_step Destination step
@param width Width of images
@param height Height of images
@sa LUT
*/
//! @addtogroup core_hal_interface_lut16 Lookup table for 16 bit index
//! @{
inline int hal_ni_lut16(const ushort *src_data, size_t src_step, size_t src_type, const ushort* lut_data, size_t lut_channel_size, size_t lut_channels, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_lut16 hal_ni_lut16
//! @endcond
/**
Hamming norm of a vector
@param a pointer to vector data

View File

@ -6,6 +6,7 @@
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
#include <sys/types.h>
/****************************************************************************************\
* LUT Transform *
@ -14,8 +15,8 @@
namespace cv
{
template<typename T> static void
LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
template<typename Ti, typename T> static void
LUT_( const Ti* src, const T* lut, T* dst, const int len, const int cn, const int lutcn )
{
if( lutcn == 1 )
{
@ -30,53 +31,45 @@ LUT8u_( const uchar* src, const T* lut, T* dst, int len, int cn, int lutcn )
}
}
static void LUT8u_8u( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_8s( const uchar* src, const schar* lut, schar* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_16u( const uchar* src, const ushort* lut, ushort* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_16s( const uchar* src, const short* lut, short* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_32s( const uchar* src, const int* lut, int* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_16f( const uchar* src, const hfloat* lut, hfloat* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_32f( const uchar* src, const float* lut, float* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
static void LUT8u_64f( const uchar* src, const double* lut, double* dst, int len, int cn, int lutcn )
{
LUT8u_( src, lut, dst, len, cn, lutcn );
}
typedef void (*LUTFunc)( const uchar* src, const uchar* lut, uchar* dst, int len, int cn, int lutcn );
static LUTFunc lutTab[CV_DEPTH_MAX] =
static LUTFunc getLUTFunc(const int srcDepth, const int dstDepth)
{
(LUTFunc)LUT8u_8u, (LUTFunc)LUT8u_8s, (LUTFunc)LUT8u_16u, (LUTFunc)LUT8u_16s,
(LUTFunc)LUT8u_32s, (LUTFunc)LUT8u_32f, (LUTFunc)LUT8u_64f, (LUTFunc)LUT8u_16f
};
LUTFunc ret = nullptr;
if((srcDepth == CV_8U) || (srcDepth == CV_8S))
{
switch(dstDepth)
{
case CV_8U: ret = (LUTFunc)LUT_<uint8_t, uint8_t>; break;
case CV_8S: ret = (LUTFunc)LUT_<uint8_t, int8_t>; break;
case CV_16U: ret = (LUTFunc)LUT_<uint8_t, uint16_t>; break;
case CV_16S: ret = (LUTFunc)LUT_<uint8_t, int16_t>; break;
case CV_32S: ret = (LUTFunc)LUT_<uint8_t, int32_t>; break;
case CV_32F: ret = (LUTFunc)LUT_<uint8_t, int32_t>; break; // float
case CV_64F: ret = (LUTFunc)LUT_<uint8_t, int64_t>; break; // double
case CV_16F: ret = (LUTFunc)LUT_<uint8_t, int16_t>; break; // hfloat
default: ret = nullptr; break;
}
}
else if((srcDepth == CV_16U) || (srcDepth == CV_16S))
{
switch(dstDepth)
{
case CV_8U: ret = (LUTFunc)LUT_<uint16_t, uint8_t>; break;
case CV_8S: ret = (LUTFunc)LUT_<uint16_t, int8_t>; break;
case CV_16U: ret = (LUTFunc)LUT_<uint16_t, uint16_t>; break;
case CV_16S: ret = (LUTFunc)LUT_<uint16_t, int16_t>; break;
case CV_32S: ret = (LUTFunc)LUT_<uint16_t, int32_t>; break;
case CV_32F: ret = (LUTFunc)LUT_<uint16_t, int32_t>; break; // float
case CV_64F: ret = (LUTFunc)LUT_<uint16_t, int64_t>; break; // double
case CV_16F: ret = (LUTFunc)LUT_<uint16_t, int16_t>; break; // hfloat
default: ret = nullptr; break;
}
}
CV_CheckTrue(ret != nullptr, "An unexpected type combination was specified.");
return ret;
}
#ifdef HAVE_OPENCL
@ -107,24 +100,19 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst)
class LUTParallelBody : public ParallelLoopBody
{
public:
bool* ok;
const Mat& src_;
const Mat& lut_;
Mat& dst_;
LUTFunc func;
LUTFunc func_;
LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, bool* _ok)
: ok(_ok), src_(src), lut_(lut), dst_(dst)
LUTParallelBody(const Mat& src, const Mat& lut, Mat& dst, LUTFunc func)
: src_(src), lut_(lut), dst_(dst), func_(func)
{
func = lutTab[lut.depth()];
*ok = (func != NULL);
}
void operator()( const cv::Range& range ) const CV_OVERRIDE
{
CV_Assert(*ok);
const int row0 = range.start;
const int row1 = range.end;
@ -140,7 +128,7 @@ public:
int len = (int)it.size;
for( size_t i = 0; i < it.nplanes; i++, ++it )
func(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
func_(ptrs[0], lut_.ptr(), ptrs[1], len, cn, lutcn);
}
private:
LUTParallelBody(const LUTParallelBody&);
@ -155,39 +143,47 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
int cn = _src.channels(), depth = _src.depth();
int lutcn = _lut.channels();
const size_t lut_size = _lut.total();
CV_Assert( (lutcn == cn || lutcn == 1) &&
_lut.total() == 256 && _lut.isContinuous() &&
(depth == CV_8U || depth == CV_8S) );
CV_Assert( (lutcn == cn || lutcn == 1) && _lut.isContinuous() &&
(
((lut_size == 256) && ((depth == CV_8U)||(depth == CV_8S))) ||
((lut_size == 65536) && ((depth == CV_16U)||(depth == CV_16S)))
)
);
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && (lut_size == 256),
ocl_LUT(_src, _lut, _dst))
Mat src = _src.getMat(), lut = _lut.getMat();
_dst.create(src.dims, src.size, CV_MAKETYPE(_lut.depth(), cn));
Mat dst = _dst.getMat();
if(lut_size == 256)
{
CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data,
lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
}
else
{
CALL_HAL(LUT16, cv_hal_lut16, src.ptr<ushort>(), src.step, src.type(), lut.ptr<ushort>(),
lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows);
}
const LUTFunc func = getLUTFunc(src.depth(), dst.depth());
CV_Assert( func != nullptr );
if (_src.dims() <= 2)
{
bool ok = false;
LUTParallelBody body(src, lut, dst, &ok);
if (ok)
{
LUTParallelBody body(src, lut, dst, func);
Range all(0, dst.rows);
if (dst.total() >= (size_t)(1<<18))
parallel_for_(all, body, (double)std::max((size_t)1, dst.total()>>16));
else
body(all);
if (ok)
return;
}
}
LUTFunc func = lutTab[lut.depth()];
CV_Assert( func != 0 );
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2] = {};

View File

@ -3221,11 +3221,12 @@ INSTANTIATE_TEST_CASE_P(Core_CartPolar, Core_PolarToCart_inplace,
)
);
CV_ENUM(LutMatType, CV_8U, CV_16U, CV_16F, CV_32S, CV_32F, CV_64F)
CV_ENUM(LutIdxType, CV_8U, CV_8S, CV_16U, CV_16S)
CV_ENUM(LutMatType, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F)
struct Core_LUT: public testing::TestWithParam<LutMatType>
struct Core_LUT: public testing::TestWithParam< std::tuple<LutIdxType, LutMatType> >
{
template<typename T, int ch, bool same_cn>
template<typename Ti, typename T, int ch, bool same_cn>
cv::Mat referenceWithType(cv::Mat input, cv::Mat table)
{
cv::Mat ref(input.size(), CV_MAKE_TYPE(table.depth(), ch));
@ -3235,7 +3236,7 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
{
if(ch == 1)
{
ref.at<T>(i, j) = table.at<T>(input.at<uchar>(i, j));
ref.at<T>(i, j) = table.at<T>(input.at<Ti>(i, j));
}
else
{
@ -3244,11 +3245,11 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
{
if (same_cn)
{
val[k] = table.at<Vec<T, ch>>(input.at<Vec<uchar, ch>>(i, j)[k])[k];
val[k] = table.at<Vec<T, ch>>(input.at<Vec<Ti, ch>>(i, j)[k])[k];
}
else
{
val[k] = table.at<T>(input.at<Vec<uchar, ch>>(i, j)[k]);
val[k] = table.at<T>(input.at<Vec<Ti, ch>>(i, j)[k]);
}
}
ref.at<Vec<T, ch>>(i, j) = val;
@ -3261,86 +3262,114 @@ struct Core_LUT: public testing::TestWithParam<LutMatType>
template<int ch = 1, bool same_cn = false>
cv::Mat reference(cv::Mat input, cv::Mat table)
{
if (table.depth() == CV_8U)
cv::Mat ret = cv::Mat();
if ((input.depth() == CV_8U) || (input.depth() == CV_8S)) // Index type for LUT operation
{
return referenceWithType<uchar, ch, same_cn>(input, table);
switch(table.depth()) // Value type for LUT operation
{
case CV_8U: ret = referenceWithType<uint8_t, uint8_t, ch, same_cn>(input, table); break;
case CV_8S: ret = referenceWithType<uint8_t, int8_t, ch, same_cn>(input, table); break;
case CV_16U: ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
case CV_16S: ret = referenceWithType<uint8_t, int16_t, ch, same_cn>(input, table); break;
case CV_32S: ret = referenceWithType<uint8_t, int32_t, ch, same_cn>(input, table); break;
case CV_32F: ret = referenceWithType<uint8_t, float, ch, same_cn>(input, table); break;
case CV_64F: ret = referenceWithType<uint8_t, double, ch, same_cn>(input, table); break;
case CV_16F: ret = referenceWithType<uint8_t, uint16_t, ch, same_cn>(input, table); break;
default: ret = cv::Mat(); break;
}
else if (table.depth() == CV_16U)
{
return referenceWithType<ushort, ch, same_cn>(input, table);
}
else if (table.depth() == CV_16F)
else if ((input.depth() == CV_16U) || (input.depth() == CV_16S))
{
return referenceWithType<ushort, ch, same_cn>(input, table);
switch(table.depth()) // Value type for LUT operation
{
case CV_8U: ret = referenceWithType<uint16_t, uint8_t, ch, same_cn>(input, table); break;
case CV_8S: ret = referenceWithType<uint16_t, int8_t, ch, same_cn>(input, table); break;
case CV_16U: ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
case CV_16S: ret = referenceWithType<uint16_t, int16_t, ch, same_cn>(input, table); break;
case CV_32S: ret = referenceWithType<uint16_t, int32_t, ch, same_cn>(input, table); break;
case CV_32F: ret = referenceWithType<uint16_t, float, ch, same_cn>(input, table); break;
case CV_64F: ret = referenceWithType<uint16_t, double, ch, same_cn>(input, table); break;
case CV_16F: ret = referenceWithType<uint16_t, uint16_t, ch, same_cn>(input, table); break;
default: ret = cv::Mat(); break;
}
else if (table.depth() == CV_32S)
{
return referenceWithType<int, ch, same_cn>(input, table);
}
else if (table.depth() == CV_32F)
{
return referenceWithType<float, ch, same_cn>(input, table);
}
else if (table.depth() == CV_64F)
{
return referenceWithType<double, ch, same_cn>(input, table);
}
return cv::Mat();
return ret;
}
};
TEST_P(Core_LUT, accuracy)
{
int type = GetParam();
cv::Mat input(117, 113, CV_8UC1);
randu(input, 0, 256);
int idx_type = get<0>(GetParam());
int value_type = get<1>(GetParam());
cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
randu(table, 0, getMaxVal(type));
ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U ) || (idx_type == CV_16S));
const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 1));
randu(input, getMinVal(idx_type), getMaxVal(idx_type));
cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
randu(table, getMinVal(value_type), getMaxVal(value_type));
cv::Mat output;
cv::LUT(input, table, output);
ASSERT_NO_THROW(cv::LUT(input, table, output));
ASSERT_FALSE(output.empty());
cv::Mat gt = reference(input, table);
ASSERT_FALSE(gt.empty());
ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
}
TEST_P(Core_LUT, accuracy_multi)
{
int type = (int)GetParam();
cv::Mat input(117, 113, CV_8UC3);
randu(input, 0, 256);
int idx_type = get<0>(GetParam());
int value_type = get<1>(GetParam());
cv::Mat table(1, 256, CV_MAKE_TYPE(type, 1));
randu(table, 0, getMaxVal(type));
ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S) ) ? 256: 65536;
cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
randu(input, getMinVal(idx_type), getMaxVal(idx_type));
cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 1));
randu(table, getMinVal(value_type), getMaxVal(value_type));
cv::Mat output;
cv::LUT(input, table, output);
ASSERT_NO_THROW(cv::LUT(input, table, output));
ASSERT_FALSE(output.empty());
cv::Mat gt = reference<3>(input, table);
ASSERT_FALSE(gt.empty());
ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
}
TEST_P(Core_LUT, accuracy_multi2)
{
int type = (int)GetParam();
cv::Mat input(117, 113, CV_8UC3);
randu(input, 0, 256);
int idx_type = get<0>(GetParam());
int value_type = get<1>(GetParam());
cv::Mat table(1, 256, CV_MAKE_TYPE(type, 3));
randu(table, 0, getMaxVal(type));
ASSERT_TRUE((idx_type == CV_8U) || (idx_type == CV_8S) || (idx_type == CV_16U) || (idx_type == CV_16S));
const int tableSize = ((idx_type == CV_8U) || (idx_type == CV_8S)) ? 256: 65536;
cv::Mat input(117, 113, CV_MAKE_TYPE(idx_type, 3));
randu(input, getMinVal(idx_type), getMaxVal(idx_type));
cv::Mat table(1, tableSize, CV_MAKE_TYPE(value_type, 3));
randu(table, getMinVal(value_type), getMaxVal(value_type));
cv::Mat output;
cv::LUT(input, table, output);
ASSERT_NO_THROW(cv::LUT(input, table, output));
ASSERT_FALSE(output.empty());
cv::Mat gt = reference<3, true>(input, table);
ASSERT_FALSE(gt.empty());
ASSERT_EQ(0, cv::norm(output, gt, cv::NORM_INF));
}
INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, LutMatType::all());
INSTANTIATE_TEST_CASE_P(/**/, Core_LUT, testing::Combine( LutIdxType::all(), LutMatType::all()));
}} // namespace