diff --git a/modules/imgproc/src/demosaicing.cpp b/modules/imgproc/src/demosaicing.cpp index 24baf16362..7916d5a319 100644 --- a/modules/imgproc/src/demosaicing.cpp +++ b/modules/imgproc/src/demosaicing.cpp @@ -123,13 +123,13 @@ public: } }; -#if CV_SIMD128 class SIMDBayerInterpolator_8u { public: - int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, - int width, int bcoeff, int gcoeff, int rcoeff) const + static int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, + int width, int bcoeff, int gcoeff, int rcoeff) { +#if CV_SIMD #if CV_NEON uint16x8_t masklo = vdupq_n_u16(255); const uchar* bayer_end = bayer + width; @@ -177,102 +177,108 @@ public: vst1_u8(dst + 8, p.val[1]); } #else - v_uint16x8 v255 = v_setall_u16(255); - v_int16x8 v_descale = v_setall_s16(static_cast(1 << 14)); - v_int16x8 dummy; - v_int16x8 cxrb; - v_int16x8 cxg2; - v_zip(v_setall_s16(static_cast(rcoeff)), - v_setall_s16(static_cast(bcoeff)), + v_uint16 v255 = vx_setall_u16(255); + v_int16 v_descale = vx_setall_s16(static_cast(1 << 14)); + v_int16 dummy; + v_int16 cxrb; + v_int16 cxg2; + v_zip(vx_setall_s16(static_cast(rcoeff)), + vx_setall_s16(static_cast(bcoeff)), cxrb, dummy); - v_zip(v_setall_s16(static_cast(gcoeff)), - v_setall_s16(static_cast(2)), + v_zip(vx_setall_s16(static_cast(gcoeff)), + vx_setall_s16(static_cast(2)), cxg2, dummy); const uchar* bayer_end = bayer + width; - for (; bayer < bayer_end - 14; bayer += 14, dst += 14) + const int step = VTraits::vlanes() - 2; + for (; bayer < bayer_end - step; bayer += step, dst += step) { - v_uint16x8 first_line = v_reinterpret_as_u16(v_load(bayer)); - v_uint16x8 second_line = v_reinterpret_as_u16(v_load(bayer + bayer_step)); - v_uint16x8 third_line = v_reinterpret_as_u16(v_load(bayer + bayer_step * 2)); + v_uint16 first_line = v_reinterpret_as_u16(vx_load(bayer)); + v_uint16 second_line = v_reinterpret_as_u16(vx_load(bayer + bayer_step)); + v_uint16 third_line = v_reinterpret_as_u16(vx_load(bayer + bayer_step * 2)); // bayer[0] - v_uint16x8 first_line0 = v_and(first_line, v255); + v_uint16 first_line0 = v_and(first_line, v255); // bayer[bayer_step*2] - v_uint16x8 third_line0 = v_and(third_line, v255); + v_uint16 third_line0 = v_and(third_line, v255); // bayer[0] + bayer[bayer_step*2] - v_uint16x8 first_third_line0 = v_add(first_line0, third_line0); + v_uint16 first_third_line0 = v_add(first_line0, third_line0); // bayer[2] + bayer[bayer_step*2+2] - v_uint16x8 first_third_line2 = v_rotate_right<1>(first_third_line0); + v_uint16 first_third_line2 = v_rotate_right<1>(first_third_line0); // bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2] - v_int16x8 r0 = v_reinterpret_as_s16(v_add(first_third_line0, first_third_line2)); + v_int16 r0 = v_reinterpret_as_s16(v_add(first_third_line0, first_third_line2)); // (bayer[2] + bayer[bayer_step*2+2]) * 2 - v_int16x8 r1 = v_reinterpret_as_s16(v_shl<1>(first_third_line2)); + v_int16 r1 = v_reinterpret_as_s16(v_shl<1>(first_third_line2)); // bayer[bayer_step+1] - v_uint16x8 second_line1 = v_shr<8>(second_line); + v_uint16 second_line1 = v_shr<8>(second_line); // bayer[bayer_step+1] * 4 - v_int16x8 b0 = v_reinterpret_as_s16(v_shl<2>(second_line1)); + v_int16 b0 = v_reinterpret_as_s16(v_shl<2>(second_line1)); // bayer[bayer_step+3] - v_uint16x8 second_line3 = v_rotate_right<1>(second_line1); + v_uint16 second_line3 = v_rotate_right<1>(second_line1); // bayer[bayer_step+1] + bayer[bayer_step+3] - v_uint16x8 second_line13 = v_add(second_line1, second_line3); + v_uint16 second_line13 = v_add(second_line1, second_line3); // (bayer[bayer_step+1] + bayer[bayer_step+3]) * 2 - v_int16x8 b1 = v_reinterpret_as_s16(v_shl(second_line13, 1)); + v_int16 b1 = v_reinterpret_as_s16(v_shl(second_line13, 1)); // bayer[1] - v_uint16x8 first_line1 = v_shr<8>(first_line); + v_uint16 first_line1 = v_shr<8>(first_line); // bayer[bayer_step] - v_uint16x8 second_line0 = v_and(second_line, v255); + v_uint16 second_line0 = v_and(second_line, v255); // bayer[bayer_step+2] - v_uint16x8 second_line2 = v_rotate_right<1>(second_line0); + v_uint16 second_line2 = v_rotate_right<1>(second_line0); // bayer[bayer_step] + bayer[bayer_step+2] - v_uint16x8 second_line02 = v_add(second_line0, second_line2); + v_uint16 second_line02 = v_add(second_line0, second_line2); // bayer[bayer_step*2+1] - v_uint16x8 third_line1 = v_shr<8>(third_line); + v_uint16 third_line1 = v_shr<8>(third_line); // bayer[1] + bayer[bayer_step*2+1] - v_uint16x8 first_third_line1 = v_add(first_line1, third_line1); + v_uint16 first_third_line1 = v_add(first_line1, third_line1); // bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1] - v_int16x8 g0 = v_reinterpret_as_s16(v_add(first_third_line1, second_line02)); + v_int16 g0 = v_reinterpret_as_s16(v_add(first_third_line1, second_line02)); // bayer[bayer_step+2] * 4 - v_int16x8 g1 = v_reinterpret_as_s16(v_shl<2>(second_line2)); + v_int16 g1 = v_reinterpret_as_s16(v_shl<2>(second_line2)); - v_int16x8 rb0; - v_int16x8 rb1; - v_int16x8 rb2; - v_int16x8 rb3; + v_int16 rb0; + v_int16 rb1; + v_int16 rb2; + v_int16 rb3; v_zip(r0, b0, rb0, rb1); v_zip(r1, b1, rb2, rb3); - v_int16x8 gd0; - v_int16x8 gd1; - v_int16x8 gd2; - v_int16x8 gd3; + v_int16 gd0; + v_int16 gd1; + v_int16 gd2; + v_int16 gd3; v_zip(g0, v_descale, gd0, gd1); v_zip(g1, v_descale, gd2, gd3); - v_int32x4 gray_even0 = v_shr<16>(v_add(v_dotprod(rb0, cxrb), v_dotprod(gd0, cxg2))); - v_int32x4 gray_even1 = v_shr<16>(v_add(v_dotprod(rb1, cxrb), v_dotprod(gd1, cxg2))); - v_int32x4 gray_odd0 = v_shr<16>(v_add(v_dotprod(rb2, cxrb), v_dotprod(gd2, cxg2))); - v_int32x4 gray_odd1 = v_shr<16>(v_add(v_dotprod(rb3, cxrb), v_dotprod(gd3, cxg2))); + v_int32 gray_even0 = v_shr<16>(v_add(v_dotprod(rb0, cxrb), v_dotprod(gd0, cxg2))); + v_int32 gray_even1 = v_shr<16>(v_add(v_dotprod(rb1, cxrb), v_dotprod(gd1, cxg2))); + v_int32 gray_odd0 = v_shr<16>(v_add(v_dotprod(rb2, cxrb), v_dotprod(gd2, cxg2))); + v_int32 gray_odd1 = v_shr<16>(v_add(v_dotprod(rb3, cxrb), v_dotprod(gd3, cxg2))); - v_int16x8 gray_even = v_pack(gray_even0, gray_even1); - v_int16x8 gray_odd = v_pack(gray_odd0, gray_odd1); + v_int16 gray_even = v_pack(gray_even0, gray_even1); + v_int16 gray_odd = v_pack(gray_odd0, gray_odd1); - v_int16x8 gray_d0; - v_int16x8 gray_d1; + v_int16 gray_d0; + v_int16 gray_d1; v_zip(gray_even, gray_odd, gray_d0, gray_d1); - v_uint8x16 gray = v_pack(v_reinterpret_as_u16(gray_d0), v_reinterpret_as_u16(gray_d1)); + v_uint8 gray = v_pack(v_reinterpret_as_u16(gray_d0), v_reinterpret_as_u16(gray_d1)); v_store(dst, gray); } #endif return static_cast(bayer - (bayer_end - width)); +#else + CV_UNUSED(bayer); CV_UNUSED(bayer_step); CV_UNUSED(dst); + CV_UNUSED(width); CV_UNUSED(bcoeff); CV_UNUSED(gcoeff); CV_UNUSED(rcoeff); + return 0; +#endif } int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const @@ -282,7 +288,7 @@ public: G R G R | G R G R | G R G R | G R G R B G B G | B G B G | B G B G | B G B G */ - +#if CV_SIMD128 #if CV_NEON uint16x8_t masklo = vdupq_n_u16(255); uint8x16x3_t pix; @@ -405,6 +411,10 @@ public: #endif return (int)(bayer - (bayer_end - width)); +#else + CV_UNUSED(bayer); CV_UNUSED(bayer_step); CV_UNUSED(dst); CV_UNUSED(width); CV_UNUSED(blue); + return 0; +#endif } int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue, const uchar alpha) const @@ -415,6 +425,7 @@ public: B G B G | B G B G | B G B G | B G B G */ +#if CV_SIMD128 #if CV_NEON uint16x8_t masklo = vdupq_n_u16(255); uint8x16x4_t pix; @@ -536,10 +547,16 @@ public: #endif return (int)(bayer - (bayer_end - width)); +#else + CV_UNUSED(bayer); CV_UNUSED(bayer_step); CV_UNUSED(dst); + CV_UNUSED(width); CV_UNUSED(blue); CV_UNUSED(alpha); + return 0; +#endif } int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const { +#if CV_SIMD128 const uchar* bayer_end = bayer + width; v_uint16x8 masklow = v_setall_u16(0x00ff); v_uint16x8 delta1 = v_setall_u16(1), delta2 = v_setall_u16(2); @@ -642,12 +659,12 @@ public: } return int(bayer - (bayer_end - width)); +#else + CV_UNUSED(bayer); CV_UNUSED(bayer_step); CV_UNUSED(dst); CV_UNUSED(width); CV_UNUSED(blue); + return 0; +#endif } }; -#else -typedef SIMDBayerStubInterpolator_ SIMDBayerInterpolator_8u; -#endif - template class Bayer2Gray_Invoker :