mirror of
https://github.com/zebrajr/opencv.git
synced 2025-12-06 12:19:50 +01:00
Merge pull request #27581 from dkurt:d.kuryaev/dlpack
### Pull Request Readiness Checklist resolves #16295 ``` docker run --gpus 0 -v ~/opencv:/opencv -v ~/opencv_contrib:/opencv_contrib -it nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 apt-get update && apt-get install -y cmake python3-dev python3-pip python3-venv && python3 -m venv .venv && source .venv/bin/activate && pip install -U pip && pip install -U numpy && pip install torch --index-url https://download.pytorch.org/whl/cu128 && cmake \ -DWITH_OPENCL=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_DOCS=OFF \ -DWITH_CUDA=ON \ -DOPENCV_DNN_CUDA=ON \ -DOPENCV_EXTRA_MODULES_PATH=/opencv_contrib/modules \ -DBUILD_LIST=ts,cudev,python3 \ -S /opencv -B /opencv_build && cmake --build /opencv_build -j16 export PYTHONPATH=/opencv_build/lib/python3/:$PYTHONPATH ``` See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
1e37d84e3a
commit
ba19416730
201
3rdparty/dlpack/LICENSE
vendored
Normal file
201
3rdparty/dlpack/LICENSE
vendored
Normal file
|
|
@ -0,0 +1,201 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2017 by Contributors
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
366
3rdparty/dlpack/include/dlpack/dlpack.h
vendored
Normal file
366
3rdparty/dlpack/include/dlpack/dlpack.h
vendored
Normal file
|
|
@ -0,0 +1,366 @@
|
||||||
|
/*!
|
||||||
|
* Copyright (c) 2017 by Contributors
|
||||||
|
* \file dlpack.h
|
||||||
|
* \brief The common header of DLPack.
|
||||||
|
*/
|
||||||
|
#ifndef DLPACK_DLPACK_H_
|
||||||
|
#define DLPACK_DLPACK_H_
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Compatibility with C++
|
||||||
|
*/
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define DLPACK_EXTERN_C extern "C"
|
||||||
|
#else
|
||||||
|
#define DLPACK_EXTERN_C
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! \brief The current major version of dlpack */
|
||||||
|
#define DLPACK_MAJOR_VERSION 1
|
||||||
|
|
||||||
|
/*! \brief The current minor version of dlpack */
|
||||||
|
#define DLPACK_MINOR_VERSION 1
|
||||||
|
|
||||||
|
/*! \brief DLPACK_DLL prefix for windows */
|
||||||
|
#ifdef _WIN32
|
||||||
|
#ifdef DLPACK_EXPORTS
|
||||||
|
#define DLPACK_DLL __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define DLPACK_DLL __declspec(dllimport)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define DLPACK_DLL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The DLPack version.
|
||||||
|
*
|
||||||
|
* A change in major version indicates that we have changed the
|
||||||
|
* data layout of the ABI - DLManagedTensorVersioned.
|
||||||
|
*
|
||||||
|
* A change in minor version indicates that we have added new
|
||||||
|
* code, such as a new device type, but the ABI is kept the same.
|
||||||
|
*
|
||||||
|
* If an obtained DLPack tensor has a major version that disagrees
|
||||||
|
* with the version number specified in this header file
|
||||||
|
* (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
|
||||||
|
* (and it is safe to do so). It is not safe to access any other fields
|
||||||
|
* as the memory layout will have changed.
|
||||||
|
*
|
||||||
|
* In the case of a minor version mismatch, the tensor can be safely used as
|
||||||
|
* long as the consumer knows how to interpret all fields. Minor version
|
||||||
|
* updates indicate the addition of enumeration values.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/*! \brief DLPack major version. */
|
||||||
|
uint32_t major;
|
||||||
|
/*! \brief DLPack minor version. */
|
||||||
|
uint32_t minor;
|
||||||
|
} DLPackVersion;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The device type in DLDevice.
|
||||||
|
*/
|
||||||
|
#ifdef __cplusplus
|
||||||
|
typedef enum : int32_t {
|
||||||
|
#else
|
||||||
|
typedef enum {
|
||||||
|
#endif
|
||||||
|
/*! \brief CPU device */
|
||||||
|
kDLCPU = 1,
|
||||||
|
/*! \brief CUDA GPU device */
|
||||||
|
kDLCUDA = 2,
|
||||||
|
/*!
|
||||||
|
* \brief Pinned CUDA CPU memory by cudaMallocHost
|
||||||
|
*/
|
||||||
|
kDLCUDAHost = 3,
|
||||||
|
/*! \brief OpenCL devices. */
|
||||||
|
kDLOpenCL = 4,
|
||||||
|
/*! \brief Vulkan buffer for next generation graphics. */
|
||||||
|
kDLVulkan = 7,
|
||||||
|
/*! \brief Metal for Apple GPU. */
|
||||||
|
kDLMetal = 8,
|
||||||
|
/*! \brief Verilog simulator buffer */
|
||||||
|
kDLVPI = 9,
|
||||||
|
/*! \brief ROCm GPUs for AMD GPUs */
|
||||||
|
kDLROCM = 10,
|
||||||
|
/*!
|
||||||
|
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
|
||||||
|
*/
|
||||||
|
kDLROCMHost = 11,
|
||||||
|
/*!
|
||||||
|
* \brief Reserved extension device type,
|
||||||
|
* used for quickly test extension device
|
||||||
|
* The semantics can differ depending on the implementation.
|
||||||
|
*/
|
||||||
|
kDLExtDev = 12,
|
||||||
|
/*!
|
||||||
|
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
|
||||||
|
*/
|
||||||
|
kDLCUDAManaged = 13,
|
||||||
|
/*!
|
||||||
|
* \brief Unified shared memory allocated on a oneAPI non-partititioned
|
||||||
|
* device. Call to oneAPI runtime is required to determine the device
|
||||||
|
* type, the USM allocation type and the sycl context it is bound to.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
kDLOneAPI = 14,
|
||||||
|
/*! \brief GPU support for next generation WebGPU standard. */
|
||||||
|
kDLWebGPU = 15,
|
||||||
|
/*! \brief Qualcomm Hexagon DSP */
|
||||||
|
kDLHexagon = 16,
|
||||||
|
/*! \brief Microsoft MAIA devices */
|
||||||
|
kDLMAIA = 17,
|
||||||
|
} DLDeviceType;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief A Device for Tensor and operator.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/*! \brief The device type used in the device. */
|
||||||
|
DLDeviceType device_type;
|
||||||
|
/*!
|
||||||
|
* \brief The device index.
|
||||||
|
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
|
||||||
|
*/
|
||||||
|
int32_t device_id;
|
||||||
|
} DLDevice;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The type code options DLDataType.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
/*! \brief signed integer */
|
||||||
|
kDLInt = 0U,
|
||||||
|
/*! \brief unsigned integer */
|
||||||
|
kDLUInt = 1U,
|
||||||
|
/*! \brief IEEE floating point */
|
||||||
|
kDLFloat = 2U,
|
||||||
|
/*!
|
||||||
|
* \brief Opaque handle type, reserved for testing purposes.
|
||||||
|
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
|
||||||
|
*/
|
||||||
|
kDLOpaqueHandle = 3U,
|
||||||
|
/*! \brief bfloat16 */
|
||||||
|
kDLBfloat = 4U,
|
||||||
|
/*!
|
||||||
|
* \brief complex number
|
||||||
|
* (C/C++/Python layout: compact struct per complex number)
|
||||||
|
*/
|
||||||
|
kDLComplex = 5U,
|
||||||
|
/*! \brief boolean */
|
||||||
|
kDLBool = 6U,
|
||||||
|
/*! \brief FP8 data types */
|
||||||
|
kDLFloat8_e3m4 = 7U,
|
||||||
|
kDLFloat8_e4m3 = 8U,
|
||||||
|
kDLFloat8_e4m3b11fnuz = 9U,
|
||||||
|
kDLFloat8_e4m3fn = 10U,
|
||||||
|
kDLFloat8_e4m3fnuz = 11U,
|
||||||
|
kDLFloat8_e5m2 = 12U,
|
||||||
|
kDLFloat8_e5m2fnuz = 13U,
|
||||||
|
kDLFloat8_e8m0fnu = 14U,
|
||||||
|
/*! \brief FP6 data types
|
||||||
|
* Setting bits != 6 is currently unspecified, and the producer must ensure it is set
|
||||||
|
* while the consumer must stop importing if the value is unexpected.
|
||||||
|
*/
|
||||||
|
kDLFloat6_e2m3fn = 15U,
|
||||||
|
kDLFloat6_e3m2fn = 16U,
|
||||||
|
/*! \brief FP4 data types
|
||||||
|
* Setting bits != 4 is currently unspecified, and the producer must ensure it is set
|
||||||
|
* while the consumer must stop importing if the value is unexpected.
|
||||||
|
*/
|
||||||
|
kDLFloat4_e2m1fn = 17U,
|
||||||
|
} DLDataTypeCode;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief The data type the tensor can hold. The data type is assumed to follow the
|
||||||
|
* native endian-ness. An explicit error message should be raised when attempting to
|
||||||
|
* export an array with non-native endianness
|
||||||
|
*
|
||||||
|
* Examples
|
||||||
|
* - float: type_code = 2, bits = 32, lanes = 1
|
||||||
|
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
|
||||||
|
* - int8: type_code = 0, bits = 8, lanes = 1
|
||||||
|
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
|
||||||
|
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
|
||||||
|
* - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
|
||||||
|
* - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
|
||||||
|
* - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
|
||||||
|
*
|
||||||
|
* When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e.,
|
||||||
|
* for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/*!
|
||||||
|
* \brief Type code of base types.
|
||||||
|
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
|
||||||
|
* footprint, but the value should be one of DLDataTypeCode enum values.
|
||||||
|
* */
|
||||||
|
uint8_t code;
|
||||||
|
/*!
|
||||||
|
* \brief Number of bits, common choices are 8, 16, 32.
|
||||||
|
*/
|
||||||
|
uint8_t bits;
|
||||||
|
/*! \brief Number of lanes in the type, used for vector types. */
|
||||||
|
uint16_t lanes;
|
||||||
|
} DLDataType;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Plain C Tensor object, does not manage memory.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
/*!
|
||||||
|
* \brief The data pointer points to the allocated data. This will be CUDA
|
||||||
|
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
|
||||||
|
* types. This pointer is always aligned to 256 bytes as in CUDA. The
|
||||||
|
* `byte_offset` field should be used to point to the beginning of the data.
|
||||||
|
*
|
||||||
|
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
|
||||||
|
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
|
||||||
|
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
|
||||||
|
* (after which this note will be updated); at the moment it is recommended
|
||||||
|
* to not rely on the data pointer being correctly aligned.
|
||||||
|
*
|
||||||
|
* For given DLTensor, the size of memory required to store the contents of
|
||||||
|
* data is calculated as follows:
|
||||||
|
*
|
||||||
|
* \code{.c}
|
||||||
|
* static inline size_t GetDataSize(const DLTensor* t) {
|
||||||
|
* size_t size = 1;
|
||||||
|
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
|
||||||
|
* size *= t->shape[i];
|
||||||
|
* }
|
||||||
|
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
|
||||||
|
* return size;
|
||||||
|
* }
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* Note that if the tensor is of size zero, then the data pointer should be
|
||||||
|
* set to `NULL`.
|
||||||
|
*/
|
||||||
|
void* data;
|
||||||
|
/*! \brief The device of the tensor */
|
||||||
|
DLDevice device;
|
||||||
|
/*! \brief Number of dimensions */
|
||||||
|
int32_t ndim;
|
||||||
|
/*! \brief The data type of the pointer*/
|
||||||
|
DLDataType dtype;
|
||||||
|
/*! \brief The shape of the tensor */
|
||||||
|
int64_t* shape;
|
||||||
|
/*!
|
||||||
|
* \brief strides of the tensor (in number of elements, not bytes)
|
||||||
|
* can be NULL, indicating tensor is compact and row-majored.
|
||||||
|
*/
|
||||||
|
int64_t* strides;
|
||||||
|
/*! \brief The offset in bytes to the beginning pointer to data */
|
||||||
|
uint64_t byte_offset;
|
||||||
|
} DLTensor;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief C Tensor object, manage memory of DLTensor. This data structure is
|
||||||
|
* intended to facilitate the borrowing of DLTensor by another framework. It is
|
||||||
|
* not meant to transfer the tensor. When the borrowing framework doesn't need
|
||||||
|
* the tensor, it should call the deleter to notify the host that the resource
|
||||||
|
* is no longer needed.
|
||||||
|
*
|
||||||
|
* \note This data structure is used as Legacy DLManagedTensor
|
||||||
|
* in DLPack exchange and is deprecated after DLPack v0.8
|
||||||
|
* Use DLManagedTensorVersioned instead.
|
||||||
|
* This data structure may get renamed or deleted in future versions.
|
||||||
|
*
|
||||||
|
* \sa DLManagedTensorVersioned
|
||||||
|
*/
|
||||||
|
typedef struct DLManagedTensor {
|
||||||
|
/*! \brief DLTensor which is being memory managed */
|
||||||
|
DLTensor dl_tensor;
|
||||||
|
/*! \brief the context of the original host framework of DLManagedTensor in
|
||||||
|
* which DLManagedTensor is used in the framework. It can also be NULL.
|
||||||
|
*/
|
||||||
|
void * manager_ctx;
|
||||||
|
/*!
|
||||||
|
* \brief Destructor - this should be called
|
||||||
|
* to destruct the manager_ctx which backs the DLManagedTensor. It can be
|
||||||
|
* NULL if there is no way for the caller to provide a reasonable destructor.
|
||||||
|
* The destructor deletes the argument self as well.
|
||||||
|
*/
|
||||||
|
void (*deleter)(struct DLManagedTensor * self);
|
||||||
|
} DLManagedTensor;
|
||||||
|
|
||||||
|
// bit masks used in in the DLManagedTensorVersioned
|
||||||
|
|
||||||
|
/*! \brief bit mask to indicate that the tensor is read only. */
|
||||||
|
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief bit mask to indicate that the tensor is a copy made by the producer.
|
||||||
|
*
|
||||||
|
* If set, the tensor is considered solely owned throughout its lifetime by the
|
||||||
|
* consumer, until the producer-provided deleter is invoked.
|
||||||
|
*/
|
||||||
|
#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief bit mask to indicate that whether a sub-byte type is packed or padded.
|
||||||
|
*
|
||||||
|
* The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
|
||||||
|
* be set by the producer to signal that a tensor of sub-byte type is padded.
|
||||||
|
*/
|
||||||
|
#define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
|
||||||
|
*
|
||||||
|
* This data structure is intended to facilitate the borrowing of DLTensor by
|
||||||
|
* another framework. It is not meant to transfer the tensor. When the borrowing
|
||||||
|
* framework doesn't need the tensor, it should call the deleter to notify the
|
||||||
|
* host that the resource is no longer needed.
|
||||||
|
*
|
||||||
|
* \note This is the current standard DLPack exchange data structure.
|
||||||
|
*/
|
||||||
|
struct DLManagedTensorVersioned {
|
||||||
|
/*!
|
||||||
|
* \brief The API and ABI version of the current managed Tensor
|
||||||
|
*/
|
||||||
|
DLPackVersion version;
|
||||||
|
/*!
|
||||||
|
* \brief the context of the original host framework.
|
||||||
|
*
|
||||||
|
* Stores DLManagedTensorVersioned is used in the
|
||||||
|
* framework. It can also be NULL.
|
||||||
|
*/
|
||||||
|
void *manager_ctx;
|
||||||
|
/*!
|
||||||
|
* \brief Destructor.
|
||||||
|
*
|
||||||
|
* This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
|
||||||
|
* It can be NULL if there is no way for the caller to provide a reasonable
|
||||||
|
* destructor. The destructor deletes the argument self as well.
|
||||||
|
*/
|
||||||
|
void (*deleter)(struct DLManagedTensorVersioned *self);
|
||||||
|
/*!
|
||||||
|
* \brief Additional bitmask flags information about the tensor.
|
||||||
|
*
|
||||||
|
* By default the flags should be set to 0.
|
||||||
|
*
|
||||||
|
* \note Future ABI changes should keep everything until this field
|
||||||
|
* stable, to ensure that deleter can be correctly called.
|
||||||
|
*
|
||||||
|
* \sa DLPACK_FLAG_BITMASK_READ_ONLY
|
||||||
|
* \sa DLPACK_FLAG_BITMASK_IS_COPIED
|
||||||
|
*/
|
||||||
|
uint64_t flags;
|
||||||
|
/*! \brief DLTensor which is being memory managed */
|
||||||
|
DLTensor dl_tensor;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // DLPACK_EXTERN_C
|
||||||
|
#endif
|
||||||
|
#endif // DLPACK_DLPACK_H_
|
||||||
|
|
@ -640,6 +640,7 @@ ocv_cmake_hook(POST_CMAKE_BUILD_OPTIONS)
|
||||||
# --- Python Support ---
|
# --- Python Support ---
|
||||||
if(NOT IOS AND NOT XROS)
|
if(NOT IOS AND NOT XROS)
|
||||||
include(cmake/OpenCVDetectPython.cmake)
|
include(cmake/OpenCVDetectPython.cmake)
|
||||||
|
include(cmake/OpenCVDetectDLPack.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include(cmake/OpenCVCompilerOptions.cmake)
|
include(cmake/OpenCVCompilerOptions.cmake)
|
||||||
|
|
|
||||||
5
cmake/OpenCVDetectDLPack.cmake
Normal file
5
cmake/OpenCVDetectDLPack.cmake
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
find_package(dlpack QUIET)
|
||||||
|
if (NOT dlpack_FOUND)
|
||||||
|
ocv_include_directories("${OpenCV_SOURCE_DIR}/3rdparty/dlpack/include")
|
||||||
|
ocv_install_3rdparty_licenses(dlpack "${OpenCV_SOURCE_DIR}/3rdparty/dlpack/LICENSE")
|
||||||
|
endif()
|
||||||
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
#ifdef HAVE_OPENCV_CORE
|
#ifdef HAVE_OPENCV_CORE
|
||||||
|
|
||||||
|
#include "dlpack/dlpack.h"
|
||||||
|
|
||||||
static PyObject* pycvMakeType(PyObject* , PyObject* args, PyObject* kw) {
|
static PyObject* pycvMakeType(PyObject* , PyObject* args, PyObject* kw) {
|
||||||
const char *keywords[] = { "depth", "channels", NULL };
|
const char *keywords[] = { "depth", "channels", NULL };
|
||||||
|
|
||||||
|
|
@ -20,6 +22,201 @@ static PyObject* pycvMakeTypeCh(PyObject*, PyObject *value) {
|
||||||
return PyInt_FromLong(CV_MAKETYPE(depth, channels));
|
return PyInt_FromLong(CV_MAKETYPE(depth, channels));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CV_DLPACK_CAPSULE_NAME "dltensor"
|
||||||
|
#define CV_DLPACK_USED_CAPSULE_NAME "used_dltensor"
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
bool fillDLPackTensor(const T& src, DLManagedTensor* tensor, const DLDevice& device);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
bool parseDLPackTensor(DLManagedTensor* tensor, T& obj, bool copy);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
int GetNumDims(const T& src);
|
||||||
|
|
||||||
|
// source: https://github.com/dmlc/dlpack/blob/7f393bbb86a0ddd71fde3e700fc2affa5cdce72d/docs/source/python_spec.rst#L110
|
||||||
|
static void dlpack_capsule_deleter(PyObject *self){
|
||||||
|
if (PyCapsule_IsValid(self, CV_DLPACK_USED_CAPSULE_NAME)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
DLManagedTensor *managed = (DLManagedTensor *)PyCapsule_GetPointer(self, CV_DLPACK_CAPSULE_NAME);
|
||||||
|
if (managed == NULL) {
|
||||||
|
PyErr_WriteUnraisable(self);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (managed->deleter) {
|
||||||
|
managed->deleter(managed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void array_dlpack_deleter(DLManagedTensor *self)
|
||||||
|
{
|
||||||
|
if (!Py_IsInitialized()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyGILState_STATE state = PyGILState_Ensure();
|
||||||
|
|
||||||
|
PyObject *array = (PyObject *)self->manager_ctx;
|
||||||
|
PyMem_Free(self);
|
||||||
|
Py_XDECREF(array);
|
||||||
|
|
||||||
|
PyGILState_Release(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static PyObject* to_dlpack(const T& src, PyObject* self, PyObject* py_args, PyObject* kw)
|
||||||
|
{
|
||||||
|
int stream = 0;
|
||||||
|
PyObject* maxVersion = nullptr;
|
||||||
|
PyObject* dlDevice = nullptr;
|
||||||
|
bool copy = false;
|
||||||
|
const char* keywords[] = { "stream", "max_version", "dl_device", "copy", NULL };
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(py_args, kw, "|iOOp:__dlpack__", (char**)keywords, &stream, &maxVersion, &dlDevice, ©))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
DLDevice device = {(DLDeviceType)-1, 0};
|
||||||
|
if (dlDevice && dlDevice != Py_None && PyTuple_Check(dlDevice))
|
||||||
|
{
|
||||||
|
device.device_type = static_cast<DLDeviceType>(PyLong_AsLong(PyTuple_GetItem(dlDevice, 0)));
|
||||||
|
device.device_id = PyLong_AsLong(PyTuple_GetItem(dlDevice, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ndim = GetNumDims(src);
|
||||||
|
void* ptr = PyMem_Malloc(sizeof(DLManagedTensor) + sizeof(int64_t) * ndim * 2);
|
||||||
|
if (!ptr) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
DLManagedTensor* tensor = reinterpret_cast<DLManagedTensor*>(ptr);
|
||||||
|
tensor->manager_ctx = self;
|
||||||
|
tensor->deleter = array_dlpack_deleter;
|
||||||
|
tensor->dl_tensor.ndim = ndim;
|
||||||
|
tensor->dl_tensor.shape = reinterpret_cast<int64_t*>(reinterpret_cast<char*>(ptr) + sizeof(DLManagedTensor));
|
||||||
|
tensor->dl_tensor.strides = tensor->dl_tensor.shape + ndim;
|
||||||
|
fillDLPackTensor(src, tensor, device);
|
||||||
|
|
||||||
|
PyObject* capsule = PyCapsule_New(ptr, CV_DLPACK_CAPSULE_NAME, dlpack_capsule_deleter);
|
||||||
|
if (!capsule) {
|
||||||
|
PyMem_Free(ptr);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the capsule holds a reference
|
||||||
|
Py_INCREF(self);
|
||||||
|
|
||||||
|
return capsule;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static PyObject* from_dlpack(PyObject* py_args, PyObject* kw)
|
||||||
|
{
|
||||||
|
PyObject* arr = nullptr;
|
||||||
|
PyObject* device = nullptr;
|
||||||
|
bool copy = false;
|
||||||
|
const char* keywords[] = { "device", "copy", NULL };
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(py_args, kw, "O|Op:from_dlpack", (char**)keywords, &arr, &device, ©))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
PyObject* capsule = nullptr;
|
||||||
|
if (PyCapsule_CheckExact(arr))
|
||||||
|
{
|
||||||
|
capsule = arr;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PyGILState_STATE gstate;
|
||||||
|
gstate = PyGILState_Ensure();
|
||||||
|
capsule = PyObject_CallMethodObjArgs(arr, PyString_FromString("__dlpack__"), NULL);
|
||||||
|
PyGILState_Release(gstate);
|
||||||
|
}
|
||||||
|
|
||||||
|
DLManagedTensor* tensor = reinterpret_cast<DLManagedTensor*>(PyCapsule_GetPointer(capsule, CV_DLPACK_CAPSULE_NAME));
|
||||||
|
if (tensor == nullptr)
|
||||||
|
{
|
||||||
|
if (capsule != arr)
|
||||||
|
Py_DECREF(capsule);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
T retval;
|
||||||
|
bool success = parseDLPackTensor(tensor, retval, copy);
|
||||||
|
if (success)
|
||||||
|
{
|
||||||
|
PyCapsule_SetName(capsule, CV_DLPACK_USED_CAPSULE_NAME);
|
||||||
|
}
|
||||||
|
if (capsule != arr)
|
||||||
|
Py_DECREF(capsule);
|
||||||
|
|
||||||
|
return success ? pyopencv_from(retval) : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DLDataType GetDLPackType(size_t elemSize1, int depth) {
|
||||||
|
DLDataType dtype;
|
||||||
|
dtype.bits = static_cast<uint8_t>(8 * elemSize1);
|
||||||
|
dtype.lanes = 1;
|
||||||
|
switch (depth)
|
||||||
|
{
|
||||||
|
case CV_8S: case CV_16S: case CV_32S: dtype.code = kDLInt; break;
|
||||||
|
case CV_8U: case CV_16U: dtype.code = kDLUInt; break;
|
||||||
|
case CV_16F: case CV_32F: case CV_64F: dtype.code = kDLFloat; break;
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsNotImplemented, "__dlpack__ data type");
|
||||||
|
}
|
||||||
|
return dtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int DLPackTypeToCVType(const DLDataType& dtype, int channels) {
|
||||||
|
if (dtype.code == kDLInt)
|
||||||
|
{
|
||||||
|
switch (dtype.bits)
|
||||||
|
{
|
||||||
|
case 8: return CV_8SC(channels);
|
||||||
|
case 16: return CV_16SC(channels);
|
||||||
|
case 32: return CV_32SC(channels);
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError,
|
||||||
|
format("Unsupported int dlpack depth: %d", dtype.bits).c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dtype.code == kDLUInt)
|
||||||
|
{
|
||||||
|
switch (dtype.bits)
|
||||||
|
{
|
||||||
|
case 8: return CV_8UC(channels);
|
||||||
|
case 16: return CV_16UC(channels);
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError,
|
||||||
|
format("Unsupported uint dlpack depth: %d", dtype.bits).c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dtype.code == kDLFloat)
|
||||||
|
{
|
||||||
|
switch (dtype.bits)
|
||||||
|
{
|
||||||
|
case 16: return CV_16FC(channels);
|
||||||
|
case 32: return CV_32FC(channels);
|
||||||
|
case 64: return CV_64FC(channels);
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError,
|
||||||
|
format("Unsupported float dlpack depth: %d", dtype.bits).c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PyErr_SetString(PyExc_BufferError, format("Unsupported dlpack data type: %d", dtype.code).c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
#define PYOPENCV_EXTRA_METHODS_CV \
|
#define PYOPENCV_EXTRA_METHODS_CV \
|
||||||
{"CV_MAKETYPE", CV_PY_FN_WITH_KW(pycvMakeType), "CV_MAKETYPE(depth, channels) -> retval"}, \
|
{"CV_MAKETYPE", CV_PY_FN_WITH_KW(pycvMakeType), "CV_MAKETYPE(depth, channels) -> retval"}, \
|
||||||
{"CV_8UC", (PyCFunction)(pycvMakeTypeCh<CV_8U>), METH_O, "CV_8UC(channels) -> retval"}, \
|
{"CV_8UC", (PyCFunction)(pycvMakeTypeCh<CV_8U>), METH_O, "CV_8UC(channels) -> retval"}, \
|
||||||
|
|
|
||||||
|
|
@ -21,17 +21,175 @@ template<> struct pyopencvVecConverter<cuda::GpuMat>
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_PY_TO_CLASS(cuda::GpuMat)
|
CV_PY_TO_CLASS(cuda::GpuMat)
|
||||||
|
CV_PY_TO_CLASS(cuda::GpuMatND)
|
||||||
CV_PY_TO_CLASS(cuda::Stream)
|
CV_PY_TO_CLASS(cuda::Stream)
|
||||||
CV_PY_TO_CLASS(cuda::Event)
|
CV_PY_TO_CLASS(cuda::Event)
|
||||||
CV_PY_TO_CLASS(cuda::HostMem)
|
CV_PY_TO_CLASS(cuda::HostMem)
|
||||||
|
|
||||||
CV_PY_TO_CLASS_PTR(cuda::GpuMat)
|
CV_PY_TO_CLASS_PTR(cuda::GpuMat)
|
||||||
|
CV_PY_TO_CLASS_PTR(cuda::GpuMatND)
|
||||||
CV_PY_TO_CLASS_PTR(cuda::GpuMat::Allocator)
|
CV_PY_TO_CLASS_PTR(cuda::GpuMat::Allocator)
|
||||||
|
|
||||||
CV_PY_FROM_CLASS(cuda::GpuMat)
|
CV_PY_FROM_CLASS(cuda::GpuMat)
|
||||||
|
CV_PY_FROM_CLASS(cuda::GpuMatND)
|
||||||
CV_PY_FROM_CLASS(cuda::Stream)
|
CV_PY_FROM_CLASS(cuda::Stream)
|
||||||
CV_PY_FROM_CLASS(cuda::HostMem)
|
CV_PY_FROM_CLASS(cuda::HostMem)
|
||||||
|
|
||||||
CV_PY_FROM_CLASS_PTR(cuda::GpuMat::Allocator)
|
CV_PY_FROM_CLASS_PTR(cuda::GpuMat::Allocator)
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool fillDLPackTensor(const Ptr<cv::cuda::GpuMat>& src, DLManagedTensor* tensor, const DLDevice& device)
|
||||||
|
{
|
||||||
|
if ((device.device_type != -1 && device.device_type != kDLCUDA) || device.device_id != 0)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "GpuMat can be exported only on GPU:0");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
tensor->dl_tensor.data = src->cudaPtr();
|
||||||
|
tensor->dl_tensor.device.device_type = kDLCUDA;
|
||||||
|
tensor->dl_tensor.device.device_id = 0;
|
||||||
|
tensor->dl_tensor.dtype = GetDLPackType(src->elemSize1(), src->depth());
|
||||||
|
tensor->dl_tensor.shape[0] = src->rows;
|
||||||
|
tensor->dl_tensor.shape[1] = src->cols;
|
||||||
|
tensor->dl_tensor.shape[2] = src->channels();
|
||||||
|
tensor->dl_tensor.strides[0] = src->step1();
|
||||||
|
tensor->dl_tensor.strides[1] = src->channels();
|
||||||
|
tensor->dl_tensor.strides[2] = 1;
|
||||||
|
tensor->dl_tensor.byte_offset = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool fillDLPackTensor(const Ptr<cv::cuda::GpuMatND>& src, DLManagedTensor* tensor, const DLDevice& device)
|
||||||
|
{
|
||||||
|
if ((device.device_type != -1 && device.device_type != kDLCUDA) || device.device_id != 0)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "GpuMatND can be exported only on GPU:0");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
tensor->dl_tensor.data = src->getDevicePtr();
|
||||||
|
tensor->dl_tensor.device.device_type = kDLCUDA;
|
||||||
|
tensor->dl_tensor.device.device_id = 0;
|
||||||
|
tensor->dl_tensor.dtype = GetDLPackType(src->elemSize1(), CV_MAT_DEPTH(src->flags));
|
||||||
|
for (int i = 0; i < src->dims; ++i)
|
||||||
|
tensor->dl_tensor.shape[i] = src->size[i];
|
||||||
|
for (int i = 0; i < src->dims; ++i)
|
||||||
|
tensor->dl_tensor.strides[i] = src->step[i];
|
||||||
|
tensor->dl_tensor.byte_offset = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool parseDLPackTensor(DLManagedTensor* tensor, cv::cuda::GpuMat& obj, bool copy)
|
||||||
|
{
|
||||||
|
if (tensor->dl_tensor.byte_offset != 0)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "Unimplemented from_dlpack for GpuMat with memory offset");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (tensor->dl_tensor.ndim != 3)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "cuda_GpuMat.from_dlpack expects a 3D tensor. Use cuda_GpuMatND.from_dlpack instead");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (tensor->dl_tensor.device.device_type != kDLCUDA)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "cuda_GpuMat.from_dlpack expects a tensor on CUDA device");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (tensor->dl_tensor.strides[1] != tensor->dl_tensor.shape[2] ||
|
||||||
|
tensor->dl_tensor.strides[2] != 1)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "Unexpected strides for image. Try use GpuMatND");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int type = DLPackTypeToCVType(tensor->dl_tensor.dtype, (int)tensor->dl_tensor.shape[2]);
|
||||||
|
if (type == -1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
obj = cv::cuda::GpuMat(
|
||||||
|
static_cast<int>(tensor->dl_tensor.shape[0]),
|
||||||
|
static_cast<int>(tensor->dl_tensor.shape[1]),
|
||||||
|
type,
|
||||||
|
tensor->dl_tensor.data,
|
||||||
|
tensor->dl_tensor.strides[0] * tensor->dl_tensor.dtype.bits / 8
|
||||||
|
);
|
||||||
|
if (copy)
|
||||||
|
obj = obj.clone();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
bool parseDLPackTensor(DLManagedTensor* tensor, cv::cuda::GpuMatND& obj, bool copy)
|
||||||
|
{
|
||||||
|
if (tensor->dl_tensor.byte_offset != 0)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "Unimplemented from_dlpack for GpuMat with memory offset");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (tensor->dl_tensor.device.device_type != kDLCUDA)
|
||||||
|
{
|
||||||
|
PyErr_SetString(PyExc_BufferError, "cuda_GpuMat.from_dlpack expects a tensor on CUDA device");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int type = DLPackTypeToCVType(tensor->dl_tensor.dtype, (int)tensor->dl_tensor.shape[2]);
|
||||||
|
if (type == -1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
std::vector<size_t> steps(tensor->dl_tensor.ndim - 1);
|
||||||
|
std::vector<int> sizes(tensor->dl_tensor.ndim);
|
||||||
|
for (int i = 0; i < tensor->dl_tensor.ndim - 1; ++i)
|
||||||
|
{
|
||||||
|
steps[i] = tensor->dl_tensor.strides[i] * tensor->dl_tensor.dtype.bits / 8;
|
||||||
|
sizes[i] = static_cast<int>(tensor->dl_tensor.shape[i]);
|
||||||
|
}
|
||||||
|
sizes.back() = static_cast<int>(tensor->dl_tensor.shape[tensor->dl_tensor.ndim - 1]);
|
||||||
|
obj = cv::cuda::GpuMatND(sizes, type, tensor->dl_tensor.data, steps);
|
||||||
|
if (copy)
|
||||||
|
obj = obj.clone();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
int GetNumDims(const Ptr<cv::cuda::GpuMat>& src) { return 3; }
|
||||||
|
|
||||||
|
template<>
|
||||||
|
int GetNumDims(const Ptr<cv::cuda::GpuMatND>& src) { return src->dims; }
|
||||||
|
|
||||||
|
static PyObject* pyDLPackGpuMat(PyObject* self, PyObject* py_args, PyObject* kw) {
|
||||||
|
Ptr<cv::cuda::GpuMat> * self1 = 0;
|
||||||
|
if (!pyopencv_cuda_GpuMat_getp(self, self1))
|
||||||
|
return failmsgp("Incorrect type of self (must be 'cuda_GpuMat' or its derivative)");
|
||||||
|
return to_dlpack(*(self1), self, py_args, kw);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject* pyDLPackGpuMatND(PyObject* self, PyObject* py_args, PyObject* kw) {
|
||||||
|
Ptr<cv::cuda::GpuMatND> * self1 = 0;
|
||||||
|
if (!pyopencv_cuda_GpuMatND_getp(self, self1))
|
||||||
|
return failmsgp("Incorrect type of self (must be 'cuda_GpuMatND' or its derivative)");
|
||||||
|
return to_dlpack(*(self1), self, py_args, kw);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject* pyDLPackDeviceCUDA(PyObject*, PyObject*, PyObject*) {
|
||||||
|
return pyopencv_from(std::tuple<int, int>(kDLCUDA, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject* pyGpuMatFromDLPack(PyObject*, PyObject* py_args, PyObject* kw) {
|
||||||
|
return from_dlpack<cv::cuda::GpuMat>(py_args, kw);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject* pyGpuMatNDFromDLPack(PyObject*, PyObject* py_args, PyObject* kw) {
|
||||||
|
return from_dlpack<cv::cuda::GpuMatND>(py_args, kw);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PYOPENCV_EXTRA_METHODS_cuda_GpuMat \
|
||||||
|
{"__dlpack__", CV_PY_FN_WITH_KW(pyDLPackGpuMat), ""}, \
|
||||||
|
{"__dlpack_device__", CV_PY_FN_WITH_KW(pyDLPackDeviceCUDA), ""}, \
|
||||||
|
{"from_dlpack", CV_PY_FN_WITH_KW_(pyGpuMatFromDLPack, METH_STATIC), ""}, \
|
||||||
|
|
||||||
|
#define PYOPENCV_EXTRA_METHODS_cuda_GpuMatND \
|
||||||
|
{"__dlpack__", CV_PY_FN_WITH_KW(pyDLPackGpuMatND), ""}, \
|
||||||
|
{"__dlpack_device__", CV_PY_FN_WITH_KW(pyDLPackDeviceCUDA), ""}, \
|
||||||
|
{"from_dlpack", CV_PY_FN_WITH_KW_(pyGpuMatNDFromDLPack, METH_STATIC), ""}, \
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,9 @@ static PyGetSetDef pyopencv_${name}_getseters[] =
|
||||||
|
|
||||||
static PyMethodDef pyopencv_${name}_methods[] =
|
static PyMethodDef pyopencv_${name}_methods[] =
|
||||||
{
|
{
|
||||||
|
#ifdef PYOPENCV_EXTRA_METHODS_${name}
|
||||||
|
PYOPENCV_EXTRA_METHODS_${name}
|
||||||
|
#endif
|
||||||
${methods_inits}
|
${methods_inits}
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -145,5 +145,18 @@ class cuda_test(NewOpenCVTests):
|
||||||
self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoisingColored'))
|
self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoisingColored'))
|
||||||
self.assertEqual(True, hasattr(cv.cuda, 'nonLocalMeans'))
|
self.assertEqual(True, hasattr(cv.cuda, 'nonLocalMeans'))
|
||||||
|
|
||||||
|
def test_dlpack_GpuMat(self):
|
||||||
|
for dtype in [np.int8, np.uint8, np.int16, np.uint16, np.float16, np.int32, np.float32, np.float64]:
|
||||||
|
for channels in [2, 3, 5]:
|
||||||
|
ref = (np.random.random((64, 128, channels)) * 255).astype(dtype)
|
||||||
|
src = cv.cuda_GpuMat()
|
||||||
|
src.upload(ref)
|
||||||
|
dst = cv.cuda_GpuMat.from_dlpack(src)
|
||||||
|
test = dst.download()
|
||||||
|
equal = np.array_equal(ref, test)
|
||||||
|
if not equal:
|
||||||
|
print(f"Failed test with dtype {dtype} and {channels} channels")
|
||||||
|
self.assertTrue(equal)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
NewOpenCVTests.bootstrap()
|
NewOpenCVTests.bootstrap()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user