// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_CUDEV_PTR2D_TEXTURE_OBJECT_HPP #define OPENCV_CUDEV_PTR2D_TEXTURE_OBJECT_HPP #include #include #include #include #include /** \file texture.hpp */ namespace cv { namespace cudev { //! @addtogroup cudev //! @{ /** @brief Simple lightweight structures that encapsulate information about an image texture on the device. * They are intended to be passed to nvcc-compiled code. */ template struct TexturePtr { typedef R elem_type, value_type; typedef float index_type; __host__ TexturePtr() {}; __host__ TexturePtr(const cudaTextureObject_t tex_) : tex(tex_) {}; __device__ __forceinline__ R operator ()(index_type y, index_type x) const { return tex2D(tex, x, y); } __device__ __forceinline__ R operator ()(index_type x) const { return tex1Dfetch(tex, x); } private: cudaTextureObject_t tex; }; // textures are a maximum of 32 bits wide, 64 bits is read as two 32 bit wide values template struct TexturePtr { typedef float index_type; __host__ TexturePtr() {}; __host__ TexturePtr(const cudaTextureObject_t tex_) : tex(tex_) {}; __device__ __forceinline__ R operator ()(index_type y, index_type x) const { const uint2 retVal = tex2D(tex, x, y); return *(reinterpret_cast(&retVal)); } __device__ __forceinline__ R operator ()(index_type x) const { const uint2 retVal = tex1Dfetch(tex, x); return *(reinterpret_cast(&retVal)); } private: cudaTextureObject_t tex; }; template struct TextureOffPtr { typedef R elem_type; typedef float index_type; __host__ TextureOffPtr(const cudaTextureObject_t tex_, const int yoff_, const int xoff_) : tex(tex_), yoff(yoff_), xoff(xoff_) {}; __device__ __forceinline__ R operator ()(index_type y, index_type x) const { return tex2D(tex, x + xoff, y + yoff); } private: cudaTextureObject_t tex; int xoff = 0; int yoff = 0; }; /** @brief non-copyable smart CUDA texture object * * UniqueTexture is a smart non-sharable wrapper for a cudaTextureObject_t handle which ensures that the handle is destroyed after use. */ template class UniqueTexture { public: __host__ UniqueTexture() noexcept { } __host__ UniqueTexture(UniqueTexture&) = delete; __host__ UniqueTexture(UniqueTexture&& other) noexcept { tex = other.tex; other.tex = 0; } __host__ UniqueTexture(const int rows, const int cols, T* data, const size_t step, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) { create(rows, cols, data, step, normalizedCoords, filterMode, addressMode, readMode); } __host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) { create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode); } __host__ ~UniqueTexture() { if (tex != cudaTextureObject_t()) { try { CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex)); CV_CUDEV_SAFE_CALL(cudaFree(internalSrc)); } catch (const cv::Exception& ex) { std::ostringstream os; os << "Exception caught during CUDA texture object destruction.\n"; os << ex.what(); os << "Exception will be ignored.\n"; CV_LOG_WARNING(0, os.str().c_str()); } } } __host__ UniqueTexture& operator=(const UniqueTexture&) = delete; __host__ UniqueTexture& operator=(UniqueTexture&& other) noexcept { CV_Assert(other); if (&other != this) { UniqueTexture(std::move(*this)); /* destroy current texture object */ tex = other.tex; other.tex = cudaTextureObject_t(); } return *this; } __host__ cudaTextureObject_t get() const noexcept { CV_Assert(tex); return tex; } __host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); } private: __host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { cudaTextureDesc texDescr; std::memset(&texDescr, 0, sizeof(texDescr)); texDescr.normalizedCoords = normalizedCoords; texDescr.filterMode = filterMode; texDescr.addressMode[0] = addressMode; texDescr.addressMode[1] = addressMode; texDescr.addressMode[2] = addressMode; texDescr.readMode = readMode; CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0)); } template __host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { cudaResourceDesc texRes; std::memset(&texRes, 0, sizeof(texRes)); texRes.resType = cudaResourceTypeLinear; texRes.res.linear.devPtr = data; texRes.res.linear.sizeInBytes = sizeInBytes; texRes.res.linear.desc = cudaCreateChannelDesc(); createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); } __host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { create(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode); } template __host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { cudaResourceDesc texRes; std::memset(&texRes, 0, sizeof(texRes)); texRes.resType = cudaResourceTypePitch2D; texRes.res.pitch2D.height = rows; texRes.res.pitch2D.width = cols; // temporary fix for single row/columns until TexturePtr is reworked if (rows == 1 || cols == 1) { size_t dStep = 0; CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows)); CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice)); texRes.res.pitch2D.devPtr = internalSrc; texRes.res.pitch2D.pitchInBytes = dStep; } else { texRes.res.pitch2D.devPtr = data; texRes.res.pitch2D.pitchInBytes = step; } texRes.res.pitch2D.desc = cudaCreateChannelDesc(); createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); } __host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { create(rows, cols, (uint2*)data, step, normalizedCoords, filterMode, addressMode, readMode); } private: cudaTextureObject_t tex; T* internalSrc = 0; }; /** @brief sharable smart CUDA texture object * * Texture is a smart sharable wrapper for a cudaTextureObject_t handle which ensures that the handle is destroyed after use. */ template class Texture { public: Texture() = default; Texture(const Texture&) = default; Texture(Texture&&) = default; __host__ Texture(const int rows_, const int cols_, T* data, const size_t step, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : rows(rows_), cols(cols_), texture(std::make_shared>(rows, cols, data, step, normalizedCoords, filterMode, addressMode, readMode)) { } __host__ Texture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : rows(1), cols(static_cast(sizeInBytes/sizeof(T))), texture(std::make_shared>(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode)) { } __host__ Texture(PtrStepSz src, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : Texture(src.rows, src.cols, src.data, src.step, normalizedCoords, filterMode, addressMode, readMode) { } Texture& operator=(const Texture&) = default; Texture& operator=(Texture&&) = default; __host__ explicit operator bool() const noexcept { if (!texture) return false; return texture->operator bool(); } __host__ operator TexturePtr() const { if (texture) return TexturePtr(texture->get()); else return TexturePtr(cudaTextureObject_t()); } int rows = 0; int cols = 0; protected: std::shared_ptr> texture = 0; }; template struct PtrTraits> : PtrTraitsBase, TexturePtr> { }; /** @brief sharable smart CUDA texture object with offset * TextureOff is a smart sharable wrapper for a cudaTextureObject_t handle which ensures that the handle is destroyed after use. */ template class TextureOff { public: TextureOff(const TextureOff&) = default; TextureOff(TextureOff&&) = default; __host__ TextureOff(const int rows, const int cols, T* data, const size_t step, const int yoff_ = 0, const int xoff_ = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : texture(std::make_shared>(rows, cols, data, step, normalizedCoords, filterMode, addressMode, readMode)), xoff(xoff_), yoff(yoff_) { } __host__ TextureOff(PtrStepSz src, const int yoff_ = 0, const int xoff_ = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : TextureOff(src.rows, src.cols, src.data, src.step, yoff_, xoff_, normalizedCoords, filterMode, addressMode, readMode) { } TextureOff& operator=(const TextureOff&) = default; TextureOff& operator=(TextureOff&&) = default; __host__ operator TextureOffPtr() const { return TextureOffPtr(texture->get(), yoff, xoff); } private: int xoff = 0; int yoff = 0; std::shared_ptr> texture = 0; }; }} #endif