core(OpenCL): thread-local OpenCL execution context

This commit is contained in:
Alexander Alekhin
2020-08-11 18:13:52 +00:00
parent 0428dce27d
commit 2129c72bc0
12 changed files with 1719 additions and 252 deletions
@@ -565,6 +565,7 @@ struct CV_EXPORTS UMatData
int allocatorFlags_;
int mapcount;
UMatData* originalUMatData;
std::shared_ptr<void> allocatorContext;
};
CV_ENUM_FLAGS(UMatData::MemoryFlag)
+148 -8
View File
@@ -229,8 +229,15 @@ public:
CV_WRAP static const Device& getDefault();
protected:
/**
* @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success.
*/
static Device fromHandle(void* d);
struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }
protected:
Impl* p;
};
@@ -239,33 +246,55 @@ class CV_EXPORTS Context
{
public:
Context();
explicit Context(int dtype);
explicit Context(int dtype); //!< @deprecated
~Context();
Context(const Context& c);
Context& operator = (const Context& c);
Context& operator= (const Context& c);
/** @deprecated */
bool create();
/** @deprecated */
bool create(int dtype);
size_t ndevices() const;
const Device& device(size_t idx) const;
Device& device(size_t idx) const;
Program getProg(const ProgramSource& prog,
const String& buildopt, String& errmsg);
void unloadProg(Program& prog);
/** Get thread-local OpenCL context (initialize if necessary) */
#if 0 // OpenCV 5.0
static Context& getDefault();
#else
static Context& getDefault(bool initialize = true);
#endif
/** @returns cl_context value */
void* ptr() const;
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
bool useSVM() const;
void setUseSVM(bool enabled);
/**
* @param context OpenCL handle (cl_context). clRetainContext() is called on success
*/
static Context fromHandle(void* context);
static Context fromDevice(const ocl::Device& device);
static Context create(const std::string& configuration);
void release();
struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }
// TODO OpenCV 5.0
//protected:
Impl* p;
};
/** @deprecated */
class CV_EXPORTS Platform
{
public:
@@ -275,11 +304,14 @@ public:
Platform& operator = (const Platform& p);
void* ptr() const;
/** @deprecated */
static Platform& getDefault();
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
protected:
struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }
protected:
Impl* p;
};
@@ -319,6 +351,7 @@ CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, in
CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
// TODO Move to internal header
/// @deprecated
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
class CV_EXPORTS Queue
@@ -340,6 +373,7 @@ public:
struct Impl; friend struct Impl;
inline Impl* getImpl() const { return p; }
inline bool empty() const { return !p; }
protected:
Impl* p;
};
@@ -490,6 +524,7 @@ public:
struct Impl; friend struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }
protected:
Impl* p;
public:
@@ -571,6 +606,7 @@ public:
struct Impl; friend struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }
protected:
Impl* p;
};
@@ -579,6 +615,9 @@ class CV_EXPORTS PlatformInfo
{
public:
PlatformInfo();
/**
* @param id pointer cl_platform_id (cl_platform_id*)
*/
explicit PlatformInfo(void* id);
~PlatformInfo();
@@ -591,8 +630,9 @@ public:
int deviceNumber() const;
void getDevice(Device& device, int d) const;
protected:
struct Impl;
bool empty() const { return !p; }
protected:
Impl* p;
};
@@ -689,6 +729,106 @@ private:
CV_EXPORTS MatAllocator* getOpenCLAllocator();
class CV_EXPORTS_W OpenCLExecutionContext
{
public:
OpenCLExecutionContext() = default;
~OpenCLExecutionContext() = default;
OpenCLExecutionContext(const OpenCLExecutionContext& other) = default;
OpenCLExecutionContext(OpenCLExecutionContext&& other) = default;
OpenCLExecutionContext& operator=(const OpenCLExecutionContext& other) = default;
OpenCLExecutionContext& operator=(OpenCLExecutionContext&& other) = default;
/** Get associated ocl::Context */
Context& getContext() const;
/** Get associated ocl::Device */
Device& getDevice() const;
/** Get associated ocl::Queue */
Queue& getQueue() const;
bool useOpenCL() const;
void setUseOpenCL(bool flag);
/** Get OpenCL execution context of current thread.
*
* Initialize OpenCL execution context if it is empty
* - create new
* - reuse context of the main thread (threadID = 0)
*/
static OpenCLExecutionContext& getCurrent();
/** Get OpenCL execution context of current thread (can be empty) */
static OpenCLExecutionContext& getCurrentRef();
/** Bind this OpenCL execution context to current thread.
*
* Context can't be empty.
*
* @note clFinish is not called for queue of previous execution context
*/
void bind() const;
/** Creates new execution context with same OpenCV context and device
*
* @param q OpenCL queue
*/
OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const;
/** @overload */
OpenCLExecutionContext cloneWithNewQueue() const;
/** @brief Creates OpenCL execution context
* OpenCV will check if available OpenCL platform has platformName name, then assign context to
* OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
* new command queue will be created.
*
* @note Lifetime of passed handles is transferred to OpenCV wrappers on success
*
* @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
* @param platformID ID of platform attached context was created for (cl_platform_id)
* @param context OpenCL context to be attached to OpenCV (cl_context)
* @param deviceID OpenCL device (cl_device_id)
*/
static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID);
/** @brief Creates OpenCL execution context
*
* @param context non-empty OpenCL context
* @param device non-empty OpenCL device (must be a part of context)
* @param queue non-empty OpenCL queue for provided context and device
*/
static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue);
/** @overload */
static OpenCLExecutionContext create(const Context& context, const Device& device);
struct Impl;
inline bool empty() const { return !p; }
void release();
protected:
std::shared_ptr<Impl> p;
};
class OpenCLExecutionContextScope
{
OpenCLExecutionContext ctx_;
public:
inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx)
{
CV_Assert(!ctx.empty());
ctx_ = OpenCLExecutionContext::getCurrentRef();
ctx.bind();
}
inline ~OpenCLExecutionContextScope()
{
if (!ctx_.empty())
{
ctx_.bind();
}
}
};
#ifdef __OPENCV_BUILD
namespace internal {
+63 -12
View File
@@ -458,9 +458,22 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
}
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
return ctx;
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
OpenCLExecutionContext clExecCtx;
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
}
catch (...)
{
clReleaseDevice(device);
clReleaseContext(context);
throw;
}
clExecCtx.bind();
return const_cast<Context&>(clExecCtx.getContext());
#endif
}
@@ -565,10 +578,22 @@ Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device)
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
}
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
return ctx;
OpenCLExecutionContext clExecCtx;
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
}
catch (...)
{
clReleaseDevice(device);
clReleaseContext(context);
throw;
}
clExecCtx.bind();
return const_cast<Context&>(clExecCtx.getContext());
#endif
}
@@ -675,10 +700,23 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
}
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
OpenCLExecutionContext clExecCtx;
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
}
catch (...)
{
clReleaseDevice(device);
clReleaseContext(context);
throw;
}
clExecCtx.bind();
g_isDirect3DDevice9Ex = true;
return ctx;
return const_cast<Context&>(clExecCtx.getContext());
#endif
}
@@ -785,10 +823,23 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
}
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
OpenCLExecutionContext clExecCtx;
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
}
catch (...)
{
clReleaseDevice(device);
clReleaseContext(context);
throw;
}
clExecCtx.bind();
g_isDirect3DDevice9Ex = false;
return ctx;
return const_cast<Context&>(clExecCtx.getContext());
#endif
}
+807 -221
View File
File diff suppressed because it is too large Load Diff
+47 -1
View File
@@ -144,6 +144,8 @@ const Device& Device::getDefault()
return dummy;
}
/* static */ Device Device::fromHandle(void* d) { OCL_NOT_AVAILABLE(); }
Context::Context() : p(NULL) { }
Context::Context(int dtype) : p(NULL) { }
@@ -154,7 +156,7 @@ Context& Context::operator=(const Context& c) { return *this; }
bool Context::create() { return false; }
bool Context::create(int dtype) { return false; }
size_t Context::ndevices() const { return 0; }
const Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); }
Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); }
Program Context::getProg(const ProgramSource& prog, const String& buildopt, String& errmsg) { OCL_NOT_AVAILABLE(); }
void Context::unloadProg(Program& prog) { }
@@ -169,6 +171,13 @@ void* Context::ptr() const { return NULL; }
bool Context::useSVM() const { return false; }
void Context::setUseSVM(bool enabled) { }
/* static */ Context Context::fromHandle(void* context) { OCL_NOT_AVAILABLE(); }
/* static */ Context Context::fromDevice(const ocl::Device& device) { OCL_NOT_AVAILABLE(); }
/* static */ Context Context::create(const std::string& configuration) { OCL_NOT_AVAILABLE(); }
void Context::release() { }
Platform::Platform() : p(NULL) { }
Platform::~Platform() { }
Platform::Platform(const Platform&) : p(NULL) { }
@@ -355,6 +364,43 @@ MatAllocator* getOpenCLAllocator() { return NULL; }
internal::ProgramEntry::operator ProgramSource&() const { OCL_NOT_AVAILABLE(); }
struct OpenCLExecutionContext::Impl
{
Impl() = default;
};
Context& OpenCLExecutionContext::getContext() const { OCL_NOT_AVAILABLE(); }
Device& OpenCLExecutionContext::getDevice() const { OCL_NOT_AVAILABLE(); }
Queue& OpenCLExecutionContext::getQueue() const { OCL_NOT_AVAILABLE(); }
bool OpenCLExecutionContext::useOpenCL() const { return false; }
void OpenCLExecutionContext::setUseOpenCL(bool flag) { }
static
OpenCLExecutionContext& getDummyOpenCLExecutionContext()
{
static OpenCLExecutionContext dummy;
return dummy;
}
/* static */
OpenCLExecutionContext& OpenCLExecutionContext::getCurrent() { return getDummyOpenCLExecutionContext(); }
/* static */
OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef() { return getDummyOpenCLExecutionContext(); }
void OpenCLExecutionContext::bind() const { OCL_NOT_AVAILABLE(); }
OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const { OCL_NOT_AVAILABLE(); }
OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const { OCL_NOT_AVAILABLE(); }
/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const std::string& platformName, void* platformID, void* context, void* deviceID) { OCL_NOT_AVAILABLE(); }
/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue) { OCL_NOT_AVAILABLE(); }
/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device) { OCL_NOT_AVAILABLE(); }
void OpenCLExecutionContext::release() { }
}}
#if defined(_MSC_VER)
+8 -3
View File
@@ -1689,9 +1689,14 @@ Context& initializeContextFromGL()
if (found < 0)
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for OpenGL interop");
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
return ctx;
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, deviceID);
clReleaseDevice(device);
clReleaseContext(context);
clExecCtx.bind();
return const_cast<Context&>(clExecCtx.getContext());
#endif
}
+3 -3
View File
@@ -322,7 +322,7 @@ struct CoreTLSData
{
CoreTLSData() :
//#ifdef HAVE_OPENCL
device(0), useOpenCL(-1),
oclExecutionContextInitialized(false), useOpenCL(-1),
//#endif
useIPP(-1),
useIPP_NE(-1)
@@ -333,8 +333,8 @@ struct CoreTLSData
RNG rng;
//#ifdef HAVE_OPENCL
int device; // device index of an array of devices in a context, see also Device::getDefault
ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
ocl::OpenCLExecutionContext oclExecutionContext;
bool oclExecutionContextInitialized;
int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
//#endif
int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
+18 -4
View File
@@ -106,7 +106,7 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices);
if ((status != CL_SUCCESS) || !(numDevices > 0))
continue;
numDevices = 1; // initializeContextFromHandle() expects only 1 device
numDevices = 1; // OpenCV expects only 1 device
status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL);
if (status != CL_SUCCESS)
@@ -135,9 +135,23 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
if (found >= 0)
{
contextInitialized = true;
Context& ctx = Context::getDefault(false);
initializeContextFromHandle(ctx, platforms[found], context, device);
return ctx;
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(platform).name();
OpenCLExecutionContext clExecCtx;
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
}
catch (...)
{
clReleaseDevice(device);
clReleaseContext(context);
throw;
}
clExecCtx.bind();
return const_cast<Context&>(clExecCtx.getContext());
}
}
# endif // HAVE_VA_INTEL && HAVE_OPENCL
+191
View File
@@ -0,0 +1,191 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
namespace opencv_test {
namespace ocl {
static void executeUMatCall(bool requireOpenCL = true)
{
UMat a(100, 100, CV_8UC1, Scalar::all(0));
UMat b;
cv::add(a, Scalar::all(1), b);
Mat b_cpu = b.getMat(ACCESS_READ);
EXPECT_EQ(0, cv::norm(b_cpu - 1, NORM_INF));
if (requireOpenCL)
{
EXPECT_TRUE(cv::ocl::useOpenCL());
}
}
TEST(OCL_Context, createFromDevice)
{
bool useOCL = cv::ocl::useOpenCL();
OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
if (!useOCL)
{
ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state
throw SkipTestException("OpenCL is not available / disabled");
}
ASSERT_FALSE(ctx.empty());
ocl::Device device = ctx.getDevice();
ASSERT_FALSE(device.empty());
ocl::Context context = ocl::Context::fromDevice(device);
ocl::Context context2 = ocl::Context::fromDevice(device);
EXPECT_TRUE(context.getImpl() == context2.getImpl()) << "Broken cache for OpenCL context (device)";
}
TEST(OCL_OpenCLExecutionContext, basic)
{
bool useOCL = cv::ocl::useOpenCL();
OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
if (!useOCL)
{
ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state
throw SkipTestException("OpenCL is not available / disabled");
}
ASSERT_FALSE(ctx.empty());
ocl::Context context = ctx.getContext();
ocl::Context context2 = ocl::Context::getDefault();
EXPECT_TRUE(context.getImpl() == context2.getImpl());
ocl::Device device = ctx.getDevice();
ocl::Device device2 = ocl::Device::getDefault();
EXPECT_TRUE(device.getImpl() == device2.getImpl());
ocl::Queue queue = ctx.getQueue();
ocl::Queue queue2 = ocl::Queue::getDefault();
EXPECT_TRUE(queue.getImpl() == queue2.getImpl());
}
TEST(OCL_OpenCLExecutionContext, createAndBind)
{
bool useOCL = cv::ocl::useOpenCL();
OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
if (!useOCL)
{
ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state
throw SkipTestException("OpenCL is not available / disabled");
}
ASSERT_FALSE(ctx.empty());
ocl::Context context = ctx.getContext();
ocl::Device device = ctx.getDevice();
OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
ASSERT_FALSE(ctx2.empty());
try
{
ctx2.bind();
executeUMatCall();
ctx.bind();
executeUMatCall();
}
catch (...)
{
ctx.bind(); // restore
throw;
}
}
TEST(OCL_OpenCLExecutionContext, createGPU)
{
bool useOCL = cv::ocl::useOpenCL();
OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
if (!useOCL)
{
ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state
throw SkipTestException("OpenCL is not available / disabled");
}
ASSERT_FALSE(ctx.empty());
ocl::Context context = ocl::Context::create(":GPU:1");
if (context.empty())
{
context = ocl::Context::create(":CPU:");
if (context.empty())
throw SkipTestException("OpenCL GPU1/CPU devices are not available");
}
ocl::Device device = context.device(0);
OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
ASSERT_FALSE(ctx2.empty());
try
{
ctx2.bind();
executeUMatCall();
ctx.bind();
executeUMatCall();
}
catch (...)
{
ctx.bind(); // restore
throw;
}
}
TEST(OCL_OpenCLExecutionContext, ScopeTest)
{
bool useOCL = cv::ocl::useOpenCL();
OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
if (!useOCL)
{
ASSERT_TRUE(ctx.empty()); // Other tests should not broke global state
throw SkipTestException("OpenCL is not available / disabled");
}
ASSERT_FALSE(ctx.empty());
ocl::Context context = ocl::Context::create(":GPU:1");
if (context.empty())
{
context = ocl::Context::create(":CPU:");
if (context.empty())
context = ctx.getContext();
}
ocl::Device device = context.device(0);
OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
ASSERT_FALSE(ctx2.empty());
try
{
OpenCLExecutionContextScope ctx_scope(ctx2);
executeUMatCall();
}
catch (...)
{
ctx.bind(); // restore
throw;
}
executeUMatCall();
}
} } // namespace opencv_test::ocl