Merge pull request #16888 from YashasSamaga:cuda4dnn-redundant-act-fusion-check
This commit is contained in:
commit
ea34b2fefb
@ -27,13 +27,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
|
||||
/** @brief exception class for errors thrown by the cuDNN API */
|
||||
class cuDNNException : public CUDAException {
|
||||
public:
|
||||
using CUDAException::CUDAException;
|
||||
cuDNNException(cudnnStatus_t code, const std::string& msg, const std::string& func, const std::string& file, int line)
|
||||
: CUDAException(Error::GpuApiCallError, msg, func, file, line), cudnnError{code}
|
||||
{
|
||||
}
|
||||
|
||||
cudnnStatus_t getCUDNNStatus() const noexcept { return cudnnError; }
|
||||
|
||||
private:
|
||||
cudnnStatus_t cudnnError;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
inline void check(cudnnStatus_t status, const char* func, const char* file, int line) {
|
||||
if (status != CUDNN_STATUS_SUCCESS)
|
||||
throw cuDNNException(Error::GpuApiCallError, cudnnGetErrorString(status), func, file, line);
|
||||
throw cuDNNException(status, cudnnGetErrorString(status), func, file, line);
|
||||
}
|
||||
|
||||
/** get_data_type<T> returns the equivalent cudnn enumeration constant for type T */
|
||||
|
||||
@ -261,16 +261,32 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
||||
input = transformed_input;
|
||||
}
|
||||
|
||||
auto conv_scratchpad = allocator.get_instance();
|
||||
|
||||
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
if (fusion_location == InternalFusionLocation::CUDNN)
|
||||
{
|
||||
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, allocator.get_instance());
|
||||
try
|
||||
{
|
||||
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, conv_scratchpad);
|
||||
}
|
||||
catch(const csl::cudnn::cuDNNException& ex)
|
||||
{
|
||||
if (ex.getCUDNNStatus() == CUDNN_STATUS_NOT_SUPPORTED)
|
||||
{
|
||||
/* drop cuDNN fusion and use the native fusion path */
|
||||
fusion_location = InternalFusionLocation::NATIVE;
|
||||
}
|
||||
else
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
if (fusion_location == InternalFusionLocation::NATIVE)
|
||||
{
|
||||
convoluter.convolve(output, input, filtersTensor, allocator.get_instance());
|
||||
convoluter.convolve(output, input, filtersTensor, conv_scratchpad);
|
||||
if (!biasTensor.empty())
|
||||
{
|
||||
std::size_t inner_size = output.size_range(2, output.rank());
|
||||
|
||||
@ -2580,16 +2580,6 @@ struct Net::Impl
|
||||
nextData->type != "Power")
|
||||
break;
|
||||
|
||||
if (IS_DNN_CUDA_TARGET(preferableTarget) &&
|
||||
nextData->type != "ReLU" &&
|
||||
nextData->type != "ReLU6" &&
|
||||
nextData->type != "Power" &&
|
||||
nextData->type != "TanH" &&
|
||||
nextData->type != "Sigmoid" &&
|
||||
nextData->type != "Swish" &&
|
||||
nextData->type != "Mish")
|
||||
break;
|
||||
|
||||
Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
|
||||
if (nextActivLayer.empty())
|
||||
break;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user