diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 58e204094f..27bf79bd8e 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -70,6 +70,12 @@ if(CUDA_FOUND) unset(CUDA_ARCH_PTX CACHE) endif() + SET(DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run") + if(WIN32 AND CMAKE_LINKER) #Workaround for VS cl.exe not being in the env. path + get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY) + SET(DETECT_ARCHS_COMMAND ${DETECT_ARCHS_COMMAND} "-ccbin" "${host_compiler_bindir}") + endif() + set(__cuda_arch_ptx "") if(CUDA_GENERATION STREQUAL "Fermi") set(__cuda_arch_bin "2.0") @@ -82,10 +88,11 @@ if(CUDA_FOUND) elseif(CUDA_GENERATION STREQUAL "Volta") set(__cuda_arch_bin "7.0") elseif(CUDA_GENERATION STREQUAL "Auto") - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" + execute_process( COMMAND ${DETECT_ARCHS_COMMAND} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") else() @@ -99,10 +106,11 @@ if(CUDA_FOUND) set(__cuda_arch_bin "3.2") set(__cuda_arch_ptx "") elseif(AARCH64) - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" + execute_process( COMMAND ${DETECT_ARCHS_COMMAND} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any if(NOT _nvcc_res EQUAL 0) message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") set(__cuda_arch_bin "5.3 6.2 7.0") diff --git a/cmake/checks/OpenCVDetectCudaArch.cu b/cmake/checks/OpenCVDetectCudaArch.cu index 9d7086cf24..70ca975530 100644 --- a/cmake/checks/OpenCVDetectCudaArch.cu +++ b/cmake/checks/OpenCVDetectCudaArch.cu @@ -1,14 +1,25 @@ -#include +#include +#include +#include + int main() { + std::ostringstream arch; + std::list archs; + int count = 0; - if (cudaSuccess != cudaGetDeviceCount(&count)){return -1;} - if (count == 0) {return -1;} + if (cudaSuccess != cudaGetDeviceCount(&count)){ return -1; } + if (count == 0) { return -1; } for (int device = 0; device < count; ++device) { cudaDeviceProp prop; - if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue;} - printf("%d.%d ", prop.major, prop.minor); + if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue; } + arch << prop.major << "." << prop.minor; + archs.push_back(arch.str()); + arch.str(""); } + archs.unique(); #Some devices might have the same arch + for (std::list::iterator it=archs.begin(); it!=archs.end(); ++it) + std::cout << *it << " "; return 0; }