diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index c60b8fcdbb..37aceee983 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -280,15 +280,6 @@ convolve_simd( in_addr += INPUT_PITCH; - Dtype weight_buf[WEIGHT_PREF]; - int w_idx=0; - - for (int i = 0; i < WEIGHT_PREF; i++) - { - weight_buf[i] = weights[weight_addr]; - weight_addr += SIMD_SIZE; - } - #define BLOCK_IN(n, c) intel_sub_group_shuffle(in_buf[n], (c)) int kr = 0; // kr = Kernel Row @@ -297,20 +288,18 @@ convolve_simd( int kc = 0; // kc = Kernel Column LOOP(KERNEL_WIDTH, kc, { + Dtype weight_value = weights[weight_addr]; + weight_addr += SIMD_SIZE; for (int br=0; br < OUT_BLOCK_HEIGHT; br++) { for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++) { Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y), bc * STRIDE_X + kc * DILATION_X); - out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]); + out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_value, input, out[br * OUT_BLOCK_WIDTH + bc]); } } - weight_buf[w_idx % WEIGHT_PREF] = weights[weight_addr]; - weight_addr += SIMD_SIZE; - ++w_idx; }); }); - weight_addr -= WEIGHT_PREF * SIMD_SIZE; } fm = fm % ALIGNED_NUM_FILTERS;