diff --git a/3rdparty/libjpeg-turbo/CMakeLists.txt b/3rdparty/libjpeg-turbo/CMakeLists.txt
index 8da98b6020..901669a4a8 100644
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@@ -4,9 +4,9 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-6
 
 set(VERSION_MAJOR 2)
 set(VERSION_MINOR 0)
-set(VERSION_REVISION 5)
+set(VERSION_REVISION 6)
 set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2000005)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
 
 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
diff --git a/3rdparty/libjpeg-turbo/LICENSE.md b/3rdparty/libjpeg-turbo/LICENSE.md
index 5ca512b34d..99c9aadcc4 100644
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================
 
-Copyright (C)2009-2019 D. R. Commander.  All Rights Reserved.
+Copyright (C)2009-2020 D. R. Commander.  All Rights Reserved.
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/3rdparty/libjpeg-turbo/README.ijg b/3rdparty/libjpeg-turbo/README.ijg
index 2e39f965c2..d681cf1273 100644
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@@ -223,12 +223,12 @@ https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871.
 A PDF file of the older JFIF 1.02 specification is available at
 http://www.w3.org/Graphics/JPEG/jfif3.pdf.
 
-The TIFF 6.0 file format specification can be obtained by FTP from
-ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
-found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
-IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
-Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from
+The TIFF 6.0 file format specification can be obtained from
+http://mirrors.ctan.org/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation
+scheme found in the TIFF 6.0 spec of 3-June-92 has a number of serious
+problems.  IJG does not recommend use of the TIFF 6.0 design (TIFF Compression
+tag 6).  Instead, we recommend the JPEG design proposed by TIFF Technical Note
+#2 (Compression tag 7).  Copies of this Note can be obtained from
 http://www.ijg.org/files/.  It is expected that the next revision
 of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
 Although IJG's own code does not support TIFF/JPEG, the free libtiff library
@@ -243,14 +243,8 @@ The most recent released version can always be found there in
 directory "files".
 
 The JPEG FAQ (Frequently Asked Questions) article is a source of some
-general information about JPEG.
-It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
-and other news.answers archive sites, including the official news.answers
-archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
-If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
-with body
-        send usenet/news.answers/jpeg-faq/part1
-        send usenet/news.answers/jpeg-faq/part2
+general information about JPEG.  It is available at
+http://www.faqs.org/faqs/jpeg-faq.
 
 
 FILE FORMAT COMPATIBILITY
diff --git a/3rdparty/libjpeg-turbo/README.md b/3rdparty/libjpeg-turbo/README.md
index e7ff743a47..90a4a43ee1 100644
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@@ -2,7 +2,7 @@ Background
 ==========
 
 libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
-baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
 MIPS systems, as well as progressive JPEG compression on x86 and x86-64
 systems.  On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
 all else being equal.  On other types of systems, libjpeg-turbo can still
@@ -179,8 +179,8 @@ supported and which aren't.
 
 NOTE:  As of this writing, extensive research has been conducted into the
 usefulness of DCT scaling as a means of data reduction and SmartScale as a
-means of quality improvement.  The reader is invited to peruse the research at
-<http://www.libjpeg-turbo.org/About/SmartScale> and draw his/her own conclusions,
+means of quality improvement.  Readers are invited to peruse the research at
+<http://www.libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
 but it is the general belief of our project that these features have not
 demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
 
@@ -287,12 +287,13 @@ following reasons:
   (and slightly faster) floating point IDCT algorithm introduced in libjpeg
   v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
   however, that this algorithm basically brings the accuracy of the floating
-  point IDCT in line with the accuracy of the slow integer IDCT.  The floating
-  point DCT/IDCT algorithms are mainly a legacy feature, and they do not
-  produce significantly more accuracy than the slow integer algorithms (to put
-  numbers on this, the typical difference in PNSR between the two algorithms
-  is less than 0.10 dB, whereas changing the quality level by 1 in the upper
-  range of the quality scale is typically more like a 1.0 dB difference.)
+  point IDCT in line with the accuracy of the accurate integer IDCT.  The
+  floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
+  not produce significantly more accuracy than the accurate integer algorithms
+  (to put numbers on this, the typical difference in PNSR between the two
+  algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
+  the upper range of the quality scale is typically more like a 1.0 dB
+  difference.)
 
 - If the floating point algorithms in libjpeg-turbo are not implemented using
   SIMD instructions on a particular platform, then the accuracy of the
@@ -340,7 +341,7 @@ The algorithm used by the SIMD-accelerated quantization function cannot produce
 correct results whenever the fast integer forward DCT is used along with a JPEG
 quality of 98-100.  Thus, libjpeg-turbo must use the non-SIMD quantization
 function in those cases.  This causes performance to drop by as much as 40%.
-It is therefore strongly advised that you use the slow integer forward DCT
+It is therefore strongly advised that you use the accurate integer forward DCT
 whenever encoding images with a JPEG quality of 98 or higher.
 
 
diff --git a/3rdparty/libjpeg-turbo/src/jchuff.c b/3rdparty/libjpeg-turbo/src/jchuff.c
index cb05055d99..db85ce114f 100644
--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@@ -34,10 +34,10 @@
  * memory footprint by 64k, which is important for some mobile applications
  * that create many isolated instances of libjpeg-turbo (web browsers, for
  * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
  * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
  * flags (this defines __thumb__).
  */
diff --git a/3rdparty/libjpeg-turbo/src/jcinit.c b/3rdparty/libjpeg-turbo/src/jcinit.c
index 78aa465786..157353a22e 100644
--- a/3rdparty/libjpeg-turbo/src/jcinit.c
+++ b/3rdparty/libjpeg-turbo/src/jcinit.c
@@ -1,8 +1,10 @@
 /*
  * jcinit.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,6 +21,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /*
diff --git a/3rdparty/libjpeg-turbo/src/jcphuff.c b/3rdparty/libjpeg-turbo/src/jcphuff.c
index 8c4efaf16c..a8b94bed84 100644
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@@ -43,10 +43,10 @@
  * memory footprint by 64k, which is important for some mobile applications
  * that create many isolated instances of libjpeg-turbo (web browsers, for
  * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on ARM processors, because some x86
+ * This feature is enabled by default only on Arm processors, because some x86
  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
  * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for ARMv6, you can
+ * have a fast implementation of it.  When building for Armv6, you can
  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
  * flags (this defines __thumb__).
  */
diff --git a/3rdparty/libjpeg-turbo/src/jctrans.c b/3rdparty/libjpeg-turbo/src/jctrans.c
index ce70a30940..ab6a2186db 100644
--- a/3rdparty/libjpeg-turbo/src/jctrans.c
+++ b/3rdparty/libjpeg-turbo/src/jctrans.c
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1998, Thomas G. Lane.
  * Modified 2000-2009 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -17,6 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Forward declarations */
diff --git a/3rdparty/libjpeg-turbo/src/jdapistd.c b/3rdparty/libjpeg-turbo/src/jdapistd.c
index 2c808fa564..38bd1110d9 100644
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, D. R. Commander.
+ * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -21,6 +21,8 @@
 #include "jinclude.h"
 #include "jdmainct.h"
 #include "jdcoefct.h"
+#include "jdmaster.h"
+#include "jdmerge.h"
 #include "jdsample.h"
 #include "jmemsys.h"
 
@@ -316,6 +318,8 @@ LOCAL(void)
 read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   JDIMENSION n;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  JSAMPARRAY scanlines = NULL;
   void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                          JDIMENSION input_row, JSAMPARRAY output_buf,
                          int num_rows) = NULL;
@@ -332,8 +336,13 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
     cinfo->cquantize->color_quantize = noop_quantize;
   }
 
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    scanlines = &upsample->spare_row;
+  }
+
   for (n = 0; n < num_lines; n++)
-    jpeg_read_scanlines(cinfo, NULL, 1);
+    jpeg_read_scanlines(cinfo, scanlines, 1);
 
   if (color_convert)
     cinfo->cconvert->color_convert = color_convert;
@@ -353,6 +362,12 @@ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
 {
   JDIMENSION rows_left;
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    read_and_discard_scanlines(cinfo, rows);
+    return;
+  }
 
   /* Increment the counter to the next row group after the skipped rows. */
   main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
@@ -382,21 +397,27 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
   my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
   JDIMENSION i, x;
   int y;
   JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
   JDIMENSION lines_to_skip, lines_to_read;
 
+  /* Two-pass color quantization is not supported. */
+  if (cinfo->quantize_colors && cinfo->two_pass_quantize)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state != DSTATE_SCANNING)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   /* Do not skip past the bottom of the image. */
   if (cinfo->output_scanline + num_lines >= cinfo->output_height) {
+    num_lines = cinfo->output_height - cinfo->output_scanline;
     cinfo->output_scanline = cinfo->output_height;
     (*cinfo->inputctl->finish_input_pass) (cinfo);
     cinfo->inputctl->eoi_reached = TRUE;
-    return cinfo->output_height - cinfo->output_scanline;
+    return num_lines;
   }
 
   if (num_lines == 0)
@@ -445,8 +466,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
     main_ptr->buffer_full = FALSE;
     main_ptr->rowgroup_ctr = 0;
     main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
-    upsample->next_row_out = cinfo->max_v_samp_factor;
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample) {
+      upsample->next_row_out = cinfo->max_v_samp_factor;
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    }
   }
 
   /* Skipping is much simpler when context rows are not required. */
@@ -458,8 +481,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
       cinfo->output_scanline += lines_left_in_iMCU_row;
       main_ptr->buffer_full = FALSE;
       main_ptr->rowgroup_ctr = 0;
-      upsample->next_row_out = cinfo->max_v_samp_factor;
-      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      if (!master->using_merged_upsample) {
+        upsample->next_row_out = cinfo->max_v_samp_factor;
+        upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      }
     }
   }
 
@@ -494,7 +519,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
       cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
       increment_simple_rowgroup_ctr(cinfo, lines_to_read);
     }
-    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    if (!master->using_merged_upsample)
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
     return num_lines;
   }
 
@@ -535,7 +561,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
    * bit odd, since "rows_to_go" seems to be redundantly keeping track of
    * output_scanline.
    */
-  upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+  if (!master->using_merged_upsample)
+    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
 
   /* Always skip the requested number of lines. */
   return num_lines;
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.c b/3rdparty/libjpeg-turbo/src/jdcoefct.c
index 723a9ac2be..2ba6aa11e4 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@@ -6,7 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2010, 2015-2016, D. R. Commander.
- * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -495,11 +495,13 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       if (first_row && block_row == 0)
         prev_block_row = buffer_ptr;
       else
-        prev_block_row = buffer[block_row - 1];
+        prev_block_row = buffer[block_row - 1] +
+                         cinfo->master->first_MCU_col[ci];
       if (last_row && block_row == block_rows - 1)
         next_block_row = buffer_ptr;
       else
-        next_block_row = buffer[block_row + 1];
+        next_block_row = buffer[block_row + 1] +
+                         cinfo->master->first_MCU_col[ci];
       /* We fetch the surrounding DC values using a sliding-register approach.
        * Initialize all nine here so as to do the right thing on narrow pics.
        */
diff --git a/3rdparty/libjpeg-turbo/src/jdcolor.c b/3rdparty/libjpeg-turbo/src/jdcolor.c
index dc0e3b6c0e..d3ae40c7da 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  * RGB565 conversion
  */
 
-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
 
 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.c b/3rdparty/libjpeg-turbo/src/jdmerge.c
index dff5a35087..3a456d6581 100644
--- a/3rdparty/libjpeg-turbo/src/jdmerge.c
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -40,41 +40,13 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jdmerge.h"
 #include "jsimd.h"
 #include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
 
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;    /* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
-
-  /* Private state for YCC->RGB conversion */
-  int *Cr_r_tab;                /* => table for Cr to R conversion */
-  int *Cb_b_tab;                /* => table for Cb to B conversion */
-  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
-  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;           /* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;     /* samples per output row */
-  JDIMENSION rows_to_go;        /* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler *my_upsample_ptr;
-
 #define SCALEBITS       16      /* speediest right-shift on some machines */
 #define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
 #define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
@@ -189,7 +161,7 @@ typedef my_upsampler *my_upsample_ptr;
 LOCAL(void)
 build_ycc_rgb_table(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   int i;
   JLONG x;
   SHIFT_TEMPS
@@ -232,7 +204,7 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
 METHODDEF(void)
 start_pass_merged_upsample(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
 
   /* Mark the spare buffer empty */
   upsample->spare_full = FALSE;
@@ -254,7 +226,7 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   JSAMPROW work_ptrs[2];
   JDIMENSION num_rows;          /* number of rows returned to caller */
 
@@ -305,7 +277,7 @@ merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
 
   /* Just do the upsampling. */
   (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
@@ -420,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  * RGB565 conversion
  */
 
-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
 
 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
@@ -566,11 +537,11 @@ h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 GLOBAL(void)
 jinit_merged_upsampler(j_decompress_ptr cinfo)
 {
-  my_upsample_ptr upsample;
+  my_merged_upsample_ptr upsample;
 
-  upsample = (my_upsample_ptr)
+  upsample = (my_merged_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                sizeof(my_upsampler));
+                                sizeof(my_merged_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *)upsample;
   upsample->pub.start_pass = start_pass_merged_upsample;
   upsample->pub.need_context_rows = FALSE;
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.h b/3rdparty/libjpeg-turbo/src/jdmerge.h
new file mode 100644
index 0000000000..b583396b10
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.h
@@ -0,0 +1,47 @@
+/*
+ * jdmerge.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;    /* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+  /* Private state for YCC->RGB conversion */
+  int *Cr_r_tab;                /* => table for Cr to R conversion */
+  int *Cb_b_tab;                /* => table for Cb to B conversion */
+  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
+  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;           /* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
+} my_merged_upsampler;
+
+typedef my_merged_upsampler *my_merged_upsample_ptr;
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jdmrg565.c b/3rdparty/libjpeg-turbo/src/jdmrg565.c
index 1b87e3718d..53f1e16700 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,7 +19,7 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -90,7 +90,7 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                    JDIMENSION in_row_group_ctr,
                                    JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -163,7 +163,7 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
                                   JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
@@ -259,7 +259,7 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
                                    JDIMENSION in_row_group_ctr,
                                    JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
diff --git a/3rdparty/libjpeg-turbo/src/jdmrgext.c b/3rdparty/libjpeg-turbo/src/jdmrgext.c
index b1c27df56a..c9a44d8219 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -25,7 +25,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
                               JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr;
@@ -97,7 +97,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
                               JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
   register JSAMPROW outptr0, outptr1;
diff --git a/3rdparty/libjpeg-turbo/src/jdtrans.c b/3rdparty/libjpeg-turbo/src/jdtrans.c
index 56713efe64..d7ec4b83b3 100644
--- a/3rdparty/libjpeg-turbo/src/jdtrans.c
+++ b/3rdparty/libjpeg-turbo/src/jdtrans.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -16,6 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Forward declarations */
diff --git a/3rdparty/libjpeg-turbo/src/jfdctint.c b/3rdparty/libjpeg-turbo/src/jfdctint.c
index b47c3061ac..c95a3a7fb8 100644
--- a/3rdparty/libjpeg-turbo/src/jfdctint.c
+++ b/3rdparty/libjpeg-turbo/src/jfdctint.c
@@ -4,11 +4,11 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
  * forward DCT (Discrete Cosine Transform).
  *
  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
diff --git a/3rdparty/libjpeg-turbo/src/jidctint.c b/3rdparty/libjpeg-turbo/src/jidctint.c
index 98425d5fd0..50f385da33 100644
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@@ -5,11 +5,11 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modification developed 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
- * This file contains a slow-but-accurate integer implementation of the
+ * This file contains a slower but more accurate integer implementation of the
  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
  * must also perform dequantization of the input coefficients.
  *
diff --git a/3rdparty/libjpeg-turbo/src/jmorecfg.h b/3rdparty/libjpeg-turbo/src/jmorecfg.h
index d0b930079a..aa29f0f9f1 100644
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2014-2015, 2018, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -273,9 +273,9 @@ typedef int boolean;
 
 /* Capability options common to encoder and decoder: */
 
-#define DCT_ISLOW_SUPPORTED     /* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED     /* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED     /* floating-point: accurate, fast on fast HW */
+#define DCT_ISLOW_SUPPORTED     /* accurate integer method */
+#define DCT_IFAST_SUPPORTED     /* less accurate int method [legacy feature] */
+#define DCT_FLOAT_SUPPORTED     /* floating-point method [legacy feature] */
 
 /* Encoder capability options: */
 
diff --git a/3rdparty/libjpeg-turbo/src/jpegcomp.h b/3rdparty/libjpeg-turbo/src/jpegcomp.h
index b32d544bf1..c4834ac0df 100644
--- a/3rdparty/libjpeg-turbo/src/jpegcomp.h
+++ b/3rdparty/libjpeg-turbo/src/jpegcomp.h
@@ -1,7 +1,7 @@
 /*
  * jpegcomp.h
  *
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,6 +19,7 @@
 #define _min_DCT_v_scaled_size  min_DCT_v_scaled_size
 #define _jpeg_width  jpeg_width
 #define _jpeg_height  jpeg_height
+#define JERR_ARITH_NOTIMPL  JERR_NOT_COMPILED
 #else
 #define _DCT_scaled_size  DCT_scaled_size
 #define _DCT_h_scaled_size  DCT_scaled_size
diff --git a/3rdparty/libjpeg-turbo/src/jpeglib.h b/3rdparty/libjpeg-turbo/src/jpeglib.h
index 33f8ad2791..d7664f0630 100644
--- a/3rdparty/libjpeg-turbo/src/jpeglib.h
+++ b/3rdparty/libjpeg-turbo/src/jpeglib.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2013-2014, 2016-2017, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -244,9 +244,9 @@ typedef enum {
 /* DCT/IDCT algorithm options. */
 
 typedef enum {
-  JDCT_ISLOW,             /* slow but accurate integer algorithm */
-  JDCT_IFAST,             /* faster, less accurate integer method */
-  JDCT_FLOAT              /* floating-point: accurate, fast on fast HW */
+  JDCT_ISLOW,             /* accurate integer method */
+  JDCT_IFAST,             /* less accurate integer method [legacy feature] */
+  JDCT_FLOAT              /* floating-point method [legacy feature] */
 } J_DCT_METHOD;
 
 #ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
diff --git a/3rdparty/libjpeg-turbo/src/jquant2.c b/3rdparty/libjpeg-turbo/src/jquant2.c
index 0ce0ca5472..6570613bb9 100644
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -1145,7 +1145,7 @@ start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
   int i;
 
   /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
   if (cinfo->dither_mode != JDITHER_NONE)
     cinfo->dither_mode = JDITHER_FS;
 
@@ -1263,7 +1263,7 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
     cquantize->sv_colormap = NULL;
 
   /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
+  /* If user asks for ordered dither, give them F-S. */
   if (cinfo->dither_mode != JDITHER_NONE)
     cinfo->dither_mode = JDITHER_FS;
 
diff --git a/3rdparty/libjpeg-turbo/src/jversion.h b/3rdparty/libjpeg-turbo/src/jversion.h
index ab4a2c5703..4462b94104 100644
--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/src/jversion.h
@@ -30,23 +30,25 @@
  * NOTE: It is our convention to place the authors in the following order:
  * - libjpeg-turbo authors (2009-) in descending order of the date of their
  *   most recent contribution to the project, then in ascending order of the
- *   date of their first contribution to the project
+ *   date of their first contribution to the project, then in alphabetical
+ *   order
  * - Upstream authors in descending order of the date of the first inclusion of
  *   their code
  */
 
 #define JCOPYRIGHT \
   "Copyright (C) 2009-2020 D. R. Commander\n" \
-  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
+  "Copyright (C) 2015, 2020 Google, Inc.\n" \
+  "Copyright (C) 2019 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
+  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
   "Copyright (C) 2015 Intel Corporation\n" \
-  "Copyright (C) 2015 Google, Inc.\n" \
+  "Copyright (C) 2013-2014 Linaro Limited\n" \
   "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
-  "Copyright (C) 2013 Linaro Limited\n" \
+  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
-  "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
 
 #define JCOPYRIGHT_SHORT \
   "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
diff --git a/apps/interactive-calibration/parametersController.cpp b/apps/interactive-calibration/parametersController.cpp
index c76b915c63..3bcf5b86e9 100644
--- a/apps/interactive-calibration/parametersController.cpp
+++ b/apps/interactive-calibration/parametersController.cpp
@@ -32,7 +32,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)
 
     if(!reader.isOpened()) {
         std::cerr << "Warning: Unable to open " << inputFileName <<
-                     " Applicatioin stated with default advanced parameters" << std::endl;
+                     " Application started with default advanced parameters" << std::endl;
         return true;
     }
 
diff --git a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown b/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
index 4630c2037f..76cfa3751d 100644
--- a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
+++ b/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
@@ -131,7 +131,7 @@ For properties you can read and change look into the documentation of the @ref c
 We want to check just how imperceptible our video converting operation went, therefore we need a
 system to check frame by frame the similarity or differences. The most common algorithm used for
 this is the PSNR (aka **Peak signal-to-noise ratio**). The simplest definition of this starts out
-from the *mean squad error*. Let there be two images: I1 and I2; with a two dimensional size i and
+from the *mean squared error*. Let there be two images: I1 and I2; with a two dimensional size i and
 j, composed of c number of channels.
 
 \f[MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}\f]
diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
index eb3f8693c2..ef2b31ac18 100644
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -220,6 +220,11 @@ struct VZeroUpperGuard {
 #  define CV_VSX 1
 #endif
 
+#ifdef __F16C__
+#  include <immintrin.h>
+#  define CV_FP16 1
+#endif
+
 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
 
 
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 9a9f31116a..d001ebc76c 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -844,7 +844,7 @@ protected:
     float16_t() : w(0) {}
     explicit float16_t(float x)
     {
-    #if CV_AVX2
+    #if CV_FP16
         __m128 v = _mm_load_ss(&x);
         w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
     #else
@@ -875,7 +875,7 @@ protected:
 
     operator float() const
     {
-    #if CV_AVX2
+    #if CV_FP16
         float f;
         _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
         return f;
diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
index 5dc5bb567d..54e8927192 100644
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@@ -3121,18 +3121,39 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
 OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
 
+//
 // FP16
+//
+
 inline v_float32x8 v256_load_expand(const float16_t* ptr)
 {
+#if CV_FP16
     return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    for (int i = 0; i < 8; i++)
+        buf[i] = (float)ptr[i];
+    return v256_load_aligned(buf);
+#endif
 }
 
 inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
 {
+#if CV_FP16
     __m128i ah = _mm256_cvtps_ph(a.val, 0);
     _mm_storeu_si128((__m128i*)ptr, ah);
+#else
+    float CV_DECL_ALIGNED(32) buf[8];
+    v_store_aligned(buf, a);
+    for (int i = 0; i < 8; i++)
+        ptr[i] = float16_t(buf[i]);
+#endif
 }
 
+//
+// end of FP16
+//
+
 inline void v256_cleanup() { _mm256_zeroall(); }
 
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp
index 001fc78264..5154041b6d 100644
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@@ -5,6 +5,11 @@
 #include "precomp.hpp"
 #include "convert.hpp"
 
+#if !defined(OPENCV_SUPRESS_WARNING_AVX2_WITHOUT_FP16C) && \
+    (defined(__GNUC__) && defined(__AVX2__) && !defined(__F16C__))
+#warning "Non-optimal compiler flags: AVX2 without FP16. Generated code is very slow. Consider adding '-mf16c' compiler option."
+#endif
+
 namespace cv {
 namespace hal {
 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
index 321fa64264..71d61e14e0 100644
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -126,9 +126,11 @@ DEFINE_SIMD_TESTS(256, AVX512_SKX)
 
 TEST(hal_intrin256, float16x16_FP16)
 {
+#if CV_TRY_FP16
     //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
     CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }
 
 
@@ -142,8 +144,10 @@ namespace intrin512 {
 
 TEST(hal_intrin512, float16x32_FP16)
 {
+#if CV_TRY_FP16
     CV_CPU_CALL_AVX512_SKX_(test_hal_intrin_float16, ());
-    throw SkipTestException("Unsupported hardware: FP16 is not available");
+#endif
+    throw SkipTestException("Unsupported: FP16 is not available");
 }
 
 
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 6731091463..84da496b42 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1902,21 +1902,21 @@ void test_hal_intrin_float64()
 #endif
 }
 
-#if CV_FP16
 void test_hal_intrin_float16()
 {
     DUMP_ENTRY(v_float16);
 #if CV_FP16
     TheTest<v_float32>()
         .test_loadstore_fp16_f32()
-#endif
 #if CV_SIMD_FP16
         .test_loadstore_fp16()
         .test_float_cvt_fp16()
 #endif
         ;
-}
+#else
+    std::cout << "SKIP: CV_FP16 is not available" << std::endl;
 #endif
+}
 
 /*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
 void test_hal_intrin_float16()
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 4f109ed82c..69b71f90ce 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -365,9 +365,12 @@ CV__DNN_INLINE_NS_BEGIN
                                      const int requiredOutputs,
                                      std::vector<MatShape> &outputs,
                                      std::vector<MatShape> &internals) const;
+
         virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                                const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;}
 
+        virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs);
+
         CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
         CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
         CV_PROP int preferableTarget; //!< prefer target for layer forwarding
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 6ec3fdadbd..a056e8f5b5 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1182,6 +1182,7 @@ struct Net::Impl : public detail::NetImplBase
         preferableBackend = DNN_BACKEND_DEFAULT;
         preferableTarget = DNN_TARGET_CPU;
         skipInfEngineInit = false;
+        hasDynamicShapes = false;
     }
 
     Ptr<DataLayer> netInputLayer;
@@ -1193,6 +1194,7 @@ struct Net::Impl : public detail::NetImplBase
     int preferableTarget;
     String halideConfigFile;
     bool skipInfEngineInit;
+    bool hasDynamicShapes;
     // Map host data to backend specific wrapper.
     std::map<void*, Ptr<BackendWrapper> > backendWrappers;
 
@@ -3539,6 +3541,46 @@ struct Net::Impl : public detail::NetImplBase
         shapes = inOutShapes[layerId];
     }
 
+    void updateLayersShapes()
+    {
+        CV_Assert(!layers[0].outputBlobs.empty());
+        ShapesVec inputShapes;
+        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
+        {
+            Mat& inp = layers[0].outputBlobs[i];
+            CV_Assert(inp.total());
+            if (preferableBackend == DNN_BACKEND_OPENCV &&
+                preferableTarget == DNN_TARGET_OPENCL_FP16)
+            {
+                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
+            }
+            inputShapes.push_back(shape(inp));
+        }
+        LayersShapesMap layersShapes;
+        layersShapes[0].in = inputShapes;
+        for (MapIdToLayerData::iterator it = layers.begin();
+             it != layers.end(); it++)
+        {
+            int layerId = it->first;
+            std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
+            if (layersShapes[layerId].in.empty())
+            {
+                for(int i = 0; i < inputLayerIds.size(); i++)
+                {
+                    int inputLayerId = inputLayerIds[i].lid;
+                    LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
+                    if(inputIt == layersShapes.end() || inputIt->second.out.empty())
+                    {
+                        getLayerShapesRecursively(inputLayerId, layersShapes);
+                    }
+                    const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
+                    layersShapes[layerId].in.push_back(shape);
+                }
+                it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
+            }
+        }
+    }
+
     LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
     {
         return *std::max_element(pins.begin(), pins.end());
@@ -3952,6 +3994,8 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
     int id = ++impl->lastLayerId;
     impl->layerNameToId.insert(std::make_pair(name, id));
     impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
+    if (params.get<bool>("has_dynamic_shapes", false))
+        impl->hasDynamicShapes = true;
 
     return id;
 }
@@ -4283,8 +4327,13 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
     bool oldShape = prevShape == blobShape;
 
     blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
-    if (!oldShape)
+    if (!oldShape) {
         ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
+        if (impl->hasDynamicShapes)
+        {
+            impl->updateLayersShapes();
+        }
+    }
 
     if (!ld.outputBlobsWrappers[pin.oid].empty())
     {
@@ -5234,6 +5283,10 @@ bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
     return false;
 }
 
+bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
+{
+    return true;
+}
 //////////////////////////////////////////////////////////////////////////
 
 static Mutex& getLayerFactoryMutex()
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index ef3d10e68b..edf9c1db9a 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -98,6 +98,9 @@ public:
         stride = Size(1, 1);
         pad_t = pad_l = pad_b = pad_r = 0;
 
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
+
         if (params.has("pool") || params.has("kernel_size") ||
             params.has("kernel_w") || params.has("kernel_h"))
         {
@@ -1191,25 +1194,33 @@ public:
             outShape.push_back(pooledSize.height);
             outShape.push_back(pooledSize.width);
         }
-        else if (padMode.empty())
-        {
-            for (int i = 0; i < local_kernel.size(); i++) {
-                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
-                outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
-            }
-
-            // If we have padding, ensure that the last pooling starts strictly
-            // inside the image (instead of at the padding); otherwise clip the last.
-            for (int i = 0; i < pads_end.size(); i++) {
-                if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
-                    --outShape[2 + i];
-                    CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
-                }
-            }
-        }
         else
         {
-            getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
+            if (hasDynamicShapes && !shapesInitialized)
+            {
+                //Just copy input shapes for width and height to prevent errors on loading stage
+                for (int i = 0; i < inpShape.size(); i++)
+                    outShape.push_back(inpShape[i]);
+            }
+            else if (padMode.empty())
+            {
+                for (int i = 0; i < local_kernel.size(); i++) {
+                    float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
+                    outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
+                }
+
+                // If we have padding, ensure that the last pooling starts strictly
+                // inside the image (instead of at the padding); otherwise clip the last.
+                for (int i = 0; i < pads_end.size(); i++) {
+                    if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
+                        --outShape[2 + i];
+                        CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
+                    }
+                }
+            } else {
+                getConvPoolOutParams(inpShape, local_kernel, strides, padMode,
+                                     std::vector<size_t>(local_kernel.size(), 1), outShape);
+            }
         }
         if (type == ROI)
         {
@@ -1231,6 +1242,14 @@ public:
         return false;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        int dims = inputs[0].size();
+        CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0);
+        shapesInitialized = true;
+        return true;
+    }
+
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
     {
@@ -1262,6 +1281,8 @@ private:
         ROI,   // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
         PSROI  // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
     };
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
index dbea5d55f4..4c603c1ac8 100644
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@@ -170,6 +170,9 @@ public:
         setParamsFrom(params);
         int axis = params.get<int>("axis", 0);
         int numAxes = params.get<int>("num_axes", -1);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
+
         CV_Assert(numAxes >= -1);
         newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
 
@@ -182,6 +185,25 @@ public:
             for (i = 0; i < dims; i++)
                 newShapeDesc[i] = paramShape.get<int>(i);
         }
+        if (hasDynamicShapes)
+        {
+            dynamicShapes.clear();
+            inputIndices.clear();
+            if (params.has("dynamic_axes")) {
+                CV_Assert(params.has("input_indices"));
+                const DictValue &dynamicAxes = params.get("dynamic_axes");
+                const DictValue &dynamicInputShapes = params.get("input_indices");
+                int i, dims = dynamicAxes.size();
+                CV_Assert(dims == dynamicInputShapes.size());
+                CV_Assert(dims > 0);
+                dynamicShapes.resize(dims);
+                inputIndices.resize(dims);
+                for (i = 0; i < dims; i++) {
+                    dynamicShapes[i] = dynamicAxes.get<int>(i);
+                    inputIndices[i] = dynamicInputShapes.get<int>(i);
+                }
+            }
+        }
     }
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
@@ -196,13 +218,21 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const CV_OVERRIDE
     {
+
         if (inputs.size() == 1 || inputs.size() == requiredOutputs)
         {
             outputs.clear();
             for (size_t i = 0; i < inputs.size(); i++)
             {
-                outputs.push_back(MatShape());
-                computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                if (hasDynamicShapes && !shapesInitialized)
+                {
+                    outputs.push_back(newShapeDesc);
+                }
+                else
+                {
+                    outputs.push_back(MatShape());
+                    computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
+                }
             }
         }
         else
@@ -213,6 +243,19 @@ public:
         return true;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        if (hasDynamicShapes)
+        {
+            for (int i = 0; i < dynamicShapes.size(); ++i)
+            {
+                newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
+            }
+        }
+        shapesInitialized = true;
+        return true;
+    }
+
     void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
     {
         std::vector<Mat> outputs;
@@ -310,6 +353,10 @@ public:
 
 private:
     std::vector<MatShape> outShapes;
+    std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
+    std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index f73a6440ef..fa2d755b71 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -72,6 +72,8 @@ public:
         setParamsFrom(params);
         axis = params.get<int>("axis", 1);
         num_split = params.get<int>("num_split", 0);
+        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
+        shapesInitialized = !hasDynamicShapes;
         if (params.has("slice_point"))
         {
             CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
@@ -150,7 +152,8 @@ public:
                 CV_Assert(sliceRanges[i].size() <= inpShape.size());
                 for (int j = 0; j < sliceRanges[i].size(); ++j)
                 {
-                    outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
+                    if (shapesInitialized || inpShape[j] > 0)
+                        outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
                 }
             }
         }
@@ -165,6 +168,12 @@ public:
         return false;
     }
 
+    bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
+    {
+        shapesInitialized = true;
+        return true;
+    }
+
     void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
     {
 #ifdef HAVE_OPENCL
@@ -597,6 +606,8 @@ public:
 protected:
     // The actual non-negative values determined from @p sliceRanges depends on input size.
     std::vector<std::vector<Range> > finalSliceRanges;
+    bool hasDynamicShapes;
+    bool shapesInitialized;
 };
 
 class CropLayerImpl CV_FINAL : public SliceLayerImpl
diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
index 46773220aa..ad3d903d68 100644
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -260,6 +260,40 @@ public:
         addNodeToMatch("Cast", gather);
         setFusedNode("Gather", input, index);
     }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        bool retVal = Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
+        size_t matchedNodesNum = matchedNodesIds.size();
+        // Now we check if merging can be made for these Gather and Cast nodes
+        if (!retVal || matchedNodesNum < 2)
+            return retVal;
+        else {
+            int nodeToMatch = matchedNodesIds[matchedNodesNum - 1];
+            const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
+            if (node->getType() == "Cast") {
+                int inpNodeId = matchedNodesIds[matchedNodesNum - 2];
+                const Ptr<ImportNodeWrapper> inpNode = net->getNode(inpNodeId);
+                if (inpNode->getType() == "Gather") {
+                    int numNodes = net->getNumNodes();
+                    std::string inpNodeName = node->getInputName(0);
+                    for (int i = 0; i < numNodes; ++i) {
+                        const Ptr<ImportNodeWrapper> node_to_check = net->getNode(i);
+                        int numInp = node_to_check->getNumInputs();
+                        for (int inp = 0; inp < numInp; ++inp) {
+                            if (i != nodeToMatch && inpNodeName == node_to_check->getInputName(0)) {
+                                // Another node has the same input node, so it cannot be merged.
+                                return false;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return retVal;
+    }
 };
 
 class ExpandSubgraph : public Subgraph
@@ -513,6 +547,19 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
         CV_Assert(!field.empty());
         Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
     }
+    else if (datatype == opencv_onnx::TensorProto_DataType_INT32)
+    {
+        if (!tensor_proto.int32_data().empty())
+        {
+            const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
+            Mat(sizes, CV_32SC1, (void*)field.data()).copyTo(blob);
+        }
+        else
+        {
+            char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
+            Mat(sizes, CV_32SC1, val).copyTo(blob);
+        }
+    }
     else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
     {
         blob.create(sizes, CV_32SC1);
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index ccb0bf1e58..fe9d04ed28 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -64,6 +64,7 @@ public:
     ONNXImporter(Net& net, const char *onnxFile)
         : dstNet(net)
     {
+        hasDynamicShapes = false;
         CV_Assert(onnxFile);
         CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
 
@@ -84,6 +85,7 @@ public:
     ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
         : dstNet(net)
     {
+        hasDynamicShapes = false;
         CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
 
         struct _Buf : public std::streambuf
@@ -115,6 +117,7 @@ protected:
     std::map<std::string, Mat> constBlobs;
 
     std::map<std::string, MatShape> outShapes;  // List of internal blobs shapes.
+    bool hasDynamicShapes;  // Whether the model has inputs with dynamic shapes
     typedef std::map<std::string, MatShape>::iterator IterShape_t;
 
     std::map<std::string, LayerInfo> layer_id;
@@ -413,8 +416,10 @@ void ONNXImporter::populateNet()
         for (int j = 0; j < inpShape.size(); ++j)
         {
             inpShape[j] = tensorShape.dim(j).dim_value();
+            if (!tensorShape.dim(j).dim_param().empty())
+                hasDynamicShapes = true;
         }
-        if (!inpShape.empty())
+        if (!inpShape.empty() && !hasDynamicShapes)
         {
             inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
         }
@@ -461,6 +466,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
 
         layerParams.name = name;
         layerParams.type = layer_type;
+        layerParams.set("has_dynamic_shapes", hasDynamicShapes);
 
         if (layer_type == "MaxPool")
         {
@@ -551,11 +557,36 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
 
                     CV_Assert(axes.size() <= inpShape.size() - 2);
                     std::vector<int> kernel_size(inpShape.size() - 2, 1);
-                    for (int i = 0; i < axes.size(); i++) {
-                        int axis = clamp(axes.get<int>(i), inpShape.size());
-                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
-                        kernel_size[axis - 2] = inpShape[axis];
+                    if (axes.size() == 1 && (clamp(axes.get<int>(0), inpShape.size()) <= 1))
+                    {
+                        int axis = clamp(axes.get<int>(0), inpShape.size());
+                        MatShape newShape = inpShape;
+                        newShape[axis + 1] = total(newShape, axis + 1);
+                        newShape.resize(axis + 2);
+                        newShape.insert(newShape.begin(), 2 - axis, 1);
+
+                        LayerParams reshapeLp;
+                        reshapeLp.type = "Reshape";
+                        reshapeLp.name = layerParams.name + "/reshape";
+                        CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                        reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
+
+                        node_proto.set_output(0, reshapeLp.name);
+                        addLayer(reshapeLp, node_proto);
+
+                        kernel_size.resize(2);
+                        kernel_size[0] = inpShape[axis];
+                        node_proto.set_input(0, node_proto.output(0));
                     }
+                    else
+                    {
+                        for (int i = 0; i < axes.size(); i++) {
+                            int axis = clamp(axes.get<int>(i), inpShape.size());
+                            CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
+                            kernel_size[axis - 2] = inpShape[axis];
+                        }
+                    }
+
                     LayerParams poolLp = layerParams;
                     poolLp.name = layerParams.name + "/avg";
                     CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
@@ -1276,6 +1307,20 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             {
                 layerParams.type = "Reshape";
                 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+                if (hasDynamicShapes)
+                {
+                    std::vector<int> dynamicAxes;
+                    std::vector<int> inputIndices;
+                    for (int index = 0; index < inpShape.size(); ++index)
+                    {
+                        if (!maskedAxes[index])
+                            inputIndices.push_back(index);
+                    }
+                    for (int index = 0; index < outShape.size(); ++index)
+                        dynamicAxes.push_back(index);
+                    layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                    layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                }
             }
             else
                 layerParams.type = "Identity";
@@ -1338,6 +1383,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             outShape.insert(outShape.begin() + axis, 1);
             layerParams.type = "Reshape";
             layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
+            if (hasDynamicShapes)
+            {
+                std::vector<int> dynamicAxes;
+                std::vector<int> inputIndices;
+                for (int index = 0; index < outShape.size(); ++index) {
+                    if (index != axis)
+                        dynamicAxes.push_back(index);
+                }
+                for (int index = 0; index < inpShape.size(); ++index)
+                    inputIndices.push_back(index);
+                layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+            }
         }
         else if (layer_type == "Expand")
         {
@@ -1625,6 +1683,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                 cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
                 sliceLp.set("begin", paramBegin);
                 sliceLp.set("end", paramEnd);
+                sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
 
                 if (inpShape.size() > 1)
                 {
@@ -1637,6 +1696,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                     layerParams.type = "Reshape";
                     layerParams.set("axis", 0);
                     layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
+                    if (hasDynamicShapes)
+                    {
+                        std::vector<int> dynamicAxes;
+                        std::vector<int> inputIndices;
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            dynamicAxes.push_back(index);
+                        for (int index = 0; index < inpShape.size(); ++index)
+                            inputIndices.push_back(index);
+                        layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
+                        layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
+                    }
                     node_proto.set_input(0, sliceLp.name);
                 }
                 else
@@ -1676,7 +1746,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             for (int i = 1; i < node_proto.input_size(); i++)
                 CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
 
-            String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
+            String interp_mode;
+            if (layerParams.has("coordinate_transformation_mode"))
+                interp_mode = layerParams.get<String>("coordinate_transformation_mode");
+            else
+                interp_mode = layerParams.get<String>("mode");
             CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
 
             layerParams.set("align_corners", interp_mode == "align_corners");
@@ -1688,16 +1762,23 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
                 shapes.convertTo(shapes, CV_32S);
             int height = shapes.at<int>(2);
             int width  = shapes.at<int>(3);
-            if (node_proto.input_size() == 3)
+            if (hasDynamicShapes)
             {
-                IterShape_t shapeIt = outShapes.find(node_proto.input(0));
-                CV_Assert(shapeIt != outShapes.end());
-                MatShape scales = shapeIt->second;
-                height *= scales[2];
-                width  *= scales[3];
+                layerParams.set("zoom_factor_x", width);
+                layerParams.set("zoom_factor_y", height);
+            }
+            else
+            {
+                if (node_proto.input_size() == 3) {
+                    IterShape_t shapeIt = outShapes.find(node_proto.input(0));
+                    CV_Assert(shapeIt != outShapes.end());
+                    MatShape scales = shapeIt->second;
+                    height *= scales[2];
+                    width *= scales[3];
+                }
+                layerParams.set("width", width);
+                layerParams.set("height", height);
             }
-            layerParams.set("width", width);
-            layerParams.set("height", height);
 
             if (layerParams.get<String>("mode") == "linear") {
                 layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index f437094a56..c48bd50c06 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -280,9 +280,11 @@ TEST_P(Test_ONNX_layers, ReduceSum)
     testONNXModels("reduce_sum");
 }
 
-TEST_P(Test_ONNX_layers, ReduceMaxGlobal)
+TEST_P(Test_ONNX_layers, ReduceMax)
 {
     testONNXModels("reduce_max");
+    testONNXModels("reduce_max_axis_0");
+    testONNXModels("reduce_max_axis_1");
 }
 
 TEST_P(Test_ONNX_layers, Scale)
@@ -718,6 +720,10 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight)
 
 TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
     String basename = "conv1d_variable_wb";
     Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
     ASSERT_FALSE(net.empty());
@@ -738,6 +744,37 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
     normAssert(ref, out, "", default_l1, default_lInf);
 }
 
+TEST_P(Test_ONNX_layers, GatherMultiOutput)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);
+#endif
+
+    testONNXModels("gather_multi_output");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+    testONNXModels("squeeze_and_conv_dynamic_axes");
+    testONNXModels("unsqueeze_and_conv_dynamic_axes");
+    testONNXModels("gather_dynamic_axes");
+    testONNXModels("gather_scalar_dynamic_axes");
+    testONNXModels("slice_dynamic_axes");
+    testONNXModels("slice_opset_11_dynamic_axes");
+    testONNXModels("resize_opset11_torch1.6_dynamic_axes");
+    testONNXModels("average_pooling_dynamic_axes");
+    testONNXModels("maxpooling_sigmoid_dynamic_axes");
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
 
 class Test_ONNX_nets : public Test_ONNX_layers