diff --git a/3rdparty/carotene/CMakeLists.txt b/3rdparty/carotene/CMakeLists.txt
index dc780fd64583..e229cbc02234 100644
--- a/3rdparty/carotene/CMakeLists.txt
+++ b/3rdparty/carotene/CMakeLists.txt
@@ -42,17 +42,9 @@ endif()
 
 if(WITH_NEON)
     target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON")
-    if(NOT DEFINED CAROTENE_NEON_ARCH )
-    elseif(CAROTENE_NEON_ARCH EQUAL 8)
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=8")
-    elseif(CAROTENE_NEON_ARCH EQUAL 7)
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=7")
-    else()
-	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=0")
-    endif()
 endif()
 
- if(MINGW) 
+ if(MINGW)
     target_compile_definitions(carotene_objs PRIVATE "-D_USE_MATH_DEFINES=1")
  endif()
 
diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp
index 7f67ecf1a34a..cb658e8af012 100644
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@@ -1857,7 +1857,7 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast<size_t>(range.
 #endif
 
 // The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8
-#if defined(DCAROTENE_NEON_ARCH) && (DCAROTENE_NEON_ARCH == 7)
+#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
 inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType)
 {
     switch(borderType)
@@ -1928,7 +1928,7 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc
 #undef cv_hal_gaussianBlurBinomial
 #define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial
 
-#endif // DCAROTENE_NEON_ARCH=7
+#endif // __ARM_ARCH=7
 
 #endif // OPENCV_IMGPROC_HAL_INTERFACE_H
 
diff --git a/3rdparty/carotene/src/common.hpp b/3rdparty/carotene/src/common.hpp
index b9de371a6afe..823ddf1ccf06 100644
--- a/3rdparty/carotene/src/common.hpp
+++ b/3rdparty/carotene/src/common.hpp
@@ -58,17 +58,6 @@
 
 namespace CAROTENE_NS { namespace internal {
 
-#ifndef CAROTENE_NEON_ARCH
-#    if defined(__aarch64__) || defined(__aarch32__)
-#        define CAROTENE_NEON_ARCH 8
-#    else
-#        define CAROTENE_NEON_ARCH 7
-#    endif
-#endif
-#if ( !defined(__aarch64__) && !defined(__aarch32__) ) && (CAROTENE_NEON_ARCH == 8 )
-#    error("ARMv7 doen't support A32/A64 Neon instructions")
-#endif
-
 inline void prefetch(const void *ptr, size_t offset = 32*10)
 {
 #if defined __GNUC__
diff --git a/3rdparty/carotene/src/vround_helper.hpp b/3rdparty/carotene/src/vround_helper.hpp
index 89a62545106f..f931a20984fc 100644
--- a/3rdparty/carotene/src/vround_helper.hpp
+++ b/3rdparty/carotene/src/vround_helper.hpp
@@ -57,7 +57,7 @@ namespace CAROTENE_NS { namespace internal {
 
 inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
     return vcvtnq_u32_f32(val);
 #else
     const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@@ -67,7 +67,7 @@ inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
 
 inline uint32x2_t vround_u32_f32(const float32x2_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
     return vcvtn_u32_f32(val);
 #else
     const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
@@ -77,7 +77,7 @@ inline uint32x2_t vround_u32_f32(const float32x2_t val)
 
 inline int32x4_t vroundq_s32_f32(const float32x4_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
     return vcvtnq_s32_f32(val);
 #else
     const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
@@ -87,7 +87,7 @@ inline int32x4_t vroundq_s32_f32(const float32x4_t val)
 
 inline int32x2_t vround_s32_f32(const float32x2_t val)
 {
-#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
     return vcvtn_s32_f32(val);
 #else
     const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9fd237b0e68c..9320c90dac93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -121,14 +121,20 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ${ENABLE_PIC})
 ocv_cmake_hook(PRE_CMAKE_BOOTSTRAP)
 
 # Bootstrap CMake system: setup CMAKE_SYSTEM_NAME and other vars
+
+# workaround: https://gitlab.kitware.com/cmake/cmake/-/issues/20989
 if(OPENCV_WORKAROUND_CMAKE_20989)
   set(CMAKE_SYSTEM_PROCESSOR_BACKUP ${CMAKE_SYSTEM_PROCESSOR})
 endif()
-enable_language(CXX C)
+
+project(OpenCV CXX C)
+
 if(OPENCV_WORKAROUND_CMAKE_20989)
   set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_SYSTEM_PROCESSOR_BACKUP})
 endif()
 
+enable_testing()
+
 ocv_cmake_hook(POST_CMAKE_BOOTSTRAP)
 
 if(NOT OPENCV_SKIP_CMAKE_SYSTEM_FILE)
@@ -151,10 +157,6 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)  # https://cmake.org/cmake/help/
   endif()
 endif()
 
-enable_testing()
-
-project(OpenCV CXX C)
-
 if(MSVC)
   set(CMAKE_USE_RELATIVE_PATHS ON CACHE INTERNAL "" FORCE)
 endif()
@@ -163,70 +165,30 @@ ocv_cmake_eval(DEBUG_PRE ONCE)
 
 ocv_clear_vars(OpenCVModules_TARGETS)
 
-include(cmake/OpenCVDownload.cmake)
-
-set(BUILD_LIST "" CACHE STRING "Build only listed modules (comma-separated, e.g. 'videoio,dnn,ts')")
-
 # ----------------------------------------------------------------------------
-# Break in case of popular CMake configuration mistakes
+#  Autodetect if we are in a GIT repository
 # ----------------------------------------------------------------------------
-if(NOT CMAKE_SIZEOF_VOID_P GREATER 0)
-  message(FATAL_ERROR "CMake fails to determine the bitness of the target platform.
-  Please check your CMake and compiler installation. If you are cross-compiling then ensure that your CMake toolchain file correctly sets the compiler details.")
+find_host_package(Git QUIET)
+
+if(NOT DEFINED OPENCV_VCSVERSION AND GIT_FOUND)
+  ocv_git_describe(OPENCV_VCSVERSION "${OpenCV_SOURCE_DIR}")
+elseif(NOT DEFINED OPENCV_VCSVERSION)
+  # We don't have git:
+  set(OPENCV_VCSVERSION "unknown")
 endif()
 
+include(cmake/OpenCVDownload.cmake)
+
 # ----------------------------------------------------------------------------
 # Detect compiler and target platform architecture
 # ----------------------------------------------------------------------------
 include(cmake/OpenCVDetectCXXCompiler.cmake)
 ocv_cmake_hook(POST_DETECT_COMPILER)
 
-# Add these standard paths to the search paths for FIND_LIBRARY
-# to find libraries from these locations first
-if(UNIX AND NOT ANDROID)
-  if(X86_64 OR CMAKE_SIZEOF_VOID_P EQUAL 8)
-    if(EXISTS /lib64)
-      list(APPEND CMAKE_LIBRARY_PATH /lib64)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /lib)
-    endif()
-    if(EXISTS /usr/lib64)
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib64)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib)
-    endif()
-  elseif(X86 OR CMAKE_SIZEOF_VOID_P EQUAL 4)
-    if(EXISTS /lib32)
-      list(APPEND CMAKE_LIBRARY_PATH /lib32)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /lib)
-    endif()
-    if(EXISTS /usr/lib32)
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib32)
-    else()
-      list(APPEND CMAKE_LIBRARY_PATH /usr/lib)
-    endif()
-  endif()
-endif()
-
-# Add these standard paths to the search paths for FIND_PATH
-# to find include files from these locations first
-if(MINGW)
-  if(EXISTS /mingw)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw)
-  endif()
-  if(EXISTS /mingw32)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw32)
-  endif()
-  if(EXISTS /mingw64)
-      list(APPEND CMAKE_INCLUDE_PATH /mingw64)
-  endif()
-endif()
-
 # ----------------------------------------------------------------------------
 # OpenCV cmake options
 # ----------------------------------------------------------------------------
-
+set(BUILD_LIST "" CACHE STRING "Build only listed modules (comma-separated, e.g. 'videoio,dnn,ts')")
 OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)
 
 # 3rd party libs
@@ -658,19 +620,6 @@ ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR})
 # ----------------------------------------------------------------------------
 set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules (can be ;-separated list of paths)")
 
-# ----------------------------------------------------------------------------
-#  Autodetect if we are in a GIT repository
-# ----------------------------------------------------------------------------
-find_host_package(Git QUIET)
-
-if(NOT DEFINED OPENCV_VCSVERSION AND GIT_FOUND)
-  ocv_git_describe(OPENCV_VCSVERSION "${OpenCV_SOURCE_DIR}")
-elseif(NOT DEFINED OPENCV_VCSVERSION)
-  # We don't have git:
-  set(OPENCV_VCSVERSION "unknown")
-endif()
-
-
 # ----------------------------------------------------------------------------
 # OpenCV compiler and linker options
 # ----------------------------------------------------------------------------
@@ -1004,15 +953,7 @@ foreach(hal ${OpenCV_HAL})
     if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
       add_subdirectory(3rdparty/carotene/hal)
       ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
-
-      if( NOT DEFINED CAROTENE_NEON_ARCH)
-          set(CAROTENE_NEON_MSG "Auto detected")
-      elseif( CAROTENE_NEON_ARCH GREATER 7)
-          set(CAROTENE_NEON_MSG "Force ARMv8+")
-      else()
-          set(CAROTENE_NEON_MSG "Force ARMv7")
-      endif()
-      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION}, ${CAROTENE_NEON_MSG})")
+      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
     else()
       message(STATUS "Carotene: NEON is not available, disabling carotene...")
     endif()
diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake
index 448afd46eafb..77c9e5c985e7 100644
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@@ -83,6 +83,10 @@ if(NOT DEFINED CMAKE_SIZEOF_VOID_P
     AND NOT OPENCV_SUPPRESS_MESSAGE_MISSING_CMAKE_SIZEOF_VOID_P)
   message(WARNING "OpenCV: CMAKE_SIZEOF_VOID_P is not defined. Perhaps CMake toolchain is broken")
 endif()
+if(NOT CMAKE_SIZEOF_VOID_P GREATER 0)
+  message(FATAL_ERROR "CMake fails to determine the bitness of the target platform.
+  Please check your CMake and compiler installation. If you are cross-compiling then ensure that your CMake toolchain file correctly sets the compiler details.")
+endif()
 
 message(STATUS "Detected processor: ${CMAKE_SYSTEM_PROCESSOR}")
 if(OPENCV_SKIP_SYSTEM_PROCESSOR_DETECTION)
@@ -156,8 +160,10 @@ elseif(MSVC)
     set(OpenCV_ARCH "ARM")
   elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
     set(OpenCV_ARCH "x64")
+  elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
+    set(OpenCV_ARCH "x86")
   else()
-    set(OpenCV_ARCH x86)
+    message(FATAL_ERROR "Failed to determine system architecture")
   endif()
 
   if(MSVC_VERSION EQUAL 1400)
diff --git a/cmake/OpenCVDownload.cmake b/cmake/OpenCVDownload.cmake
index 3e4651553771..40e48d7465a2 100644
--- a/cmake/OpenCVDownload.cmake
+++ b/cmake/OpenCVDownload.cmake
@@ -40,11 +40,14 @@ file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}")
 ocv_check_environment_variables(OPENCV_DOWNLOAD_MIRROR_ID)
 
 function(ocv_init_download_mirror)
+  if(NOT GIT_FOUND)
+    return()
+  endif()
   if(NOT DEFINED OPENCV_DOWNLOAD_MIRROR_ID)
     # Run `git remote get-url origin` to get remote source
     execute_process(
       COMMAND
-        git remote get-url origin
+        ${GIT_EXECUTABLE} remote get-url origin
       WORKING_DIRECTORY
         ${CMAKE_SOURCE_DIR}
       RESULT_VARIABLE
diff --git a/doc/opencv.bib b/doc/opencv.bib
index be6721b573d8..da31c2a46bb8 100644
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@@ -1220,7 +1220,7 @@ @inproceedings{Zuliani2014RANSACFD
   title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...},
   author={Marco Zuliani},
   year={2014},
-  url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf}
+  url = {http://www.marcozuliani.com/docs/RANSAC4Dummies.pdf}
 }
 @inproceedings{forstner1987fast,
   title={A fast operator for detection and precise location of distincs points, corners and center of circular features},
diff --git a/doc/tutorials/imgproc/imgtrans/remap/remap.markdown b/doc/tutorials/imgproc/imgtrans/remap/remap.markdown
index cbf23998e119..834ca2770c5d 100644
--- a/doc/tutorials/imgproc/imgtrans/remap/remap.markdown
+++ b/doc/tutorials/imgproc/imgtrans/remap/remap.markdown
@@ -45,7 +45,7 @@ Theory
 
     ![](images/Remap_Tutorial_Theory_0.jpg)
 
-    observe how the red circle changes positions with respect to x (considering \f$x\f$ the horizontal
+    observe how the red circle changes positions with respect to \f$x\f$ (considering \f$x\f$ the horizontal
     direction):
 
     ![](images/Remap_Tutorial_Theory_1.jpg)
@@ -62,19 +62,19 @@ Code
     -   Wait for the user to exit the program
 
 @add_toggle_cpp
--   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
     [here](https://github.com/opencv/opencv/tree/5.x/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp)
     @include samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp
 @end_toggle
 
 @add_toggle_java
--   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
     [here](https://github.com/opencv/opencv/tree/5.x/samples/java/tutorial_code/ImgTrans/remap/RemapDemo.java)
     @include samples/java/tutorial_code/ImgTrans/remap/RemapDemo.java
 @end_toggle
 
 @add_toggle_python
--   The tutorial code's is shown lines below. You can also download it from
+-   The tutorial code is shown lines below. You can also download it from
     [here](https://github.com/opencv/opencv/tree/5.x/samples/python/tutorial_code/ImgTrans/remap/Remap_Demo.py)
     @include samples/python/tutorial_code/ImgTrans/remap/Remap_Demo.py
 @end_toggle
diff --git a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown
index 74b066db5d45..91040416083e 100644
--- a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown
+++ b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown
@@ -72,7 +72,7 @@ Theory
 
 -#  We mentioned that an Affine Transformation is basically a **relation**
     between two images. The information about this relation can come, roughly, in two ways:
-    -#  We know both \f$X\f$ and T and we also know that they are related. Then our task is to find \f$M\f$
+    -#  We know both \f$X\f$ and \f$T\f$ and we also know that they are related. Then our task is to find \f$M\f$
     -#  We know \f$M\f$ and \f$X\f$. To obtain \f$T\f$ we only need to apply \f$T = M \cdot X\f$. Our information
         for \f$M\f$ may be explicit (i.e. have the 2-by-3 matrix) or it can come as a geometric relation
         between points.
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 905d7b95c2be..982a78570868 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -586,7 +586,6 @@ Following options can be used to change installation layout for common scenarios
 | `BUILD_JAVA` | _ON_ | Enable Java wrappers build. Java SDK and Ant must be installed. |
 | `BUILD_FAT_JAVA_LIB` | _ON_ (for static Android builds) | Build single _opencv_java_ dynamic library containing all library functionality bundled with Java bindings. |
 | `BUILD_opencv_python3` | _ON_ | Build python3 bindings. Python with development files and numpy must be installed. |
-| `CAROTENE_NEON_ARCH` | '(auto)' | Switch NEON Arch for Carotene. If it sets nothing, it will be auto-detected. If it sets 8, ARMv8(and later) is used. Otherwise, ARMv7 is used. |
 
 TODO: need separate tutorials covering bindings builds
 
diff --git a/modules/core/include/opencv2/core/cuda_types.hpp b/modules/core/include/opencv2/core/cuda_types.hpp
index b33f06179d13..ddee2f3d591a 100644
--- a/modules/core/include/opencv2/core/cuda_types.hpp
+++ b/modules/core/include/opencv2/core/cuda_types.hpp
@@ -66,6 +66,9 @@
     #define __CV_CUDA_HOST_DEVICE__
 #endif
 
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core.hpp"
+
 namespace cv
 {
     namespace cuda
@@ -124,6 +127,11 @@ namespace cv
 
             int cols;
             int rows;
+
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ Size size() const { return {cols, rows}; }
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ T& operator ()(const Point &pos)       { return (*this)(pos.y, pos.x); }
+            CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ const T& operator ()(const Point &pos) const { return (*this)(pos.y, pos.x); }
+            using PtrStep<T>::operator();
         };
 
         typedef PtrStepSz<unsigned char> PtrStepSzb;
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 3714774cf679..5ce799bb20ad 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -758,7 +758,11 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ   (EnumType, EnumType)
 #    define __has_cpp_attribute(__x) 0
 #  endif
 #  if __has_cpp_attribute(nodiscard)
-#    define CV_NODISCARD_STD [[nodiscard]]
+#    if defined(__NVCC__) && __CUDACC_VER_MAJOR__ < 12
+#       define CV_NODISCARD_STD
+#    else
+#       define CV_NODISCARD_STD [[nodiscard]]
+#    endif
 #  elif __cplusplus >= 201703L
 //   available when compiler is C++17 compliant
 #    define CV_NODISCARD_STD [[nodiscard]]
diff --git a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
index 4a98dbf96ebe..db491cc1375d 100644
--- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp
@@ -650,16 +650,18 @@ inline v_float32x8 v256_shuffle(const v_float32x8 &a)
 template<int m>
 inline v_float64x4 v256_shuffle(const v_float64x4 &a)
 {
-    int imm8 = m & 0b0001;  //0 or 1
-    if (m & 0x0b0010) imm8 |= 0b0100;
-    //else imm8 |= 0b0000;
-    if (m & 0x0b0100) imm8 |= 0b110000;  //2 or 3
-    else imm8 |= 0b100000;
-    if (m & 0x0b1000) imm8 |= 0b11000000;
-    else imm8 |= 0b10000000;
+    const int m1 = m & 0b1;
+    const int m2 = m & 0b10;
+    const int m3 = m & 0b100;
+    const int m4 = m & 0b1000;
+    const int m5 = m2 << 1;
+    const int m6 = m3 << 2;
+    const int m7 = m4 << 3;
+    const int m8 = m1 & m5 & m6 & m7;
 
-    return v_float64x4(__lasx_xvpermi_d(*((__m256i*)&a.val), imm8));
+    return v_float64x4(__lasx_xvshuf4i_d(*((__m256i*)&a.val), *((__m256i*)&a.val), m8));
 }
+
 template<typename _Tpvec>
 inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1)
 {
@@ -1100,7 +1102,7 @@ inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
 template<int imm>
 inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
 {
-    enum {IMM_L = (imm - 16) & 0xFF};
+    enum {IMM_L = ((imm - 16) & 0xFF) > 31 ? 31 : ((imm - 16) & 0xFF)};
     enum {IMM_R = (16 - imm) & 0xFF};
 
     if (imm == 0) return a;
@@ -1117,7 +1119,7 @@ inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
 template<int imm>
 inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
 {
-    enum {IMM_L = (imm - 16) & 0xFF};
+    enum {IMM_L = ((imm - 16) & 0xFF) > 31 ? 31 : ((imm - 16) & 0xFF)};
 
     if (imm == 0) return a;
     if (imm > 32) return v_uint8x32();
diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp
index 4de20ca2284a..ad5ae671d02d 100644
--- a/modules/core/perf/opencl/perf_arithm.cpp
+++ b/modules/core/perf/opencl/perf_arithm.cpp
@@ -358,7 +358,8 @@ typedef TestBaseWithParam<FlipParams> FlipFixture;
 
 OCL_PERF_TEST_P(FlipFixture, Flip,
             ::testing::Combine(OCL_TEST_SIZES,
-                               OCL_TEST_TYPES, FlipType::all()))
+                               ::testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_32FC1, CV_32FC4),
+                               FlipType::all()))
 {
     const FlipParams params = GetParam();
     const Size srcSize = get<0>(params);
@@ -388,7 +389,9 @@ typedef tuple<Size, MatType, RotateType> RotateParams;
 typedef TestBaseWithParam<RotateParams> RotateFixture;
 
 OCL_PERF_TEST_P(RotateFixture, rotate,
-                ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, RotateType::all()))
+                ::testing::Combine(OCL_TEST_SIZES,
+                                   ::testing::Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4),
+                                   RotateType::all()))
 {
     const RotateParams params = GetParam();
     const Size srcSize   = get<0>(params);
diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 2e038e7da2a0..7ba4f87f4fdd 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -1049,6 +1049,13 @@ void cv::add( InputArray src1, InputArray src2, OutputArray dst,
 {
     CV_INSTRUMENT_REGION();
 
+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
     arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
 }
 
@@ -1057,6 +1064,13 @@ void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
 {
     CV_INSTRUMENT_REGION();
 
+    CV_Assert(_src1.empty() == _src2.empty());
+    if (_src1.empty() && _src2.empty())
+    {
+        _dst.release();
+        return;
+    }
+
     ExtendedTypeFunc subExtFunc = getSubExtFunc(_src1.depth(), _src2.depth(), dtype < 0 ? _dst.depth() : dtype);
     arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB,
               /* extendedFunc */ subExtFunc);
@@ -1066,6 +1080,13 @@ void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
 {
     CV_INSTRUMENT_REGION();
 
+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
     arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
 }
 
@@ -1186,6 +1207,13 @@ void divide(InputArray src1, InputArray src2,
 {
     CV_INSTRUMENT_REGION();
 
+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
     arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
 }
 
@@ -1194,6 +1222,12 @@ void divide(double scale, InputArray src2,
 {
     CV_INSTRUMENT_REGION();
 
+    if (src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
     arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
 }
 
@@ -1236,6 +1270,13 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
 {
     CV_INSTRUMENT_REGION();
 
+    CV_Assert(src1.empty() == src2.empty());
+    if (src1.empty() && src2.empty())
+    {
+        dst.release();
+        return;
+    }
+
     double scalars[] = {alpha, beta, gamma};
     arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
 }
diff --git a/modules/core/src/minmax.dispatch.cpp b/modules/core/src/minmax.dispatch.cpp
index 09bcde0559d0..20e7bd275042 100644
--- a/modules/core/src/minmax.dispatch.cpp
+++ b/modules/core/src/minmax.dispatch.cpp
@@ -21,6 +21,8 @@ static MinMaxIdxFunc getMinMaxIdxFunc(int depth)
                     CV_CPU_DISPATCH_MODES_ALL);
 }
 
+// The function expects 1-based indexing for ofs
+// Zero is treated as invalid offset (not found)
 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
 {
     int i, d = a.dims;
@@ -324,9 +326,9 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
         {
             // minIdx[0] and minIdx[0] are always 0 for "flatten" version
             if (minIdx)
-                ofs2idx(src, minIdx[1], minIdx);
+                ofs2idx(src, minIdx[1]+1, minIdx);
             if (maxIdx)
-                ofs2idx(src, maxIdx[1], maxIdx);
+                ofs2idx(src, maxIdx[1]+1, maxIdx);
             return;
         }
         else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED)
diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
index 3ba5253e9dd9..5c851066dfe2 100644
--- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@@ -460,7 +460,7 @@ PERF_TEST_P_(DivPerfTest, TestPerformance)
 
     //This condition need to workaround the #21044 issue in the OpenCV.
     //It reinitializes divider matrix without zero values for CV_16S DST type.
-    if (dtype == CV_16S && dtype != type)
+    if (dtype != type)
         cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255));
 
     // OpenCV code ///////////////////////////////////////////////////////////
@@ -552,8 +552,7 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)
     initMatsRandU(type, sz, dtype, false);
     //This condition need to workaround the #21044 issue in the OpenCV.
     //It reinitializes divider matrix without zero values for CV_16S DST type.
-    if (dtype == CV_16S || (type == CV_16S && dtype == -1))
-        cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
+    cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
 
     // OpenCV code ///////////////////////////////////////////////////////////
     cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
diff --git a/modules/imgcodecs/src/grfmt_avif.cpp b/modules/imgcodecs/src/grfmt_avif.cpp
index c1d3682d0c4e..98ddb7336268 100644
--- a/modules/imgcodecs/src/grfmt_avif.cpp
+++ b/modules/imgcodecs/src/grfmt_avif.cpp
@@ -143,6 +143,7 @@ AvifDecoder::AvifDecoder() {
   m_buf_supported = true;
   channels_ = 0;
   decoder_ = avifDecoderCreate();
+  decoder_->strictFlags = AVIF_STRICT_DISABLED;
 }
 
 AvifDecoder::~AvifDecoder() {
@@ -166,6 +167,7 @@ bool AvifDecoder::checkSignature(const String &signature) const {
   std::unique_ptr<avifDecoder, decltype(&avifDecoderDestroy)> decoder(
       avifDecoderCreate(), avifDecoderDestroy);
   if (!decoder) return false;
+  decoder->strictFlags = AVIF_STRICT_DISABLED;
   OPENCV_AVIF_CHECK_STATUS(
       avifDecoderSetIOMemory(
           decoder.get(), reinterpret_cast<const uint8_t *>(signature.c_str()),
diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp
index fb607bf18f8a..f9b4edaa1084 100644
--- a/modules/imgcodecs/test/test_tiff.cpp
+++ b/modules/imgcodecs/test/test_tiff.cpp
@@ -1096,7 +1096,6 @@ INSTANTIATE_TEST_CASE_P(AllModes, Imgcodecs_Tiff_Modes, testing::ValuesIn(all_mo
 TEST(Imgcodecs_Tiff_Modes, write_multipage)
 {
     const string root = cvtest::TS::ptr()->get_data_path();
-    const string filename = root + "readwrite/multipage.tif";
     const string page_files[] = {
         "readwrite/multipage_p1.tif",
         "readwrite/multipage_p2.tif",
@@ -1109,7 +1108,7 @@ TEST(Imgcodecs_Tiff_Modes, write_multipage)
     vector<Mat> pages;
     for (size_t i = 0; i < page_count; i++)
     {
-        const Mat page = imread(root + page_files[i]);
+        const Mat page = imread(root + page_files[i], IMREAD_REDUCED_GRAYSCALE_8 + (int)i);
         pages.push_back(page);
     }
 
diff --git a/modules/imgproc/perf/perf_warp.cpp b/modules/imgproc/perf/perf_warp.cpp
index 688d449a55b4..cb4c32c905f2 100644
--- a/modules/imgproc/perf/perf_warp.cpp
+++ b/modules/imgproc/perf/perf_warp.cpp
@@ -12,7 +12,7 @@ CV_ENUM(InterType, INTER_NEAREST, INTER_LINEAR)
 CV_ENUM(InterTypeExtended, INTER_NEAREST, INTER_LINEAR, WARP_RELATIVE_MAP)
 CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH)
 
-typedef TestBaseWithParam< tuple<Size, InterType, BorderMode> > TestWarpAffine;
+typedef TestBaseWithParam< tuple<MatType, Size, InterType, BorderMode> > TestWarpAffine;
 typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, int> > TestWarpPerspective;
 typedef TestBaseWithParam< tuple<Size, InterType, BorderMode, MatType> > TestWarpPerspectiveNear_t;
 typedef TestBaseWithParam< tuple<MatType, Size, InterTypeExtended, BorderMode, RemapMode> > TestRemap;
@@ -21,6 +21,7 @@ void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode, boo
 
 PERF_TEST_P( TestWarpAffine, WarpAffine,
              Combine(
+                Values(CV_8UC1, CV_8UC4),
                 Values( szVGA, sz720p, sz1080p ),
                 InterType::all(),
                 BorderMode::all()
@@ -28,13 +29,14 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,
 )
 {
     Size sz, szSrc(512, 512);
-    int borderMode, interType;
-    sz         = get<0>(GetParam());
-    interType  = get<1>(GetParam());
-    borderMode = get<2>(GetParam());
+    int borderMode, interType, dataType;
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+    interType  = get<2>(GetParam());
+    borderMode = get<3>(GetParam());
     Scalar borderColor = Scalar::all(150);
 
-    Mat src(szSrc,CV_8UC4), dst(sz, CV_8UC4);
+    Mat src(szSrc, dataType), dst(sz, dataType);
     cvtest::fillGradient(src);
     if(borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
     Mat warpMat = getRotationMatrix2D(Point2f(src.cols/2.f, src.rows/2.f), 30., 2.2);
@@ -47,6 +49,7 @@ PERF_TEST_P( TestWarpAffine, WarpAffine,
 
 PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
     Combine(
+        Values(CV_8UC1, CV_8UC4),
         Values(szVGA, sz720p, sz1080p),
         InterType::all(),
         BorderMode::all()
@@ -54,13 +57,16 @@ PERF_TEST_P(TestWarpAffine, DISABLED_WarpAffine_ovx,
 )
 {
     Size sz, szSrc(512, 512);
-    int borderMode, interType;
-    sz = get<0>(GetParam());
-    interType = get<1>(GetParam());
-    borderMode = get<2>(GetParam());
+    int borderMode, interType, dataType;
+
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+    interType  = get<2>(GetParam());
+    borderMode = get<3>(GetParam());
+
     Scalar borderColor = Scalar::all(150);
 
-    Mat src(szSrc, CV_8UC1), dst(sz, CV_8UC1);
+    Mat src(szSrc, dataType), dst(sz, dataType);
     cvtest::fillGradient(src);
     if (borderMode == BORDER_CONSTANT) cvtest::smoothBorder(src, borderColor, 1);
     Mat warpMat = getRotationMatrix2D(Point2f(src.cols / 2.f, src.rows / 2.f), 30., 2.2);
diff --git a/modules/imgproc/src/color_rgb.simd.hpp b/modules/imgproc/src/color_rgb.simd.hpp
index ca39d8a9083e..40e385446042 100644
--- a/modules/imgproc/src/color_rgb.simd.hpp
+++ b/modules/imgproc/src/color_rgb.simd.hpp
@@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>
 
             uchar v3_half = v3 / 2;
 
-            dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
-            dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
-            dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
-            dst[3] = v3;
-
             dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
             dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
             dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);
diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index 28f5b725fe6c..e5bdaa8fc412 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -1983,65 +1983,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
         }
         else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
         {
-            if( nninterpolate )
+            #if CV_TRY_SSE4_1
+            if( useSSE4_1 )
+                opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
+            else
+            #endif
             {
                 #if CV_SIMD128
-                int span = VTraits<v_float32x4>::vlanes();
                 {
-                    for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
-                        v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
-                                                 v_round(v_load(src1f + x + span))));
+                    v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
+                    v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
+                    v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
+                    int span = VTraits<v_uint16x8>::vlanes();
+                    for (; x <= size.width - span; x += span )
+                    {
+                        v_float32x4 v_src0[2], v_src1[2];
+                        v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
+                        v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
+                        v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
+                        v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
+                        v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
+                        v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
+
+                        v_int16x8 v_dst[2];
+                        v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
+                        v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
+                        v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
+
+                        v_store(dst2 + x, v_pack_u(
+                            v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
+                            v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
+                    }
                 }
                 #endif
                 for( ; x < size.width; x++ )
                 {
-                    dst1[x*2] = saturate_cast<short>(src1f[x*2]);
-                    dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
-                }
-            }
-            else
-            {
-                #if CV_TRY_SSE4_1
-                if( useSSE4_1 )
-                    opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
-                else
-                #endif
-                {
-                    #if CV_SIMD128
-                    {
-                        v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
-                        v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
-                        v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
-                        int span = VTraits<v_uint16x8>::vlanes();
-                        for (; x <= size.width - span; x += span )
-                        {
-                            v_float32x4 v_src0[2], v_src1[2];
-                            v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
-                            v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
-                            v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
-                            v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
-                            v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
-                            v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
-
-                            v_int16x8 v_dst[2];
-                            v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
-                            v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
-                            v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
-
-                            v_store(dst2 + x, v_pack_u(
-                                v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
-                                v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
-                        }
-                    }
-                    #endif
-                    for( ; x < size.width; x++ )
-                    {
-                        int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
-                        int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
-                        dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
-                        dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
-                        dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
-                    }
+                    int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
+                    int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
+                    dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
+                    dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
+                    dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
                 }
             }
         }
diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp
index 9b0f72fcfbf5..1b8b85a04b41 100644
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@@ -94,6 +94,10 @@ static void hlineResize(ET* src, int cn, int *ofst, FT* m, FT* dst, int dst_min,
             }
         }
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     ET* src_last = src + cn*ofst[dst_width - 1];
     for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
     {
@@ -125,6 +129,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 1>
             ET* px = src + ofst[i];
             *(dst++) = m[0] * px[0] + m[1] * px[1];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + ofst[dst_width - 1])[0];
         for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
         {
@@ -149,6 +157,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 2>
             *(dst++) = m[0] * px[0] + m[1] * px[2];
             *(dst++) = m[0] * px[1] + m[1] * px[3];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 2*ofst[dst_width - 1])[0];
         src1 = (src + 2*ofst[dst_width - 1])[1];
         for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
@@ -177,6 +189,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 3>
             *(dst++) = m[0] * px[1] + m[1] * px[4];
             *(dst++) = m[0] * px[2] + m[1] * px[5];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 3*ofst[dst_width - 1])[0];
         src1 = (src + 3*ofst[dst_width - 1])[1];
         src2 = (src + 3*ofst[dst_width - 1])[2];
@@ -209,6 +225,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 2, true, 4>
             *(dst++) = m[0] * px[2] + m[1] * px[6];
             *(dst++) = m[0] * px[3] + m[1] * px[7];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 4*ofst[dst_width - 1])[0];
         src1 = (src + 4*ofst[dst_width - 1])[1];
         src2 = (src + 4*ofst[dst_width - 1])[2];
@@ -237,6 +257,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 1>
             ET* px = src + ofst[i];
             *(dst++) = m[0] * src[0] + m[1] * src[1] + m[2] * src[2] + m[3] * src[3];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + ofst[dst_width - 1])[0];
         for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
         {
@@ -261,6 +285,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 2>
             *(dst++) = m[0] * src[0] + m[1] * src[2] + m[2] * src[4] + m[3] * src[6];
             *(dst++) = m[0] * src[1] + m[1] * src[3] + m[2] * src[5] + m[3] * src[7];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 2*ofst[dst_width - 1])[0];
         src1 = (src + 2*ofst[dst_width - 1])[1];
         for (; i < dst_width; i++) // Points that fall right from src image so became equal to rightmost src point
@@ -289,6 +317,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 3>
             *(dst++) = m[0] * src[1] + m[1] * src[4] + m[2] * src[7] + m[3] * src[10];
             *(dst++) = m[0] * src[2] + m[1] * src[5] + m[2] * src[8] + m[3] * src[11];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 3*ofst[dst_width - 1])[0];
         src1 = (src + 3*ofst[dst_width - 1])[1];
         src2 = (src + 3*ofst[dst_width - 1])[2];
@@ -321,6 +353,10 @@ template <typename ET, typename FT> struct hline<ET, FT, 4, true, 4>
             *(dst++) = m[0] * src[2] + m[1] * src[6] + m[2] * src[10] + m[3] * src[14];
             *(dst++) = m[0] * src[3] + m[1] * src[7] + m[2] * src[11] + m[3] * src[15];
         }
+        // Avoid reading a potentially unset ofst, leading to a random memory read.
+        if (i >= dst_width) {
+            return;
+        }
         src0 = (src + 4*ofst[dst_width - 1])[0];
         src1 = (src + 4*ofst[dst_width - 1])[1];
         src2 = (src + 4*ofst[dst_width - 1])[2];
@@ -382,6 +418,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 1>(uint8_t* src, int, int *o
         uint8_t* px = src + ofst[i];
         *(dst++) = m[0] * px[0] + m[1] * px[1];
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     src_0 = (src + ofst[dst_width - 1])[0];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
     v_src_0 = vx_setall_u16(*((uint16_t*)&src_0));
@@ -438,6 +478,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 2>(uint8_t* src, int, int *o
         *(dst++) = m[0] * px[0] + m[1] * px[2];
         *(dst++) = m[0] * px[1] + m[1] * px[3];
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     ((ufixedpoint16*)(srccn.w))[0] = (src + 2 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 2 * ofst[dst_width - 1])[1];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
     v_srccn = v_reinterpret_as_u16(vx_setall_u32(srccn.d));
@@ -510,6 +554,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 3>(uint8_t* src, int, int *o
         *(dst++) = m[0] * px[1] + m[1] * px[4];
         *(dst++) = m[0] * px[2] + m[1] * px[5];
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     ((ufixedpoint16*)(srccn.w))[0] = (src + 3*ofst[dst_width - 1])[0];
     ((ufixedpoint16*)(srccn.w))[1] = (src + 3*ofst[dst_width - 1])[1];
     ((ufixedpoint16*)(srccn.w))[2] = (src + 3*ofst[dst_width - 1])[2];
@@ -583,6 +631,10 @@ void hlineResizeCn<uint8_t, ufixedpoint16, 2, true, 4>(uint8_t* src, int, int *o
         *(dst++) = m[0] * px[2] + m[1] * px[6];
         *(dst++) = m[0] * px[3] + m[1] * px[7];
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     ((ufixedpoint16*)(srccn.w))[0] = (src + 4 * ofst[dst_width - 1])[0]; ((ufixedpoint16*)(srccn.w))[1] = (src + 4 * ofst[dst_width - 1])[1];
     ((ufixedpoint16*)(srccn.w))[2] = (src + 4 * ofst[dst_width - 1])[2]; ((ufixedpoint16*)(srccn.w))[3] = (src + 4 * ofst[dst_width - 1])[3];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
@@ -634,6 +686,10 @@ void hlineResizeCn<uint16_t, ufixedpoint32, 2, true, 1>(uint16_t* src, int, int
         uint16_t* px = src + ofst[i];
         *(dst++) = m[0] * px[0] + m[1] * px[1];
     }
+    // Avoid reading a potentially unset ofst, leading to a random memory read.
+    if (i >= dst_width) {
+        return;
+    }
     src_0 = (src + ofst[dst_width - 1])[0];
 #if (CV_SIMD || CV_SIMD_SCALABLE)
     v_src_0 = vx_setall_u32(*((uint32_t*)&src_0));
diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp
index 3bebb563de8e..a1f70103befa 100644
--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@@ -455,7 +455,7 @@ void CV_ColorGrayTest::get_test_array_types_and_sizes( int test_case_idx, vector
 double CV_ColorGrayTest::get_success_error_level( int /*test_case_idx*/, int i, int j )
 {
     int depth = test_mat[i][j].depth();
-    return depth == CV_8U ? 2 : depth == CV_16U ? 16 : 1e-5;
+    return depth == CV_8U ? 1 : depth == CV_16U ? 2 : 1e-5;
 }
 
 
@@ -2844,6 +2844,11 @@ void runCvtColorBitExactCheck(ColorConversionCodes code, int inputType, uint32_t
     }
 }
 
+TEST(Imgproc_cvtColor_BE, COLOR_RGB2GRAY)  { runCvtColorBitExactCheck(COLOR_RGB2GRAY,  CV_8UC3, 0x416bd44a); }
+TEST(Imgproc_cvtColor_BE, COLOR_RGBA2GRAY) { runCvtColorBitExactCheck(COLOR_RGBA2GRAY, CV_8UC3, 0x416bd44a); }
+TEST(Imgproc_cvtColor_BE, COLOR_BGR2GRAY)  { runCvtColorBitExactCheck(COLOR_BGR2GRAY,  CV_8UC3, 0x3008c6b8); }
+TEST(Imgproc_cvtColor_BE, COLOR_BGRA2GRAY) { runCvtColorBitExactCheck(COLOR_BGRA2GRAY, CV_8UC3, 0x3008c6b8); }
+
 TEST(Imgproc_cvtColor_BE, COLOR_BGR2YUV) { runCvtColorBitExactCheck(COLOR_BGR2YUV, CV_8UC3, 0xc2cbcfda); }
 TEST(Imgproc_cvtColor_BE, COLOR_RGB2YUV) { runCvtColorBitExactCheck(COLOR_RGB2YUV, CV_8UC3, 0x4e98e757); }
 TEST(Imgproc_cvtColor_BE, COLOR_YUV2BGR) { runCvtColorBitExactCheck(COLOR_YUV2BGR, CV_8UC3, 0xb2c62a3f); }
diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp
index 5f0ba7fad33c..47b6181598fc 100644
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@@ -1449,11 +1449,11 @@ void CvCaptureCAM_V4L::convertToRgb(const Buffer &currentBuffer)
         return;
     case V4L2_PIX_FMT_NV12:
         cv::cvtColor(cv::Mat(imageSize.height * 3 / 2, imageSize.width, CV_8U, start), frame,
-                     COLOR_YUV2RGB_NV12);
+                     COLOR_YUV2BGR_NV12);
         return;
     case V4L2_PIX_FMT_NV21:
         cv::cvtColor(cv::Mat(imageSize.height * 3 / 2, imageSize.width, CV_8U, start), frame,
-                     COLOR_YUV2RGB_NV21);
+                     COLOR_YUV2BGR_NV21);
         return;
 #ifdef HAVE_JPEG
     case V4L2_PIX_FMT_MJPEG: