From 198df77c93d9813ceaa39db66c2d9393f96c7acd Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Tue, 2 Oct 2018 17:11:48 -0400
Subject: [PATCH 1/5] MAINT: provide an algorithm that blocks matrices with a
 single memory copy.

---
 numpy/core/shape_base.py | 203 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 195 insertions(+), 8 deletions(-)

diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index fde23076ba4e..a928dc72be8f 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -3,6 +3,8 @@
 __all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
            'stack', 'vstack']
 
+import functools
+import operator
 
 from . import numeric as _nx
 from .numeric import array, asanyarray, newaxis
@@ -432,6 +434,10 @@ def _block_check_depths_match(arrays, parent_index=[]):
         refer to it, and the last index along the empty axis will be `None`.
     max_arr_ndim : int
         The maximum of the ndims of the arrays nested in `arrays`.
+    final_size: int
+        The number of elements in the final array. This is used the motivate
+        the choice of algorithm used using benchmarking wisdom.
+
     """
     if type(arrays) is tuple:
         # not strictly necessary, but saves us from:
@@ -450,8 +456,9 @@ def _block_check_depths_match(arrays, parent_index=[]):
         idxs_ndims = (_block_check_depths_match(arr, parent_index + [i])
                       for i, arr in enumerate(arrays))
 
-        first_index, max_arr_ndim = next(idxs_ndims)
-        for index, ndim in idxs_ndims:
+        first_index, max_arr_ndim, final_size = next(idxs_ndims)
+        for index, ndim, size in idxs_ndims:
+            final_size += size
             if ndim > max_arr_ndim:
                 max_arr_ndim = ndim
             if len(index) != len(first_index):
@@ -466,13 +473,15 @@ def _block_check_depths_match(arrays, parent_index=[]):
             # propagate our flag that indicates an empty list at the bottom
             if index[-1] is None:
                 first_index = index
-        return first_index, max_arr_ndim
+
+        return first_index, max_arr_ndim, final_size
     elif type(arrays) is list and len(arrays) == 0:
         # We've 'bottomed out' on an empty list
-        return parent_index + [None], 0
+        return parent_index + [None], 0, 0
     else:
         # We've 'bottomed out' - arrays is either a scalar or an array
-        return parent_index, _nx.ndim(arrays)
+        size = _nx.size(arrays)
+        return parent_index, _nx.ndim(arrays), size
 
 
 def _atleast_nd(a, ndim):
@@ -481,9 +490,132 @@ def _atleast_nd(a, ndim):
     return array(a, ndmin=ndim, copy=False, subok=True)
 
 
+def _accumulate(values):
+    # Helper function because Python 2.7 doesn't have
+    # itertools.accumulate
+    value = 0
+    accumulated = []
+    for v in values:
+        value += v
+        accumulated.append(value)
+    return accumulated
+
+
+def _concatenate_shapes(shapes, axis):
+    """Given array shapes, return the resulting shape and slices prefixes.
+
+    These help in nested concatation.
+    Returns
+    -------
+    shape: tuple of int
+        This tuple satisfies:
+        ```
+        shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis)
+        shape == concatenate(arrs, axis).shape
+        ```
+
+    slice_prefixes: tuple of (slice(start, end), )
+        For a list of arrays being concatenated, this returns the slice
+        in the larger array at axis that needs to be sliced into.
+
+        For example, the following holds:
+        ```
+        ret = concatenate([a, b, c], axis)
+        _, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis)
+
+        ret[(slice(None),) * axis + sl_a] == a
+        ret[(slice(None),) * axis + sl_b] == b
+        ret[(slice(None),) * axis + sl_c] == c
+        ```
+
+        Thses are called slice prefixes since they are used in the recursive
+        blocking algorithm to compute the left-most slices during the
+        recursion. Therefore, they must be prepended to rest of the slice
+        that was computed deeper in the recusion.
+
+        These are returned as tuples to ensure that they can quickly be added
+        to existing slice tuple without creating a new tuple everytime.
+
+    """
+    # Cache a result that will be reused.
+    shape_at_axis = [shape[axis] for shape in shapes]
+
+    # Take a shape, any shape
+    first_shape = shapes[0]
+    first_shape_pre = first_shape[:axis]
+    first_shape_post = first_shape[axis+1:]
+
+    if any(shape[:axis] != first_shape_pre or
+           shape[axis+1:] != first_shape_post for shape in shapes):
+        raise ValueError(
+            'Mismatched array shapes in block along axis {}.'.format(axis))
+
+    shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:])
+
+    offsets_at_axis = _accumulate(shape_at_axis)
+    slice_prefixes = [(slice(start, end),)
+                      for start, end in zip([0] + offsets_at_axis,
+                                            offsets_at_axis)]
+    return shape, slice_prefixes
+
+
+def _block_info_recursion(arrays, max_depth, result_ndim, depth=0):
+    """
+    Returns the shape of the final array, along with a list
+    of slices and a list of arrays that can be used for assignment inside the
+    new array
+
+    Parameters
+    ----------
+    arrays : nested list of arrays
+        The arrays to check
+    max_depth : list of int
+        The number of nested lists
+    result_ndim: int
+        The number of dimensions in thefinal array.
+
+    Returns
+    -------
+    shape : tuple of int
+        The shape that the final array will take on.
+    slices: list of tuple of slices
+        The slices into the full array required for assignment. These are
+        required to be prepended with ``(Ellipsis, )`` to obtain to correct
+        final index.
+    arrays: list of ndarray
+        The data to assign to each slice of the full array
+
+    """
+    if depth < max_depth:
+        shapes, slices, arrays = zip(
+            *[_block_info_recursion(arr, max_depth, result_ndim, depth+1)
+              for arr in arrays])
+
+        axis = result_ndim - max_depth + depth
+        shape, slice_prefixes = _concatenate_shapes(shapes, axis)
+
+        # Prepend the slice prefix and flatten the slices
+        slices = [slice_prefix + the_slice
+                  for slice_prefix, inner_slices in zip(slice_prefixes, slices)
+                  for the_slice in inner_slices]
+
+        # Flatten the array list
+        arrays = functools.reduce(operator.add, arrays)
+
+        return shape, slices, arrays
+    else:
+        # We've 'bottomed out' - arrays is either a scalar or an array
+        # type(arrays) is not list
+        # Return the slice and the array inside a list to be consistent with
+        # the recursive case.
+        arr = _atleast_nd(arrays, result_ndim)
+        return arr.shape, [()], [arr]
+
+
 def _block(arrays, max_depth, result_ndim, depth=0):
     """
-    Internal implementation of block. `arrays` is the argument passed to
+    Internal implementation of block based on repeated concatenation.
+    `arrays` is the argument passed to
     block. `max_depth` is the depth of nested lists within `arrays` and
     `result_ndim` is the greatest of the dimensions of the arrays in
     `arrays` and the depth of the lists in `arrays` (see block docstring
@@ -648,7 +780,38 @@ def block(arrays):
 
 
     """
-    bottom_index, arr_ndim = _block_check_depths_match(arrays)
+    arrays, list_ndim, result_ndim, final_size = _block_setup(arrays)
+
+    # It was found through benchmarking that making an array of final size
+    # around 256x256 was faster by straight concatenation on a
+    # i7-7700HQ processor and dual channel ram 2400MHz.
+    # It didn't seem to matter heavily on the dtype used.
+    #
+    # A 2D array using repeated concatenation requires 2 copies of the array.
+    #
+    # The fastest algorithm will depend on the ratio of CPU power to memory
+    # speed.
+    # One can monitor the results of the benchmark
+    # https://pv.github.io/numpy-bench/#bench_shape_base.Block2D.time_block2d
+    # to tune this parameter until a C version of the `_block_info_recursion`
+    # algorithm is implemented which would likely be faster than the python
+    # version.
+    if list_ndim * final_size > (2 * 512 * 512):
+        return _block_slicing(arrays, list_ndim, result_ndim)
+    else:
+        return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+# Theses helper functions are mostly used for testing.
+# They allow us to write tests that directly call `_block_slicing`
+# or `_block_concatenate` wtihout blocking large arrays to forse the wisdom
+# to trigger the desired path.
+def _block_setup(arrays):
+    """
+    Returns
+    (`arrays`, list_ndim, result_ndim, final_size)
+    """
+    bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays)
     list_ndim = len(bottom_index)
     if bottom_index and bottom_index[-1] is None:
         raise ValueError(
@@ -656,7 +819,31 @@ def block(arrays):
                 _block_format_index(bottom_index)
             )
         )
-    result = _block(arrays, list_ndim, max(arr_ndim, list_ndim))
+    result_ndim = max(arr_ndim, list_ndim)
+    return arrays, list_ndim, result_ndim, final_size
+
+
+def _block_slicing(arrays, list_ndim, result_ndim):
+    shape, slices, arrays = _block_info_recursion(
+        arrays, list_ndim, result_ndim)
+    dtype = _nx.result_type(*[arr.dtype for arr in arrays])
+
+    # Test preferring F only in the case that all input arrays are F
+    F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays)
+    C_order =  all(arr.flags['C_CONTIGUOUS'] for arr in arrays)
+    order = 'F' if F_order and not C_order else 'C'
+    result = _nx.empty(shape=shape, dtype=dtype, order=order)
+    # Note: In a c implementation, the function
+    # PyArray_CreateMultiSortedStridePerm could be used for more advanced
+    # guessing of the desired order.
+
+    for the_slice, arr in zip(slices, arrays):
+        result[(Ellipsis,) + the_slice] = arr
+    return result
+
+
+def _block_concatenate(arrays, list_ndim, result_ndim):
+    result = _block(arrays, list_ndim, result_ndim)
     if list_ndim == 0:
         # Catch an edge case where _block returns a view because
         # `arrays` is a single numpy array and not a list of numpy arrays.

From 6d4715e5358bc639faa8ddff9be19ff05823a0c3 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Tue, 2 Oct 2018 20:43:14 -0400
Subject: [PATCH 2/5] TST: Block test: Trigger both code paths.

---
 numpy/core/tests/test_shape_base.py | 126 ++++++++++++++++++++--------
 1 file changed, 89 insertions(+), 37 deletions(-)

diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index df819b73f6ca..f396b25af76f 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -6,6 +6,9 @@
     array, arange, atleast_1d, atleast_2d, atleast_3d, block, vstack, hstack,
     newaxis, concatenate, stack
     )
+
+from numpy.core.shape_base import (_block_setup,
+                                   _block_concatenate, _block_slicing)
 from numpy.testing import (
     assert_, assert_raises, assert_array_equal, assert_equal,
     assert_raises_regex, assert_almost_equal
@@ -372,14 +375,63 @@ def test_stack():
                         stack, [np.arange(2), np.arange(3)])
 
 
+# See for more information on how to parametrize a whole class
+# https://docs.pytest.org/en/latest/example/parametrize.html#parametrizing-test-methods-through-per-class-configuration
+def pytest_generate_tests(metafunc):
+    # called once per each test function
+    if hasattr(metafunc.cls, 'params'):
+        arglist = metafunc.cls.params
+        argnames = sorted(arglist[0])
+        metafunc.parametrize(argnames,
+                             [[funcargs[name] for name in argnames]
+                              for funcargs in arglist])
+
+
+# blocking small arrays and large arrays go through different paths.
+# the algorithm is triggered depending on the number of element
+# copies required.
+# We define a test fixture that forces most tests to go through
+# both code paths.
+# Ultimately, this should be removed if a single algorithm is found
+# to be faster for both small and large arrays.s
+def _block_force_concatenate(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_concatenate(arrays, list_ndim, result_ndim)
+
+
+def _block_force_slicing(arrays):
+    arrays, list_ndim, result_ndim, _ = _block_setup(arrays)
+    return _block_slicing(arrays, list_ndim, result_ndim)
+
+
 class TestBlock(object):
-    def test_returns_copy(self):
+    params = [dict(block=block),
+              dict(block=_block_force_concatenate),
+              dict(block=_block_force_slicing)]
+
+    def test_returns_copy(self, block):
         a = np.eye(3)
-        b = np.block(a)
+        b = block(a)
         b[0, 0] = 2
         assert b[0, 0] != a[0, 0]
 
-    def test_block_simple_row_wise(self):
+    def test_block_total_size_estimate(self, block):
+        _, _, _, total_size = _block_setup([1])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1]])
+        assert total_size == 1
+
+        _, _, _, total_size = _block_setup([[1, 1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1], [1]])
+        assert total_size == 2
+
+        _, _, _, total_size = _block_setup([[1, 2], [3, 4]])
+        assert total_size == 4
+
+    def test_block_simple_row_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         desired = np.array([[1, 1, 2, 2],
@@ -387,7 +439,7 @@ def test_block_simple_row_wise(self):
         result = block([a_2d, b_2d])
         assert_equal(desired, result)
 
-    def test_block_simple_column_wise(self):
+    def test_block_simple_column_wise(self, block):
         a_2d = np.ones((2, 2))
         b_2d = 2 * a_2d
         expected = np.array([[1, 1],
@@ -397,7 +449,7 @@ def test_block_simple_column_wise(self):
         result = block([[a_2d], [b_2d]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_row_wise(self):
+    def test_block_with_1d_arrays_row_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
@@ -405,7 +457,7 @@ def test_block_with_1d_arrays_row_wise(self):
         result = block([a, b])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_multiple_rows(self):
+    def test_block_with_1d_arrays_multiple_rows(self, block):
         a = np.array([1, 2, 3])
         b = np.array([2, 3, 4])
         expected = np.array([[1, 2, 3, 2, 3, 4],
@@ -413,7 +465,7 @@ def test_block_with_1d_arrays_multiple_rows(self):
         result = block([[a, b], [a, b]])
         assert_equal(expected, result)
 
-    def test_block_with_1d_arrays_column_wise(self):
+    def test_block_with_1d_arrays_column_wise(self, block):
         # # # 1-D vectors are treated as row arrays
         a_1d = np.array([1, 2, 3])
         b_1d = np.array([2, 3, 4])
@@ -422,7 +474,7 @@ def test_block_with_1d_arrays_column_wise(self):
         result = block([[a_1d], [b_1d]])
         assert_equal(expected, result)
 
-    def test_block_mixed_1d_and_2d(self):
+    def test_block_mixed_1d_and_2d(self, block):
         a_2d = np.ones((2, 2))
         b_1d = np.array([2, 2])
         result = block([[a_2d], [b_1d]])
@@ -431,7 +483,7 @@ def test_block_mixed_1d_and_2d(self):
                              [2, 2]])
         assert_equal(expected, result)
 
-    def test_block_complicated(self):
+    def test_block_complicated(self, block):
         # a bit more complicated
         one_2d = np.array([[1, 1, 1]])
         two_2d = np.array([[2, 2, 2]])
@@ -455,7 +507,7 @@ def test_block_complicated(self):
                         [zero_2d]])
         assert_equal(result, expected)
 
-    def test_nested(self):
+    def test_nested(self, block):
         one = np.array([1, 1, 1])
         two = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]])
         three = np.array([3, 3, 3])
@@ -464,9 +516,9 @@ def test_nested(self):
         six = np.array([6, 6, 6, 6, 6])
         zero = np.zeros((2, 6))
 
-        result = np.block([
+        result = block([
             [
-                np.block([
+                block([
                    [one],
                    [three],
                    [four]
@@ -485,7 +537,7 @@ def test_nested(self):
 
         assert_equal(result, expected)
 
-    def test_3d(self):
+    def test_3d(self, block):
         a000 = np.ones((2, 2, 2), int) * 1
 
         a100 = np.ones((3, 2, 2), int) * 2
@@ -498,7 +550,7 @@ def test_3d(self):
 
         a111 = np.ones((3, 3, 3), int) * 8
 
-        result = np.block([
+        result = block([
             [
                 [a000, a001],
                 [a010, a011],
@@ -540,53 +592,53 @@ def test_3d(self):
 
         assert_array_equal(result, expected)
 
-    def test_block_with_mismatched_shape(self):
+    def test_block_with_mismatched_shape(self, block):
         a = np.array([0, 0])
         b = np.eye(2)
-        assert_raises(ValueError, np.block, [a, b])
-        assert_raises(ValueError, np.block, [b, a])
+        assert_raises(ValueError, block, [a, b])
+        assert_raises(ValueError, block, [b, a])
 
-    def test_no_lists(self):
-        assert_equal(np.block(1),         np.array(1))
-        assert_equal(np.block(np.eye(3)), np.eye(3))
+    def test_no_lists(self, block):
+        assert_equal(block(1),         np.array(1))
+        assert_equal(block(np.eye(3)), np.eye(3))
 
-    def test_invalid_nesting(self):
+    def test_invalid_nesting(self, block):
         msg = 'depths are mismatched'
-        assert_raises_regex(ValueError, msg, np.block, [1, [2]])
-        assert_raises_regex(ValueError, msg, np.block, [1, []])
-        assert_raises_regex(ValueError, msg, np.block, [[1], 2])
-        assert_raises_regex(ValueError, msg, np.block, [[], 2])
-        assert_raises_regex(ValueError, msg, np.block, [
+        assert_raises_regex(ValueError, msg, block, [1, [2]])
+        assert_raises_regex(ValueError, msg, block, [1, []])
+        assert_raises_regex(ValueError, msg, block, [[1], 2])
+        assert_raises_regex(ValueError, msg, block, [[], 2])
+        assert_raises_regex(ValueError, msg, block, [
             [[1], [2]],
             [[3, 4]],
             [5]  # missing brackets
         ])
 
-    def test_empty_lists(self):
-        assert_raises_regex(ValueError, 'empty', np.block, [])
-        assert_raises_regex(ValueError, 'empty', np.block, [[]])
-        assert_raises_regex(ValueError, 'empty', np.block, [[1], []])
+    def test_empty_lists(self, block):
+        assert_raises_regex(ValueError, 'empty', block, [])
+        assert_raises_regex(ValueError, 'empty', block, [[]])
+        assert_raises_regex(ValueError, 'empty', block, [[1], []])
 
-    def test_tuple(self):
-        assert_raises_regex(TypeError, 'tuple', np.block, ([1, 2], [3, 4]))
-        assert_raises_regex(TypeError, 'tuple', np.block, [(1, 2), (3, 4)])
+    def test_tuple(self, block):
+        assert_raises_regex(TypeError, 'tuple', block, ([1, 2], [3, 4]))
+        assert_raises_regex(TypeError, 'tuple', block, [(1, 2), (3, 4)])
 
-    def test_different_ndims(self):
+    def test_different_ndims(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 1, 3))
 
-        result = np.block([a, b, c])
+        result = block([a, b, c])
         expected = np.array([[[1., 2., 2., 3., 3., 3.]]])
 
         assert_equal(result, expected)
 
-    def test_different_ndims_depths(self):
+    def test_different_ndims_depths(self, block):
         a = 1.
         b = 2 * np.ones((1, 2))
         c = 3 * np.ones((1, 2, 3))
 
-        result = np.block([[a, b], [c]])
+        result = block([[a, b], [c]])
         expected = np.array([[[1., 2., 2.],
                               [3., 3., 3.],
                               [3., 3., 3.]]])

From d9824ad5723522b4802e3fc2593d001c6e4580a7 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Thu, 4 Oct 2018 20:18:17 -0700
Subject: [PATCH 3/5] TST: Add a test to block that checks for mismatched
 shapes in 2D

---
 numpy/core/tests/test_shape_base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index f396b25af76f..7a6cb7c7480d 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -598,6 +598,9 @@ def test_block_with_mismatched_shape(self, block):
         assert_raises(ValueError, block, [a, b])
         assert_raises(ValueError, block, [b, a])
 
+        to_block = [[np.ones((2,3)), np.ones((2,2))],
+                    [np.ones((2,2)), np.ones((2,2))]]
+        assert_raises(ValueError, block, to_block)
     def test_no_lists(self, block):
         assert_equal(block(1),         np.array(1))
         assert_equal(block(np.eye(3)), np.eye(3))

From c5f21f6a2279ca7f207f3298c30316f8b886a5d4 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Fri, 5 Oct 2018 20:35:13 -0400
Subject: [PATCH 4/5] DOC: Add a release note about the slice based blocking
 algorithm

---
 doc/release/1.16.0-notes.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index 599123f97c6a..60980b122914 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -246,6 +246,14 @@ Previously we had a broken default that sometimes would not report underflow,
 overflow, and invalid floating point operations. Now we can support non-glibc
 distrubutions like Alpine Linux as long as they ship `fenv.h`.
 
+Speedup ``np.block`` for large arrays
+-------------------------------------
+Large arrays (greater than ``512 * 512``) now use a blocking algorithm based on
+copying the data directly into the appropriate slice of the resulting array.
+This results in significant speedups for these large arrays, particularly for
+arrays being blocked along more than 2 dimensions.
+
+
 Changes
 =======
 

From f164d2e90cce62d901c1cce881684863fefde91f Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Sat, 20 Oct 2018 20:33:51 -0400
Subject: [PATCH 5/5] TST: Add a test to ensure the memory order is respected
 when after a call to ``np.block``.

---
 numpy/core/tests/test_shape_base.py | 30 +++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index 7a6cb7c7480d..2c74627befd5 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -647,3 +647,33 @@ def test_different_ndims_depths(self, block):
                               [3., 3., 3.]]])
 
         assert_equal(result, expected)
+
+    def test_block_memory_order(self, block):
+        # 3D
+        arr_c = np.zeros((3,)*3, order='C')
+        arr_f = np.zeros((3,)*3, order='F')
+
+        b_c = [[[arr_c, arr_c],
+                [arr_c, arr_c]],
+               [[arr_c, arr_c],
+                [arr_c, arr_c]]]
+
+        b_f = [[[arr_f, arr_f],
+                [arr_f, arr_f]],
+               [[arr_f, arr_f],
+                [arr_f, arr_f]]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']
+
+        arr_c = np.zeros((3, 3), order='C')
+        arr_f = np.zeros((3, 3), order='F')
+        # 2D
+        b_c = [[arr_c, arr_c],
+               [arr_c, arr_c]]
+
+        b_f = [[arr_f, arr_f],
+               [arr_f, arr_f]]
+
+        assert block(b_c).flags['C_CONTIGUOUS']
+        assert block(b_f).flags['F_CONTIGUOUS']