-
Notifications
You must be signed in to change notification settings - Fork 46
Open
Description
If I write CDFs with identical content but with row and column majority, I get different results according to cdflib if I am writing+reading a 3D zVariable (3 dimensions including records). I get the same problem for higher dimensionality. numpy array order does not matter, np.reshape(... order=...) .
I am not able to say whether this is a problem with writing or reading. cdfdump prints the CDF file content the same way but only as 1D array per record. (Maybe that hints that the reading is wrong, rather than the writing?)
I am using cdflib 1.3.3, Linux.
Code which illustrates/produces the error. It writes a zVariable, reads it again, and compares with what should have been written.
import cdflib
import math
import numpy as np
def write_cdf(cdf_path, b_column_major: bool):
assert type(b_column_major) is bool
if b_column_major:
cdflib_majority = 'column_major'
else:
cdflib_majority = 'row_major'
CDTID = 'CDF_UINT2'
ZV_NAME = 'ZV_NAME'
SHAPE = 2 + np.arange(0, 3)
NA_ZV_DATA = np.reshape(np.arange(0, math.prod(SHAPE), dtype='uint16'), SHAPE, order='C')
# NA_ZV_DATA = np.reshape(np.arange(0, 2*3*4, dtype='uint16'), (2, 3, 4))
with cdflib.cdfwrite.CDF(
cdf_path,
delete=True,
cdf_spec={
'Majority': cdflib_majority,
'Checksum': True,
'Compressed': 0,
},
) as cdf:
cdf.write_var(
{
'Variable': ZV_NAME,
'Data_Type': getattr(cdf, CDTID),
'Num_Elements': 1,
'Rec_Vary': True,
'Dim_Sizes': NA_ZV_DATA.shape[1:],
'Var_Type': 'zVariable',
# NOTE: zVar-level compression. Not to be confused with
# file-level compression. Variable compression should not be
# used.
'Compress': 0,
},
var_data=NA_ZV_DATA,
var_attrs={},
)
with cdflib.cdfread.CDF(cdf_path) as cdf:
na_zv_data = cdf.varget(ZV_NAME)
print('#'*30)
print('Data written to CDF:')
print(f'{cdflib_majority=}')
print(f'{NA_ZV_DATA.shape=}')
print(NA_ZV_DATA)
assert na_zv_data.dtype == NA_ZV_DATA.dtype
# NOTE: np.array_equal() only asserts equal values, not data type.
if not np.array_equal(na_zv_data, NA_ZV_DATA):
print('-'*30)
print('Data read from CDF (differs from data written):')
print(f'{na_zv_data.shape=}')
print(na_zv_data)
write_cdf('/home/erjo/temp/temp/test_major_row.cdf', b_column_major=False)
write_cdf('/home/erjo/temp/temp/test_major_column.cdf', b_column_major=True)
Output:
##############################
Data written to CDF:
cdflib_majority='row_major'
NA_ZV_DATA.shape=(2, 3, 4)
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
##############################
Data written to CDF:
cdflib_majority='column_major'
NA_ZV_DATA.shape=(2, 3, 4)
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
------------------------------
Data read from CDF (differs from data written):
na_zv_data.shape=(2, 3, 4)
[[[ 0 3 6 9]
[ 1 4 7 10]
[ 2 5 8 11]]
[[12 15 18 21]
[13 16 19 22]
[14 17 20 23]]]
Metadata
Metadata
Assignees
Labels
No labels
Type
Projects
Status
Backlog