Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a0e2ce1

Browse files
fix
1 parent 31a81c1 commit a0e2ce1

File tree

3 files changed

+49
-12
lines changed

3 files changed

+49
-12
lines changed

data/src/test/java/org/apache/iceberg/data/RandomGenericData.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ public static Iterable<Record> generateDictionaryEncodableRecords(
7373

7474
public static Iterable<Record> generateDictionaryEncodableRecords(
7575
Schema schema, int numRecords, long seed, float nullPercentage) {
76-
return generateIcebergGenerics(schema, numRecords, () -> new DictionaryEncodedGenerator(seed));
76+
return generateIcebergGenerics(
77+
schema, numRecords, () -> new DictionaryEncodedGenerator(seed, nullPercentage));
7778
}
7879

7980
private static Iterable<Record> generateIcebergGenerics(
@@ -131,7 +132,7 @@ private static class DictionaryEncodedGenerator extends RandomRecordGenerator {
131132
super(seed);
132133
}
133134

134-
DictionaryEncodedGenerator(long seed, long nullPercentage) {
135+
DictionaryEncodedGenerator(long seed, float nullPercentage) {
135136
super(seed, nullPercentage);
136137
}
137138

@@ -218,8 +219,7 @@ public Object list(Types.ListType list, Supplier<Object> elementResult) {
218219

219220
List<Object> result = Lists.newArrayListWithExpectedSize(numElements);
220221
for (int i = 0; i < numElements; i += 1) {
221-
// return null 5% of the time when the value is optional
222-
if (list.isElementOptional() && random.nextInt(20) == 1) {
222+
if (list.isElementOptional() && isNull()) {
223223
result.add(null);
224224
} else {
225225
result.add(elementResult.get());
@@ -251,8 +251,7 @@ public Object map(Types.MapType map, Supplier<Object> keyResult, Supplier<Object
251251

252252
keySet.add(key);
253253

254-
// return null 5% of the time when the value is optional
255-
if (map.isValueOptional() && random.nextInt(20) == 1) {
254+
if (map.isValueOptional() && isNull()) {
256255
result.put(key, null);
257256
} else {
258257
result.put(key, valueResult.get());

data/src/test/java/org/apache/iceberg/data/TestLocalScan.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.nio.ByteOrder;
3737
import java.nio.file.Files;
3838
import java.util.Arrays;
39+
import java.util.Comparator;
3940
import java.util.Iterator;
4041
import java.util.List;
4142
import java.util.Set;
@@ -263,9 +264,37 @@ public void testRandomData() throws IOException {
263264

264265
append.commit();
265266

266-
Set<Record> records = Sets.newHashSet(IcebergGenerics.read(table).build());
267+
RecordComparator comparator = new RecordComparator();
268+
List<Record> records = Lists.newArrayList(IcebergGenerics.read(table).build());
269+
270+
expected.sort(comparator);
271+
records.sort(comparator);
267272
assertThat(records).as("Should produce correct number of records").hasSameSizeAs(expected);
268-
assertThat(records).as("Random record set should match").isEqualTo(Sets.newHashSet(expected));
273+
assertThat(records).as("Random record set should match").isEqualTo(expected);
274+
}
275+
276+
// Compares by ID, then puts null data first, then lexicographically by data
277+
private static class RecordComparator implements Comparator<Record> {
278+
279+
@Override
280+
public int compare(Record r1, Record r2) {
281+
long idComparison = r1.get(0, Long.class) - r2.get(0, Long.class);
282+
if (idComparison < 0) {
283+
return -1;
284+
} else if (idComparison > 0) {
285+
return 1;
286+
}
287+
288+
if (r1.get(1, String.class) == null) {
289+
return -1;
290+
}
291+
292+
if (r2.get(1, String.class) == null) {
293+
return 1;
294+
}
295+
296+
return r1.get(1, String.class).compareTo(r2.get(1, String.class));
297+
}
269298
}
270299

271300
@TestTemplate

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,15 @@ protected Object randomValue(Type.PrimitiveType primitive, Random rand) {
241241

242242
private static class SparkRandomDataGenerator extends TypeUtil.CustomOrderSchemaVisitor<Object> {
243243
private final Random random;
244+
private final float nullPercentage;
244245

245246
private SparkRandomDataGenerator(long seed) {
247+
this(seed, DEFAULT_NULL_PERCENTAGE);
248+
}
249+
250+
private SparkRandomDataGenerator(long seed, float nullPercentage) {
246251
this.random = new Random(seed);
252+
this.nullPercentage = nullPercentage;
247253
}
248254

249255
@Override
@@ -265,12 +271,16 @@ public InternalRow struct(Types.StructType struct, Iterable<Object> fieldResults
265271
@Override
266272
public Object field(Types.NestedField field, Supplier<Object> fieldResult) {
267273
// return null 5% of the time when the value is optional
268-
if (field.isOptional() && random.nextInt(20) == 1) {
274+
if (field.isOptional() && isNull()) {
269275
return null;
270276
}
271277
return fieldResult.get();
272278
}
273279

280+
private boolean isNull() {
281+
return random.nextFloat() < nullPercentage;
282+
}
283+
274284
@Override
275285
public GenericArrayData list(Types.ListType list, Supplier<Object> elementResult) {
276286
int numElements = random.nextInt(20);
@@ -279,7 +289,7 @@ public GenericArrayData list(Types.ListType list, Supplier<Object> elementResult
279289

280290
for (int i = 0; i < numElements; i += 1) {
281291
// return null 5% of the time when the value is optional
282-
if (list.isElementOptional() && random.nextInt(20) == 1) {
292+
if (list.isElementOptional() && isNull()) {
283293
arr[i] = null;
284294
} else {
285295
arr[i] = elementResult.get();
@@ -310,8 +320,7 @@ public Object map(Types.MapType map, Supplier<Object> keyResult, Supplier<Object
310320
keySet.add(key);
311321

312322
keysArr[i] = key;
313-
// return null 5% of the time when the value is optional
314-
if (map.isValueOptional() && random.nextInt(20) == 1) {
323+
if (map.isValueOptional() && isNull()) {
315324
valuesArr[i] = null;
316325
} else {
317326
valuesArr[i] = valueResult.get();

0 commit comments

Comments
 (0)