Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2f1dca1

Browse files
more cleanup
1 parent 25a1e77 commit 2f1dca1

File tree

8 files changed

+23
-63
lines changed

8 files changed

+23
-63
lines changed

data/src/test/java/org/apache/iceberg/data/RandomGenericData.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import java.util.function.Supplier;
3838
import org.apache.iceberg.RandomVariants;
3939
import org.apache.iceberg.Schema;
40+
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
4041
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
4142
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
4243
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
@@ -53,13 +54,6 @@ public static List<Record> generate(Schema schema, int numRecords, long seed) {
5354
generateIcebergGenerics(schema, numRecords, () -> new RandomRecordGenerator(seed)));
5455
}
5556

56-
public static List<Record> generate(
57-
Schema schema, int numRecords, long seed, float nullPercentage) {
58-
return Lists.newArrayList(
59-
generateIcebergGenerics(
60-
schema, numRecords, () -> new RandomRecordGenerator(seed, nullPercentage)));
61-
}
62-
6357
public static Iterable<Record> generateFallbackRecords(
6458
Schema schema, int numRecords, long seed, long numDictRows) {
6559
return generateIcebergGenerics(
@@ -187,6 +181,9 @@ protected RandomDataGenerator(long seed) {
187181
}
188182

189183
protected RandomDataGenerator(long seed, float nullPercentage) {
184+
Preconditions.checkArgument(
185+
0.0f <= nullPercentage && nullPercentage <= 1.0f,
186+
"Percentage needs to be in the range (0.0, 1.0)");
190187
this.random = new Random(seed);
191188
this.nullPercentage = nullPercentage;
192189
}

data/src/test/java/org/apache/iceberg/data/TestLocalScan.java

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -264,42 +264,18 @@ public void testRandomData() throws IOException {
264264

265265
append.commit();
266266

267-
RecordComparator comparator = new RecordComparator();
267+
Comparator<Record> recordComparator =
268+
Comparator.comparing((Record r) -> r.get(0, Long.class))
269+
.thenComparing(
270+
(Record r) -> r.get(1, String.class), Comparator.nullsFirst(String::compareTo));
268271
List<Record> records = Lists.newArrayList(IcebergGenerics.read(table).build());
269272

270-
expected.sort(comparator);
271-
records.sort(comparator);
273+
expected.sort(recordComparator);
274+
records.sort(recordComparator);
272275
assertThat(records).as("Should produce correct number of records").hasSameSizeAs(expected);
273276
assertThat(records).as("Random record set should match").isEqualTo(expected);
274277
}
275278

276-
private static class RecordComparator implements Comparator<Record> {
277-
@Override
278-
public int compare(Record r1, Record r2) {
279-
// Compare by ID (never null)
280-
int idCmp = Long.compare(r1.get(0, Long.class), r2.get(0, Long.class));
281-
if (idCmp != 0) {
282-
return idCmp;
283-
}
284-
285-
// Compare by data, nulls first
286-
String dataFirst = r1.get(1, String.class);
287-
String dataSecond = r2.get(1, String.class);
288-
289-
if (dataFirst == null && dataSecond == null) {
290-
return 0;
291-
}
292-
if (dataFirst == null) {
293-
return -1;
294-
}
295-
if (dataSecond == null) {
296-
return 1;
297-
}
298-
299-
return dataFirst.compareTo(dataSecond);
300-
}
301-
}
302-
303279
@TestTemplate
304280
public void testFullScan() {
305281
Iterable<Record> results = IcebergGenerics.read(sharedTable).build();

spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,16 @@ protected Object randomValue(Type.PrimitiveType primitive, Random rand) {
241241

242242
private static class SparkRandomDataGenerator extends TypeUtil.CustomOrderSchemaVisitor<Object> {
243243
private final Random random;
244+
private final float nullPercentage;
244245

245246
private SparkRandomDataGenerator(long seed) {
246247
this(seed, DEFAULT_NULL_PERCENTAGE);
247248
}
248249

249250
private SparkRandomDataGenerator(long seed, float nullPercentage) {
251+
Preconditions.checkArgument(
252+
0.0f <= nullPercentage && nullPercentage <= 1.0f,
253+
"Percentage needs to be in the range (0.0, 1.0)");
250254
this.random = new Random(seed);
251255
this.nullPercentage = nullPercentage;
252256
}

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,24 +77,7 @@ public static void assertEqualsBatch(
7777
for (int rowId = 0; rowId < batch.numRows(); rowId++) {
7878
InternalRow row = batch.getRow(rowId);
7979
Record expectedRecord = expectedRecords.next();
80-
Types.StructType expectedRecordType = expectedRecord.struct();
81-
List<Types.NestedField> fields = struct.fields();
82-
83-
for (int readPos = 0; readPos < fields.size(); readPos += 1) {
84-
Types.NestedField field = fields.get(readPos);
85-
Types.NestedField expectedField = expectedRecordType.field(field.fieldId());
86-
Object expectedValue;
87-
Object actualValue = row.isNullAt(readPos) ? null : row.get(readPos, convert(field.type()));
88-
if (expectedField != null) {
89-
expectedValue = expectedRecord.getField(expectedField.name());
90-
assertEqualsUnsafe(field.type(), expectedValue, actualValue);
91-
} else {
92-
assertEqualsUnsafe(
93-
field.type(),
94-
GenericDataUtil.internalToGeneric(field.type(), field.initialDefault()),
95-
actualValue);
96-
}
97-
}
80+
assertEqualsUnsafe(struct, expectedRecord, row);
9881
}
9982
}
10083

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,9 @@ private SparkRandomDataGenerator(long seed) {
248248
}
249249

250250
private SparkRandomDataGenerator(long seed, float nullPercentage) {
251+
Preconditions.checkArgument(
252+
0.0f <= nullPercentage && nullPercentage <= 1.0f,
253+
"Percentage needs to be in the range (0.0, 1.0)");
251254
this.random = new Random(seed);
252255
this.nullPercentage = nullPercentage;
253256
}

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
4343
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
4444
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
45-
import org.apache.iceberg.spark.data.RandomData;
4645
import org.apache.iceberg.spark.data.TestHelpers;
4746
import org.apache.iceberg.spark.data.vectorized.VectorizedSparkParquetReaders;
4847
import org.apache.iceberg.types.Types;
@@ -96,16 +95,14 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException {
9695
File dictionaryEncodedFile = File.createTempFile("junit", null, temp.toFile());
9796
assertThat(dictionaryEncodedFile.delete()).as("Delete should succeed").isTrue();
9897
Iterable<Record> dictionaryEncodableData =
99-
RandomGenericData.generateDictionaryEncodableRecords(
100-
schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE);
98+
RandomGenericData.generateDictionaryEncodableRecords(schema, 10000, 0L);
10199
try (FileAppender<Record> writer = getParquetWriter(schema, dictionaryEncodedFile)) {
102100
writer.addAll(dictionaryEncodableData);
103101
}
104102

105103
File plainEncodingFile = File.createTempFile("junit", null, temp.toFile());
106104
assertThat(plainEncodingFile.delete()).as("Delete should succeed").isTrue();
107-
Iterable<Record> nonDictionaryData =
108-
RandomGenericData.generate(schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE);
105+
Iterable<Record> nonDictionaryData = RandomGenericData.generate(schema, 10000, 0L);
109106
try (FileAppender<Record> writer = getParquetWriter(schema, plainEncodingFile)) {
110107
writer.addAll(nonDictionaryData);
111108
}

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ Iterable<Record> generateData(
5959
FileAppender<Record> getParquetWriter(Schema schema, File testFile) throws IOException {
6060
return Parquet.write(Files.localOutput(testFile))
6161
.schema(schema)
62+
.createWriterFunc(GenericParquetWriter::create)
6263
.named("test")
6364
.set(TableProperties.PARQUET_DICT_SIZE_BYTES, "512000")
64-
.createWriterFunc(GenericParquetWriter::create)
6565
.build();
6666
}
6767

spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,17 @@ Iterable<Record> generateData(
143143
FileAppender<Record> getParquetWriter(Schema schema, File testFile) throws IOException {
144144
return Parquet.write(Files.localOutput(testFile))
145145
.schema(schema)
146-
.named("test")
147146
.createWriterFunc(GenericParquetWriter::create)
147+
.named("test")
148148
.build();
149149
}
150150

151151
FileAppender<Record> getParquetV2Writer(Schema schema, File testFile) throws IOException {
152152
return Parquet.write(Files.localOutput(testFile))
153153
.schema(schema)
154+
.createWriterFunc(GenericParquetWriter::create)
154155
.named("test")
155156
.writerVersion(ParquetProperties.WriterVersion.PARQUET_2_0)
156-
.createWriterFunc(GenericParquetWriter::create)
157157
.build();
158158
}
159159

0 commit comments

Comments
 (0)