Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a69ec52
rebase
eric-maynard Jun 10, 2025
0bba5ef
lint
eric-maynard Jun 10, 2025
9ecc2be
some changes per comments
eric-maynard Jun 18, 2025
3cd2819
Merge branch 'main' of ssh://github.com-oss/apache/iceberg into parqu…
eric-maynard Jun 18, 2025
8d186fe
javadoc
eric-maynard Jun 23, 2025
5ce8913
lint
eric-maynard Jun 23, 2025
9fe0bba
create class
eric-maynard Jun 23, 2025
6cecf96
remove clash
eric-maynard Jun 23, 2025
2ce2590
Merge branch 'parquet-v2-refactor' of ssh://github.com-oss/eric-mayna…
eric-maynard Jun 23, 2025
3aed168
refactoring
eric-maynard Jun 23, 2025
98d1c5c
clean up
eric-maynard Jun 23, 2025
b72e338
wire up
eric-maynard Jun 23, 2025
b76cc47
tweak header
eric-maynard Jun 25, 2025
ec07775
check in
eric-maynard Jun 25, 2025
c79a77c
resolve conflicts
eric-maynard Jun 26, 2025
1969466
debugging
eric-maynard Jun 27, 2025
d2b173b
debugging
eric-maynard Jun 27, 2025
1f219e5
debugging commit
eric-maynard Jul 1, 2025
21c11d8
move code
eric-maynard Jul 1, 2025
e4bc23f
switch back to floats
eric-maynard Jul 1, 2025
a88af2e
clean a bit
eric-maynard Jul 1, 2025
c375e99
semistable
eric-maynard Jul 1, 2025
f8cfbb2
polish
eric-maynard Jul 1, 2025
9d27297
stable:
eric-maynard Jul 1, 2025
d75f85e
spotless; polish
eric-maynard Jul 1, 2025
03f6395
spotless
eric-maynard Jul 1, 2025
c39570d
fix lints
eric-maynard Jul 2, 2025
3a73ecc
review comments
eric-maynard Jul 15, 2025
44a81ac
amogh comments
eric-maynard Jul 21, 2025
d584753
russell comments
eric-maynard Jul 21, 2025
d253f1b
spotless
eric-maynard Jul 21, 2025
e0b505b
retry docs
eric-maynard Jul 22, 2025
97a315e
javadoc fix
eric-maynard Jul 22, 2025
dfb7b77
putInt
eric-maynard Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
semistable
  • Loading branch information
eric-maynard committed Jul 1, 2025
commit c375e99d0e35b1255d179f51e4449c60a478791c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.iceberg.arrow.vectorized.NullabilityHolder;
import org.apache.iceberg.parquet.ParquetUtil;
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.column.values.plain.PlainValuesReader;

public final class VectorizedParquetDefinitionLevelReader
Expand Down Expand Up @@ -236,7 +237,7 @@ protected void nextRleBatch(
int idx,
int numValues,
byte[] byteArray) {
setNextNValuesInVector(typeWidth, nullabilityHolder, valuesReader, idx, vector, numValues);
setNextNValuesInVector(nullabilityHolder, valuesReader, idx, vector, numValues, this);
}

@Override
Expand Down Expand Up @@ -265,6 +266,9 @@ protected void nextPackedBatch(

protected abstract void nextVal(
FieldVector vector, int idx, VectorizedValuesReader valuesReader, Mode mode);

public abstract void nextVals(
FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total);
}

class LongReader extends NumericBaseReader {
Expand Down Expand Up @@ -292,6 +296,11 @@ protected void nextDictEncodedVal(
.setLong((long) idx * typeWidth, dict.decodeToLong(reader.readInteger()));
}
}

@Override
public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) {
valuesReader.readLongs(total, vector, rowId);
}
}

class DoubleReader extends NumericBaseReader {
Expand Down Expand Up @@ -319,6 +328,11 @@ protected void nextDictEncodedVal(
.setDouble((long) idx * typeWidth, dict.decodeToDouble(reader.readInteger()));
}
}

@Override
public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) {
valuesReader.readDoubles(total, vector, rowId);
}
}

class FloatReader extends NumericBaseReader {
Expand Down Expand Up @@ -346,6 +360,11 @@ protected void nextDictEncodedVal(
.setFloat((long) idx * typeWidth, dict.decodeToFloat(reader.readInteger()));
}
}

@Override
public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) {
valuesReader.readFloats(total, vector, rowId);
}
}

class IntegerReader extends NumericBaseReader {
Expand Down Expand Up @@ -375,6 +394,11 @@ protected void nextDictEncodedVal(
.setInt((long) idx * typeWidth, dict.decodeToInt(reader.readInteger()));
}
}

@Override
public void nextVals(FieldVector vector, int rowId, VectorizedValuesReader valuesReader, int total) {
valuesReader.readIntegers(total, vector, rowId);
}
}

abstract class BaseReader extends CommonReader {
Expand Down Expand Up @@ -662,16 +686,15 @@ private void setNulls(

@SuppressWarnings({"all"})
private void setNextNValuesInVector(
int typeWidth,
NullabilityHolder nullabilityHolder,
VectorizedValuesReader valuesReader,
int bufferIdx,
FieldVector vector,
int numValues) {
int numValues,
NumericBaseReader reader) {
ArrowBuf validityBuffer = vector.getValidityBuffer();
if (currentValue == maxDefLevel) {
// TODO read the correct type not just hard-coded longs here
valuesReader.readFloats(numValues, vector, bufferIdx);
reader.nextVals(vector, bufferIdx, valuesReader, numValues);
nullabilityHolder.setNotNulls(bufferIdx, numValues);
if (setArrowValidityVector) {
for (int i = 0; i < numValues; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ public abstract static class RandomDataGenerator<T>

private final Random random;
private final float nullPercentage;
private int currentInt = 1;

protected RandomDataGenerator(long seed) {
this(seed, DEFAULT_NULL_PERCENTAGE);
Expand Down Expand Up @@ -290,10 +289,6 @@ public Object primitive(Type.PrimitiveType primitive) {
} else {
return EPOCH.plus((long) result, NANOS).toLocalDateTime();
}
case INTEGER:
return currentInt++;
case LONG:
return (long)currentInt++;
default:
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,11 @@ public void testSupportedReadsForParquetV2() throws Exception {
// (i.e. decimals > 8 bytes)
Schema schema =
new Schema(
optional(102, "float_data", Types.FloatType.get())
// optional(103, "double_data", Types.DoubleType.get()),
// optional(104, "decimal_data", Types.DecimalType.of(25, 5)),
// optional(105, "int_data", Types.IntegerType.get()),
// optional(106, "long_data", Types.LongType.get())
optional(102, "float_data", Types.FloatType.get()),
optional(103, "double_data", Types.DoubleType.get()),
optional(104, "decimal_data", Types.DecimalType.of(25, 5)),
optional(105, "int_data", Types.IntegerType.get()),
optional(106, "long_data", Types.LongType.get())
);

File dataFile = File.createTempFile("junit", null, temp.toFile());
Expand All @@ -319,8 +319,7 @@ public void testSupportedReadsForParquetV2() throws Exception {

@Test
public void testUnsupportedReadsForParquetV2() throws Exception {
// Longs, ints, string types etc. use delta encoding and which are not supported for vectorized
// reads
// Some types use delta encoding and which are not supported for vectorized reads
Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields());
File dataFile = File.createTempFile("junit", null, temp.toFile());
assertThat(dataFile.delete()).as("Delete should succeed").isTrue();
Expand Down
Loading