Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 53e1db6

Browse files
initial
1 parent 25a1e77 commit 53e1db6

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorHolder.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@ public PositionVectorHolder(
167167
}
168168
}
169169

170+
public static class RowIdVectorHolder extends VectorHolder {
171+
public RowIdVectorHolder(
172+
FieldVector vector, Types.NestedField icebergField, NullabilityHolder nulls) {
173+
super(vector, icebergField, nulls);
174+
}
175+
}
176+
170177
public static class DeletedVectorHolder extends VectorHolder {
171178
private final int numRows;
172179

arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,10 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() {
461461
return new PositionVectorReader(true);
462462
}
463463

464+
public static VectorizedArrowReader rowIds(long firstRowId, VectorizedArrowReader idReader) {
465+
return new RowIdVectorReader(firstRowId, idReader);
466+
}
467+
464468
private static final class NullVectorReader extends VectorizedArrowReader {
465469
private static final NullVectorReader INSTANCE = new NullVectorReader();
466470

@@ -567,6 +571,76 @@ public void close() {
567571
}
568572
}
569573

574+
private static final class RowIdVectorReader extends VectorizedArrowReader {
575+
private static final Field ROW_ID_ARROW_FIELD = ArrowSchemaUtil.convert(MetadataColumns.ROW_ID);
576+
577+
private final long firstRowId;
578+
private final VectorizedArrowReader idReader;
579+
private final VectorizedArrowReader posReader;
580+
private NullabilityHolder nulls;
581+
582+
private RowIdVectorReader(long firstRowId, VectorizedArrowReader idReader) {
583+
this.firstRowId = firstRowId;
584+
this.idReader = idReader;
585+
this.posReader = new PositionVectorReader(true);
586+
}
587+
588+
@Override
589+
public VectorHolder read(VectorHolder reuse, int numValsToRead) {
590+
FieldVector positions = posReader.read(null, numValsToRead).vector();
591+
FieldVector ids = idReader.read(null, numValsToRead).vector();
592+
BigIntVector vec = newVector(numValsToRead);
593+
ArrowBuf dataBuffer = vec.getDataBuffer();
594+
for (int i = 0; i < numValsToRead; i += 1) {
595+
if (ids.isNull(i)) {
596+
long rowId = firstRowId + (Long) positions.getObject(i);
597+
dataBuffer.setLong((long) i * Long.BYTES, rowId);
598+
} else {
599+
dataBuffer.setLong((long) i * Long.BYTES, (Long) ids.getObject(i));
600+
}
601+
}
602+
603+
vec.setValueCount(numValsToRead);
604+
605+
return new VectorHolder.RowIdVectorHolder(vec, MetadataColumns.ROW_POSITION, nulls);
606+
}
607+
608+
@Override
609+
public void setRowGroupInfo(
610+
PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata) {
611+
idReader.setRowGroupInfo(source, metadata);
612+
posReader.setRowGroupInfo(source, metadata);
613+
}
614+
615+
@Override
616+
public void setBatchSize(int batchSize) {
617+
if (nulls == null || nulls.size() < batchSize) {
618+
this.nulls = newNullabilityHolder(batchSize);
619+
}
620+
621+
idReader.setBatchSize(batchSize);
622+
posReader.setBatchSize(batchSize);
623+
}
624+
625+
@Override
626+
public void close() {
627+
// don't close vectors as they are not owned by readers
628+
}
629+
630+
private static BigIntVector newVector(int valueCount) {
631+
BigIntVector vector =
632+
(BigIntVector) ROW_ID_ARROW_FIELD.createVector(ArrowAllocation.rootAllocator());
633+
vector.allocateNew(valueCount);
634+
return vector;
635+
}
636+
637+
private static NullabilityHolder newNullabilityHolder(int size) {
638+
NullabilityHolder nullabilityHolder = new NullabilityHolder(size);
639+
nullabilityHolder.setNotNulls(0, size);
640+
return nullabilityHolder;
641+
}
642+
}
643+
570644
/**
571645
* A Dummy Vector Reader which doesn't actually read files, instead it returns a dummy
572646
* VectorHolder which indicates the constant value which should be used for this column.

0 commit comments

Comments
 (0)