18
18
*/
19
19
package org .apache .iceberg .arrow .vectorized ;
20
20
21
+ import static org .apache .iceberg .arrow .vectorized .ArrowVectorAccessors .getVectorAccessor ;
22
+
21
23
import java .util .Map ;
22
24
import org .apache .arrow .memory .ArrowBuf ;
23
25
import org .apache .arrow .memory .BufferAllocator ;
@@ -461,8 +463,47 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() {
461
463
return new PositionVectorReader (true );
462
464
}
463
465
464
- public static VectorizedArrowReader rowIds (long firstRowId , VectorizedArrowReader idReader ) {
465
- return new RowIdVectorReader (firstRowId , idReader );
466
+ public static VectorizedArrowReader rowIds (long baseRowId , VectorizedArrowReader idReader ) {
467
+ return new RowIdVectorReader (baseRowId , idReader );
468
+ }
469
+
470
+ public static VectorizedArrowReader lastUpdated (
471
+ Long baseRowId , Long fileLastUpdated , VectorizedArrowReader seqReader ) {
472
+ if (fileLastUpdated != null && baseRowId != null ) {
473
+ return new LastUpdatedSeqVectorReader (fileLastUpdated , seqReader );
474
+ } else {
475
+ return nulls ();
476
+ }
477
+ }
478
+
479
+ public static VectorizedReader <?> replaceWithMetadataReader (
480
+ Types .NestedField icebergField ,
481
+ VectorizedReader <?> reader ,
482
+ Map <Integer , ?> idToConstant ,
483
+ boolean setArrowValidityVector ) {
484
+ int id = icebergField .fieldId ();
485
+ if (id == MetadataColumns .ROW_ID .fieldId ()) {
486
+ Long baseRowId = (Long ) idToConstant .get (id );
487
+ return rowIds (baseRowId , (VectorizedArrowReader ) reader );
488
+ } else if (id == MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER .fieldId ()) {
489
+ Long baseRowId = (Long ) idToConstant .get (id );
490
+ Long fileSeqNumber = (Long ) idToConstant .get (id );
491
+ return VectorizedArrowReader .lastUpdated (
492
+ baseRowId , fileSeqNumber , (VectorizedArrowReader ) reader );
493
+ } else if (idToConstant .containsKey (id )) {
494
+ // containsKey is used because the constant may be null
495
+ return new ConstantVectorReader <>(icebergField , idToConstant .get (id ));
496
+ } else if (id == MetadataColumns .ROW_POSITION .fieldId ()) {
497
+ if (setArrowValidityVector ) {
498
+ return positionsWithSetArrowValidityVector ();
499
+ } else {
500
+ return VectorizedArrowReader .positions ();
501
+ }
502
+ } else if (id == MetadataColumns .IS_DELETED .fieldId ()) {
503
+ return new DeletedVectorReader ();
504
+ }
505
+
506
+ return reader ;
466
507
}
467
508
468
509
private static final class NullVectorReader extends VectorizedArrowReader {
@@ -534,12 +575,6 @@ private static BigIntVector newVector(int valueCount) {
534
575
return vector ;
535
576
}
536
577
537
- private static NullabilityHolder newNullabilityHolder (int size ) {
538
- NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
539
- nullabilityHolder .setNotNulls (0 , size );
540
- return nullabilityHolder ;
541
- }
542
-
543
578
@ Override
544
579
public void setRowGroupInfo (
545
580
PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
@@ -575,34 +610,38 @@ private static final class RowIdVectorReader extends VectorizedArrowReader {
575
610
private static final Field ROW_ID_ARROW_FIELD = ArrowSchemaUtil .convert (MetadataColumns .ROW_ID );
576
611
577
612
private final long firstRowId ;
578
- private final VectorizedArrowReader idReader ;
579
- private final VectorizedArrowReader posReader ;
613
+ private final VectorizedReader < VectorHolder > idReader ;
614
+ private final VectorizedReader < VectorHolder > posReader ;
580
615
private NullabilityHolder nulls ;
581
616
582
617
private RowIdVectorReader (long firstRowId , VectorizedArrowReader idReader ) {
583
618
this .firstRowId = firstRowId ;
584
- this .idReader = idReader ;
619
+ this .idReader = idReader != null ? idReader : nulls () ;
585
620
this .posReader = new PositionVectorReader (true );
586
621
}
587
622
588
623
@ Override
589
624
public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
590
625
FieldVector positions = posReader .read (null , numValsToRead ).vector ();
591
- FieldVector ids = idReader .read (null , numValsToRead ). vector ( );
592
- BigIntVector vec = newVector ( numValsToRead );
626
+ VectorHolder ids = idReader .read (null , numValsToRead );
627
+ BigIntVector vec = newBigIntVector ( ROW_ID_ARROW_FIELD , numValsToRead );
593
628
ArrowBuf dataBuffer = vec .getDataBuffer ();
629
+ boolean isNullReader = ids .vector () == null ;
630
+ ArrowVectorAccessor <?, String , ?, ?> idsAccessor =
631
+ isNullReader ? null : getVectorAccessor (ids );
594
632
for (int i = 0 ; i < numValsToRead ; i += 1 ) {
595
- if (ids .isNull (i )) {
633
+ long bufferOffset = (long ) i * Long .BYTES ;
634
+ if (isNullReader || ids .nullabilityHolder ().isNullAt (i ) == 1 ) {
596
635
long rowId = firstRowId + (Long ) positions .getObject (i );
597
- dataBuffer .setLong (( long ) i * Long . BYTES , rowId );
636
+ dataBuffer .setLong (bufferOffset , rowId );
598
637
} else {
599
- dataBuffer .setLong ((long ) i * Long .BYTES , (Long ) ids .getObject (i ));
638
+ long materializedRowId = idsAccessor .getLong (i );
639
+ dataBuffer .setLong (bufferOffset , materializedRowId );
600
640
}
601
641
}
602
642
603
643
vec .setValueCount (numValsToRead );
604
-
605
- return new VectorHolder .RowIdVectorHolder (vec , MetadataColumns .ROW_POSITION , nulls );
644
+ return VectorHolder .vectorHolder (vec , MetadataColumns .ROW_ID , nulls );
606
645
}
607
646
608
647
@ Override
@@ -626,21 +665,76 @@ public void setBatchSize(int batchSize) {
626
665
public void close () {
627
666
// don't close vectors as they are not owned by readers
628
667
}
668
+ }
629
669
630
- private static BigIntVector newVector (int valueCount ) {
631
- BigIntVector vector =
632
- (BigIntVector ) ROW_ID_ARROW_FIELD .createVector (ArrowAllocation .rootAllocator ());
633
- vector .allocateNew (valueCount );
634
- return vector ;
670
+ private static final class LastUpdatedSeqVectorReader extends VectorizedArrowReader {
671
+ private static final Field LAST_UPDATED_SEQ =
672
+ ArrowSchemaUtil .convert (MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER );
673
+
674
+ private final long lastUpdatedSeq ;
675
+ private final VectorizedReader <VectorHolder > seqReader ;
676
+ private NullabilityHolder nulls ;
677
+
678
+ private LastUpdatedSeqVectorReader (
679
+ long lastUpdatedSeq , VectorizedReader <VectorHolder > seqReader ) {
680
+ this .lastUpdatedSeq = lastUpdatedSeq ;
681
+ this .seqReader = seqReader == null ? nulls () : seqReader ;
682
+ }
683
+
684
+ @ Override
685
+ public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
686
+ BigIntVector vec = newBigIntVector (LAST_UPDATED_SEQ , numValsToRead );
687
+ ArrowBuf dataBuffer = vec .getDataBuffer ();
688
+ VectorHolder seqNumbers = seqReader .read (null , numValsToRead );
689
+ ArrowVectorAccessor <?, String , ?, ?> accessor =
690
+ seqNumbers .vector () == null ? null : getVectorAccessor (seqNumbers );
691
+ for (int i = 0 ; i < numValsToRead ; i += 1 ) {
692
+ long bufferOffset = (long ) i * Long .BYTES ;
693
+ if (seqNumbers .vector () == null || seqNumbers .nullabilityHolder ().isNullAt (i ) == 1 ) {
694
+ dataBuffer .setLong (bufferOffset , lastUpdatedSeq );
695
+ } else {
696
+ long materializedSeqNumber = accessor .getLong (i );
697
+ dataBuffer .setLong (bufferOffset , materializedSeqNumber );
698
+ }
699
+ }
700
+
701
+ vec .setValueCount (numValsToRead );
702
+ return VectorHolder .vectorHolder (vec , MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER , nulls );
703
+ }
704
+
705
+ @ Override
706
+ public void setRowGroupInfo (
707
+ PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
708
+ seqReader .setRowGroupInfo (source , metadata );
709
+ }
710
+
711
+ @ Override
712
+ public void setBatchSize (int batchSize ) {
713
+ if (nulls == null || nulls .size () < batchSize ) {
714
+ this .nulls = newNullabilityHolder (batchSize );
715
+ }
716
+
717
+ seqReader .setBatchSize (batchSize );
635
718
}
636
719
637
- private static NullabilityHolder newNullabilityHolder (int size ) {
638
- NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
639
- nullabilityHolder .setNotNulls (0 , size );
640
- return nullabilityHolder ;
720
+ @ Override
721
+ public void close () {
722
+ // don't close vectors as they are not owned by readers
641
723
}
642
724
}
643
725
726
+ private static BigIntVector newBigIntVector (Field field , int valueCount ) {
727
+ BigIntVector vector = (BigIntVector ) field .createVector (ArrowAllocation .rootAllocator ());
728
+ vector .allocateNew (valueCount );
729
+ return vector ;
730
+ }
731
+
732
+ private static NullabilityHolder newNullabilityHolder (int size ) {
733
+ NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
734
+ nullabilityHolder .setNotNulls (0 , size );
735
+ return nullabilityHolder ;
736
+ }
737
+
644
738
/**
645
739
* A Dummy Vector Reader which doesn't actually read files, instead it returns a dummy
646
740
* VectorHolder which indicates the constant value which should be used for this column.
0 commit comments