@@ -461,6 +461,53 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() {
461
461
return new PositionVectorReader (true );
462
462
}
463
463
464
+ public static VectorizedArrowReader rowIds (Long baseRowId , VectorizedArrowReader idReader ) {
465
+ if (baseRowId != null ) {
466
+ return new RowIdVectorReader (baseRowId , idReader );
467
+ } else {
468
+ return nulls ();
469
+ }
470
+ }
471
+
472
+ public static VectorizedArrowReader lastUpdated (
473
+ Long baseRowId , Long fileLastUpdated , VectorizedArrowReader seqReader ) {
474
+ if (fileLastUpdated != null && baseRowId != null ) {
475
+ return new LastUpdatedSeqVectorReader (fileLastUpdated , seqReader );
476
+ } else {
477
+ return nulls ();
478
+ }
479
+ }
480
+
481
+ public static VectorizedReader <?> replaceWithMetadataReader (
482
+ Types .NestedField icebergField ,
483
+ VectorizedReader <?> reader ,
484
+ Map <Integer , ?> idToConstant ,
485
+ boolean setArrowValidityVector ) {
486
+ int id = icebergField .fieldId ();
487
+ if (id == MetadataColumns .ROW_ID .fieldId ()) {
488
+ Long baseRowId = (Long ) idToConstant .get (id );
489
+ return rowIds (baseRowId , (VectorizedArrowReader ) reader );
490
+ } else if (id == MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER .fieldId ()) {
491
+ Long baseRowId = (Long ) idToConstant .get (MetadataColumns .ROW_ID .fieldId ());
492
+ Long fileSeqNumber = (Long ) idToConstant .get (id );
493
+ return VectorizedArrowReader .lastUpdated (
494
+ baseRowId , fileSeqNumber , (VectorizedArrowReader ) reader );
495
+ } else if (idToConstant .containsKey (id )) {
496
+ // containsKey is used because the constant may be null
497
+ return new ConstantVectorReader <>(icebergField , idToConstant .get (id ));
498
+ } else if (id == MetadataColumns .ROW_POSITION .fieldId ()) {
499
+ if (setArrowValidityVector ) {
500
+ return positionsWithSetArrowValidityVector ();
501
+ } else {
502
+ return VectorizedArrowReader .positions ();
503
+ }
504
+ } else if (id == MetadataColumns .IS_DELETED .fieldId ()) {
505
+ return new DeletedVectorReader ();
506
+ }
507
+
508
+ return reader ;
509
+ }
510
+
464
511
private static final class NullVectorReader extends VectorizedArrowReader {
465
512
private static final NullVectorReader INSTANCE = new NullVectorReader ();
466
513
@@ -530,12 +577,6 @@ private static BigIntVector newVector(int valueCount) {
530
577
return vector ;
531
578
}
532
579
533
- private static NullabilityHolder newNullabilityHolder (int size ) {
534
- NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
535
- nullabilityHolder .setNotNulls (0 , size );
536
- return nullabilityHolder ;
537
- }
538
-
539
580
@ Override
540
581
public void setRowGroupInfo (
541
582
PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
@@ -567,6 +608,164 @@ public void close() {
567
608
}
568
609
}
569
610
611
+ private static final class RowIdVectorReader extends VectorizedArrowReader {
612
+ private static final Field ROW_ID_ARROW_FIELD = ArrowSchemaUtil .convert (MetadataColumns .ROW_ID );
613
+
614
+ private final long firstRowId ;
615
+ private final VectorizedReader <VectorHolder > idReader ;
616
+ private final VectorizedReader <VectorHolder > posReader ;
617
+ private NullabilityHolder nulls ;
618
+
619
+ private RowIdVectorReader (long firstRowId , VectorizedArrowReader idReader ) {
620
+ this .firstRowId = firstRowId ;
621
+ this .idReader = idReader != null ? idReader : nulls ();
622
+ this .posReader = new PositionVectorReader (true );
623
+ }
624
+
625
+ @ Override
626
+ public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
627
+ FieldVector positions = null ;
628
+ FieldVector ids = null ;
629
+
630
+ try {
631
+ positions = posReader .read (null , numValsToRead ).vector ();
632
+ VectorHolder idsHolder = idReader .read (null , numValsToRead );
633
+ ids = idsHolder .vector ();
634
+ ArrowVectorAccessor <?, String , ?, ?> idsAccessor =
635
+ ids == null ? null : ArrowVectorAccessors .getVectorAccessor (idsHolder );
636
+
637
+ BigIntVector rowIds = allocateBigIntVector (ROW_ID_ARROW_FIELD , numValsToRead );
638
+ ArrowBuf dataBuffer = rowIds .getDataBuffer ();
639
+ for (int i = 0 ; i < numValsToRead ; i += 1 ) {
640
+ long bufferOffset = (long ) i * Long .BYTES ;
641
+ if (idsAccessor == null || isNull (idsHolder , i )) {
642
+ long rowId = firstRowId + (Long ) positions .getObject (i );
643
+ dataBuffer .setLong (bufferOffset , rowId );
644
+ } else {
645
+ long materializedRowId = idsAccessor .getLong (i );
646
+ dataBuffer .setLong (bufferOffset , materializedRowId );
647
+ }
648
+ }
649
+
650
+ rowIds .setValueCount (numValsToRead );
651
+ return VectorHolder .vectorHolder (rowIds , MetadataColumns .ROW_ID , nulls );
652
+ } finally {
653
+ if (positions != null ) {
654
+ positions .close ();
655
+ }
656
+
657
+ if (ids != null ) {
658
+ ids .close ();
659
+ }
660
+ }
661
+ }
662
+
663
+ @ Override
664
+ public void setRowGroupInfo (
665
+ PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
666
+ idReader .setRowGroupInfo (source , metadata );
667
+ posReader .setRowGroupInfo (source , metadata );
668
+ }
669
+
670
+ @ Override
671
+ public void setBatchSize (int batchSize ) {
672
+ if (nulls == null || nulls .size () < batchSize ) {
673
+ this .nulls = newNullabilityHolder (batchSize );
674
+ }
675
+
676
+ idReader .setBatchSize (batchSize );
677
+ posReader .setBatchSize (batchSize );
678
+ }
679
+
680
+ @ Override
681
+ public void close () {
682
+ // don't close result vectors as they are not owned by readers
683
+ }
684
+ }
685
+
686
+ private static final class LastUpdatedSeqVectorReader extends VectorizedArrowReader {
687
+ private static final Field LAST_UPDATED_SEQ =
688
+ ArrowSchemaUtil .convert (MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER );
689
+
690
+ private final long lastUpdatedSeq ;
691
+ private final VectorizedReader <VectorHolder > seqReader ;
692
+ private NullabilityHolder nulls ;
693
+
694
+ private LastUpdatedSeqVectorReader (
695
+ long lastUpdatedSeq , VectorizedReader <VectorHolder > seqReader ) {
696
+ this .lastUpdatedSeq = lastUpdatedSeq ;
697
+ this .seqReader = seqReader == null ? nulls () : seqReader ;
698
+ }
699
+
700
+ @ Override
701
+ public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
702
+ FieldVector seqNumbers = null ;
703
+ try {
704
+ VectorHolder seqNumbersHolder = seqReader .read (null , numValsToRead );
705
+ seqNumbers = seqNumbersHolder .vector ();
706
+ ArrowVectorAccessor <?, String , ?, ?> seqAccessor =
707
+ seqNumbers == null ? null : ArrowVectorAccessors .getVectorAccessor (seqNumbersHolder );
708
+
709
+ BigIntVector lastUpdatedSequenceNumbers =
710
+ allocateBigIntVector (LAST_UPDATED_SEQ , numValsToRead );
711
+ ArrowBuf dataBuffer = lastUpdatedSequenceNumbers .getDataBuffer ();
712
+ for (int i = 0 ; i < numValsToRead ; i += 1 ) {
713
+ long bufferOffset = (long ) i * Long .BYTES ;
714
+ if (seqAccessor == null || isNull (seqNumbersHolder , i )) {
715
+ dataBuffer .setLong (bufferOffset , lastUpdatedSeq );
716
+ } else {
717
+ long materializedSeqNumber = seqAccessor .getLong (i );
718
+ dataBuffer .setLong (bufferOffset , materializedSeqNumber );
719
+ }
720
+ }
721
+
722
+ lastUpdatedSequenceNumbers .setValueCount (numValsToRead );
723
+ return VectorHolder .vectorHolder (
724
+ lastUpdatedSequenceNumbers , MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER , nulls );
725
+ } finally {
726
+ if (seqNumbers != null ) {
727
+ seqNumbers .close ();
728
+ }
729
+ }
730
+ }
731
+
732
+ @ Override
733
+ public void setRowGroupInfo (
734
+ PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
735
+ seqReader .setRowGroupInfo (source , metadata );
736
+ }
737
+
738
+ @ Override
739
+ public void setBatchSize (int batchSize ) {
740
+ if (nulls == null || nulls .size () < batchSize ) {
741
+ this .nulls = newNullabilityHolder (batchSize );
742
+ }
743
+
744
+ seqReader .setBatchSize (batchSize );
745
+ }
746
+
747
+ @ Override
748
+ public void close () {
749
+ // don't close result vectors as they are not owned by readers
750
+ }
751
+ }
752
+
753
+ private static boolean isNull (VectorHolder holder , int index ) {
754
+ return holder .nullabilityHolder ().isNullAt (index ) == 1 ;
755
+ }
756
+
757
+ private static BigIntVector allocateBigIntVector (Field field , int valueCount ) {
758
+ BigIntVector vector = (BigIntVector ) field .createVector (ArrowAllocation .rootAllocator ());
759
+ vector .allocateNew (valueCount );
760
+ return vector ;
761
+ }
762
+
763
+ private static NullabilityHolder newNullabilityHolder (int size ) {
764
+ NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
765
+ nullabilityHolder .setNotNulls (0 , size );
766
+ return nullabilityHolder ;
767
+ }
768
+
570
769
/**
571
770
* A Dummy Vector Reader which doesn't actually read files, instead it returns a dummy
572
771
* VectorHolder which indicates the constant value which should be used for this column.
0 commit comments