@@ -461,8 +461,43 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() {
461
461
return new PositionVectorReader (true );
462
462
}
463
463
464
- public static VectorizedArrowReader rowIds (long firstRowId , VectorizedArrowReader idReader ) {
465
- return new RowIdVectorReader (firstRowId , idReader );
464
+ public static VectorizedArrowReader rowIds (long baseRowId , VectorizedArrowReader idReader ) {
465
+ return new RowIdVectorReader (baseRowId , idReader );
466
+ }
467
+
468
+ public static VectorizedArrowReader lastUpdated (
469
+ Long baseRowId , Long fileLastUpdated , VectorizedArrowReader seqReader ) {
470
+ if (fileLastUpdated != null && baseRowId != null ) {
471
+ return new LastUpdatedSeqVectorReader (fileLastUpdated , seqReader );
472
+ } else {
473
+ return nulls ();
474
+ }
475
+ }
476
+
477
+ public static VectorizedReader <?> replaceWithMetadataReader (
478
+ int id ,
479
+ VectorizedReader <?> reader ,
480
+ Map <Integer , ?> idToConstant ,
481
+ boolean setArrowValidityVector ) {
482
+ if (id == MetadataColumns .ROW_ID .fieldId ()) {
483
+ Long baseRowId = (Long ) idToConstant .get (id );
484
+ return rowIds (baseRowId , (VectorizedArrowReader ) reader );
485
+ } else if (id == MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER .fieldId ()) {
486
+ Long baseRowId = (Long ) idToConstant .get (id );
487
+ Long fileSeqNumber = (Long ) idToConstant .get (id );
488
+ return VectorizedArrowReader .lastUpdated (
489
+ baseRowId , fileSeqNumber , (VectorizedArrowReader ) reader );
490
+ } else if (id == MetadataColumns .ROW_POSITION .fieldId ()) {
491
+ if (setArrowValidityVector ) {
492
+ return positionsWithSetArrowValidityVector ();
493
+ } else {
494
+ return VectorizedArrowReader .positions ();
495
+ }
496
+ } else if (id == MetadataColumns .IS_DELETED .fieldId ()) {
497
+ return new DeletedVectorReader ();
498
+ }
499
+
500
+ return reader ;
466
501
}
467
502
468
503
private static final class NullVectorReader extends VectorizedArrowReader {
@@ -575,8 +610,8 @@ private static final class RowIdVectorReader extends VectorizedArrowReader {
575
610
private static final Field ROW_ID_ARROW_FIELD = ArrowSchemaUtil .convert (MetadataColumns .ROW_ID );
576
611
577
612
private final long firstRowId ;
578
- private final VectorizedArrowReader idReader ;
579
- private final VectorizedArrowReader posReader ;
613
+ private final VectorizedReader < VectorHolder > idReader ;
614
+ private final VectorizedReader < VectorHolder > posReader ;
580
615
private NullabilityHolder nulls ;
581
616
582
617
private RowIdVectorReader (long firstRowId , VectorizedArrowReader idReader ) {
@@ -588,21 +623,23 @@ private RowIdVectorReader(long firstRowId, VectorizedArrowReader idReader) {
588
623
@ Override
589
624
public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
590
625
FieldVector positions = posReader .read (null , numValsToRead ).vector ();
591
- FieldVector ids = idReader .read (null , numValsToRead ). vector ( );
626
+ VectorHolder ids = idReader .read (null , numValsToRead );
592
627
BigIntVector vec = newVector (numValsToRead );
593
628
ArrowBuf dataBuffer = vec .getDataBuffer ();
594
629
for (int i = 0 ; i < numValsToRead ; i += 1 ) {
595
- if (ids .isNull (i )) {
630
+ long bufferOffset = (long ) i * Long .BYTES ;
631
+ if (ids .nullabilityHolder ().isNullAt (i ) == 1 ) {
596
632
long rowId = firstRowId + (Long ) positions .getObject (i );
597
- dataBuffer .setLong (( long ) i * Long . BYTES , rowId );
633
+ dataBuffer .setLong (bufferOffset , rowId );
598
634
} else {
599
- dataBuffer .setLong ((long ) i * Long .BYTES , (Long ) ids .getObject (i ));
635
+ long materializedRowId = ids .vector ().getDataBuffer ().getLong (bufferOffset );
636
+ dataBuffer .setLong (bufferOffset , materializedRowId );
600
637
}
601
638
}
602
639
603
640
vec .setValueCount (numValsToRead );
604
641
605
- return new VectorHolder .RowIdVectorHolder (vec , MetadataColumns .ROW_POSITION , nulls );
642
+ return new VectorHolder .RowIdVectorHolder (vec , MetadataColumns .ROW_ID , nulls );
606
643
}
607
644
608
645
@ Override
@@ -641,6 +678,74 @@ private static NullabilityHolder newNullabilityHolder(int size) {
641
678
}
642
679
}
643
680
681
+ private static final class LastUpdatedSeqVectorReader extends VectorizedArrowReader {
682
+ private static final Field LAST_UPDATED_SEQ =
683
+ ArrowSchemaUtil .convert (MetadataColumns .LAST_UPDATED_SEQUENCE_NUMBER );
684
+
685
+ private final long lastUpdatedSeq ;
686
+ private final VectorizedReader <VectorHolder > seqReader ;
687
+ private NullabilityHolder nulls ;
688
+
689
+ private LastUpdatedSeqVectorReader (
690
+ long lastUpdatedSeq , VectorizedReader <VectorHolder > seqReader ) {
691
+ this .lastUpdatedSeq = lastUpdatedSeq ;
692
+ this .seqReader = seqReader == null ? nulls () : seqReader ;
693
+ }
694
+
695
+ @ Override
696
+ public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
697
+ VectorHolder seqNumbers = seqReader .read (null , numValsToRead );
698
+ BigIntVector vec = newVector (numValsToRead );
699
+ ArrowBuf dataBuffer = vec .getDataBuffer ();
700
+ for (int i = 0 ; i < numValsToRead ; i += 1 ) {
701
+ long bufferOffset = (long ) i * Long .BYTES ;
702
+ if (seqNumbers .nullabilityHolder ().isNullAt (i ) == 1 ) {
703
+ dataBuffer .setLong (bufferOffset , lastUpdatedSeq );
704
+ } else {
705
+ long materializedRowId = seqNumbers .vector ().getDataBuffer ().getLong (bufferOffset );
706
+ dataBuffer .setLong (bufferOffset , materializedRowId );
707
+ }
708
+ }
709
+
710
+ vec .setValueCount (numValsToRead );
711
+
712
+ return new VectorHolder .RowIdVectorHolder (vec , MetadataColumns .ROW_ID , nulls );
713
+ }
714
+
715
+ @ Override
716
+ public void setRowGroupInfo (
717
+ PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
718
+ seqReader .setRowGroupInfo (source , metadata );
719
+ }
720
+
721
+ @ Override
722
+ public void setBatchSize (int batchSize ) {
723
+ if (nulls == null || nulls .size () < batchSize ) {
724
+ this .nulls = newNullabilityHolder (batchSize );
725
+ }
726
+
727
+ seqReader .setBatchSize (batchSize );
728
+ }
729
+
730
+ @ Override
731
+ public void close () {
732
+ // don't close vectors as they are not owned by readers
733
+ }
734
+
735
+ private static BigIntVector newVector (int valueCount ) {
736
+ BigIntVector vector =
737
+ (BigIntVector ) LAST_UPDATED_SEQ .createVector (ArrowAllocation .rootAllocator ());
738
+ vector .allocateNew (valueCount );
739
+ return vector ;
740
+ }
741
+
742
+ private static NullabilityHolder newNullabilityHolder (int size ) {
743
+ NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
744
+ nullabilityHolder .setNotNulls (0 , size );
745
+ return nullabilityHolder ;
746
+ }
747
+ }
748
+
644
749
/**
645
750
* A Dummy Vector Reader which doesn't actually read files, instead it returns a dummy
646
751
* VectorHolder which indicates the constant value which should be used for this column.
0 commit comments