@@ -461,6 +461,10 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() {
461
461
return new PositionVectorReader (true );
462
462
}
463
463
464
+ public static VectorizedArrowReader rowIds (long firstRowId , VectorizedArrowReader idReader ) {
465
+ return new RowIdVectorReader (firstRowId , idReader );
466
+ }
467
+
464
468
private static final class NullVectorReader extends VectorizedArrowReader {
465
469
private static final NullVectorReader INSTANCE = new NullVectorReader ();
466
470
@@ -567,6 +571,76 @@ public void close() {
567
571
}
568
572
}
569
573
574
+ private static final class RowIdVectorReader extends VectorizedArrowReader {
575
+ private static final Field ROW_ID_ARROW_FIELD = ArrowSchemaUtil .convert (MetadataColumns .ROW_ID );
576
+
577
+ private final long firstRowId ;
578
+ private final VectorizedArrowReader idReader ;
579
+ private final VectorizedArrowReader posReader ;
580
+ private NullabilityHolder nulls ;
581
+
582
+ private RowIdVectorReader (long firstRowId , VectorizedArrowReader idReader ) {
583
+ this .firstRowId = firstRowId ;
584
+ this .idReader = idReader ;
585
+ this .posReader = new PositionVectorReader (true );
586
+ }
587
+
588
+ @ Override
589
+ public VectorHolder read (VectorHolder reuse , int numValsToRead ) {
590
+ FieldVector positions = posReader .read (null , numValsToRead ).vector ();
591
+ FieldVector ids = idReader .read (null , numValsToRead ).vector ();
592
+ BigIntVector vec = newVector (numValsToRead );
593
+ ArrowBuf dataBuffer = vec .getDataBuffer ();
594
+ for (int i = 0 ; i < numValsToRead ; i += 1 ) {
595
+ if (ids .isNull (i )) {
596
+ long rowId = firstRowId + (Long ) positions .getObject (i );
597
+ dataBuffer .setLong ((long ) i * Long .BYTES , rowId );
598
+ } else {
599
+ dataBuffer .setLong ((long ) i * Long .BYTES , (Long ) ids .getObject (i ));
600
+ }
601
+ }
602
+
603
+ vec .setValueCount (numValsToRead );
604
+
605
+ return new VectorHolder .RowIdVectorHolder (vec , MetadataColumns .ROW_POSITION , nulls );
606
+ }
607
+
608
+ @ Override
609
+ public void setRowGroupInfo (
610
+ PageReadStore source , Map <ColumnPath , ColumnChunkMetaData > metadata ) {
611
+ idReader .setRowGroupInfo (source , metadata );
612
+ posReader .setRowGroupInfo (source , metadata );
613
+ }
614
+
615
+ @ Override
616
+ public void setBatchSize (int batchSize ) {
617
+ if (nulls == null || nulls .size () < batchSize ) {
618
+ this .nulls = newNullabilityHolder (batchSize );
619
+ }
620
+
621
+ idReader .setBatchSize (batchSize );
622
+ posReader .setBatchSize (batchSize );
623
+ }
624
+
625
+ @ Override
626
+ public void close () {
627
+ // don't close vectors as they are not owned by readers
628
+ }
629
+
630
+ private static BigIntVector newVector (int valueCount ) {
631
+ BigIntVector vector =
632
+ (BigIntVector ) ROW_ID_ARROW_FIELD .createVector (ArrowAllocation .rootAllocator ());
633
+ vector .allocateNew (valueCount );
634
+ return vector ;
635
+ }
636
+
637
+ private static NullabilityHolder newNullabilityHolder (int size ) {
638
+ NullabilityHolder nullabilityHolder = new NullabilityHolder (size );
639
+ nullabilityHolder .setNotNulls (0 , size );
640
+ return nullabilityHolder ;
641
+ }
642
+ }
643
+
570
644
/**
571
645
* A Dummy Vector Reader which doesn't actually read files, instead it returns a dummy
572
646
* VectorHolder which indicates the constant value which should be used for this column.
0 commit comments