@@ -127,7 +127,7 @@ struct ChunkMeta {
127
127
}
128
128
129
129
/// A mini-block chunk that has been decoded and decompressed
130
- #[ derive( Debug ) ]
130
+ #[ derive( Debug , Clone ) ]
131
131
struct DecodedMiniBlockChunk {
132
132
rep : Option < ScalarBuffer < u16 > > ,
133
133
def : Option < ScalarBuffer < u16 > > ,
@@ -527,13 +527,42 @@ impl DecodePageTask for DecodeMiniBlockTask {
527
527
528
528
// We need to keep track of the offset into repbuf/defbuf that we are building up
529
529
let mut level_offset = 0 ;
530
+
531
+ // Pre-compute caching needs for each chunk by checking if the next chunk is the same
532
+ let needs_caching: Vec < bool > = self
533
+ . instructions
534
+ . windows ( 2 )
535
+ . map ( |w| w[ 0 ] . 1 . chunk_idx == w[ 1 ] . 1 . chunk_idx )
536
+ . chain ( std:: iter:: once ( false ) ) // the last one never needs caching
537
+ . collect ( ) ;
538
+
539
+ // Cache for storing decoded chunks when beneficial
540
+ let mut chunk_cache: Option < ( usize , DecodedMiniBlockChunk ) > = None ;
541
+
530
542
// Now we iterate through each instruction and process it
531
- for ( instructions, chunk) in self . instructions . iter ( ) {
532
- // TODO: It's very possible that we have duplicate `buf` in self.instructions and we
533
- // don't want to decode the buf again and again on the same thread.
543
+ for ( idx, ( instructions, chunk) ) in self . instructions . iter ( ) . enumerate ( ) {
544
+ let should_cache_this_chunk = needs_caching[ idx] ;
545
+
546
+ let decoded_chunk = match & chunk_cache {
547
+ Some ( ( cached_chunk_idx, ref cached_chunk) )
548
+ if * cached_chunk_idx == chunk. chunk_idx =>
549
+ {
550
+ // Clone only when we have a cache hit (much cheaper than decoding)
551
+ cached_chunk. clone ( )
552
+ }
553
+ _ => {
554
+ // Cache miss, need to decode
555
+ let decoded = self . decode_miniblock_chunk ( & chunk. data , chunk. items_in_chunk ) ?;
556
+
557
+ // Only update cache if this chunk will benefit the next access
558
+ if should_cache_this_chunk {
559
+ chunk_cache = Some ( ( chunk. chunk_idx , decoded. clone ( ) ) ) ;
560
+ }
561
+ decoded
562
+ }
563
+ } ;
534
564
535
- let DecodedMiniBlockChunk { rep, def, values } =
536
- self . decode_miniblock_chunk ( & chunk. data , chunk. items_in_chunk ) ?;
565
+ let DecodedMiniBlockChunk { rep, def, values } = decoded_chunk;
537
566
538
567
// Our instructions tell us which rows we want to take from this chunk
539
568
let row_range_start =
0 commit comments