3
3
4
4
//! Utilities for working with datafusion execution plans
5
5
6
- use std:: {
7
- collections:: HashMap ,
8
- fmt:: { self , Formatter } ,
9
- sync:: { Arc , LazyLock , Mutex } ,
10
- time:: Duration ,
11
- } ;
12
-
13
6
use arrow_array:: RecordBatch ;
14
- use arrow_schema:: Schema as ArrowSchema ;
7
+ use arrow_schema:: { Schema as ArrowSchema , SchemaRef } ;
8
+ use datafusion:: physical_plan:: memory:: MemoryStream ;
15
9
use datafusion:: {
16
10
catalog:: streaming:: StreamingTable ,
17
11
dataframe:: DataFrame ,
@@ -33,6 +27,13 @@ use datafusion::{
33
27
} ;
34
28
use datafusion_common:: { DataFusionError , Statistics } ;
35
29
use datafusion_physical_expr:: { EquivalenceProperties , Partitioning } ;
30
+ use std:: any:: Any ;
31
+ use std:: {
32
+ collections:: HashMap ,
33
+ fmt:: { self , Formatter } ,
34
+ sync:: { Arc , LazyLock , Mutex } ,
35
+ time:: Duration ,
36
+ } ;
36
37
37
38
use futures:: { stream, StreamExt } ;
38
39
use lance_arrow:: SchemaExt ;
@@ -201,6 +202,99 @@ impl ExecutionPlan for OneShotExec {
201
202
}
202
203
}
203
204
205
+ /// A source execution node created from existing record batches.
206
+ pub struct RecordBatchExec {
207
+ batches : Vec < RecordBatch > ,
208
+ schema : SchemaRef ,
209
+ properties : PlanProperties ,
210
+ }
211
+
212
+ impl RecordBatchExec {
213
+ pub fn new ( batches : Vec < RecordBatch > ) -> Result < Self > {
214
+ if batches. is_empty ( ) {
215
+ return Err ( Error :: InvalidInput {
216
+ source : "RecordBatchExec requires at least one batch" . into ( ) ,
217
+ location : location ! ( ) ,
218
+ } ) ;
219
+ }
220
+ let schema = batches[ 0 ] . schema ( ) ;
221
+ Ok ( Self {
222
+ batches,
223
+ schema : schema. clone ( ) ,
224
+ properties : PlanProperties :: new (
225
+ EquivalenceProperties :: new ( schema) ,
226
+ Partitioning :: RoundRobinBatch ( 1 ) ,
227
+ EmissionType :: Incremental ,
228
+ Boundedness :: Bounded ,
229
+ ) ,
230
+ } )
231
+ }
232
+ }
233
+
234
+ impl std:: fmt:: Debug for RecordBatchExec {
235
+ fn fmt ( & self , f : & mut std:: fmt:: Formatter ) -> std:: fmt:: Result {
236
+ write ! ( f, "RecordBatchExec" )
237
+ }
238
+ }
239
+
240
+ impl DisplayAs for RecordBatchExec {
241
+ fn fmt_as ( & self , t : DisplayFormatType , f : & mut Formatter ) -> fmt:: Result {
242
+ match t {
243
+ DisplayFormatType :: Default
244
+ | DisplayFormatType :: Verbose
245
+ | DisplayFormatType :: TreeRender => {
246
+ write ! ( f, "RecordBatchExec" )
247
+ }
248
+ }
249
+ }
250
+ }
251
+
252
+ impl ExecutionPlan for RecordBatchExec {
253
+ fn name ( & self ) -> & str {
254
+ "RecordBatchExec"
255
+ }
256
+
257
+ fn as_any ( & self ) -> & dyn Any {
258
+ self
259
+ }
260
+
261
+ fn properties ( & self ) -> & PlanProperties {
262
+ & self . properties
263
+ }
264
+
265
+ fn schema ( & self ) -> SchemaRef {
266
+ self . schema . clone ( )
267
+ }
268
+
269
+ fn children ( & self ) -> Vec < & Arc < dyn ExecutionPlan > > {
270
+ vec ! [ ]
271
+ }
272
+
273
+ fn with_new_children (
274
+ self : Arc < Self > ,
275
+ children : Vec < Arc < dyn ExecutionPlan > > ,
276
+ ) -> datafusion_common:: Result < Arc < dyn ExecutionPlan > > {
277
+ if !children. is_empty ( ) {
278
+ return Err ( DataFusionError :: Internal (
279
+ "RecordBatchExec does not support children" . to_string ( ) ,
280
+ ) ) ;
281
+ }
282
+ Ok ( self )
283
+ }
284
+
285
+ fn execute (
286
+ & self ,
287
+ _partition : usize ,
288
+ _context : Arc < TaskContext > ,
289
+ ) -> datafusion_common:: Result < SendableRecordBatchStream > {
290
+ Ok ( Box :: pin ( MemoryStream :: try_new (
291
+ self . batches . clone ( ) ,
292
+ self . schema . clone ( ) ,
293
+ None ,
294
+ ) ?) )
295
+ }
296
+ }
297
+
204
298
struct TracedExec {
205
299
input : Arc < dyn ExecutionPlan > ,
206
300
properties : PlanProperties ,
0 commit comments