Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Prev Previous commit
Next Next commit
allow for skip_arrow_metadata
  • Loading branch information
rok committed Sep 24, 2025
commit 2e50f9f64f253adf41a880f104381c9c3d5bb9a9
15 changes: 10 additions & 5 deletions datafusion/datasource-parquet/src/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,7 @@ impl FileSink for ParquetSink {
.build()?;
let schema = get_writer_schema(&self.config);
let props = parquet_props.clone();
let skip_arrow_metadata = self.parquet_options.global.skip_arrow_metadata;
let parallel_options_clone = parallel_options.clone();
let pool = Arc::clone(context.memory_pool());
file_write_tasks.spawn(async move {
Expand All @@ -1321,6 +1322,7 @@ impl FileSink for ParquetSink {
rx,
schema,
&props,
skip_arrow_metadata,
parallel_options_clone,
pool,
)
Expand Down Expand Up @@ -1647,7 +1649,8 @@ async fn output_single_parquet_file_parallelized(
object_store_writer: Box<dyn AsyncWrite + Send + Unpin>,
data: Receiver<RecordBatch>,
output_schema: Arc<Schema>,
parquet_props: &WriterProperties,
writer_properties: &WriterProperties,
skip_arrow_metadata: bool,
parallel_options: ParallelParquetWriterOptions,
pool: Arc<dyn MemoryPool>,
) -> Result<FileMetaData> {
Expand All @@ -1657,20 +1660,22 @@ async fn output_single_parquet_file_parallelized(
mpsc::channel::<SpawnedTask<RBStreamSerializeResult>>(max_rowgroups);

let merged_buff = SharedBuffer::new(INITIAL_BUFFER_BYTES);
let writer = ArrowWriter::try_new(
let options = ArrowWriterOptions::new()
.with_properties(writer_properties.clone())
.with_skip_arrow_metadata(skip_arrow_metadata);
let writer = ArrowWriter::try_new_with_options(
merged_buff.clone(),
Arc::clone(&output_schema),
Some(parquet_props.clone()),
options,
)?;
let (writer, row_group_writer_factory) = writer.into_serialized_writer()?;

let arc_props = Arc::new(parquet_props.clone());
let launch_serialization_task = spawn_parquet_parallel_serialization_task(
row_group_writer_factory,
data,
serialize_tx,
Arc::clone(&output_schema),
Arc::clone(&arc_props),
Arc::new(writer_properties.clone()),
parallel_options,
Arc::clone(&pool),
);
Expand Down