Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit db7d038

Browse files
authored
feat(fuse): support per-column STATS_TRUNCATE_LEN for string statistics (#19815)
* feat(fuse): support per-column STATS_TRUNCATE_LEN for string statistics Add STATS_TRUNCATE_LEN column option to CREATE TABLE DDL, allowing users to specify a custom string truncation length (1-4096) for column-level min/max statistics instead of the default 16 bytes. Changes: - Add STATS_TRUNCATE_LEN keyword to parser and column definition AST - Store field_stats_truncate_len in TableMeta (proto field 39) - Plumb truncate length through WriteSettings, BlockBuilder, and ColumnStatisticsState to gen_columns_statistics - Add trim_string_min/max_with_len helpers for custom-length trimming - Show STATS_TRUNCATE_LEN in SHOW CREATE TABLE output - Add proto-conv v175 test and sqllogictest coverage * z * z * z * z
1 parent aacd59f commit db7d038

79 files changed

Lines changed: 737 additions & 64 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/meta/app/src/schema/table/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ pub struct TableMeta {
180180
pub updated_on: DateTime<Utc>,
181181
pub comment: String,
182182
pub field_comments: Vec<String>,
183+
/// Per-column string stats truncation length, keyed by ColumnId.
184+
/// Absent means use the default (STATS_STRING_PREFIX_LEN = 16).
185+
pub field_stats_truncate_len: BTreeMap<ColumnId, u64>,
183186
pub virtual_schema: Option<VirtualDataSchema>,
184187

185188
// if used in CreateTableReq, this field MUST set to None.
@@ -401,6 +404,7 @@ impl Default for TableMeta {
401404
updated_on: Utc::now(),
402405
comment: "".to_string(),
403406
field_comments: vec![],
407+
field_stats_truncate_len: BTreeMap::new(),
404408
virtual_schema: Default::default(),
405409
drop_on: None,
406410
statistics: Default::default(),

src/meta/process/src/pb_value_decoder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ mod tests {
360360
let got = normalize_timestamps(&decode_pb_value("__fd_table_by_id/1", &buf));
361361
assert_eq!(
362362
got,
363-
r#"TableMeta { schema: TableSchema { fields: [], metadata: {}, next_column_id: 0 }, engine: "FUSE", engine_options: {}, storage_params: None, part_prefix: "", options: {}, cluster_key: None, cluster_key_v2: None, cluster_key_seq: 0, created_on: <TS>, updated_on: <TS>, comment: "", field_comments: [], virtual_schema: None, drop_on: None, statistics: TableStatistics { number_of_rows: 0, data_bytes: 0, compressed_data_bytes: 0, index_data_bytes: 0, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_segments: None, number_of_blocks: None }, column_mask_policy: None, column_mask_policy_columns_ids: {}, row_access_policy: None, row_access_policy_columns_ids: None, indexes: {}, constraints: {} }"#
363+
r#"TableMeta { schema: TableSchema { fields: [], metadata: {}, next_column_id: 0 }, engine: "FUSE", engine_options: {}, storage_params: None, part_prefix: "", options: {}, cluster_key: None, cluster_key_v2: None, cluster_key_seq: 0, created_on: <TS>, updated_on: <TS>, comment: "", field_comments: [], field_stats_truncate_len: {}, virtual_schema: None, drop_on: None, statistics: TableStatistics { number_of_rows: 0, data_bytes: 0, compressed_data_bytes: 0, index_data_bytes: 0, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_segments: None, number_of_blocks: None }, column_mask_policy: None, column_mask_policy_columns_ids: {}, row_access_policy: None, row_access_policy_columns_ids: None, indexes: {}, constraints: {} }"#
364364
);
365365
}
366366

@@ -488,7 +488,7 @@ mod tests {
488488
got1,
489489
concat!(
490490
" txn.if_then[1].put __fd_table_by_id/20:\n",
491-
r#" TableMeta { schema: TableSchema { fields: [], metadata: {}, next_column_id: 0 }, engine: "FUSE", engine_options: {}, storage_params: None, part_prefix: "", options: {}, cluster_key: None, cluster_key_v2: None, cluster_key_seq: 0, created_on: <TS>, updated_on: <TS>, comment: "", field_comments: [], virtual_schema: None, drop_on: None, statistics: TableStatistics { number_of_rows: 0, data_bytes: 0, compressed_data_bytes: 0, index_data_bytes: 0, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_segments: None, number_of_blocks: None }, column_mask_policy: None, column_mask_policy_columns_ids: {}, row_access_policy: None, row_access_policy_columns_ids: None, indexes: {}, constraints: {} }"#,
491+
r#" TableMeta { schema: TableSchema { fields: [], metadata: {}, next_column_id: 0 }, engine: "FUSE", engine_options: {}, storage_params: None, part_prefix: "", options: {}, cluster_key: None, cluster_key_v2: None, cluster_key_seq: 0, created_on: <TS>, updated_on: <TS>, comment: "", field_comments: [], field_stats_truncate_len: {}, virtual_schema: None, drop_on: None, statistics: TableStatistics { number_of_rows: 0, data_bytes: 0, compressed_data_bytes: 0, index_data_bytes: 0, bloom_index_size: None, ngram_index_size: None, inverted_index_size: None, vector_index_size: None, virtual_column_size: None, number_of_segments: None, number_of_blocks: None }, column_mask_policy: None, column_mask_policy_columns_ids: {}, row_access_policy: None, row_access_policy_columns_ids: None, indexes: {}, constraints: {} }"#,
492492
)
493493
);
494494
}

src/meta/proto-conv/src/impls/table.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ impl FromToProto for mt::TableMeta {
223223
drop_on: p.drop_on.from_pb_opt()?,
224224
comment: p.comment,
225225
field_comments: p.field_comments,
226+
field_stats_truncate_len: p.field_stats_truncate_len.into_iter().collect(),
226227
statistics: p
227228
.statistics
228229
.map(FromToProto::from_pb)
@@ -287,6 +288,7 @@ impl FromToProto for mt::TableMeta {
287288
drop_on: self.drop_on.to_pb_opt()?,
288289
comment: self.comment.clone(),
289290
field_comments: self.field_comments.clone(),
291+
field_stats_truncate_len: self.field_stats_truncate_len.clone(),
290292
statistics: Some(self.statistics.to_pb()?),
291293
column_mask_policy: self.column_mask_policy.clone().unwrap_or_default(),
292294
row_access_policy: self.row_access_policy.clone(),

src/meta/proto-conv/src/util.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
203203
(171, "2026-04-02: Update: user.proto/CsvFileFormatParams and TextFileFormatParams add encoding and encoding_error_mode"),
204204
(172, "2026-04-09: Update: user.proto/CsvFileFormatParams and TextFileFormatParams add trim_space"),
205205
(173, "2026-04-16: Update: file_format.proto/CsvFileFormatParams add quote_style"),
206-
(174, "2026-04-28: Add: AuthInfo::KeyPair for key-pair authentication")
206+
(174, "2026-04-28: Add: AuthInfo::KeyPair for key-pair authentication"),
207+
(175, "2026-05-08: Add: field_stats_truncate_len per-column string stats truncation in TableMeta")
207208
// Dear developer:
208209
// If you're gonna add a new metadata version, you'll have to add a test for it.
209210
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)

src/meta/proto-conv/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,4 @@ mod v171_file_format_encoding;
166166
mod v172_file_format_trim_space;
167167
mod v173_csv_quote_style;
168168
mod v174_user_key_pair;
169+
mod v175_field_stats_truncate_len;

src/meta/proto-conv/tests/it/proto_conv.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ fn new_table_meta() -> mt::TableMeta {
149149
updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(),
150150
comment: s("table_comment"),
151151
field_comments: vec!["c".to_string(); 21],
152+
field_stats_truncate_len: btreemap! {},
152153
virtual_schema: Some(ce::VirtualDataSchema {
153154
fields: vec![ce::VirtualDataField {
154155
name: "field_0".to_string(),

src/meta/proto-conv/tests/it/v002_table_meta.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ fn test_decode_v2_table_meta() -> anyhow::Result<()> {
138138
updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(),
139139
comment: s("table_comment"),
140140
field_comments: vec!["c".to_string(); 21],
141+
field_stats_truncate_len: btreemap! {},
141142
virtual_schema: None,
142143
drop_on: None,
143144
statistics: Default::default(),

src/meta/proto-conv/tests/it/v010_table_meta.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ fn test_decode_v10_table_meta() -> anyhow::Result<()> {
140140
updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(),
141141
comment: s("table_comment"),
142142
field_comments: vec!["c".to_string(); 21],
143+
field_stats_truncate_len: btreemap! {},
143144
virtual_schema: None,
144145
drop_on: None,
145146
statistics: Default::default(),

src/meta/proto-conv/tests/it/v012_table_meta.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ fn test_decode_v12_table_meta() -> anyhow::Result<()> {
142142
updated_on: Utc.with_ymd_and_hms(2014, 11, 29, 12, 0, 10).unwrap(),
143143
comment: s("table_comment"),
144144
field_comments: vec!["c".to_string(); 21],
145+
field_stats_truncate_len: btreemap! {},
145146
virtual_schema: None,
146147
drop_on: None,
147148
statistics: Default::default(),

0 commit comments

Comments
 (0)