-
Notifications
You must be signed in to change notification settings - Fork 227
Expand file tree
/
Copy pathdeserialize_seed.rs
More file actions
176 lines (150 loc) · 5.75 KB
/
deserialize_seed.rs
File metadata and controls
176 lines (150 loc) · 5.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
//! Calamine example to demonstrate stateful deserialization using
//! [`RowDeserializer`] and [`serde::de::DeserializeSeed`].
//!
//! Use this approach when:
//! - Column names are only known at runtime (discovered from the header row),
//! or,
//! - The deserialized value depends on context that cannot be expressed with
//! `#[serde(...)]` attributes alone.
//!
//! The sample Excel file `temperature.xlsx` used in this example contains a
//! single sheet named "Sheet1" with the following layout:
//!
//! ```text
//! ____________________________________________
//! | || | |
//! | || A | B |
//! |_________||________________|________________|
//! | 1 || label | value |
//! |_________||________________|________________|
//! | 2 || celsius | 22.2222 |
//! |_________||________________|________________|
//! | 3 || fahrenheit | 72 |
//! |_________||________________|________________|
//! |_ _________________________________|
//! \ Sheet1 /
//! ------
//! ```
use std::collections::HashMap;
use calamine::{open_workbook, Reader, RowDeserializer, Xlsx};
use serde::de::{DeserializeSeed, MapAccess, Visitor};
// ---------------------------------------------------------------------------
// Target type. The deserialized value we want to produce.
// ---------------------------------------------------------------------------
#[derive(Debug, PartialEq)]
struct Row {
label: String,
/// The raw cell value multiplied by `RowSeed::multiplier`.
value: f64,
}
// ---------------------------------------------------------------------------
// Seed: Carries the runtime context that influences deserialization.
// ---------------------------------------------------------------------------
/// Carries state that is only known at runtime.
///
/// The `multiplier` is a trivial stand-in for any runtime value — a unit
/// conversion factor, a per-sheet configuration value read from another cell,
/// or a database look-up result, etc.
struct RowSeed {
multiplier: f64,
}
impl<'de> DeserializeSeed<'de> for RowSeed {
type Value = Row;
fn deserialize<D: serde::Deserializer<'de>>(
self,
deserializer: D,
) -> Result<Self::Value, D::Error> {
deserializer.deserialize_map(RowVisitor {
multiplier: self.multiplier,
})
}
}
// ---------------------------------------------------------------------------
// Visitor: walks the map entries produced by RowDeserializer.
// ---------------------------------------------------------------------------
struct RowVisitor {
multiplier: f64,
}
impl<'de> Visitor<'de> for RowVisitor {
type Value = Row;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "A map of spreadsheet cells")
}
fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
let mut label: Option<String> = None;
let mut value: Option<f64> = None;
while let Some(key) = map.next_key::<String>()? {
match key.as_str() {
"label" => label = Some(map.next_value()?),
"value" => value = Some(map.next_value::<f64>()? * self.multiplier),
_ => {
map.next_value::<serde::de::IgnoredAny>()?;
}
}
}
Ok(Row {
label: label.ok_or_else(|| serde::de::Error::missing_field("label"))?,
value: value.ok_or_else(|| serde::de::Error::missing_field("value"))?,
})
}
}
// ---------------------------------------------------------------------------
// Main.
// ---------------------------------------------------------------------------
fn main() -> Result<(), Box<dyn std::error::Error>> {
let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut workbook: Xlsx<_> = open_workbook(path)?;
let range = workbook.worksheet_range("Sheet1")?;
let mut rows = range.rows();
// Read the header row and build a name/index map at runtime.
let header_row = rows.next().ok_or("missing header row")?;
let headers: HashMap<String, usize> = header_row
.iter()
.enumerate()
.map(|(idx, cell)| (cell.to_string(), idx))
.collect();
// Build the ordered (index, name) pairs that `RowDeserializer` expects.
let mut header_pairs: Vec<(usize, String)> = headers
.iter()
.map(|(name, &idx)| (idx, name.clone()))
.collect();
header_pairs.sort_by_key(|&(idx, _)| idx);
let column_indexes: Vec<usize> = header_pairs.iter().map(|&(idx, _)| idx).collect();
let header_names: Vec<String> = header_pairs.iter().map(|(_, name)| name.clone()).collect();
// Some runtime value — here we use a fixed multiplier, but in practice this
// could come from another cell, a config file, a database, etc.
let multiplier = 2.0_f64;
// Deserialize each data row using the seed.
let mut results: Vec<Row> = Vec::new();
for (row_idx, row) in rows.enumerate() {
let de = RowDeserializer::new(
&column_indexes,
Some(&header_names),
row,
(row_idx as u32 + 1, 0),
);
results.push(RowSeed { multiplier }.deserialize(de)?);
}
assert_eq!(
results[0],
Row {
label: "celsius".into(),
value: 22.2222 * multiplier
}
);
assert_eq!(
results[1],
Row {
label: "fahrenheit".into(),
value: 72.0 * multiplier
}
);
println!(
"Deserialized {} rows (multiplier = {multiplier}):",
results.len()
);
for row in &results {
println!(" {:?}", row);
}
Ok(())
}