Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 92bb655

Browse files
karanabesylvestre
authored andcommitted
fix(base64): allow padded chunks mid-stream
1 parent 859a1ed commit 92bb655

File tree

3 files changed

+111
-19
lines changed

3 files changed

+111
-19
lines changed

src/uu/base32/src/base_common.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,18 +171,16 @@ pub fn get_input(config: &Config) -> UResult<Box<dyn ReadSeek>> {
171171
}
172172
}
173173

174-
/// Determines if the input buffer ends with padding ('=') after trimming trailing whitespace.
174+
/// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
175175
fn read_and_has_padding<R: Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
176176
let mut buf = Vec::new();
177177
input
178178
.read_to_end(&mut buf)
179179
.map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;
180180

181-
// Reverse iterator and skip trailing whitespace without extra collections
182-
let has_padding = buf
183-
.iter()
184-
.rfind(|&&byte| !byte.is_ascii_whitespace())
185-
.is_some_and(|&byte| byte == b'=');
181+
// Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
182+
// even when padding bytes are followed by more data).
183+
let has_padding = buf.contains(&b'=');
186184

187185
Ok((has_padding, buf))
188186
}
@@ -665,6 +663,8 @@ mod tests {
665663
("aGVsbG8sIHdvcmxkIQ== \n", true),
666664
("aGVsbG8sIHdvcmxkIQ=", true),
667665
("aGVsbG8sIHdvcmxkIQ= ", true),
666+
("MTIzNA==MTIzNA", true),
667+
("MTIzNA==\nMTIzNA", true),
668668
("aGVsbG8sIHdvcmxkIQ \n", false),
669669
("aGVsbG8sIHdvcmxkIQ", false),
670670
];

src/uucore/src/lib/features/encoding.rs

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,26 @@ pub struct Base64SimdWrapper {
2222
}
2323

2424
impl Base64SimdWrapper {
25+
fn decode_with_standard(input: &[u8], output: &mut Vec<u8>) -> Result<(), ()> {
26+
match base64_simd::STANDARD.decode_to_vec(input) {
27+
Ok(decoded_bytes) => {
28+
output.extend_from_slice(&decoded_bytes);
29+
Ok(())
30+
}
31+
Err(_) => Err(()),
32+
}
33+
}
34+
35+
fn decode_with_no_pad(input: &[u8], output: &mut Vec<u8>) -> Result<(), ()> {
36+
match base64_simd::STANDARD_NO_PAD.decode_to_vec(input) {
37+
Ok(decoded_bytes) => {
38+
output.extend_from_slice(&decoded_bytes);
39+
Ok(())
40+
}
41+
Err(_) => Err(()),
42+
}
43+
}
44+
2545
pub fn new(
2646
use_padding: bool,
2747
valid_decoding_multiple: usize,
@@ -47,22 +67,64 @@ impl SupportsFastDecodeAndEncode for Base64SimdWrapper {
4767
}
4868

4969
fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
50-
let decoded = if self.use_padding {
51-
base64_simd::STANDARD.decode_to_vec(input)
70+
let original_len = output.len();
71+
72+
let decode_result = if self.use_padding {
73+
// GNU coreutils keeps decoding even when '=' appears before the true end
74+
// of the stream (e.g. concatenated padded chunks). Mirror that logic
75+
// by splitting at each '='-containing quantum, decoding those 4-byte
76+
// groups with the padded variant, then letting the remainder fall back
77+
// to whichever alphabet fits.
78+
let mut start = 0usize;
79+
while start < input.len() {
80+
let remaining = &input[start..];
81+
82+
if remaining.is_empty() {
83+
break;
84+
}
85+
86+
if let Some(eq_rel_idx) = remaining.iter().position(|&b| b == b'=') {
87+
let blocks = (eq_rel_idx / 4) + 1;
88+
let segment_len = blocks * 4;
89+
90+
if segment_len > remaining.len() {
91+
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
92+
}
93+
94+
if Self::decode_with_standard(&remaining[..segment_len], output).is_err() {
95+
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
96+
}
97+
98+
start += segment_len;
99+
} else {
100+
// If there are no more '=' bytes the tail might still be padded
101+
// (len % 4 == 0) or purposely unpadded (GNU --ignore-garbage or
102+
// concatenated streams), so select the matching alphabet.
103+
let decoder = if remaining.len() % 4 == 0 {
104+
Self::decode_with_standard
105+
} else {
106+
Self::decode_with_no_pad
107+
};
108+
109+
if decoder(remaining, output).is_err() {
110+
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
111+
}
112+
113+
break;
114+
}
115+
}
116+
117+
Ok(())
52118
} else {
53-
base64_simd::STANDARD_NO_PAD.decode_to_vec(input)
119+
Self::decode_with_no_pad(input, output)
120+
.map_err(|_| USimpleError::new(1, "error: invalid input".to_owned()))
54121
};
55122

56-
match decoded {
57-
Ok(decoded_bytes) => {
58-
output.extend_from_slice(&decoded_bytes);
59-
Ok(())
60-
}
61-
Err(_) => {
62-
// Restore original length on error
63-
output.truncate(output.len());
64-
Err(USimpleError::new(1, "error: invalid input".to_owned()))
65-
}
123+
if let Err(err) = decode_result {
124+
output.truncate(original_len);
125+
Err(err)
126+
} else {
127+
Ok(())
66128
}
67129
}
68130

tests/by-util/test_base64.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5+
6+
// spell-checker:ignore unpadded, QUJD
7+
58
#[cfg(target_os = "linux")]
69
use uutests::at_and_ucmd;
710
use uutests::new_ucmd;
@@ -108,6 +111,33 @@ fn test_decode_repeat_flags() {
108111
.stdout_only("hello, world!");
109112
}
110113

114+
#[test]
115+
fn test_decode_padded_block_followed_by_unpadded_tail() {
116+
new_ucmd!()
117+
.arg("--decode")
118+
.pipe_in("MTIzNA==MTIzNA")
119+
.succeeds()
120+
.stdout_only("12341234");
121+
}
122+
123+
#[test]
124+
fn test_decode_padded_block_followed_by_aligned_tail() {
125+
new_ucmd!()
126+
.arg("--decode")
127+
.pipe_in("MTIzNA==QUJD")
128+
.succeeds()
129+
.stdout_only("1234ABC");
130+
}
131+
132+
#[test]
133+
fn test_decode_unpadded_stream_without_equals() {
134+
new_ucmd!()
135+
.arg("--decode")
136+
.pipe_in("MTIzNA")
137+
.succeeds()
138+
.stdout_only("1234");
139+
}
140+
111141
#[test]
112142
fn test_garbage() {
113143
let input = "aGVsbG8sIHdvcmxkIQ==\0"; // spell-checker:disable-line

0 commit comments

Comments
 (0)