|
3 | 3 | // For the full copyright and license information, please view the LICENSE-*
|
4 | 4 | // files that was distributed with this source code.
|
5 | 5 |
|
6 |
| -use std::{ffi::OsString, io::Write}; |
7 |
| - |
8 | 6 | use regex::Regex;
|
| 7 | +use std::{ffi::OsString, io::Write}; |
9 | 8 | use unicode_width::UnicodeWidthStr;
|
10 | 9 |
|
11 | 10 | /// Replace tabs by spaces in the input line.
|
@@ -99,6 +98,15 @@ pub fn report_failure_to_read_input_file(
|
99 | 98 | );
|
100 | 99 | }
|
101 | 100 |
|
| 101 | +/// Limits a string at a certain limiter position. This can break the |
| 102 | +/// encoding of a specific char where it has been cut. |
| 103 | +#[must_use] |
| 104 | +pub fn limited_string(orig: &[u8], limiter: usize) -> &[u8] { |
| 105 | + // TODO: Verify if we broke the encoding of the char |
| 106 | + // when we cut it. |
| 107 | + &orig[..orig.len().min(limiter)] |
| 108 | +} |
| 109 | + |
102 | 110 | #[cfg(test)]
|
103 | 111 | mod tests {
|
104 | 112 | use super::*;
|
@@ -205,4 +213,64 @@ mod tests {
|
205 | 213 | assert!(m_time > current_time);
|
206 | 214 | }
|
207 | 215 | }
|
| 216 | + |
| 217 | + mod limited_string { |
| 218 | + use super::*; |
| 219 | + use std::str; |
| 220 | + |
| 221 | + #[test] |
| 222 | + fn empty_orig_returns_empty() { |
| 223 | + let orig: &[u8] = b""; |
| 224 | + let result = limited_string(&orig, 10); |
| 225 | + assert!(result.is_empty()); |
| 226 | + } |
| 227 | + |
| 228 | + #[test] |
| 229 | + fn zero_limit_returns_empty() { |
| 230 | + let orig: &[u8] = b"foo"; |
| 231 | + let result = limited_string(&orig, 0); |
| 232 | + assert!(result.is_empty()); |
| 233 | + } |
| 234 | + |
| 235 | + #[test] |
| 236 | + fn limit_longer_than_orig_returns_full() { |
| 237 | + let orig: &[u8] = b"foo"; |
| 238 | + let result = limited_string(&orig, 10); |
| 239 | + assert_eq!(result, orig); |
| 240 | + } |
| 241 | + |
| 242 | + #[test] |
| 243 | + fn ascii_limit_in_middle() { |
| 244 | + let orig: &[u8] = b"foobar"; |
| 245 | + let result = limited_string(&orig, 3); |
| 246 | + assert_eq!(result, b"foo"); |
| 247 | + assert!(str::from_utf8(&result).is_ok()); // All are ascii chars, we do not broke the enconding |
| 248 | + } |
| 249 | + |
| 250 | + #[test] |
| 251 | + fn utf8_multibyte_cut_invalidates() { |
| 252 | + let orig = "áéíóú".as_bytes(); |
| 253 | + let result = limited_string(&orig, 1); |
| 254 | + // should contain only the first byte of mult-byte char |
| 255 | + assert_eq!(result, vec![0xC3]); |
| 256 | + assert!(str::from_utf8(&result).is_err()); |
| 257 | + } |
| 258 | + |
| 259 | + #[test] |
| 260 | + fn utf8_limit_at_codepoint_boundary() { |
| 261 | + let orig = "áéí".as_bytes(); |
| 262 | + let bytes = &orig; |
| 263 | + let result = limited_string(&orig, bytes.len()); |
| 264 | + |
| 265 | + assert_eq!(result, *bytes); |
| 266 | + assert!(str::from_utf8(&result).is_ok()); |
| 267 | + } |
| 268 | + |
| 269 | + #[test] |
| 270 | + fn works_with_byte_vec_input() { |
| 271 | + let orig_bytes = b"hello".to_vec(); |
| 272 | + let result = limited_string(&orig_bytes, 3); |
| 273 | + assert_eq!(result, b"hel"); |
| 274 | + } |
| 275 | + } |
208 | 276 | }
|
0 commit comments