Fix: Show path separator error regardless of directory existence#1876
Fix: Show path separator error regardless of directory existence#1876Richman018 wants to merge 3 commits intosharkdp:masterfrom
Conversation
Fixes sharkdp#1873 The error message about patterns containing path separators was only shown when the pattern was also an existing directory. This change makes the error consistent by showing it whenever a pattern contains a path separator (and --full-path is not used), regardless of whether the pattern is an existing directory. This ensures users get helpful feedback even when they accidentally use a path separator in a pattern that doesn't exist as a directory.
32fcbed to
e1e7025
Compare
…egex patterns The previous fix was too aggressive and flagged valid regex patterns like \Ac on Windows (where \A is a regex anchor). This change adds a heuristic to distinguish between paths and regex escape sequences: - On Windows: Only flag patterns that look like paths (drive paths like C:\, or patterns with backslashes that aren't short regex escapes like \Ac) - On Unix: Continue to flag all patterns with path separators This fixes the test_smart_case failure while still addressing issue sharkdp#1873.
e1e7025 to
3057e24
Compare
tmccombs
left a comment
There was a problem hiding this comment.
I left some feedback on the implementation. But also, before merging this, we'll need to add an entry to the changelog, and add some tests.
| let looks_like_path = if cfg!(windows) { | ||
| // On Windows, check if it's a drive path (C:\) or if the backslash is | ||
| // followed by something that looks like a path component (not a single regex escape) | ||
| let is_drive_path = opts.pattern.len() >= 3 | ||
| && opts | ||
| .pattern | ||
| .chars() | ||
| .next() | ||
| .is_some_and(|c| c.is_ascii_alphabetic()) | ||
| && opts.pattern.chars().nth(1) == Some(':') | ||
| && opts.pattern.chars().nth(2) == Some(std::path::MAIN_SEPARATOR); | ||
| is_drive_path | ||
| || (opts.pattern.matches(std::path::MAIN_SEPARATOR).count() > 0 | ||
| && !is_likely_regex_escape(&opts.pattern)) | ||
| } else { | ||
| // On Unix, if it starts with / or contains /, it's likely a path | ||
| true | ||
| }; |
There was a problem hiding this comment.
| let looks_like_path = if cfg!(windows) { | |
| // On Windows, check if it's a drive path (C:\) or if the backslash is | |
| // followed by something that looks like a path component (not a single regex escape) | |
| let is_drive_path = opts.pattern.len() >= 3 | |
| && opts | |
| .pattern | |
| .chars() | |
| .next() | |
| .is_some_and(|c| c.is_ascii_alphabetic()) | |
| && opts.pattern.chars().nth(1) == Some(':') | |
| && opts.pattern.chars().nth(2) == Some(std::path::MAIN_SEPARATOR); | |
| is_drive_path | |
| || (opts.pattern.matches(std::path::MAIN_SEPARATOR).count() > 0 | |
| && !is_likely_regex_escape(&opts.pattern)) | |
| } else { | |
| // On Unix, if it starts with / or contains /, it's likely a path | |
| true | |
| }; | |
| #[cfg(windows)] | |
| let looks_like_path = { | |
| // On Windows, check if it's a drive path (C:\) or if the backslash is | |
| // followed by something that looks like a path component (not a single regex escape) | |
| let is_drive_path = opts.pattern.len() >= 3 | |
| && opts | |
| .pattern | |
| .chars() | |
| .next() | |
| .is_some_and(|c| c.is_ascii_alphabetic()) | |
| && opts.pattern.chars().nth(1) == Some(':') | |
| && opts.pattern.chars().nth(2) == Some(std::path::MAIN_SEPARATOR); | |
| is_drive_path | |
| || (opts.pattern.matches(std::path::MAIN_SEPARATOR).count() > 0 | |
| && !is_likely_regex_escape(&opts.pattern)) | |
| } | |
| // On Unix, if it starts with / or contains /, it's likely a path | |
| #[cfg(not(windows))] | |
| let looks_like_path = true; |
So that on non-windows we don't even have to compile the code for the path that we'll never encounter.
It would be nice if we could avoid the if statement below as well, but I'm not sure how to do that in a clean way without repeating the errror. Maybe if we factored the error message out into a const?
| let is_drive_path = opts.pattern.len() >= 3 | ||
| && opts | ||
| .pattern | ||
| .chars() | ||
| .next() | ||
| .is_some_and(|c| c.is_ascii_alphabetic()) | ||
| && opts.pattern.chars().nth(1) == Some(':') | ||
| && opts.pattern.chars().nth(2) == Some(std::path::MAIN_SEPARATOR); |
There was a problem hiding this comment.
| let is_drive_path = opts.pattern.len() >= 3 | |
| && opts | |
| .pattern | |
| .chars() | |
| .next() | |
| .is_some_and(|c| c.is_ascii_alphabetic()) | |
| && opts.pattern.chars().nth(1) == Some(':') | |
| && opts.pattern.chars().nth(2) == Some(std::path::MAIN_SEPARATOR); | |
| let pattern_bytes = opts.pattern.bytes(); | |
| let is_drive_path = pattern_bytes.len() >= 3 && pattern_bytes[0].is_ascii_alphabetic() | |
| && pattern_bytes[1] == b':' | |
| && pattern_bytes[2].into() == std::path::MAIN_SEPARATOR; |
| /// Check if a pattern is likely a regex escape sequence rather than a path. | ||
| /// This is a heuristic to avoid false positives on Windows where \ is both | ||
| /// a path separator and a regex escape character. | ||
| fn is_likely_regex_escape(pattern: &str) -> bool { |
There was a problem hiding this comment.
| fn is_likely_regex_escape(pattern: &str) -> bool { | |
| #[cfg(windows)] | |
| fn is_likely_regex_escape(pattern: &str) -> bool { |
| /// This is a heuristic to avoid false positives on Windows where \ is both | ||
| /// a path separator and a regex escape character. | ||
| fn is_likely_regex_escape(pattern: &str) -> bool { | ||
| if !cfg!(windows) { |
There was a problem hiding this comment.
get rid of this, and use conditional compilation on the entire function
| // Common regex escape sequences: \A, \z, \b, \d, \s, \w, \1, \2, etc. | ||
| // If the pattern is very short (like "\Ac") and starts with \ followed by | ||
| // a letter or digit, it's likely a regex escape. | ||
| if pattern.len() <= 3 |
There was a problem hiding this comment.
This heuristic will have a lot of false positives. This will prevent you from using escapes in a regex for anything longer than 3 characters on windows.
I'm not really sure what a good path forward is for windows. Honestly, our best option might be to continue using the same heuristic we had before (checking if the path corresponds to a directory). Or maybe just check if the first path component is a directory?
Maybe we could also look for "/" on windows, since that can be a path separator as well.
Fixes #1873
The error message about patterns containing path separators was only shown when the pattern was also an existing directory. This change makes the error consistent by showing it whenever a pattern contains a path separator (and
--full-pathis not used), regardless of whether the pattern is an existing directory.This ensures users get helpful feedback even when they accidentally use a path separator in a pattern that doesn't exist as a directory.
Changes:
Path::new(&opts.pattern).is_dir()check fromensure_search_pattern_is_not_a_path()Testing:
fd /nonexistent/pathwould silently return no resultsfd /nonexistent/pathnow shows the helpful error message