diff --git a/Cargo.lock b/Cargo.lock index 0d6a9c2e..64cf9ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -258,6 +258,18 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.10" @@ -307,13 +319,16 @@ dependencies = [ "clap", "faccess", "filetime", + "itertools", "nix 0.30.1", "onig", "predicates", "pretty_assertions", "regex", + "rstest", "serial_test", "tempfile", + "thiserror", "uucore", "walkdir", ] @@ -344,9 +359,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -354,9 +369,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" @@ -371,31 +386,49 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.21" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -422,6 +455,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" + [[package]] name = "iana-time-zone" version = "0.1.62" @@ -446,12 +485,31 @@ dependencies = [ "cc", ] +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "js-sys" version = "0.3.68" @@ -742,6 +800,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.93" @@ -813,6 +880,51 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + +[[package]] +name = "rstest" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fc39292f8613e913f7df8fa892b8944ceb47c247b78e1b1ae2f09e019be789d" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn", + "unicode-ident", +] + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "0.38.44" @@ -869,11 +981,31 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b84345e4c9bd703274a082fb80caaa99b7612be48dfaa1dd9266577ec412309d" +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + [[package]] name = "serde" -version = "1.0.147" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "serial_test" @@ -932,9 +1064,9 @@ checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "2.0.18" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -964,6 +1096,43 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml_datetime" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" + +[[package]] +name = "toml_edit" +version = "0.22.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "treeline" version = "0.1.0" @@ -972,9 +1141,9 @@ checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-width" @@ -1237,6 +1406,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.33.0" diff --git a/Cargo.toml b/Cargo.toml index e4c7056d..a08bb6d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ authors = ["uutils developers"] [dependencies] chrono = "0.4.41" -clap = "4.5" +clap = { version = "4.5", features = ["env"] } faccess = "0.2.4" walkdir = "2.5" regex = "1.11" @@ -19,6 +19,9 @@ onig = { version = "6.4", default-features = false } uucore = { version = "0.0.30", features = ["entries", "fs", "fsext", "mode"] } nix = { version = "0.30", features = ["fs", "user"] } argmax = "0.3.1" +itertools = "0.14.0" +rstest = "0.25.0" +thiserror = "2.0.12" [dev-dependencies] assert_cmd = "2" @@ -33,6 +36,14 @@ pretty_assertions = "1.4.1" name = "find" path = "src/find/main.rs" +[[bin]] +name = "locate" +path = "src/locate/main.rs" + +[[bin]] +name = "updatedb" +path = "src/updatedb/main.rs" + [[bin]] name = "xargs" path = "src/xargs/main.rs" diff --git a/invalid_db b/invalid_db new file mode 100644 index 00000000..f62ba7ec Binary files /dev/null and b/invalid_db differ diff --git a/old_db b/old_db new file mode 100644 index 00000000..3fafa8ee Binary files /dev/null and b/old_db differ diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index ca2ed15e..082bdec5 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -74,6 +74,7 @@ use std::{ use super::{Config, Dependencies}; pub use entry::{FileType, WalkEntry, WalkError}; +pub use regex::RegexType; /// Symlink following mode. #[derive(Clone, Copy, Debug, Eq, PartialEq)] diff --git a/src/lib.rs b/src/lib.rs index 9f959d0e..44e37ac5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,4 +5,7 @@ // https://opensource.org/licenses/MIT. pub mod find; +#[cfg(unix)] +pub mod locate; +pub mod updatedb; pub mod xargs; diff --git a/src/locate/main.rs b/src/locate/main.rs new file mode 100644 index 00000000..ca9f3b54 --- /dev/null +++ b/src/locate/main.rs @@ -0,0 +1,18 @@ +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#[cfg(not(windows))] +fn main() { + let args = std::env::args().collect::>(); + let strs: Vec<&str> = args.iter().map(std::convert::AsRef::as_ref).collect(); + std::process::exit(findutils::locate::locate_main(strs.as_slice())); +} + +#[cfg(windows)] +fn main() { + // TODO: locate currently uses UNIX-specific OsString APIs. If those can be worked around, locate + // should function normally on Windows. If and when that happens, make sure to make a separate + // windows test database with \ instead of /. + println!("locate is unsupported on Windows"); +} diff --git a/src/locate/mod.rs b/src/locate/mod.rs new file mode 100644 index 00000000..f07a901e --- /dev/null +++ b/src/locate/mod.rs @@ -0,0 +1,653 @@ +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +use std::{ + borrow::Cow, + env, + ffi::{CStr, CString, OsStr}, + fs::{self, File}, + io::{self, stderr, BufRead, BufReader, Read, Write}, + os::unix::{ffi::OsStrExt, fs::MetadataExt}, + path::{Path, PathBuf}, + str::FromStr, +}; + +use chrono::{DateTime, Local, TimeDelta}; +use clap::{self, crate_version, value_parser, Arg, ArgAction, ArgMatches, Command, Id}; +use itertools::Itertools; +use onig::{Regex, RegexOptions, Syntax}; +use thiserror::Error; +use uucore::error::{ClapErrorWrapper, UClapError, UError, UResult}; + +use crate::{find::matchers::RegexType, updatedb::DbFormat}; + +#[derive(Debug)] +pub struct Config { + all: bool, + basename: bool, + mode: Mode, + db: Vec, + existing: ExistenceMode, + follow_symlinks: bool, + ignore_case: bool, + limit: Option, + max_age: usize, + null_bytes: bool, + print: bool, +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub enum Mode { + #[default] + Normal, + Count, + Statistics, +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum ExistenceMode { + #[default] + Any, + Present, + NotPresent, +} + +#[derive(Error, Debug)] +pub enum Error { + #[error("no matches found")] + NoMatches, + #[error("Unknown database type")] + InvalidDbType, + #[error("locate database {0} is corrupt or invalid")] + InvalidDb(String), + #[error("{0}")] + IoErr(#[from] io::Error), + #[error("{0}")] + ClapErr(#[from] ClapErrorWrapper), + /// General copy error + #[error("{0}")] + Error(String), +} + +type LocateResult = Result; + +impl UError for Error { + fn code(&self) -> i32 { + 1 + } +} + +pub struct Statistics { + matches: usize, + total_length: usize, + whitespace: usize, + newlines: usize, + high_bit: usize, +} + +impl Statistics { + fn new() -> Self { + Self { + matches: 0, + total_length: 0, + whitespace: 0, + newlines: 0, + high_bit: 0, + } + } + + fn add_match(&mut self, mat: &CStr) { + let s = mat.to_string_lossy(); + self.matches += 1; + self.total_length += s.len(); + if s.chars().any(char::is_whitespace) { + self.whitespace += 1; + } + if s.chars().any(|c| c == '\n') { + self.newlines += 1; + } + if !s.is_ascii() { + self.high_bit += 1; + } + } + + fn print_header(&self, dbreader: &DbReader) { + println!( + "Database {} is in the {} format.", + dbreader.path.to_string_lossy(), + dbreader.format, + ); + } + + fn print(&self, dbreader: &DbReader) { + if let Ok(metadata) = fs::metadata(&dbreader.path) { + if let Ok(time) = metadata.modified() { + let time: DateTime = time.into(); + println!("Database was last modified at {}", time); + } + println!("Locate database size: {} bytes", metadata.size()); + } + println!("Matching Filenames: {}", self.matches); + println!( + "File names have a cumulative length of {} bytes", + self.total_length + ); + println!("Of those file names,\n"); + println!(" {} contain whitespace,", self.whitespace); + println!(" {} contain newline characters,", self.newlines); + println!( + " and {} contain characters with the high bit set.", + self.high_bit + ); + println!(); + } +} + +#[derive(Debug)] +enum Patterns { + String(Vec), + Regex(Vec), +} + +impl Patterns { + fn any_match(&self, entry: &str) -> bool { + match self { + Self::String(v) => v.iter().any(|s| entry.contains(s)), + Self::Regex(v) => v.iter().any(|r| r.find(entry).is_some()), + } + } + + fn all_match(&self, entry: &str) -> bool { + match self { + Self::String(v) => v.iter().all(|s| entry.contains(s)), + Self::Regex(v) => v.iter().all(|r| r.find(entry).is_some()), + } + } +} + +pub struct ParsedInfo { + patterns: Patterns, + config: Config, +} + +fn make_regex(ty: RegexType, config: &Config, pattern: &str) -> Option { + let syntax = match ty { + RegexType::Emacs => Syntax::emacs(), + RegexType::Grep => Syntax::grep(), + RegexType::PosixBasic => Syntax::posix_basic(), + RegexType::PosixExtended => Syntax::posix_extended(), + }; + + Regex::with_options( + pattern, + if config.ignore_case { + RegexOptions::REGEX_OPTION_IGNORECASE + } else { + RegexOptions::REGEX_OPTION_NONE + }, + syntax, + ) + .ok() +} + +impl From for ParsedInfo { + fn from(value: ArgMatches) -> Self { + let config = Config { + all: value.get_flag("all"), + basename: value.get_flag("basename"), + db: value + .get_one::("database") + .unwrap() + .split(':') + .map(PathBuf::from) + .collect(), + mode: value + .get_many::("mode") + .unwrap_or_default() + .next_back() + .map(|s| match s.as_str() { + "count" => Mode::Count, + "statistics" => Mode::Statistics, + _ => unreachable!(), + }) + .unwrap_or_default(), + existing: value + .get_many::("exist") + .unwrap_or_default() + .next_back() + .map(|s| match s.as_str() { + "existing" => ExistenceMode::Present, + "non-existing" => ExistenceMode::NotPresent, + s => unreachable!("{s}"), + }) + .unwrap_or_default(), + follow_symlinks: value.get_flag("follow") || !value.get_flag("nofollow"), + ignore_case: value.get_flag("ignore-case"), + limit: value.get_one::("limit").copied(), + max_age: *value.get_one::("max-database-age").unwrap(), + null_bytes: value.get_flag("null"), + print: value.get_flag("print"), + }; + let patterns: Vec = value + .get_many::("patterns") + .unwrap() + .cloned() + .collect(); + let patterns = if let Some(ty) = value.get_flag("regex").then(|| { + value + .get_one::("regextype") + .and_then(|s| RegexType::from_str(s.as_str()).ok()) + .unwrap_or(RegexType::Emacs) + }) { + Patterns::Regex( + patterns + .into_iter() + .filter_map(|s| make_regex(ty, &config, &s)) + .collect(), + ) + } else { + Patterns::String(patterns) + }; + Self { patterns, config } + } +} + +fn uu_app() -> Command { + Command::new("locate") + .version(crate_version!()) + .args_override_self(true) + .arg( + Arg::new("all") + .short('a') + .long("all") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("basename") + .short('b') + .long("basename") + .action(ArgAction::SetTrue) + .group("name"), + ) + .arg( + Arg::new("count") + .short('c') + .long("count") + .action(ArgAction::SetTrue) + .group("mode"), + ) + .arg( + Arg::new("database") + .short('d') + .long("database") + .env("LOCATE_PATH") + .default_value("/usr/local/var/locatedb") + .action(ArgAction::Set), + ) + .arg( + Arg::new("existing") + .short('e') + .long("existing") + .action(ArgAction::SetTrue) + .group("exist"), + ) + .arg( + Arg::new("non-existing") + .short('E') + .long("non-existing") + .action(ArgAction::SetTrue) + .group("exist"), + ) + .arg( + Arg::new("follow") + .short('L') + .action(ArgAction::SetTrue) + .overrides_with("nofollow"), + ) + .arg( + Arg::new("nofollow") + .short('P') + .short_alias('H') + .action(ArgAction::SetTrue) + .overrides_with("follow"), + ) + .arg( + Arg::new("ignore-case") + .short('i') + .long("ignore-case") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("limit") + .short('l') + .long("limit") + .value_parser(value_parser!(usize)) + .action(ArgAction::Set), + ) + .arg( + Arg::new("max-database-age") + .long("max-database-age") + .value_parser(value_parser!(usize)) + .default_value("8") + .action(ArgAction::Set), + ) + .arg( + Arg::new("mmap") + .short('m') + .long("mmap") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("null") + .short('0') + .long("null") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("print") + .short('p') + .long("print") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("wholename") + .short('w') + .long("wholename") + .action(ArgAction::SetFalse) + .group("name"), + ) + .arg( + Arg::new("regex") + .short('r') + .long("regex") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("regextype") + .long("regextype") + .value_parser([ + "findutils-default", + "emacs", + "gnu-awk", + "grep", + "posix-awk", + "awk", + "posix-basic", + "posix-egrep", + "egrep", + "posix-extended", + ]) + .action(ArgAction::Set), + ) + .arg( + Arg::new("stdio") + .short('s') + .long("stdio") + .action(ArgAction::SetTrue), + ) + .arg( + Arg::new("statistics") + .short('S') + .long("statistics") + .action(ArgAction::SetTrue) + .group("mode"), + ) + .arg( + Arg::new("patterns") + .num_args(1..) + .action(ArgAction::Append) + .value_parser(value_parser!(String)) + .required(true), + ) +} + +struct DbReader { + reader: BufReader, + prev: Option, + prefix: isize, + format: DbFormat, + path: PathBuf, +} + +impl Iterator for DbReader { + type Item = LocateResult; + + fn next(&mut self) -> Option { + // 1 byte for the prefix delta + let mut buf = [0]; + self.reader.read_exact(&mut buf).ok()?; + // 0x80 - the prefix delta takes the next two bytes + let size = if buf[0] == 0x80 { + let mut buf = [0; 2]; + self.reader.read_exact(&mut buf).ok()?; + i16::from_be_bytes(buf) as isize + } else { + // u8 as isize directly doesn't sign-extend + buf[0] as i8 as isize + }; + self.prefix += size; + // read the actual path fragment + let mut buf = Vec::new(); + self.reader.read_until(b'\0', &mut buf).ok()?; + let prefix = self.prev.as_ref().map(|s| { + s.to_bytes() + .iter() + .take(self.prefix as usize) + .collect::>() + }); + if (prefix.as_ref().map(|v| v.len()).unwrap_or(0) as isize) < size { + return Some(Err(Error::InvalidDb( + self.path.to_string_lossy().to_string(), + ))); + } + let res = CString::from_vec_with_nul( + prefix + .unwrap_or_default() + .into_iter() + .copied() + .chain(buf) + .collect(), + ) + .ok()?; + self.prev = Some(res.clone()); + Some(Ok(res)) + } +} + +impl DbReader { + fn new(path: impl AsRef) -> UResult { + let mut reader = BufReader::new(File::open(path.as_ref())?); + let format = Self::check_db(&mut reader).ok_or(Error::InvalidDbType)?; + Ok(Self { + reader, + prev: None, + prefix: 0, + format, + path: path.as_ref().to_path_buf(), + }) + } + + fn check_db(reader: &mut BufReader) -> Option { + let mut buf = [0]; + let Ok(_) = reader.read_exact(&mut buf) else { + return None; + }; + let mut buf = Vec::new(); + let Ok(_) = reader.read_until(b'\0', &mut buf) else { + return None; + }; + + // drop nul byte when matching + match String::from_utf8_lossy(&buf[..buf.len() - 1]).as_ref() { + "LOCATE02" => Some(DbFormat::Locate02), + _ => None, + } + } +} + +fn match_entry(entry: &CStr, config: &Config, patterns: &Patterns) -> bool { + let buf = Path::new(OsStr::from_bytes(entry.to_bytes())); + let name = if config.basename { + let Some(path) = buf.file_name() else { + return false; + }; + + let c = CString::from_vec_with_nul( + path.as_encoded_bytes() + .iter() + .copied() + .chain([b'\0']) + .collect(), + ) + .unwrap(); + + Cow::Owned(c) + } else { + Cow::Borrowed(entry) + }; + let entry = name.to_string_lossy(); + + let patterns_match = match config.all { + false => { + if entry.chars().any(|c| r"*?[]\".contains(c)) { + // TODO: parse metacharacters + false + } else { + patterns.any_match(entry.as_ref()) + } + } + true => { + if entry.chars().any(|c| r"*?[]\".contains(c)) { + // TODO: parse metacharacters + false + } else { + patterns.all_match(entry.as_ref()) + } + } + }; + + let existence_matches = match config.existing { + ExistenceMode::Any => true, + ExistenceMode::Present => { + PathBuf::from(entry.to_string()).exists() + || config.follow_symlinks + && fs::symlink_metadata(PathBuf::from(entry.to_string())).is_ok() + } + ExistenceMode::NotPresent => { + !PathBuf::from(entry.to_string()).exists() + || config.follow_symlinks + && fs::symlink_metadata(PathBuf::from(entry.to_string())).is_err() + } + }; + + patterns_match && existence_matches +} + +fn do_locate(args: &[&str]) -> LocateResult<()> { + let matches = uu_app().try_get_matches_from(args); + match matches { + Err(e) => { + let mut app = uu_app(); + + match e.kind() { + clap::error::ErrorKind::DisplayHelp => { + app.print_help()?; + } + clap::error::ErrorKind::DisplayVersion => print!("{}", app.render_version()), + _ => return Err(e.with_exit_code(1).into()), + } + } + Ok(matches) => { + let ParsedInfo { patterns, config } = ParsedInfo::from(matches); + let mut stats = Statistics::new(); + + // iterate over each given database + let count = config + .db + .iter() + .filter_map(|p| DbReader::new(p.as_path()).ok()) + .map(|mut dbreader| { + // if we can get the mtime of the file, check it against the current time + if let Ok(metadata) = fs::metadata(&dbreader.path) { + if let Ok(time) = metadata.modified() { + let modified: DateTime = time.into(); + let now = Local::now(); + let delta = now - modified; + if delta + > TimeDelta::days(config.max_age as i64) + { + eprintln!( + "{}: warning: database ‘{}’ is more than {} days old (actual age is {:.1} days)", + args[0], + dbreader.path.to_string_lossy(), + config.max_age, + delta.num_seconds() as f64 / (60 * 60 * 24) as f64 + ); + } + } + } + + // the first line of the statistics description is printed before matches + // (given --print) + if config.mode == Mode::Statistics { + stats.print_header(&dbreader); + } + + // find matches + let count = dbreader + .by_ref() + .process_results(|iter| + iter + .filter(|s| match_entry(s.as_c_str(), &config, &patterns)) + .take(config.limit.unwrap_or(usize::MAX)) + .inspect(|s| { + if config.mode == Mode::Normal || config.print { + if config.null_bytes { + print!("{}\0", s.to_string_lossy()); + } else { + println!("{}", s.to_string_lossy()); + } + } + if config.mode == Mode::Statistics { + stats.add_match(s); + } + }) + .count() + ); + + // print the rest of the statistics description + if config.mode == Mode::Statistics && count.is_ok() { + stats.print(&dbreader); + } + + count + }) + .try_fold(0, |acc, e| e.map(|e| acc + e))?; + + if config.mode == Mode::Count { + println!("{count}"); + } + + // zero matches isn't an error if --statistics is passed + if count == 0 && config.mode != Mode::Statistics { + return Err(Error::NoMatches); + } + } + } + + Ok(()) +} + +pub fn locate_main(args: &[&str]) -> i32 { + match do_locate(args) { + Ok(()) => 0, + Err(e) => { + match e { + Error::NoMatches => {} + _ => writeln!(&mut stderr(), "Error: {e}").unwrap(), + } + e.code() + } + } +} diff --git a/src/updatedb/main.rs b/src/updatedb/main.rs new file mode 100644 index 00000000..d77802af --- /dev/null +++ b/src/updatedb/main.rs @@ -0,0 +1,15 @@ +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#[cfg(not(windows))] +fn main() { + let args = std::env::args().collect::>(); + let strs: Vec<&str> = args.iter().map(std::convert::AsRef::as_ref).collect(); + std::process::exit(findutils::updatedb::updatedb_main(strs.as_slice())); +} + +#[cfg(windows)] +fn main() { + println!("updatedb is unsupported on Windows"); +} diff --git a/src/updatedb/mod.rs b/src/updatedb/mod.rs new file mode 100644 index 00000000..4dead95d --- /dev/null +++ b/src/updatedb/mod.rs @@ -0,0 +1,333 @@ +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +use std::{ + cell::RefCell, + fmt::Display, + fs::OpenOptions, + io::{stderr, BufRead, BufReader, BufWriter, Write}, + path::PathBuf, + rc::Rc, + str::FromStr, + time::SystemTime, +}; + +use clap::{crate_version, value_parser, Arg, ArgAction, ArgMatches, Command}; +use itertools::Itertools; +use uucore::error::UResult; + +use crate::find::{find_main, Dependencies}; + +// local_user and net_user are currently ignored +#[allow(dead_code)] +pub struct Config { + find_options: String, + local_paths: Vec, + net_paths: Vec, + prune_paths: Vec, + prune_fs: Vec, + output: PathBuf, + local_user: Option, + net_user: String, + db_format: DbFormat, +} + +impl From for Config { + fn from(value: ArgMatches) -> Self { + Self { + find_options: value + .get_one::("findoptions") + .cloned() + .unwrap_or_else(String::new), + local_paths: value + .get_one::("localpaths") + .map(|s| { + s.split_whitespace() + .filter_map(|s| PathBuf::from_str(s).ok()) + .collect() + }) + .unwrap_or_else(|| vec![PathBuf::from("/")]), + net_paths: value + .get_one::("netpaths") + .map(|s| s.split_whitespace().map(|s| s.to_owned()).collect()) + .unwrap_or_default(), + prune_paths: value + .get_one::("prunepaths") + .map(|s| s.split_whitespace().map(PathBuf::from).collect()) + .unwrap_or_else(|| { + ["/tmp", "/usr/tmp", "/var/tmp", "/afs"] + .into_iter() + .map(PathBuf::from) + .collect() + }), + prune_fs: value + .get_one::("prunefs") + .map(|s| s.split_whitespace().map(|s| s.to_owned()).collect()) + .unwrap_or_else(|| { + ["nfs", "NFS", "proc"] + .into_iter() + .map(str::to_string) + .collect() + }), + db_format: value + .get_one::("dbformat") + .copied() + .unwrap_or_default(), + output: value + .get_one::("output") + .cloned() + // FIXME: the default should be platform-dependent + .unwrap_or(PathBuf::from_str("/usr/local/var/locatedb").unwrap()), + local_user: value.get_one::("localuser").cloned(), + net_user: value + .get_one::("netuser") + .cloned() + .unwrap_or(String::from("daemon")), + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum DbFormat { + #[default] + Locate02, +} + +// used for locate's --statistics +impl Display for DbFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Locate02 => f.write_str("GNU LOCATE02"), + } + } +} + +fn uu_app() -> Command { + Command::new("updatedb") + .version(crate_version!()) + .arg( + Arg::new("findoptions") + .long("findoptions") + .require_equals(true) + .env("FINDOPTIONS") + .action(ArgAction::Append), + ) + .arg( + Arg::new("localpaths") + .long("localpaths") + .require_equals(true) + .action(ArgAction::Set), + ) + .arg( + Arg::new("netpaths") + .long("netpaths") + .require_equals(true) + .env("NETPATHS") + .action(ArgAction::Set), + ) + .arg( + Arg::new("prunepaths") + .long("prunepaths") + .require_equals(true) + .env("PRUNEPATHS") + .action(ArgAction::Set), + ) + .arg( + Arg::new("prunefs") + .long("prunefs") + .require_equals(true) + .env("PRUNEFS") + .action(ArgAction::Set), + ) + .arg( + Arg::new("output") + .long("output") + .require_equals(true) + .value_parser(value_parser!(PathBuf)) + .action(ArgAction::Set), + ) + .arg( + Arg::new("localuser") + .long("localuser") + .require_equals(true) + .env("LOCALUSER") + .action(ArgAction::Set), + ) + .arg( + Arg::new("netuser") + .long("netuser") + .require_equals(true) + .env("NETUSER") + .action(ArgAction::Set), + ) + .arg( + Arg::new("dbformat") + .long("dbformat") + .require_equals(true) + .value_parser(["LOCATE02"]) + .action(ArgAction::Set), + ) +} + +// The LOCATE02 format elides bytes from the path until the first byte that differs from the +// previous entry. It keeps a running total of the prefix length, and uses 1 or 3 bytes to write +// the difference from the previous prefix length. Paths are provided in sorted order by find. +struct Frcoder<'a> { + reader: BufReader<&'a [u8]>, + prev: Option>, + prefix: usize, + ty: DbFormat, +} + +impl<'a> Frcoder<'a> { + fn new(v: &'a [u8], ty: DbFormat) -> Self { + Self { + reader: BufReader::new(v), + prev: None, + prefix: 0, + ty, + } + } + + fn generate_header(&self) -> Vec { + match self.ty { + DbFormat::Locate02 => "\0LOCATE02\0".as_bytes().to_vec(), + } + } +} + +impl Iterator for Frcoder<'_> { + type Item = Vec; + + fn next(&mut self) -> Option { + let mut path = Vec::new(); + // find prints nul bytes after each path + if self.reader.read_until(b'\0', &mut path).ok()? == 0 { + return None; + } + + let prefix = path + .iter() + .zip(self.prev.as_deref().unwrap_or_default()) + .take_while(|(a, b)| a == b) + .count(); + + let diff = prefix as i32 - self.prefix as i32; + + // if the prefix delta exceeds 0x7f, we use 0x80 to signal that the next two bytes comprise + // the delta + let mut out = Vec::new(); + if diff.abs() > 0x7f { + out.push(0x80); + out.extend((diff as i16).to_be_bytes()); + } else { + out.push(diff as u8); + } + + out.extend(path.iter().skip(prefix)); + + self.prefix = prefix; + self.prev = Some(path); + + Some(out) + } +} + +// capture find's stdout +struct CapturedDependencies { + output: Rc>, + now: SystemTime, +} + +impl CapturedDependencies { + fn new(output: Rc>) -> Self { + Self { + output, + now: SystemTime::now(), + } + } +} + +impl Dependencies for CapturedDependencies { + fn get_output(&self) -> &RefCell { + self.output.as_ref() + } + + fn now(&self) -> SystemTime { + self.now + } +} + +fn do_updatedb(args: &[&str]) -> UResult<()> { + let matches = uu_app().try_get_matches_from(args)?; + let config = Config::from(matches); + + // TODO: handle localuser and netuser + // this will likely involve splitting the find logic into two calls + + let mut find_args = vec!["find"]; + find_args.extend(config.local_paths.iter().filter_map(|p| p.to_str())); + find_args.extend(config.net_paths.iter().map(|s| s.as_str())); + find_args.extend(config.find_options.split_whitespace()); + // offload most of the logic to find + let excludes = format!( + "( {} {} ) -prune {} {} {}", + if config.prune_fs.is_empty() { + "" + } else { + "-fstype" + }, + config.prune_fs.iter().join(" -or -fstype "), + if config.prune_paths.is_empty() { + "" + } else { + "-or -regex" + }, + config + .prune_paths + .iter() + .filter_map(|p| p.to_str()) + .join(" -prune -or -regex "), + if config.prune_paths.is_empty() { + "" + } else { + "-prune" + }, + ); + find_args.extend(excludes.split_whitespace()); + find_args.extend(["-or", "-print0", "-sorted"]); + + let output = Rc::new(RefCell::new(Vec::new())); + let deps = CapturedDependencies::new(output.clone()); + find_main(find_args.as_slice(), &deps); + + let mut writer = BufWriter::new( + OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(config.output)?, + ); + + let output = output.borrow(); + let frcoder = Frcoder::new(output.as_slice(), config.db_format); + writer.write_all(&frcoder.generate_header())?; + for v in frcoder { + writer.write_all(v.as_slice())?; + } + + writer.flush()?; + + Ok(()) +} + +pub fn updatedb_main(args: &[&str]) -> i32 { + match do_updatedb(args) { + Ok(()) => 0, + Err(e) => { + writeln!(&mut stderr(), "Error: {e}").unwrap(); + 1 + } + } +} diff --git a/test_data/db/depth/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/f0 b/test_data/db/depth/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/f0 new file mode 100644 index 00000000..e69de29b diff --git a/test_data/db/depth/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/f1 b/test_data/db/depth/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/1234567890123456789012345678901234567890/f1 new file mode 100644 index 00000000..e69de29b diff --git a/test_data_db b/test_data_db new file mode 100644 index 00000000..3fafa8ee Binary files /dev/null and b/test_data_db differ diff --git a/tests/db_tests.rs b/tests/db_tests.rs new file mode 100644 index 00000000..a4f836e3 --- /dev/null +++ b/tests/db_tests.rs @@ -0,0 +1,196 @@ +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +mod common; + +use std::{fs::File, io, process::Command}; + +use assert_cmd::{assert::OutputAssertExt, cargo::CommandCargoExt}; +use rstest::{fixture, rstest}; + +#[fixture] +fn add_special_files() -> io::Result<()> { + File::create("test_data/db/abc def")?; + File::create("test_data/db/abc\ndef")?; + File::create("test_data/db/✨sparkles✨")?; + Ok(()) +} + +#[cfg(not(windows))] +const DB_FLAG: &str = "--database=test_data_db"; +#[cfg(not(windows))] +const INVALID_DB_FLAG: &str = "--database=invalid_db"; +#[cfg(not(windows))] +const OLD_DB_FLAG: &str = "--database=old_db"; + +#[test] +#[cfg(not(windows))] +fn test_locate_no_matches() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["usr", DB_FLAG]) + .assert() + .failure(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_match() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["test_data", DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_no_matches_basename() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["test_data1234567890", "--basename", DB_FLAG]) + .assert() + .failure(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_match_basename() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["abbbc", "--basename", DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_existing() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["abbbc", "--existing", DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_non_existing() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["abbbc", "--non-existing", DB_FLAG]) + .assert() + .failure(); +} + +#[rstest] +#[cfg(not(windows))] +fn test_locate_statistics(add_special_files: io::Result<()>) { + if add_special_files.is_ok() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["", "--statistics", DB_FLAG]) + .assert() + .success(); + } +} + +#[rstest] +#[case("emacs")] +#[case("grep")] +#[case("posix-basic")] +#[case("posix-extended")] +#[cfg(not(windows))] +fn test_locate_regex(#[case] input: &str) { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args([ + "abbbc", + "--regex", + format!("--regextype={input}").as_str(), + DB_FLAG, + ]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_all() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["abb", "bbc", "--all", DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_all_regex() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["abb", "b*c", "--all", "--regex", DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_invalid_db() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["test_data", INVALID_DB_FLAG]) + .assert() + .failure(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_outdated_db() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .args(["test_data", OLD_DB_FLAG]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_print_help() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .arg("--help") + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_locate_invalid_flag() { + Command::cargo_bin("locate") + .expect("couldn't find locate binary") + .arg("--unknown") + .assert() + .failure(); +} + +#[rstest] +#[cfg(not(windows))] +fn test_updatedb(_add_special_files: io::Result<()>) { + Command::cargo_bin("updatedb") + .expect("couldn't find updatedb binary") + .args(["--localpaths=./test_data", "--output=/dev/null"]) + .assert() + .success(); +} + +#[test] +#[cfg(not(windows))] +fn test_updatedb_invalid_flag() { + Command::cargo_bin("updatedb") + .expect("couldn't find updatedb binary") + .args(["--unknown"]) + .assert() + .failure(); +}