Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
294 changes: 253 additions & 41 deletions src/uu/tail/src/follow/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,81 @@ use crate::text;
use std::collections::HashMap;
use std::collections::hash_map::Keys;
use std::fs::{File, Metadata};
use std::io::{BufRead, BufReader, BufWriter, Write, stdout};
use std::io::{BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write, stdout};
use std::path::{Path, PathBuf};
use std::time::Instant;
use uucore::error::UResult;

/// Combined trait for readers that support both buffered reading and seeking.
/// This allows us to detect file growth after renames in polling mode.
pub trait BufReadSeek: BufRead + Seek + Send {}

/// Blanket implementation for any type that implements BufRead, Seek, and Send
impl<T: BufRead + Seek + Send> BufReadSeek for T {}

/// Wrapper for non-seekable readers (like stdin) that implements Seek as a no-op.
/// This allows stdin to work with the BufReadSeek trait without actual seeking capability.
pub struct NonSeekableReader<R: BufRead + Send> {
inner: R,
}

impl<R: BufRead + Send> NonSeekableReader<R> {
pub fn new(inner: R) -> Self {
Self { inner }
}
}

impl<R: BufRead + Send> Read for NonSeekableReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}

impl<R: BufRead + Send> BufRead for NonSeekableReader<R> {
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
self.inner.fill_buf()
}

fn consume(&mut self, amt: usize) {
self.inner.consume(amt);
}
}

impl<R: BufRead + Send> Seek for NonSeekableReader<R> {
fn seek(&mut self, _pos: SeekFrom) -> std::io::Result<u64> {
// No-op for non-seekable readers like stdin
Ok(0)
}
}

/// Identifies the source of a file system event
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WatchSource {
/// Event originated from watching the file directly
File,
/// Event originated from watching the parent directory
/// (only used in Linux inotify + --follow=name mode)
ParentDirectory,
}

/// Tracks watch metadata for a monitored file
#[derive(Debug, Clone)]
pub struct WatchedPath {
/// The file being monitored
#[allow(dead_code)]
pub file_path: PathBuf,
/// Parent directory watch (if enabled)
#[allow(dead_code)]
pub parent_path: Option<PathBuf>,
}

/// Data structure to keep a handle on files to follow.
/// `last` always holds the path/key of the last file that was printed from.
/// The keys of the [`HashMap`] can point to an existing file path (normal case),
/// or stdin ("-"), or to a non-existing path (--retry).
/// For existing files, all keys in the [`HashMap`] are absolute Paths.
pub struct FileHandling {
map: HashMap<PathBuf, PathData>,
map: HashMap<PathBuf, (PathData, Option<WatchedPath>)>,
last: Option<PathBuf>,
header_printer: HeaderPrinter,
}
Expand All @@ -38,26 +102,81 @@ impl FileHandling {

/// Wrapper for [`HashMap::insert`] using [`Path::canonicalize`]
pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) {
self.insert_with_watch(k, v, None, update_last);
}

/// Insert a file with optional watch metadata
pub fn insert_with_watch(
&mut self,
k: &Path,
v: PathData,
watch_info: Option<WatchedPath>,
update_last: bool,
) {
let k = Self::canonicalize_path(k);
if update_last {
self.last = Some(k.clone());
}
let _ = self.map.insert(k, v);
let _ = self.map.insert(k, (v, watch_info));
}

/// Wrapper for [`HashMap::remove`] using [`Path::canonicalize`]
/// If the canonicalized path is not found, tries all keys in the map to find a match.
/// This handles cases where a file was renamed and can no longer be canonicalized.
pub fn remove(&mut self, k: &Path) -> PathData {
self.map.remove(&Self::canonicalize_path(k)).unwrap()
let canonicalized = Self::canonicalize_path(k);

// Try canonicalized path first (fast path for existing files)
if let Some(entry) = self.map.remove(&canonicalized) {
return entry.0;
}

// Fallback for renamed files: try the raw key directly
if let Some(entry) = self.map.remove(k) {
return entry.0;
}

// Last resort: search through all keys to find one that matches when canonicalized
// This handles the case where the file was tracked under its canonical path
// but the event refers to it by the pre-rename name
let matching_key = self
.map
.keys()
.find(|key| {
// Check if this key, when made relative to the same directory as k, matches k
if let (Some(k_file), Some(key_file)) = (k.file_name(), key.file_name()) {
if k_file == key_file {
// If the file names match, check if they refer to the same logical file
return true;
}
}
false
})
.cloned();

if let Some(key) = matching_key {
return self.map.remove(&key).unwrap().0;
}

panic!("No path was found. about [{}]", k.display())
}

/// Wrapper for [`HashMap::get`] using [`Path::canonicalize`]
pub fn get(&self, k: &Path) -> &PathData {
self.map.get(&Self::canonicalize_path(k)).unwrap()
&self.map.get(&Self::canonicalize_path(k)).unwrap().0
}

/// Wrapper for [`HashMap::get_mut`] using [`Path::canonicalize`]
pub fn get_mut(&mut self, k: &Path) -> &mut PathData {
self.map.get_mut(&Self::canonicalize_path(k)).unwrap()
&mut self.map.get_mut(&Self::canonicalize_path(k)).unwrap().0
}

/// Get watch metadata for a path
#[allow(dead_code)]
pub fn get_watch_info(&self, k: &Path) -> Option<&WatchedPath> {
self.map
.get(&Self::canonicalize_path(k))
.and_then(|(_, watch)| watch.as_ref())
}

/// Canonicalize `path` if it is not already an absolute path
Expand All @@ -74,16 +193,12 @@ impl FileHandling {
self.get_mut(path).metadata.as_ref()
}

pub fn keys(&self) -> Keys<'_, PathBuf, PathData> {
pub fn keys(&self) -> Keys<'_, PathBuf, (PathData, Option<WatchedPath>)> {
self.map.keys()
}

pub fn contains_key(&self, k: &Path) -> bool {
self.map.contains_key(k)
}

pub fn get_last(&self) -> Option<&PathBuf> {
self.last.as_ref()
self.map.contains_key(&Self::canonicalize_path(k))
}

/// Return true if there is only stdin remaining
Expand Down Expand Up @@ -111,18 +226,41 @@ impl FileHandling {
self.get_mut(path).reader = None;
}

/// Reopen the file at the monitored `path`
/// Reopen the file at the monitored `path`, or reset reader state if already open
pub fn update_reader(&mut self, path: &Path) -> UResult<()> {
/*
BUG: If it's not necessary to reopen a file, GNU's tail calls seek to offset 0.
However, we can't call seek here because `BufRead` does not implement `Seek`.
As a workaround, we always reopen the file even though this might not always
be necessary.
*/
self.get_mut(path)
.reader
.replace(Box::new(BufReader::new(File::open(path)?)));
Ok(())
// Always try to reopen the file to get a fresh file descriptor
// This is important when a file is replaced (different inode)
if let Ok(file) = File::open(path) {
self.get_mut(path)
.reader
.replace(Box::new(BufReader::new(file)));
Ok(())
} else {
// File doesn't exist (e.g., after rename in descriptor mode)
// Keep the existing reader - it may still be valid
Ok(())
}
}

/// Reopen file and position at the last N lines/bytes (for truncate events)
pub fn update_reader_with_positioning(&mut self, path: &Path, settings: &Settings) -> UResult<()> {
// Close existing reader
self.get_mut(path).reader = None;

// Reopen file and position at end
if let Ok(mut file) = File::open(path) {
// Apply bounded_tail logic to position at last N lines/bytes
super::super::bounded_tail(&mut file, settings);

// Create buffered reader from positioned file
self.get_mut(path)
.reader
.replace(Box::new(BufReader::new(file)));
Ok(())
} else {
// File doesn't exist
Ok(())
}
}

/// Reload metadata from `path`, or `metadata`
Expand Down Expand Up @@ -170,42 +308,116 @@ impl FileHandling {
false
}
}

/// Poll a single file descriptor for new data.
/// Returns Ok(true) if new data was read and output.
pub fn poll_fd(&mut self, path: &Path, verbose: bool) -> UResult<bool> {
let path_data = self.get_mut(path);

// Only poll if marked for fallback and is a regular file
if !path_data.fallback_to_polling || !path_data.is_regular_file {
return Ok(false);
}

// Throttle polling: minimum 50ms between polls
let now = Instant::now();
if let Some(last_polled) = path_data.last_polled {
if now.duration_since(last_polled).as_millis() < 50 {
return Ok(false);
}
}
path_data.last_polled = Some(now);

// After a rename, the path no longer exists on disk, but the file descriptor
// is still valid. We can't use metadata to check file size, so we'll just
// try to read from the FD. If there's data, we'll output it.

// Check if we have a reader (file descriptor)
if self.get(path).reader.is_none() {
return Ok(false);
}

// Read and output new data (similar to tail_file)
let mut chunks = BytesChunkBuffer::new(u64::MAX);

if let Some(reader) = self.get_mut(path).reader.as_mut() {
chunks.fill(reader)?;
}

if chunks.has_data() {
if self.needs_header(path, verbose) {
let display_name = self.get(path).display_name.clone();
self.header_printer.print(display_name.as_str());
}

let mut writer = BufWriter::new(stdout().lock());
chunks.print(&mut writer)?;
writer.flush()?;

self.last.replace(path.to_owned());
self.update_metadata(path, None);

Ok(true)
} else {
Ok(false)
}
}

/// Poll all file descriptors marked for polling fallback.
/// Returns Ok(true) if any file made progress.
pub fn poll_all_fds(&mut self, verbose: bool) -> UResult<bool> {
let paths_to_poll: Vec<PathBuf> = self
.map
.iter()
.filter(|(_, (data, _))| data.fallback_to_polling && data.is_regular_file)
.map(|(path, _)| path.clone())
.collect();

let mut any_progress = false;
for path in paths_to_poll {
if self.poll_fd(&path, verbose)? {
any_progress = true;
}
}

Ok(any_progress)
}

/// Check if any files are marked for polling fallback.
pub fn has_polling_fallback(&self) -> bool {
self.map.values().any(|(data, _)| data.fallback_to_polling)
}
}

/// Data structure to keep a handle on the [`BufReader`], [`Metadata`]
/// and the `display_name` (`header_name`) of files that are being followed.
pub struct PathData {
pub reader: Option<Box<dyn BufRead>>,
pub reader: Option<Box<dyn BufReadSeek>>,
pub metadata: Option<Metadata>,
pub display_name: String,
/// After a rename event in descriptor mode, switch to periodic FD polling
pub fallback_to_polling: bool,
/// Track when we last polled this FD to throttle polling frequency
pub last_polled: Option<Instant>,
/// Whether this is a regular file (skip polling for pipes/sockets)
pub is_regular_file: bool,
}

impl PathData {
pub fn new(
reader: Option<Box<dyn BufRead>>,
reader: Option<Box<dyn BufReadSeek>>,
metadata: Option<Metadata>,
display_name: &str,
) -> Self {
let is_regular_file = metadata.as_ref().map(|m| m.is_file()).unwrap_or(false);

Self {
reader,
metadata,
display_name: display_name.to_owned(),
fallback_to_polling: false,
last_polled: None,
is_regular_file,
}
}
pub fn from_other_with_path(data: Self, path: &Path) -> Self {
// Remove old reader
let old_reader = data.reader;
let reader = if old_reader.is_some() {
// Use old reader with the same file descriptor if there is one
old_reader
} else if let Ok(file) = File::open(path) {
// Open new file tail from start
Some(Box::new(BufReader::new(file)) as Box<dyn BufRead>)
} else {
// Probably file was renamed/moved or removed again
None
};

Self::new(reader, path.metadata().ok(), data.display_name.as_str())
}
}
1 change: 1 addition & 0 deletions src/uu/tail/src/follow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
mod files;
mod watch;

pub use files::NonSeekableReader;
pub use watch::{Observer, follow};
Loading
Loading