Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9f131bd

Browse files
committed
fix(dd): optimize O_DIRECT buffer alignment to reduce syscall overhead
Implement page-aligned buffer allocation and optimize O_DIRECT flag handling to match GNU dd behavior. Key changes: - Add allocate_aligned_buffer() for page-aligned memory allocation - Update buffer allocation to use aligned buffers - Modify handle_o_direct_write() to only remove O_DIRECT for partial blocks - Add Output::write_with_o_direct_handling() for proper O_DIRECT handling - Add comprehensive unit and integration tests Fixes #6078
1 parent 95b266f commit 9f131bd

File tree

2 files changed

+329
-22
lines changed

2 files changed

+329
-22
lines changed

src/uu/dd/src/dd.rs

Lines changed: 184 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,37 @@ use uucore::{format_usage, show_error};
6565

6666
const BUF_INIT_BYTE: u8 = 0xDD;
6767

68+
/// Helper function to allocate a page-aligned buffer on Linux/Android.
69+
///
70+
/// O_DIRECT requires buffers to be aligned to page boundaries (typically 4096 bytes).
71+
/// This function allocates a Vec<u8> with proper alignment to support O_DIRECT
72+
/// without triggering EINVAL errors.
73+
#[cfg(any(target_os = "linux", target_os = "android"))]
74+
fn allocate_aligned_buffer(size: usize) -> Vec<u8> {
75+
let alignment = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
76+
let ptr = unsafe { libc::memalign(alignment, size) as *mut u8 };
77+
78+
assert!(
79+
!ptr.is_null(),
80+
"Failed to allocate aligned buffer of size {size}"
81+
);
82+
83+
// Initialize with BUF_INIT_BYTE
84+
unsafe {
85+
std::ptr::write_bytes(ptr, BUF_INIT_BYTE, size);
86+
}
87+
88+
// Convert raw pointer to Vec<u8>
89+
// SAFETY: We just allocated this memory with memalign, so it's valid
90+
unsafe { Vec::from_raw_parts(ptr, 0, size) }
91+
}
92+
93+
/// Fallback for non-Linux platforms - use regular Vec allocation
94+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
95+
fn allocate_aligned_buffer(size: usize) -> Vec<u8> {
96+
vec![BUF_INIT_BYTE; size]
97+
}
98+
6899
/// Final settings after parsing
69100
#[derive(Default)]
70101
struct Settings {
@@ -688,8 +719,17 @@ fn is_sparse(buf: &[u8]) -> bool {
688719

689720
/// Handle O_DIRECT write errors by temporarily removing the flag and retrying.
690721
/// This follows GNU dd behavior for partial block writes with O_DIRECT.
722+
///
723+
/// With proper buffer alignment (page-aligned), O_DIRECT should only fail for
724+
/// partial blocks (size < output_blocksize). This function only removes O_DIRECT
725+
/// when necessary, matching GNU dd behavior and minimizing system call overhead.
691726
#[cfg(any(target_os = "linux", target_os = "android"))]
692-
fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) -> io::Result<usize> {
727+
fn handle_o_direct_write(
728+
f: &mut File,
729+
buf: &[u8],
730+
output_blocksize: usize,
731+
original_error: io::Error,
732+
) -> io::Result<usize> {
693733
use nix::fcntl::{FcntlArg, OFlag, fcntl};
694734

695735
// Get current flags using nix
@@ -698,8 +738,10 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
698738
Err(_) => return Err(original_error),
699739
};
700740

701-
// If O_DIRECT is set, try removing it temporarily
702-
if oflags.contains(OFlag::O_DIRECT) {
741+
// If O_DIRECT is set, only remove it for partial blocks (size < output_blocksize)
742+
// This matches GNU dd behavior and minimizes system call overhead.
743+
// With proper buffer alignment, full blocks should not fail with EINVAL.
744+
if oflags.contains(OFlag::O_DIRECT) && buf.len() < output_blocksize {
703745
let flags_without_direct = oflags - OFlag::O_DIRECT;
704746

705747
// Remove O_DIRECT flag using nix
@@ -710,7 +752,7 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
710752
// Retry the write without O_DIRECT
711753
let write_result = f.write(buf);
712754

713-
// Restore O_DIRECT flag using nix (GNU doesn't restore it, but we'll be safer)
755+
// Restore O_DIRECT flag using nix
714756
// Log any restoration errors without failing the operation
715757
if let Err(os_err) = fcntl(&mut *f, FcntlArg::F_SETFL(oflags)) {
716758
// Just log the error, don't fail the whole operation
@@ -719,16 +761,18 @@ fn handle_o_direct_write(f: &mut File, buf: &[u8], original_error: io::Error) ->
719761

720762
write_result
721763
} else {
722-
// O_DIRECT wasn't set, return original error
764+
// O_DIRECT wasn't set or this is a full block, return original error
723765
Err(original_error)
724766
}
725767
}
726768

727769
/// Stub for non-Linux platforms - just return the original error.
728770
#[cfg(not(any(target_os = "linux", target_os = "android")))]
771+
#[allow(dead_code)]
729772
fn handle_o_direct_write(
730773
_f: &mut File,
731774
_buf: &[u8],
775+
_output_blocksize: usize,
732776
original_error: io::Error,
733777
) -> io::Result<usize> {
734778
Err(original_error)
@@ -745,21 +789,7 @@ impl Write for Dest {
745789
f.seek(SeekFrom::Current(seek_amt))?;
746790
Ok(buf.len())
747791
}
748-
Self::File(f, _) => {
749-
// Try the write first
750-
match f.write(buf) {
751-
Ok(len) => Ok(len),
752-
Err(e)
753-
if e.kind() == io::ErrorKind::InvalidInput
754-
&& e.raw_os_error() == Some(libc::EINVAL) =>
755-
{
756-
// This might be an O_DIRECT alignment issue.
757-
// Try removing O_DIRECT temporarily and retry.
758-
handle_o_direct_write(f, buf, e)
759-
}
760-
Err(e) => Err(e),
761-
}
762-
}
792+
Self::File(f, _) => f.write(buf),
763793
Self::Stdout(stdout) => stdout.write(buf),
764794
#[cfg(unix)]
765795
Self::Fifo(f) => f.write(buf),
@@ -922,6 +952,36 @@ impl<'a> Output<'a> {
922952
}
923953
}
924954

955+
/// Write to the destination with O_DIRECT awareness.
956+
///
957+
/// This method handles O_DIRECT write errors by temporarily removing the flag
958+
/// for partial blocks, matching GNU dd behavior.
959+
#[cfg(any(target_os = "linux", target_os = "android"))]
960+
fn write_with_o_direct_handling(&mut self, buf: &[u8]) -> io::Result<usize> {
961+
match self.dst.write(buf) {
962+
Ok(len) => Ok(len),
963+
Err(e)
964+
if e.kind() == io::ErrorKind::InvalidInput
965+
&& e.raw_os_error() == Some(libc::EINVAL) =>
966+
{
967+
// This might be an O_DIRECT alignment issue.
968+
// Try removing O_DIRECT temporarily and retry (only for partial blocks).
969+
if let Dest::File(f, _) = &mut self.dst {
970+
handle_o_direct_write(f, buf, self.settings.obs, e)
971+
} else {
972+
Err(e)
973+
}
974+
}
975+
Err(e) => Err(e),
976+
}
977+
}
978+
979+
/// Fallback for non-Linux platforms - use regular write
980+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
981+
fn write_with_o_direct_handling(&mut self, buf: &[u8]) -> io::Result<usize> {
982+
self.dst.write(buf)
983+
}
984+
925985
/// writes a block of data. optionally retries when first try didn't complete
926986
///
927987
/// this is needed by gnu-test: tests/dd/stats.s
@@ -932,7 +992,7 @@ impl<'a> Output<'a> {
932992
let full_len = chunk.len();
933993
let mut base_idx = 0;
934994
loop {
935-
match self.dst.write(&chunk[base_idx..]) {
995+
match self.write_with_o_direct_handling(&chunk[base_idx..]) {
936996
Ok(wlen) => {
937997
base_idx += wlen;
938998
// take iflags.fullblock as oflags shall not have this option
@@ -1146,7 +1206,11 @@ fn dd_copy(mut i: Input, o: Output) -> io::Result<()> {
11461206

11471207
// Create a common buffer with a capacity of the block size.
11481208
// This is the max size needed.
1149-
let mut buf = vec![BUF_INIT_BYTE; bsize];
1209+
//
1210+
// On Linux/Android, use an aligned buffer for O_DIRECT support.
1211+
// O_DIRECT requires buffers to be aligned to page boundaries (typically 4096 bytes).
1212+
// This prevents EINVAL errors when writing with oflag=direct.
1213+
let mut buf = allocate_aligned_buffer(bsize);
11501214

11511215
// Spawn a timer thread to provide a scheduled signal indicating when we
11521216
// should send an update of our progress to the reporting thread.
@@ -1596,4 +1660,102 @@ mod tests {
15961660
Output::new_file(Path::new(settings.outfile.as_ref().unwrap()), &settings).is_err()
15971661
);
15981662
}
1663+
1664+
// ===== O_DIRECT Buffer Alignment Tests =====
1665+
1666+
#[test]
1667+
#[cfg(any(target_os = "linux", target_os = "android"))]
1668+
fn test_aligned_buffer_allocation() {
1669+
// Test that allocate_aligned_buffer creates page-aligned buffers
1670+
let buf = super::allocate_aligned_buffer(4096);
1671+
1672+
// Verify buffer is created
1673+
assert_eq!(buf.capacity(), 4096);
1674+
1675+
// Verify buffer pointer is page-aligned
1676+
let ptr = buf.as_ptr() as usize;
1677+
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
1678+
assert_eq!(ptr % page_size, 0, "Buffer should be page-aligned");
1679+
}
1680+
1681+
#[test]
1682+
#[cfg(any(target_os = "linux", target_os = "android"))]
1683+
fn test_aligned_buffer_various_sizes() {
1684+
// Test alignment for various buffer sizes
1685+
let sizes = vec![512, 1024, 2048, 4096, 8192, 16384];
1686+
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
1687+
1688+
for size in sizes {
1689+
let buf = super::allocate_aligned_buffer(size);
1690+
let ptr = buf.as_ptr() as usize;
1691+
assert_eq!(
1692+
ptr % page_size,
1693+
0,
1694+
"Buffer of size {size} should be page-aligned"
1695+
);
1696+
}
1697+
}
1698+
1699+
#[test]
1700+
#[cfg(any(target_os = "linux", target_os = "android"))]
1701+
fn test_aligned_buffer_initialization() {
1702+
// Test that buffer is initialized with BUF_INIT_BYTE
1703+
let buf = super::allocate_aligned_buffer(1024);
1704+
1705+
// Check that buffer is initialized (not all zeros)
1706+
let init_byte = super::BUF_INIT_BYTE;
1707+
for &byte in buf.iter() {
1708+
assert_eq!(
1709+
byte, init_byte,
1710+
"Buffer should be initialized with BUF_INIT_BYTE"
1711+
);
1712+
}
1713+
}
1714+
1715+
#[test]
1716+
#[cfg(not(any(target_os = "linux", target_os = "android")))]
1717+
fn test_aligned_buffer_fallback() {
1718+
// Test that non-Linux platforms use regular Vec allocation
1719+
let buf = super::allocate_aligned_buffer(4096);
1720+
1721+
// Should still create a valid buffer
1722+
assert_eq!(buf.capacity(), 4096);
1723+
assert_eq!(buf.len(), 4096);
1724+
}
1725+
1726+
#[test]
1727+
fn test_calc_bsize_alignment() {
1728+
// Test that calculated buffer size is reasonable for O_DIRECT
1729+
let ibs = 4096;
1730+
let obs = 4096;
1731+
let bsize = calc_bsize(ibs, obs);
1732+
1733+
// Should be a multiple of both ibs and obs
1734+
assert_eq!(bsize % ibs, 0);
1735+
assert_eq!(bsize % obs, 0);
1736+
1737+
// Should be at least as large as both
1738+
assert!(bsize >= ibs);
1739+
assert!(bsize >= obs);
1740+
}
1741+
1742+
#[test]
1743+
fn test_calc_bsize_lcm() {
1744+
// Test LCM calculation for various block sizes
1745+
let test_cases = vec![
1746+
(512, 512, 512),
1747+
(512, 1024, 1024),
1748+
(1024, 2048, 2048),
1749+
(4096, 4096, 4096),
1750+
(512, 4096, 4096),
1751+
];
1752+
1753+
for (ibs, obs, expected) in test_cases {
1754+
let bsize = calc_bsize(ibs, obs);
1755+
assert_eq!(
1756+
bsize, expected,
1757+
"calc_bsize({ibs}, {obs}) should be {expected}"
1758+
);
1759+
}
1760+
}
15991761
}

0 commit comments

Comments
 (0)