From 3fd980482c551431b9fcd1c965585631800428d8 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Thu, 13 May 2010 22:09:30 +0800 Subject: [PATCH 01/36] cp: Add FIEMAP support for efficient sparse file copy * src/fiemap.h: Add fiemap.h for fiemap ioctl(2) support. Copied from linux's include/linux/fiemap.h, with minor formatting changes. * src/copy.c (copy_reg): Now, when `cp' invoked with --sparse=[WHEN] option, we will try to do FIEMAP-copy if the underlaying file system support it, fall back to a normal copy if it fails. --- src/copy.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/fiemap.h | 102 +++++++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 src/fiemap.h diff --git a/src/copy.c b/src/copy.c index 9a014ad5aa..5ee3f1af5b 100644 --- a/src/copy.c +++ b/src/copy.c @@ -62,6 +62,10 @@ # include "verror.h" #endif +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -148,6 +152,141 @@ clone_file (int dest_fd, int src_fd) #endif } +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Perform FIEMAP(available in mainline 2.6.27) copy if possible. + Call ioctl(2) with FS_IOC_FIEMAP to efficiently map file allocation + excepts holes. So the overhead to deal with holes with lseek(2) in + normal copy could be saved. This would result in much faster backups + for any kind of sparse file. */ +static bool +fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, + off_t src_total_size, char const *src_name, + char const *dst_name, bool *normal_copy_required) +{ + bool fail = false; + bool last = false; + char fiemap_buf[4096]; + struct fiemap *fiemap = (struct fiemap *)fiemap_buf; + struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; + uint32_t count = (sizeof (fiemap_buf) - sizeof (*fiemap)) / + sizeof (struct fiemap_extent); + off_t last_ext_logical = 0; + uint64_t last_ext_len = 0; + uint64_t last_read_size = 0; + unsigned int i = 0; + + /* This is required at least to initialize fiemap->fm_start, + but also serves (in May 2010) to appease valgrind, which + appears not to know the semantics of the FIEMAP ioctl. */ + memset (fiemap_buf, 0, sizeof fiemap_buf); + + do + { + fiemap->fm_length = FIEMAP_MAX_OFFSET; + fiemap->fm_extent_count = count; + + /* When ioctl(2) fails, fall back to the normal copy only if it + is the first time we met. */ + if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) + { + /* If `i > 0', then at least one ioctl(2) has been performed before. */ + if (i == 0) + *normal_copy_required = true; + return false; + } + + /* If 0 extents are returned, then more ioctls are not needed. */ + if (fiemap->fm_mapped_extents == 0) + break; + + for (i = 0; i < fiemap->fm_mapped_extents; i++) + { + assert (fm_ext[i].fe_logical <= OFF_T_MAX); + + off_t ext_logical = fm_ext[i].fe_logical; + uint64_t ext_len = fm_ext[i].fe_length; + + if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) + { + error (0, errno, _("cannot lseek %s"), quote (src_name)); + return fail; + } + + if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return fail; + } + + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) + { + last_ext_logical = ext_logical; + last_ext_len = ext_len; + last = true; + } + + while (ext_len) + { + char buf[buf_size]; + + /* Avoid reading into the holes if the left extent + length is shorter than the buffer size. */ + if (ext_len < buf_size) + buf_size = ext_len; + + ssize_t n_read = read (src_fd, buf, buf_size); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return fail; + } + + if (n_read == 0) + { + /* Figure out how many bytes read from the last extent. */ + last_read_size = last_ext_len - ext_len; + break; + } + + if (full_write (dest_fd, buf, n_read) != n_read) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return fail; + } + + ext_len -= n_read; + } + } + + fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length; + + } while (! last); + + /* If a file ends up with holes, the sum of the last extent logical offset + and the read-returned size will be shorter than the actual size of the + file. Use ftruncate to extend the length of the destination file. */ + if (last_ext_logical + last_read_size < src_total_size) + { + if (ftruncate (dest_fd, src_total_size) < 0) + { + error (0, errno, _("extending %s"), quote (dst_name)); + return fail; + } + } + + return ! fail; +} +#else +static bool fiemap_copy_ok (ignored) { errno == ENOTSUP; return false; } +#endif + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -676,6 +815,25 @@ copy_reg (char const *src_name, char const *dst_name, #endif } + if (make_holes) + { + bool require_normal_copy = false; + /* Perform efficient FIEMAP copy for sparse files, fall back to the + standard copy only if the ioctl(2) fails. */ + if (fiemap_copy_ok (source_desc, dest_desc, buf_size, + src_open_sb.st_size, src_name, + dst_name, &require_normal_copy)) + goto preserve_metadata; + else + { + if (! require_normal_copy) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + } + /* If not making a sparse file, try to use a more-efficient buffer size. */ if (! make_holes) @@ -804,6 +962,7 @@ copy_reg (char const *src_name, char const *dst_name, } } +preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; diff --git a/src/fiemap.h b/src/fiemap.h new file mode 100644 index 0000000000..d33293b5c3 --- /dev/null +++ b/src/fiemap.h @@ -0,0 +1,102 @@ +/* FS_IOC_FIEMAP ioctl infrastructure. + Some portions copyright (C) 2007 Cluster File Systems, Inc + Authors: Mark Fasheh + Kalpak Shah + Andreas Dilger . */ + +/* Copy from kernel, modified to respect GNU code style by Jie Liu. */ + +#ifndef _LINUX_FIEMAP_H +# define _LINUX_FIEMAP_H + +# include + +struct fiemap_extent +{ + /* Logical offset in bytes for the start of the extent + from the beginning of the file. */ + uint64_t fe_logical; + + /* Physical offset in bytes for the start of the extent + from the beginning of the disk. */ + uint64_t fe_physical; + + /* Length in bytes for this extent. */ + uint64_t fe_length; + + uint64_t fe_reserved64[2]; + + /* FIEMAP_EXTENT_* flags for this extent. */ + uint32_t fe_flags; + + uint32_t fe_reserved[3]; +}; + +struct fiemap +{ + /* Logical offset(inclusive) at which to start mapping(in). */ + uint64_t fm_start; + + /* Logical length of mapping which userspace wants(in). */ + uint64_t fm_length; + + /* FIEMAP_FLAG_* flags for request(in/out). */ + uint32_t fm_flags; + + /* Number of extents that were mapped(out). */ + uint32_t fm_mapped_extents; + + /* Size of fm_extents array(in). */ + uint32_t fm_extent_count; + + uint32_t fm_reserved; + + /* Array of mapped extents(out). */ + struct fiemap_extent fm_extents[0]; +}; + +/* The maximum offset can be mapped for a file. */ +# define FIEMAP_MAX_OFFSET (~0ULL) + +/* Sync file data before map. */ +# define FIEMAP_FLAG_SYNC 0x00000001 + +/* Map extented attribute tree. */ +# define FIEMAP_FLAG_XATTR 0x00000002 + +# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +/* Last extent in file. */ +# define FIEMAP_EXTENT_LAST 0x00000001 + +/* Data location unknown. */ +# define FIEMAP_EXTENT_UNKNOWN 0x00000002 + +/* Location still pending, Sets EXTENT_UNKNOWN. */ +# define FIEMAP_EXTENT_DELALLOC 0x00000004 + +/* Data can not be read while fs is unmounted. */ +# define FIEMAP_EXTENT_ENCODED 0x00000008 + +/* Data is encrypted by fs. Sets EXTENT_NO_BYPASS. */ +# define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 + +/* Extent offsets may not be block aligned. */ +# define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 + +/* Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_INLINE 0x00000200 + +/* Multiple files in block. Set EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_TAIL 0x00000400 + +/* Space allocated, but not data (i.e. zero). */ +# define FIEMAP_EXTENT_UNWRITTEN 0x00000800 + +/* File does not natively support extents. Result merged for efficiency. */ +# define FIEMAP_EXTENT_MERGED 0x00001000 + +/* Space shared with other files. */ +# define FIEMAP_EXTENT_SHARED 0x00002000 + +#endif From 183f3f94535a5aca0bcef9c82fb1e29fa99fbb43 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Thu, 13 May 2010 22:17:53 +0800 Subject: [PATCH 02/36] tests: add a new test for FIEMAP-copy * tests/cp/sparse-fiemap: Add a new test for FIEMAP-copy against a loopbacked ext4 partition. * tests/Makefile.am (sparse-fiemap): Reference the new test. --- tests/Makefile.am | 1 + tests/cp/sparse-fiemap | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 tests/cp/sparse-fiemap diff --git a/tests/Makefile.am b/tests/Makefile.am index 1e4e3009f7..081ae8e34c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,6 +25,7 @@ root_tests = \ cp/special-bits \ cp/cp-mv-enotsup-xattr \ cp/capability \ + cp/sparse-fiemap \ dd/skip-seek-past-dev \ install/install-C-root \ ls/capability \ diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap new file mode 100755 index 0000000000..945c94b121 --- /dev/null +++ b/tests/cp/sparse-fiemap @@ -0,0 +1,56 @@ +#!/bin/sh +# Test cp --sparse=always through fiemap copy + +# Copyright (C) 2006-2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + cp --version +fi + +. $srcdir/test-lib.sh +require_root_ + +cwd=`pwd` +cleanup_() { cd /; umount "$cwd/mnt"; } + +skip=0 +# Create an ext4 loopback file system +dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 +mkdir mnt +mkfs -t ext4 -F blob || + skip_test_ "failed to create ext4 file system" +mount -oloop blob mnt || skip=1 +echo test > mnt/f || skip=1 +test -s mnt/f || skip=1 + +test $skip = 1 && + skip_test_ "insufficient mount/ext4 support" + +# Create a 1TiB sparse file +dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure + +cd mnt || fail=1 + +# It takes many minutes to copy this sparse file using the old method. +# By contrast, it takes far less than 1 second using FIEMAP-copy. +timeout 10 cp --sparse=always sparse fiemap || fail=1 + +# Ensure that the sparse file copied through fiemap has the same size +# in bytes as the original. +test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 + +Exit $fail From 6f85fe2eeb9fbb551cf6f3b2ad86cd1d9d0edebb Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 28 May 2010 09:24:15 +0200 Subject: [PATCH 03/36] tests: sparse-fiemap: factor out some set-up * tests/cp/sparse-fiemap: Cd into test directory sooner. --- tests/cp/sparse-fiemap | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 945c94b121..21b02acac6 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -33,9 +33,10 @@ dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 mkdir mnt mkfs -t ext4 -F blob || skip_test_ "failed to create ext4 file system" -mount -oloop blob mnt || skip=1 -echo test > mnt/f || skip=1 -test -s mnt/f || skip=1 +mount -oloop blob mnt || skip=1 +cd mnt || skip=1 +echo test > f || skip=1 +test -s f || skip=1 test $skip = 1 && skip_test_ "insufficient mount/ext4 support" @@ -43,7 +44,6 @@ test $skip = 1 && # Create a 1TiB sparse file dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure -cd mnt || fail=1 # It takes many minutes to copy this sparse file using the old method. # By contrast, it takes far less than 1 second using FIEMAP-copy. From 811248697c28284b48833e2fc88a837c763e3642 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 21 May 2010 18:28:42 +0200 Subject: [PATCH 04/36] tests: exercise more of the new FIEMAP copying code * tests/cp/sparse-fiemap: Ensure that a file with many extents (more than fit in copy.c's internal 4KiB buffer) is copied properly. --- tests/cp/sparse-fiemap | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 21b02acac6..3608db3fd3 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -53,4 +53,42 @@ timeout 10 cp --sparse=always sparse fiemap || fail=1 # in bytes as the original. test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 +# ================================================= +# Ensure that we exercise the FIEMAP-copying code enough +# to provoke at least two iterations of the do...while loop +# in which it calls ioctl (fd, FS_IOC_FIEMAP,... +# This also verifies that non-trivial extents are preserved. + +$PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' + +$PERL -e 'BEGIN { $n = 16 * 1024; *F = *STDOUT }' \ + -e 'for (1..100) { sysseek (*F, $n, 1)' \ + -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 + +cp --sparse=always j1 j2 || fail=1 +cmp j1 j2 || fail=1 + +filefrag j1 | grep extent \ + || skip_test_ 'skipping part of this test; you lack filefrag' + +# Here is sample filefrag output: +# $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ +# -e 'for (1..5) { sysseek(*F,$n,1)' \ +# -e '&& syswrite *F,"."x$n or die "$!"}' > j +# $ filefrag -v j +# File system type is: ef53 +# File size of j is 163840 (40 blocks, blocksize 4096) +# ext logical physical expected length flags +# 0 4 6258884 4 +# 1 12 6258892 6258887 4 +# 2 20 6258900 6258895 4 +# 3 28 6258908 6258903 4 +# 4 36 6258916 6258911 4 eof +# j: 6 extents found + +# exclude the physical block numbers; they always differ +filefrag -v j1 | awk '/^ / {print $1,$2,$NF}' > ff1 || fail=1 +filefrag -v j2 | awk '/^ / {print $1,$2,$NF}' > ff2 || fail=1 +compare ff1 ff2 || fail=1 + Exit $fail From 4b131ce4980f19e4deb42ecb67867fe96926e598 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 May 2010 10:22:58 +0200 Subject: [PATCH 05/36] tests: require root only if current partition is neither btrfs nor xfs * tests/cp/sparse-fiemap: Don't require root access if current partition is btrfs or xfs. Use init.sh, not test-lib.sh. --- tests/cp/sparse-fiemap | 49 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 3608db3fd3..1f78671ce5 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -1,7 +1,7 @@ #!/bin/sh # Test cp --sparse=always through fiemap copy -# Copyright (C) 2006-2010 Free Software Foundation, Inc. +# Copyright (C) 2010 Free Software Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,29 +21,34 @@ if test "$VERBOSE" = yes; then cp --version fi -. $srcdir/test-lib.sh -require_root_ - -cwd=`pwd` -cleanup_() { cd /; umount "$cwd/mnt"; } - -skip=0 -# Create an ext4 loopback file system -dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 -mkdir mnt -mkfs -t ext4 -F blob || - skip_test_ "failed to create ext4 file system" -mount -oloop blob mnt || skip=1 -cd mnt || skip=1 -echo test > f || skip=1 -test -s f || skip=1 - -test $skip = 1 && - skip_test_ "insufficient mount/ext4 support" +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +if df -T -t btrfs -t xfs . ; then + : # Current dir is on a partition with working extents. Good! +else + # It's not; we need to create one, hence we need root access. + require_root_ + + cwd=$PWD + cleanup_() { cd /; umount "$cwd/mnt"; } + + skip=0 + # Create an XFS loopback file system + dd if=/dev/zero of=blob bs=32k count=1000 || skip=1 + mkdir mnt + mkfs -t xfs blob || + skip_test_ "failed to create XFS file system" + mount -oloop blob mnt || skip=1 + cd mnt || skip=1 + echo test > f || skip=1 + test -s f || skip=1 + + test $skip = 1 && + skip_test_ "insufficient mount/XFS support" +fi # Create a 1TiB sparse file -dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure - +dd if=/dev/zero of=sparse bs=1k count=1 seek=1G || framework_failure # It takes many minutes to copy this sparse file using the old method. # By contrast, it takes far less than 1 second using FIEMAP-copy. From 232b71dacba5ee0469bdc3bcc685ff630980e233 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 May 2010 10:21:46 +0200 Subject: [PATCH 06/36] tests: test fiemap-enabled cp more thoroughly * tests/cp/sparse-fiemap: More tests. --- tests/cp/sparse-fiemap | 61 ++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 1f78671ce5..907ccbdbc6 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -66,34 +66,37 @@ test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' -$PERL -e 'BEGIN { $n = 16 * 1024; *F = *STDOUT }' \ - -e 'for (1..100) { sysseek (*F, $n, 1)' \ - -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 - -cp --sparse=always j1 j2 || fail=1 -cmp j1 j2 || fail=1 - -filefrag j1 | grep extent \ - || skip_test_ 'skipping part of this test; you lack filefrag' - -# Here is sample filefrag output: -# $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ -# -e 'for (1..5) { sysseek(*F,$n,1)' \ -# -e '&& syswrite *F,"."x$n or die "$!"}' > j -# $ filefrag -v j -# File system type is: ef53 -# File size of j is 163840 (40 blocks, blocksize 4096) -# ext logical physical expected length flags -# 0 4 6258884 4 -# 1 12 6258892 6258887 4 -# 2 20 6258900 6258895 4 -# 3 28 6258908 6258903 4 -# 4 36 6258916 6258911 4 eof -# j: 6 extents found - -# exclude the physical block numbers; they always differ -filefrag -v j1 | awk '/^ / {print $1,$2,$NF}' > ff1 || fail=1 -filefrag -v j2 | awk '/^ / {print $1,$2,$NF}' > ff2 || fail=1 -compare ff1 ff2 || fail=1 +for i in $(seq 20); do + for j in 1 2 31 100; do + $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \ + -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ + -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 + + cp --sparse=always j1 j2 || fail=1 + cmp j1 j2 || fail=1 + filefrag -v j1 | grep extent \ + || skip_test_ 'skipping part of this test; you lack filefrag' + + # Here is sample filefrag output: + # $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ + # -e 'for (1..5) { sysseek(*F,$n,1)' \ + # -e '&& syswrite *F,"."x$n or die "$!"}' > j + # $ filefrag -v j + # File system type is: ef53 + # File size of j is 163840 (40 blocks, blocksize 4096) + # ext logical physical expected length flags + # 0 4 6258884 4 + # 1 12 6258892 6258887 4 + # 2 20 6258900 6258895 4 + # 3 28 6258908 6258903 4 + # 4 36 6258916 6258911 4 eof + # j: 6 extents found + + # exclude the physical block numbers; they always differ + filefrag -v j1 | awk '/^ / {print $1,$2}' > ff1 || fail=1 + filefrag -v j2 | awk '/^ / {print $1,$2}' > ff2 || fail=1 + compare ff1 ff2 || fail=1 + done +done Exit $fail From 14b82a9a380e94aa88b65dc580d98fa57e14d83c Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 29 May 2010 21:22:40 +0200 Subject: [PATCH 07/36] tests: relax the root-tests cross-check * cfg.mk (sc_root_tests): Allow spaces before "require_root_", now that tests/cp/sparse-fiemap has a conditional use. --- cfg.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cfg.mk b/cfg.mk index 4df74f05d0..d0d68cc31c 100644 --- a/cfg.mk +++ b/cfg.mk @@ -80,7 +80,7 @@ sc_root_tests: @if test -d tests \ && grep check-root tests/Makefile.am>/dev/null 2>&1; then \ t1=sc-root.expected; t2=sc-root.actual; \ - grep -nl '^require_root_$$' \ + grep -nl '^ *require_root_$$' \ $$($(VC_LIST) tests) |sed s,tests/,, |sort > $$t1; \ sed -n '/^root_tests =[ ]*\\$$/,/[^\]$$/p' \ $(srcdir)/tests/Makefile.am \ From 0e04def9928095f4916e9c2f4e38202d4a2b8bdc Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 30 May 2010 21:20:30 +0200 Subject: [PATCH 08/36] tests: improve fiemap test to work with 4 FS types; fall back on ext4 * tests/cp/sparse-fiemap: Improve. * tests/filefrag-extent-compare: New file. --- tests/cp/sparse-fiemap | 38 ++++++++++++++------ tests/filefrag-extent-compare | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 11 deletions(-) create mode 100644 tests/filefrag-extent-compare diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 907ccbdbc6..dc0cf60595 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -23,7 +23,7 @@ fi . "${srcdir=.}/init.sh"; path_prepend_ ../src -if df -T -t btrfs -t xfs . ; then +if df -T -t btrfs -t xfs -t ext4 -t ocfs2 . ; then : # Current dir is on a partition with working extents. Good! else # It's not; we need to create one, hence we need root access. @@ -33,18 +33,18 @@ else cleanup_() { cd /; umount "$cwd/mnt"; } skip=0 - # Create an XFS loopback file system + # Create an ext4 loopback file system dd if=/dev/zero of=blob bs=32k count=1000 || skip=1 mkdir mnt - mkfs -t xfs blob || - skip_test_ "failed to create XFS file system" + mkfs -t ext4 -F blob || + skip_test_ "failed to create ext4 file system" mount -oloop blob mnt || skip=1 cd mnt || skip=1 echo test > f || skip=1 test -s f || skip=1 test $skip = 1 && - skip_test_ "insufficient mount/XFS support" + skip_test_ "insufficient mount/ext4 support" fi # Create a 1TiB sparse file @@ -66,13 +66,26 @@ test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' -for i in $(seq 20); do +# Extract logical block number and length pairs from filefrag -v output. +# The initial sed is to remove the "eof" from the normally-empty "flags" field. +# That is required when that final extent has no number in the "expected" field. +f() +{ + sed 's/ eof$//' $@ \ + | awk '/^ *[0-9]/ {printf "%d %d ", $2 ,NF < 5 ? $NF : $5 } END {print ""}' +} + +for i in $(seq 1 2 21); do for j in 1 2 31 100; do $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \ -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ - -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 - + -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1 + # sync cp --sparse=always j1 j2 || fail=1 + # sync + # Technically we may need the 'sync' uses above, but + # uncommenting them makes this test take much longer. + cmp j1 j2 || fail=1 filefrag -v j1 | grep extent \ || skip_test_ 'skipping part of this test; you lack filefrag' @@ -93,10 +106,13 @@ for i in $(seq 20); do # j: 6 extents found # exclude the physical block numbers; they always differ - filefrag -v j1 | awk '/^ / {print $1,$2}' > ff1 || fail=1 - filefrag -v j2 | awk '/^ / {print $1,$2}' > ff2 || fail=1 - compare ff1 ff2 || fail=1 + filefrag -v j1 > ff1 || fail=1 + filefrag -v j2 > ff2 || fail=1 + { f ff1; f ff2; } \ + | $PERL $abs_top_srcdir/tests/filefrag-extent-compare \ + || { fail=1; break; } done + test $fail = 1 && break done Exit $fail diff --git a/tests/filefrag-extent-compare b/tests/filefrag-extent-compare new file mode 100644 index 0000000000..3c095d52f4 --- /dev/null +++ b/tests/filefrag-extent-compare @@ -0,0 +1,68 @@ +eval '(exit $?0)' && eval 'exec perl -wS "$0" ${1+"$@"}' + & eval 'exec perl -wS "$0" $argv:q' + if 0; +# Determine whether two files have the same extents by comparing +# the logical block numbers and lengths from filefrag -v for each. + +# Invoke like this: +# This helper function, f, extracts logical block number and lengths. +# f() { awk '/^ *[0-9]/ {printf "%d %d ",$2,NF<5?$NF:$5} END {print ""}'; } +# { filefrag -v j1 | f; filefrag -v j2 | f; } | ./filefrag-extent-compare + +use warnings; +use strict; +(my $ME = $0) =~ s|.*/||; + +my @line = <>; +my $n_lines = @line; +$n_lines == 2 + or die "$ME: expected exactly two input lines; got $n_lines\n"; + +my @A = split ' ', $line[0]; +my @B = split ' ', $line[1]; +@A % 2 || @B % 2 + and die "$ME: unexpected input: odd number of numbers; expected even\n"; + +my @a; +my @b; +foreach my $i (0..@A/2-1) { $a[$i] = { L_BLK => $A[2*$i], LEN => $A[2*$i+1] } }; +foreach my $i (0..@B/2-1) { $b[$i] = { L_BLK => $B[2*$i], LEN => $B[2*$i+1] } }; + +my $i = 0; +my $j = 0; +while (1) + { + !defined $a[$i] && !defined $b[$j] + and exit 0; + defined $a[$i] && defined $b[$j] + or die "\@a and \@b have different lengths, even after adjustment\n"; + ($a[$i]->{L_BLK} == $b[$j]->{L_BLK} + && $a[$i]->{LEN} == $b[$j]->{LEN}) + and next; + ($a[$i]->{LEN} < $b[$j]->{LEN} + && exists $a[$i+1] && $a[$i]->{LEN} + $a[$i+1]->{LEN} == $b[$j]->{LEN}) + and ++$i, next; + exists $b[$j+1] && $a[$i]->{LEN} == $b[$i]->{LEN} + $b[$i+1]->{LEN} + and ++$j, next; + die "differing extent:\n" + . " [$i]=$a[$i]->{L_BLK} $a[$i]->{LEN}\n" + . " [$j]=$b[$j]->{L_BLK} $b[$j]->{LEN}\n" + } +continue + { + ++$i; + ++$j; + } + +### Setup "GNU" style for perl-mode and cperl-mode. +## Local Variables: +## mode: perl +## perl-indent-level: 2 +## perl-continued-statement-offset: 2 +## perl-continued-brace-offset: 0 +## perl-brace-offset: 0 +## perl-brace-imaginary-offset: 0 +## perl-label-offset: -2 +## perl-extra-newline-before-brace: t +## perl-merge-trailing-else: nil +## End: From 30be467038230107249640a2469639d5e981a905 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 5 Jun 2010 10:17:48 +0200 Subject: [PATCH 09/36] copy.c: adjust comments, tweak semantics * src/copy.c (fiemap_copy): Rename from fiemap_copy_ok. Add/improve comments. Remove local, "fail". (fiemap_copy): Do not require caller to set "normal_copy_required" before calling fiemap_copy. Report ioctl failure if it's the 2nd or subsequent call. --- src/copy.c | 62 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/src/copy.c b/src/copy.c index 5ee3f1af5b..6bb05068da 100644 --- a/src/copy.c +++ b/src/copy.c @@ -156,30 +156,33 @@ clone_file (int dest_fd, int src_fd) # ifndef FS_IOC_FIEMAP # define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) # endif -/* Perform FIEMAP(available in mainline 2.6.27) copy if possible. - Call ioctl(2) with FS_IOC_FIEMAP to efficiently map file allocation - excepts holes. So the overhead to deal with holes with lseek(2) in - normal copy could be saved. This would result in much faster backups - for any kind of sparse file. */ +/* Perform a FIEMAP copy, if possible. + Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. This avoids the + overhead of detecting holes in a hole-introducing/preserving copy, + and thus makes copying sparse files much more efficient. Upon a + successful copy, return true. If the initial ioctl fails, set + *NORMAL_COPY_REQUIRED to true and return false. Upon any other + failure, set *NORMAL_COPY_REQUIRED to false and return false. */ static bool -fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, - off_t src_total_size, char const *src_name, - char const *dst_name, bool *normal_copy_required) +fiemap_copy (int src_fd, int dest_fd, size_t buf_size, + off_t src_total_size, char const *src_name, + char const *dst_name, bool *normal_copy_required) { - bool fail = false; bool last = false; char fiemap_buf[4096]; - struct fiemap *fiemap = (struct fiemap *)fiemap_buf; + struct fiemap *fiemap = (struct fiemap *) fiemap_buf; struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - uint32_t count = (sizeof (fiemap_buf) - sizeof (*fiemap)) / - sizeof (struct fiemap_extent); + uint32_t count = ((sizeof fiemap_buf - sizeof (*fiemap)) + / sizeof (struct fiemap_extent)); off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; unsigned int i = 0; + *normal_copy_required = false; /* This is required at least to initialize fiemap->fm_start, - but also serves (in May 2010) to appease valgrind, which + but also serves (in mid 2010) to appease valgrind, which appears not to know the semantics of the FIEMAP ioctl. */ memset (fiemap_buf, 0, sizeof fiemap_buf); @@ -192,9 +195,16 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, is the first time we met. */ if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) { - /* If `i > 0', then at least one ioctl(2) has been performed before. */ + /* If the first ioctl fails, tell the caller that it is + ok to proceed with a normal copy. */ if (i == 0) *normal_copy_required = true; + else + { + /* If the second or subsequent ioctl fails, diagnose it, + since it ends up causing the entire copy/cp to fail. */ + error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name)); + } return false; } @@ -212,13 +222,13 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) { error (0, errno, _("cannot lseek %s"), quote (src_name)); - return fail; + return false; } if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return fail; + return false; } if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) @@ -245,7 +255,7 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, continue; #endif error (0, errno, _("reading %s"), quote (src_name)); - return fail; + return false; } if (n_read == 0) @@ -258,7 +268,7 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, if (full_write (dest_fd, buf, n_read) != n_read) { error (0, errno, _("writing %s"), quote (dst_name)); - return fail; + return false; } ext_len -= n_read; @@ -276,15 +286,15 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, { if (ftruncate (dest_fd, src_total_size) < 0) { - error (0, errno, _("extending %s"), quote (dst_name)); - return fail; + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; } } - return ! fail; + return true; } #else -static bool fiemap_copy_ok (ignored) { errno == ENOTSUP; return false; } +static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; } #endif /* FIXME: describe */ @@ -817,12 +827,12 @@ copy_reg (char const *src_name, char const *dst_name, if (make_holes) { - bool require_normal_copy = false; + bool require_normal_copy; /* Perform efficient FIEMAP copy for sparse files, fall back to the standard copy only if the ioctl(2) fails. */ - if (fiemap_copy_ok (source_desc, dest_desc, buf_size, - src_open_sb.st_size, src_name, - dst_name, &require_normal_copy)) + if (fiemap_copy (source_desc, dest_desc, buf_size, + src_open_sb.st_size, src_name, + dst_name, &require_normal_copy)) goto preserve_metadata; else { From 52440e3ca6d6cece99825281efb09ebc32d4427c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 9 Jun 2010 08:15:07 +0200 Subject: [PATCH 10/36] copy.c: ensure proper alignment of fiemap buffer * src/copy.c (fiemap_copy): Ensure that our fiemap buffer is large enough and well-aligned. Replace "0LL" with equivalent "0" as 3rd argument to lseek. --- src/copy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/copy.c b/src/copy.c index 6bb05068da..1d32913ebc 100644 --- a/src/copy.c +++ b/src/copy.c @@ -170,11 +170,12 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, char const *dst_name, bool *normal_copy_required) { bool last = false; - char fiemap_buf[4096]; - struct fiemap *fiemap = (struct fiemap *) fiemap_buf; + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - uint32_t count = ((sizeof fiemap_buf - sizeof (*fiemap)) - / sizeof (struct fiemap_extent)); + enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext }; + verify (count != 0); + off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; @@ -184,7 +185,7 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, /* This is required at least to initialize fiemap->fm_start, but also serves (in mid 2010) to appease valgrind, which appears not to know the semantics of the FIEMAP ioctl. */ - memset (fiemap_buf, 0, sizeof fiemap_buf); + memset (&fiemap_buf, 0, sizeof fiemap_buf); do { @@ -219,13 +220,13 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, off_t ext_logical = fm_ext[i].fe_logical; uint64_t ext_len = fm_ext[i].fe_length; - if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) + if (lseek (src_fd, ext_logical, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (src_name)); return false; } - if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) + if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); return false; From f3264230583094a68acd9b1abf938f09548da2b9 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 9 Jun 2010 08:42:30 +0200 Subject: [PATCH 11/36] fiemap.h: include , not * src/fiemap.h: Include stdint.h, not linux/types.h, now that this file uses only portable type names. --- src/fiemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiemap.h b/src/fiemap.h index d33293b5c3..c5d8424b3b 100644 --- a/src/fiemap.h +++ b/src/fiemap.h @@ -9,7 +9,7 @@ #ifndef _LINUX_FIEMAP_H # define _LINUX_FIEMAP_H -# include +# include struct fiemap_extent { From fd7bf0fe65d1c470ff0d9be6e7353df93be01c7f Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 11 Jun 2010 14:34:03 +0200 Subject: [PATCH 12/36] tests: accommodate varying filefrag -v "flags" output * tests/cp/sparse-fiemap: Accommodate values other than "eof" in the "flags" column of filefrag -v output --- tests/cp/sparse-fiemap | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index dc0cf60595..b6b1103909 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -68,10 +68,11 @@ $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' # Extract logical block number and length pairs from filefrag -v output. # The initial sed is to remove the "eof" from the normally-empty "flags" field. +# Similarly, remove flags values like "unknown,delalloc,eof". # That is required when that final extent has no number in the "expected" field. f() { - sed 's/ eof$//' $@ \ + sed 's/ [a-z,][a-z,]*$//' $@ \ | awk '/^ *[0-9]/ {printf "%d %d ", $2 ,NF < 5 ? $NF : $5 } END {print ""}' } From 153790eca19fcc92855420ea94893cc95e7b306c Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 11 Jun 2010 16:29:02 +0800 Subject: [PATCH 13/36] copy.c: add FIEMAP_FLAG_SYNC to fiemap ioctl * src/copy.c (fiemap_copy): Force kernel to sync the source file before mapping. --- src/copy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/copy.c b/src/copy.c index 1d32913ebc..ead15b82f1 100644 --- a/src/copy.c +++ b/src/copy.c @@ -190,6 +190,7 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, do { fiemap->fm_length = FIEMAP_MAX_OFFSET; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; fiemap->fm_extent_count = count; /* When ioctl(2) fails, fall back to the normal copy only if it From 80ed842ab1518aaf996b64d60c6915a2df1b0137 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 13 Jun 2010 16:19:29 +0200 Subject: [PATCH 14/36] build: distribute new file, fiemap.h * src/Makefile.am (noinst_HEADERS): Add fiemap.h. --- src/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Makefile.am b/src/Makefile.am index 91364635fc..986bd217ff 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -145,6 +145,7 @@ noinst_HEADERS = \ copy.h \ cp-hash.h \ dircolors.h \ + fiemap.h \ find-mount-point.h \ fs.h \ group-list.h \ From 63b743902d99aa67adbad795bf11a1ce4d27c39b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 13 Jun 2010 16:34:42 +0200 Subject: [PATCH 15/36] build: distribute new test script, filefrag-extent-compare * tests/Makefile.am (EXTRA_DIST): Add filefrag-extent-compare. --- tests/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Makefile.am b/tests/Makefile.am index 081ae8e34c..847f18136a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -10,6 +10,7 @@ EXTRA_DIST = \ CuTmpdir.pm \ check.mk \ envvar-check \ + filefrag-extent-compare \ init.cfg \ init.sh \ lang-default \ From bfda815cf4c889ac8165004fee5678fee67fbaf1 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Wed, 29 Sep 2010 16:11:41 +0800 Subject: [PATCH 16/36] bug#6131: [PATCH]: fiemap support for efficient sparse file copy Jim Meyering wrote: > jeff.liu wrote: >> Sorry for the delay. >> >> This is the new patch to isolate the stuff regarding to extents reading to a new module. and teach >> cp(1) to make use of it. > > Jeff, > > I applied your patch to my rebased fiemap-copy branch. > My first step was to run the usual > > ./bootstrap && ./configure && make && make check > > "make check" failed on due to a double free in your new code: > (x86_64, Fedora 13, ext4 working directory) > > To get details, I made this temporary modification: Hi Jim, I am sorry for the fault, it fixed at the patch below. Would you please revie at your convenience? Changes: ======== 1. fix write_zeros() as Jim's comments, thanks for pointing this out. 2. remove char const *fname from struct extent_scan. 3. change the signature of open_extent_scan() from "void open_extent_scan(struct extent_scan **scan)" to "void open_extent_scan(struct extent_scan *scan)"; the reason is I'd like to reduce once memory allocation for the extent_scan variable, instead, using stack to save it. 4. remove close_extent_scan() from a function defined at extent-scan.c to extent-scan.h as a Macro definination, but it does nothing for now, since initial extent scan defined at stack. 5. add a macro "free_extents_info()" defined at extent-scan.h to release the memory allocated to extent info which should be called combine with get_extents_info(), it just one line, so IMHO, define it as macro should be ok. I have done the memory check via `valgrind`, no issue found. make test against cp/sparse-fiemap failed at the extent compare stage, but the file content is identical to each other by comparing those two files "j1/j2" manually. Is it make sense if we verify them through diff(1) since the testing file is in small size? or we have to merge the contig extents from the output of `filefrag', I admit I have not dig into the filefrag-extent-compare at the moment, I need to recall the perl language syntax. :-P. >From 50a3338db06442fa2d789fd65175172d140cc96e Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 29 Sep 2010 15:35:43 +0800 Subject: [PATCH 1/1] cp: add a new module for scanning extents * src/extent-scan.c: Source code for scanning extents. Call open_extent_scan() to initialize extent scan. Call get_extents_info() to get a number of extents for each iteration. * src/extent-scan.h: Header file of extent-scan.c. Wrap free_extent_info() as macro define to release the space allocated extent_info per extent scan. Wrap close_extent_scan() as macro define but do nothing at the moment. * src/Makefile.am: Reference it and link it to copy_source. * src/copy.c: Make use of the new module, replace fiemap_copy() with extent_copy(). Signed-off-by: Jie Liu --- src/Makefile.am | 2 +- src/copy.c | 193 +++++++++++++++++++++++++++------------------- src/extent-scan.c | 113 +++++++++++++++++++++++++++ src/extent-scan.h | 68 ++++++++++++++++ 4 files changed, 296 insertions(+), 80 deletions(-) create mode 100644 src/extent-scan.c create mode 100644 src/extent-scan.h diff --git a/src/Makefile.am b/src/Makefile.am index 986bd217ff..9fd7822149 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -450,7 +450,7 @@ uninstall-local: fi; \ fi -copy_sources = copy.c cp-hash.c +copy_sources = copy.c cp-hash.c extent-scan.c # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid # confusion with the `install' target. The install rule transforms `ginstall' diff --git a/src/copy.c b/src/copy.c index ead15b82f1..2c3eabdda4 100644 --- a/src/copy.c +++ b/src/copy.c @@ -36,6 +36,7 @@ #include "buffer-lcm.h" #include "copy.h" #include "cp-hash.h" +#include "extent-scan.h" #include "error.h" #include "fcntl--.h" #include "file-set.h" @@ -152,74 +153,79 @@ clone_file (int dest_fd, int src_fd) #endif } -#ifdef __linux__ -# ifndef FS_IOC_FIEMAP -# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) -# endif -/* Perform a FIEMAP copy, if possible. - Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to - obtain a map of file extents excluding holes. This avoids the - overhead of detecting holes in a hole-introducing/preserving copy, - and thus makes copying sparse files much more efficient. Upon a - successful copy, return true. If the initial ioctl fails, set - *NORMAL_COPY_REQUIRED to true and return false. Upon any other - failure, set *NORMAL_COPY_REQUIRED to false and return false. */ static bool -fiemap_copy (int src_fd, int dest_fd, size_t buf_size, - off_t src_total_size, char const *src_name, - char const *dst_name, bool *normal_copy_required) +write_zeros (int fd, uint64_t n_bytes) { - bool last = false; - union { struct fiemap f; char c[4096]; } fiemap_buf; - struct fiemap *fiemap = &fiemap_buf.f; - struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext }; - verify (count != 0); + static char *zeros; + static size_t nz = IO_BUFSIZE; + + if (zeros == NULL) + { + static char fallback[1024]; + zeros = calloc (nz, 1); + if (zeros == NULL) + { + zeros = fallback; + nz = sizeof fallback; + } + } + + while (n_bytes) + { + uint64_t n = MIN (sizeof nz, n_bytes); + if ((full_write (fd, zeros, n)) != n) + return false; + n_bytes -= n; + } + + return true; +} +/* Perform an efficient extent copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus makes copying sparse files much more efficient. + Upon a successful copy, return true. If the initial extent scan + fails, set *NORMAL_COPY_REQUIRED to true and return false. + Upon any other failure, set *NORMAL_COPY_REQUIRED to false and + return false. */ +static bool +extent_copy (int src_fd, int dest_fd, size_t buf_size, + off_t src_total_size, bool make_holes, + char const *src_name, char const *dst_name, + bool *require_normal_copy) +{ + struct extent_scan scan; off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; - unsigned int i = 0; - *normal_copy_required = false; + unsigned int i; + bool ok = true; - /* This is required at least to initialize fiemap->fm_start, - but also serves (in mid 2010) to appease valgrind, which - appears not to know the semantics of the FIEMAP ioctl. */ - memset (&fiemap_buf, 0, sizeof fiemap_buf); + open_extent_scan (src_fd, &scan); do { - fiemap->fm_length = FIEMAP_MAX_OFFSET; - fiemap->fm_flags = FIEMAP_FLAG_SYNC; - fiemap->fm_extent_count = count; - - /* When ioctl(2) fails, fall back to the normal copy only if it - is the first time we met. */ - if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) + ok = get_extents_info (&scan); + if (! ok) { - /* If the first ioctl fails, tell the caller that it is - ok to proceed with a normal copy. */ - if (i == 0) - *normal_copy_required = true; - else + if (scan.hit_last_extent) + break; + + if (scan.initial_scan_failed) { - /* If the second or subsequent ioctl fails, diagnose it, - since it ends up causing the entire copy/cp to fail. */ - error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name)); + close_extent_scan (&scan); + *require_normal_copy = true; + return false; } + + error (0, errno, _("failed to get extents info %s"), quote (src_name)); return false; } - /* If 0 extents are returned, then more ioctls are not needed. */ - if (fiemap->fm_mapped_extents == 0) - break; - - for (i = 0; i < fiemap->fm_mapped_extents; i++) + for (i = 0; i < scan.ei_count; i++) { - assert (fm_ext[i].fe_logical <= OFF_T_MAX); - - off_t ext_logical = fm_ext[i].fe_logical; - uint64_t ext_len = fm_ext[i].fe_length; + off_t ext_logical = scan.ext_info[i].ext_logical; + uint64_t ext_len = scan.ext_info[i].ext_length; if (lseek (src_fd, ext_logical, SEEK_SET) < 0) { @@ -227,27 +233,37 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, return false; } - if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) + if (make_holes) { - error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return false; + if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return false; + } } - - if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) + else { - last_ext_logical = ext_logical; - last_ext_len = ext_len; - last = true; + /* If not making a sparse file, write zeros to the destination + file if there is a hole between the last and current extent. */ + if (last_ext_logical + last_ext_len < ext_logical) + { + uint64_t holes_len = ext_logical - last_ext_logical - last_ext_len; + if (! write_zeros (dest_fd, holes_len)) + return false; + } } + last_ext_logical = ext_logical; + last_ext_len = ext_len; + last_read_size = 0; + while (ext_len) { char buf[buf_size]; /* Avoid reading into the holes if the left extent length is shorter than the buffer size. */ - if (ext_len < buf_size) - buf_size = ext_len; + buf_size = MIN (ext_len, buf_size); ssize_t n_read = read (src_fd, buf, buf_size); if (n_read < 0) @@ -257,12 +273,12 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, continue; #endif error (0, errno, _("reading %s"), quote (src_name)); - return false; + return false; } if (n_read == 0) { - /* Figure out how many bytes read from the last extent. */ + /* Figure out how many bytes read from the previous extent. */ last_read_size = last_ext_len - ext_len; break; } @@ -277,27 +293,44 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, } } - fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length; + /* Release the space allocated to scan->ext_info. */ + free_extents_info (&scan); + } while (! scan.hit_last_extent); - } while (! last); + /* Do nothing now. */ + close_extent_scan (&scan); /* If a file ends up with holes, the sum of the last extent logical offset - and the read-returned size will be shorter than the actual size of the - file. Use ftruncate to extend the length of the destination file. */ - if (last_ext_logical + last_read_size < src_total_size) + and the read-returned size or the last extent length will be shorter than + the actual size of the file. Use ftruncate to extend the length of the + destination file if make_holes, or write zeros up to the actual size of the + file. */ + if (make_holes) { - if (ftruncate (dest_fd, src_total_size) < 0) + if (last_ext_logical + last_read_size < src_total_size) { - error (0, errno, _("failed to extend %s"), quote (dst_name)); - return false; + if (ftruncate (dest_fd, src_total_size) < 0) + { + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; + } + } + } + else + { + if (last_ext_logical + last_ext_len < src_total_size) + { + uint64_t holes_len = src_total_size - last_ext_logical - last_ext_len; + if (0 < holes_len) + { + if (! write_zeros (dest_fd, holes_len)) + return false; + } } } return true; } -#else -static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; } -#endif /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic @@ -830,11 +863,13 @@ copy_reg (char const *src_name, char const *dst_name, if (make_holes) { bool require_normal_copy; - /* Perform efficient FIEMAP copy for sparse files, fall back to the - standard copy only if the ioctl(2) fails. */ - if (fiemap_copy (source_desc, dest_desc, buf_size, - src_open_sb.st_size, src_name, - dst_name, &require_normal_copy)) + /* Perform efficient extent copy for sparse file, fall back to the + standard copy only if the initial extent scan fails. If the + '--sparse=never' option was specified, we writing all data but + use extent copy if available to efficiently read. */ + if (extent_copy (source_desc, dest_desc, buf_size, + src_open_sb.st_size, make_holes, + src_name, dst_name, &require_normal_copy)) goto preserve_metadata; else { diff --git a/src/extent-scan.c b/src/extent-scan.c new file mode 100644 index 0000000000..f371b8718d --- /dev/null +++ b/src/extent-scan.c @@ -0,0 +1,113 @@ +/* extent-scan.c -- core functions for scanning extents + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Jie Liu (jeff.liu@oracle.com). */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "extent-scan.h" +#include "error.h" +#include "quote.h" + +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + +/* Allocate space for struct extent_scan, initialize the entries if + necessary and return it as the input argument of get_extents_info(). */ +extern void +open_extent_scan (int src_fd, struct extent_scan *scan) +{ + scan->fd = src_fd; + scan->ei_count = 0; + scan->scan_start = 0; + scan->initial_scan_failed = false; + scan->hit_last_extent = false; +} + +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. */ +extern bool +get_extents_info (struct extent_scan *scan) +{ + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; + struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; + enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents }; + verify (count != 0); + unsigned int i; + + /* This is required at least to initialize fiemap->fm_start, + but also serves (in mid 2010) to appease valgrind, which + appears not to know the semantics of the FIEMAP ioctl. */ + memset (&fiemap_buf, 0, sizeof fiemap_buf); + + fiemap->fm_start = scan->scan_start; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; + fiemap->fm_extent_count = count; + fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start; + + /* Fall back to the standard copy if call ioctl(2) failed for the + the first time. */ + if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0) + { + if (scan->scan_start == 0) + scan->initial_scan_failed = true; + return false; + } + + /* If 0 extents are returned, then more get_extent_table() are not needed. */ + if (fiemap->fm_mapped_extents == 0) + { + scan->hit_last_extent = true; + return false; + } + + scan->ei_count = fiemap->fm_mapped_extents; + scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info)); + + for (i = 0; i < scan->ei_count; i++) + { + assert (fm_extents[i].fe_logical <= OFF_T_MAX); + + scan->ext_info[i].ext_logical = fm_extents[i].fe_logical; + scan->ext_info[i].ext_length = fm_extents[i].fe_length; + scan->ext_info[i].ext_flags = fm_extents[i].fe_flags; + } + + i--; + if (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_LAST) + { + scan->hit_last_extent = true; + return true; + } + + scan->scan_start = fm_extents[i].fe_logical + fm_extents[i].fe_length; + + return true; +} +#else +extern bool get_extents_info (ignored) { errno = ENOTSUP; return false; } +#endif diff --git a/src/extent-scan.h b/src/extent-scan.h new file mode 100644 index 0000000000..07c2e5bc57 --- /dev/null +++ b/src/extent-scan.h @@ -0,0 +1,68 @@ +/* core functions for efficient reading sparse files + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Jie Liu (jeff.liu@oracle.com). */ + +#ifndef EXTENT_SCAN_H +# define EXTENT_SCAN_H + +/* Structure used to reserve information of each extent. */ +struct extent_info +{ + /* Logical offset of an extent. */ + off_t ext_logical; + + /* Extent length. */ + uint64_t ext_length; + + /* Extent flags, use it for FIEMAP only, or set it to zero. */ + uint32_t ext_flags; +}; + +/* Structure used to reserve extent scan information per file. */ +struct extent_scan +{ + /* File descriptor of extent scan run against. */ + int fd; + + /* Next scan start offset. */ + off_t scan_start; + + /* How many extent info returned for a scan. */ + uint32_t ei_count; + + /* If true, fall back to a normal copy, either + set by the failure of ioctl(2) for FIEMAP or + lseek(2) with SEEK_DATA. */ + bool initial_scan_failed; + + /* If ture, the total extent scan per file has been finished. */ + bool hit_last_extent; + + /* Extent information. */ + struct extent_info *ext_info; +}; + +void +open_extent_scan (int src_fd, struct extent_scan *scan); + +bool +get_extents_info (struct extent_scan *scan); + +#define free_extents_info(ext_scan) free ((ext_scan)->ext_info) +#define close_extent_scan(ext_scan) /* empty */ + +#endif /* EXTENT_SCAN_H */ From 8352d19a553d2f68f9e911352fa1f8e62e428eb5 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 10:39:50 +0200 Subject: [PATCH 17/36] fiemap copy: don't let write failure go unreported; adjust style, etc. * src/copy.c (write_zeros): Add comments. (extent_copy): Move decls of "ok" and "i" down to scope where used. Adjust comments. Rename local: s/holes_len/hole_size/ Print a diagnostic upon failure to write zeros. --- src/copy.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/copy.c b/src/copy.c index 2c3eabdda4..902c6bbea6 100644 --- a/src/copy.c +++ b/src/copy.c @@ -153,12 +153,17 @@ clone_file (int dest_fd, int src_fd) #endif } +/* Write N_BYTES zero bytes to file descriptor FD. Return true if successful. + Upon write failure, set errno and return false. */ static bool write_zeros (int fd, uint64_t n_bytes) { static char *zeros; static size_t nz = IO_BUFSIZE; + /* Attempt to use a relatively large calloc'd source buffer for + efficiency, but if that allocation fails, resort to a smaller + statically allocated one. */ if (zeros == NULL) { static char fallback[1024]; @@ -198,14 +203,12 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; - unsigned int i; - bool ok = true; open_extent_scan (src_fd, &scan); do { - ok = get_extents_info (&scan); + bool ok = get_extents_info (&scan); if (! ok) { if (scan.hit_last_extent) @@ -218,10 +221,12 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, return false; } - error (0, errno, _("failed to get extents info %s"), quote (src_name)); + error (0, errno, _("%s: failed to get extents info"), + quote (src_name)); return false; } + unsigned int i; for (i = 0; i < scan.ei_count; i++) { off_t ext_logical = scan.ext_info[i].ext_logical; @@ -243,13 +248,18 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, } else { - /* If not making a sparse file, write zeros to the destination - file if there is a hole between the last and current extent. */ + /* We're not inducing holes; write zeros to the destination file + if there is a hole between the last and current extent. */ if (last_ext_logical + last_ext_len < ext_logical) { - uint64_t holes_len = ext_logical - last_ext_logical - last_ext_len; - if (! write_zeros (dest_fd, holes_len)) - return false; + uint64_t hole_size = (ext_logical + - last_ext_logical + - last_ext_len); + if (! write_zeros (dest_fd, hole_size)) + { + error (0, errno, _("%s: write failed"), quote (dst_name)); + return false; + } } } From 30535440c807cef9aaeac2277990c9782362c23c Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 11:19:02 +0200 Subject: [PATCH 18/36] rename extent_scan member * extent-scan.h [struct extent_scan]: Rename member: s/hit_last_extent/hit_final_extent/. "final" is clearer, since "last" can be interpreted as "preceding". --- src/copy.c | 4 ++-- src/extent-scan.c | 6 +++--- src/extent-scan.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/copy.c b/src/copy.c index 902c6bbea6..b309a86438 100644 --- a/src/copy.c +++ b/src/copy.c @@ -211,7 +211,7 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, bool ok = get_extents_info (&scan); if (! ok) { - if (scan.hit_last_extent) + if (scan.hit_final_extent) break; if (scan.initial_scan_failed) @@ -305,7 +305,7 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, /* Release the space allocated to scan->ext_info. */ free_extents_info (&scan); - } while (! scan.hit_last_extent); + } while (! scan.hit_final_extent); /* Do nothing now. */ close_extent_scan (&scan); diff --git a/src/extent-scan.c b/src/extent-scan.c index f371b8718d..b0345f5af7 100644 --- a/src/extent-scan.c +++ b/src/extent-scan.c @@ -40,7 +40,7 @@ open_extent_scan (int src_fd, struct extent_scan *scan) scan->ei_count = 0; scan->scan_start = 0; scan->initial_scan_failed = false; - scan->hit_last_extent = false; + scan->hit_final_extent = false; } #ifdef __linux__ @@ -81,7 +81,7 @@ get_extents_info (struct extent_scan *scan) /* If 0 extents are returned, then more get_extent_table() are not needed. */ if (fiemap->fm_mapped_extents == 0) { - scan->hit_last_extent = true; + scan->hit_final_extent = true; return false; } @@ -100,7 +100,7 @@ get_extents_info (struct extent_scan *scan) i--; if (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_LAST) { - scan->hit_last_extent = true; + scan->hit_final_extent = true; return true; } diff --git a/src/extent-scan.h b/src/extent-scan.h index 07c2e5bc57..0c9c199e3c 100644 --- a/src/extent-scan.h +++ b/src/extent-scan.h @@ -50,7 +50,7 @@ struct extent_scan bool initial_scan_failed; /* If ture, the total extent scan per file has been finished. */ - bool hit_last_extent; + bool hit_final_extent; /* Extent information. */ struct extent_info *ext_info; From 58c342c39a10cbc9b3283382729ab9db6c971aec Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 11:44:12 +0200 Subject: [PATCH 19/36] rename extent-scan functions to start with extent_scan_ --- src/copy.c | 12 +++++------- src/extent-scan.c | 10 +++++----- src/extent-scan.h | 22 ++++++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/copy.c b/src/copy.c index b309a86438..30c1b564f1 100644 --- a/src/copy.c +++ b/src/copy.c @@ -204,11 +204,11 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, uint64_t last_ext_len = 0; uint64_t last_read_size = 0; - open_extent_scan (src_fd, &scan); + extent_scan_init (src_fd, &scan); do { - bool ok = get_extents_info (&scan); + bool ok = extent_scan_read (&scan); if (! ok) { if (scan.hit_final_extent) @@ -216,7 +216,6 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, if (scan.initial_scan_failed) { - close_extent_scan (&scan); *require_normal_copy = true; return false; } @@ -304,11 +303,10 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, } /* Release the space allocated to scan->ext_info. */ - free_extents_info (&scan); - } while (! scan.hit_final_extent); + extent_scan_free (&scan); - /* Do nothing now. */ - close_extent_scan (&scan); + } + while (! scan.hit_final_extent); /* If a file ends up with holes, the sum of the last extent logical offset and the read-returned size or the last extent length will be shorter than diff --git a/src/extent-scan.c b/src/extent-scan.c index b0345f5af7..97bb792acc 100644 --- a/src/extent-scan.c +++ b/src/extent-scan.c @@ -32,9 +32,9 @@ #endif /* Allocate space for struct extent_scan, initialize the entries if - necessary and return it as the input argument of get_extents_info(). */ + necessary and return it as the input argument of extent_scan_read(). */ extern void -open_extent_scan (int src_fd, struct extent_scan *scan) +extent_scan_init (int src_fd, struct extent_scan *scan) { scan->fd = src_fd; scan->ei_count = 0; @@ -50,14 +50,13 @@ open_extent_scan (int src_fd, struct extent_scan *scan) /* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to obtain a map of file extents excluding holes. */ extern bool -get_extents_info (struct extent_scan *scan) +extent_scan_read (struct extent_scan *scan) { union { struct fiemap f; char c[4096]; } fiemap_buf; struct fiemap *fiemap = &fiemap_buf.f; struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents }; verify (count != 0); - unsigned int i; /* This is required at least to initialize fiemap->fm_start, but also serves (in mid 2010) to appease valgrind, which @@ -88,6 +87,7 @@ get_extents_info (struct extent_scan *scan) scan->ei_count = fiemap->fm_mapped_extents; scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info)); + unsigned int i; for (i = 0; i < scan->ei_count; i++) { assert (fm_extents[i].fe_logical <= OFF_T_MAX); @@ -109,5 +109,5 @@ get_extents_info (struct extent_scan *scan) return true; } #else -extern bool get_extents_info (ignored) { errno = ENOTSUP; return false; } +extern bool extent_scan_read (ignored) { errno = ENOTSUP; return false; } #endif diff --git a/src/extent-scan.h b/src/extent-scan.h index 0c9c199e3c..3119c8df11 100644 --- a/src/extent-scan.h +++ b/src/extent-scan.h @@ -19,7 +19,7 @@ #ifndef EXTENT_SCAN_H # define EXTENT_SCAN_H -/* Structure used to reserve information of each extent. */ +/* Structure used to store information of each extent. */ struct extent_info { /* Logical offset of an extent. */ @@ -44,25 +44,27 @@ struct extent_scan /* How many extent info returned for a scan. */ uint32_t ei_count; - /* If true, fall back to a normal copy, either - set by the failure of ioctl(2) for FIEMAP or - lseek(2) with SEEK_DATA. */ + /* If true, fall back to a normal copy, either set by the + failure of ioctl(2) for FIEMAP or lseek(2) with SEEK_DATA. */ bool initial_scan_failed; - /* If ture, the total extent scan per file has been finished. */ + /* If true, the total extent scan per file has been finished. */ bool hit_final_extent; - /* Extent information. */ + /* Extent information: a malloc'd array of ei_count structs. */ struct extent_info *ext_info; }; void -open_extent_scan (int src_fd, struct extent_scan *scan); +extent_scan_init (int src_fd, struct extent_scan *scan); bool -get_extents_info (struct extent_scan *scan); +extent_scan_read (struct extent_scan *scan); -#define free_extents_info(ext_scan) free ((ext_scan)->ext_info) -#define close_extent_scan(ext_scan) /* empty */ +static inline void +extent_scan_free (struct extent_scan *scan) +{ + free (scan->ext_info); +} #endif /* EXTENT_SCAN_H */ From 3ab88cb2f79accd5ea7de8c24ece8453c241a507 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 11:55:46 +0200 Subject: [PATCH 20/36] distribute extent-scan.h, too * src/Makefile.am (copy_sources): Also distribute extent-scan.h. --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 9fd7822149..bf1d60a886 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -450,7 +450,7 @@ uninstall-local: fi; \ fi -copy_sources = copy.c cp-hash.c extent-scan.c +copy_sources = copy.c cp-hash.c extent-scan.c extent-scan.h # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid # confusion with the `install' target. The install rule transforms `ginstall' From 9d3daa5e170a787ed8abe757e809f3aaee615f16 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 11:55:58 +0200 Subject: [PATCH 21/36] formatting --- src/extent-scan.c | 7 ++++++- src/extent-scan.h | 6 ++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/extent-scan.c b/src/extent-scan.c index 97bb792acc..51609757f2 100644 --- a/src/extent-scan.c +++ b/src/extent-scan.c @@ -109,5 +109,10 @@ extent_scan_read (struct extent_scan *scan) return true; } #else -extern bool extent_scan_read (ignored) { errno = ENOTSUP; return false; } +extern bool +extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED) +{ + errno = ENOTSUP; + return false; +} #endif diff --git a/src/extent-scan.h b/src/extent-scan.h index 3119c8df11..ac9e5006fc 100644 --- a/src/extent-scan.h +++ b/src/extent-scan.h @@ -55,11 +55,9 @@ struct extent_scan struct extent_info *ext_info; }; -void -extent_scan_init (int src_fd, struct extent_scan *scan); +void extent_scan_init (int src_fd, struct extent_scan *scan); -bool -extent_scan_read (struct extent_scan *scan); +bool extent_scan_read (struct extent_scan *scan); static inline void extent_scan_free (struct extent_scan *scan) From 5cb0ff8a8a50fdaa8888a86c9bbcec4d6b3cb61b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 11 Oct 2010 12:00:07 +0200 Subject: [PATCH 22/36] extent-scan.c: don't include error.h or quote.h * src/extent-scan.c: Don't include error.h or quote.h. Neither is used. --- src/extent-scan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/extent-scan.c b/src/extent-scan.c index 51609757f2..3bb0d536ce 100644 --- a/src/extent-scan.c +++ b/src/extent-scan.c @@ -24,8 +24,6 @@ #include "system.h" #include "extent-scan.h" -#include "error.h" -#include "quote.h" #ifndef HAVE_FIEMAP # include "fiemap.h" From ffd02ad91ac22b18c0a07c433e7e9983aed81542 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Tue, 11 Jan 2011 22:49:34 +0100 Subject: [PATCH 23/36] copy.c: shorten a comment to fit in 80 columns --- src/copy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/copy.c b/src/copy.c index 30c1b564f1..270009bbea 100644 --- a/src/copy.c +++ b/src/copy.c @@ -287,7 +287,7 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, if (n_read == 0) { - /* Figure out how many bytes read from the previous extent. */ + /* Record number of bytes read from the previous extent. */ last_read_size = last_ext_len - ext_len; break; } From f880d4e43c47fa0b08757d911e00c69de07296ab Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 Jan 2011 12:30:21 +0100 Subject: [PATCH 24/36] * src/copy.c (copy_reg): Remove useless else-after-goto. --- src/copy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/copy.c b/src/copy.c index 270009bbea..71da00d74a 100644 --- a/src/copy.c +++ b/src/copy.c @@ -879,13 +879,11 @@ copy_reg (char const *src_name, char const *dst_name, src_open_sb.st_size, make_holes, src_name, dst_name, &require_normal_copy)) goto preserve_metadata; - else + + if (! require_normal_copy) { - if (! require_normal_copy) - { - return_val = false; - goto close_src_and_dst_desc; - } + return_val = false; + goto close_src_and_dst_desc; } } From 237c2325b3d11e1b1a576978b884df3423a075b1 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 Jan 2011 12:36:03 +0100 Subject: [PATCH 25/36] copy: call extent_copy also when make_holes is false, ... so that we benefit from using extents also when reading a sparse input file with --sparse=never. * src/copy.c (copy_reg): Remove erroneous test of "make_holes" so that we call extent_copy also when make_holes is false. Otherwise, what's the point of that parameter? --- src/copy.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/copy.c b/src/copy.c index 71da00d74a..be7fdba7b7 100644 --- a/src/copy.c +++ b/src/copy.c @@ -868,23 +868,20 @@ copy_reg (char const *src_name, char const *dst_name, #endif } - if (make_holes) + bool require_normal_copy; + /* Perform efficient extent copy for sparse file, fall back to the + standard copy only if the initial extent scan fails. If the + '--sparse=never' option was specified, we writing all data but + use extent copy if available to efficiently read. */ + if (extent_copy (source_desc, dest_desc, buf_size, + src_open_sb.st_size, make_holes, + src_name, dst_name, &require_normal_copy)) + goto preserve_metadata; + + if (! require_normal_copy) { - bool require_normal_copy; - /* Perform efficient extent copy for sparse file, fall back to the - standard copy only if the initial extent scan fails. If the - '--sparse=never' option was specified, we writing all data but - use extent copy if available to efficiently read. */ - if (extent_copy (source_desc, dest_desc, buf_size, - src_open_sb.st_size, make_holes, - src_name, dst_name, &require_normal_copy)) - goto preserve_metadata; - - if (! require_normal_copy) - { - return_val = false; - goto close_src_and_dst_desc; - } + return_val = false; + goto close_src_and_dst_desc; } /* If not making a sparse file, try to use a more-efficient From b3dfab326ad8d917ac1eaba10e0852bf695f93ae Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 Jan 2011 12:55:58 +0100 Subject: [PATCH 26/36] copy: tweak variable name; improve a comment * src/copy.c (copy_reg): Rename a variable to make more sense from caller's perspective: s/require_normal_copy/normal_copy_required/. This is an output-only variable, and the original name could make it look like an input (or i&o) variable. --- src/copy.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/copy.c b/src/copy.c index be7fdba7b7..fae8dbe0e9 100644 --- a/src/copy.c +++ b/src/copy.c @@ -868,17 +868,17 @@ copy_reg (char const *src_name, char const *dst_name, #endif } - bool require_normal_copy; - /* Perform efficient extent copy for sparse file, fall back to the + bool normal_copy_required; + /* Perform an efficient extent-based copy, falling back to the standard copy only if the initial extent scan fails. If the - '--sparse=never' option was specified, we writing all data but - use extent copy if available to efficiently read. */ + '--sparse=never' option is specified, write all data but use + any extents to read more efficiently. */ if (extent_copy (source_desc, dest_desc, buf_size, src_open_sb.st_size, make_holes, - src_name, dst_name, &require_normal_copy)) + src_name, dst_name, &normal_copy_required)) goto preserve_metadata; - if (! require_normal_copy) + if (! normal_copy_required) { return_val = false; goto close_src_and_dst_desc; From bdf7c351a37ed6eeaa6bce98cb82902073bcc6c3 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 Jan 2011 13:09:08 +0100 Subject: [PATCH 27/36] copy: don't allocate a separate buffer just for extent-based copy * src/copy.c (copy_reg): Move use of extent_scan to just *after* we allocate the main copying buffer, so we can... (extent_scan): Take a new parameter, BUF, and use that rather than allocating a private buffer. Update caller. --- src/copy.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/copy.c b/src/copy.c index fae8dbe0e9..c9cc2f7379 100644 --- a/src/copy.c +++ b/src/copy.c @@ -194,7 +194,7 @@ write_zeros (int fd, uint64_t n_bytes) Upon any other failure, set *NORMAL_COPY_REQUIRED to false and return false. */ static bool -extent_copy (int src_fd, int dest_fd, size_t buf_size, +extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, off_t src_total_size, bool make_holes, char const *src_name, char const *dst_name, bool *require_normal_copy) @@ -268,8 +268,6 @@ extent_copy (int src_fd, int dest_fd, size_t buf_size, while (ext_len) { - char buf[buf_size]; - /* Avoid reading into the holes if the left extent length is shorter than the buffer size. */ buf_size = MIN (ext_len, buf_size); @@ -868,22 +866,6 @@ copy_reg (char const *src_name, char const *dst_name, #endif } - bool normal_copy_required; - /* Perform an efficient extent-based copy, falling back to the - standard copy only if the initial extent scan fails. If the - '--sparse=never' option is specified, write all data but use - any extents to read more efficiently. */ - if (extent_copy (source_desc, dest_desc, buf_size, - src_open_sb.st_size, make_holes, - src_name, dst_name, &normal_copy_required)) - goto preserve_metadata; - - if (! normal_copy_required) - { - return_val = false; - goto close_src_and_dst_desc; - } - /* If not making a sparse file, try to use a more-efficient buffer size. */ if (! make_holes) @@ -912,6 +894,22 @@ copy_reg (char const *src_name, char const *dst_name, buf_alloc = xmalloc (buf_size + buf_alignment_slop); buf = ptr_align (buf_alloc, buf_alignment); + bool normal_copy_required; + /* Perform an efficient extent-based copy, falling back to the + standard copy only if the initial extent scan fails. If the + '--sparse=never' option is specified, write all data but use + any extents to read more efficiently. */ + if (extent_copy (source_desc, dest_desc, buf, buf_size, + src_open_sb.st_size, make_holes, + src_name, dst_name, &normal_copy_required)) + goto preserve_metadata; + + if (! normal_copy_required) + { + return_val = false; + goto close_src_and_dst_desc; + } + while (true) { word *wp = NULL; From 8e4f0efd3ad17f1dd7a561369da22dfaf43ab3e8 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 28 Jan 2011 22:31:23 +0100 Subject: [PATCH 28/36] tests: ensure that FIEMAP-enabled cp copies a sparse file efficiently * tests/cp/fiemap-perf: New file. * tests/Makefile.am (TESTS): Add it. --- tests/Makefile.am | 1 + tests/cp/fiemap-perf | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100755 tests/cp/fiemap-perf diff --git a/tests/Makefile.am b/tests/Makefile.am index 847f18136a..7855ac5197 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -320,6 +320,7 @@ TESTS = \ cp/dir-vs-file \ cp/existing-perm-race \ cp/fail-perm \ + cp/fiemap-perf \ cp/file-perm-race \ cp/into-self \ cp/link \ diff --git a/tests/cp/fiemap-perf b/tests/cp/fiemap-perf new file mode 100755 index 0000000000..429e59beb1 --- /dev/null +++ b/tests/cp/fiemap-perf @@ -0,0 +1,32 @@ +#!/bin/sh +# ensure that a sparse file is copied efficiently, by default + +# Copyright (C) 2011 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ cp + +# Require a fiemap-enabled FS. +df -T -t btrfs -t xfs -t ext4 -t ocfs2 . \ + || skip_ "this file system lacks FIEMAP support" + +# Create a large-but-sparse file. +timeout 1 dd bs=1 seek=1T of=f < /dev/null || framework_failure_ + +# Nothing can read (much less write) that many bytes in so little time. +timeout 3 cp f f2 || framework_failure_ + +Exit $fail From dd380c3d672f78adb4cb907e8658db6b3962a281 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 18:28:25 +0100 Subject: [PATCH 29/36] fiemap copy: rename some locals (extent_copy): Rename locals: s/*ext_logical/*ext_start/ --- src/copy.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/copy.c b/src/copy.c index c9cc2f7379..e164ab7df2 100644 --- a/src/copy.c +++ b/src/copy.c @@ -200,7 +200,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, bool *require_normal_copy) { struct extent_scan scan; - off_t last_ext_logical = 0; + off_t last_ext_start = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; @@ -228,10 +228,10 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, unsigned int i; for (i = 0; i < scan.ei_count; i++) { - off_t ext_logical = scan.ext_info[i].ext_logical; + off_t ext_start = scan.ext_info[i].ext_logical; uint64_t ext_len = scan.ext_info[i].ext_length; - if (lseek (src_fd, ext_logical, SEEK_SET) < 0) + if (lseek (src_fd, ext_start, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (src_name)); return false; @@ -239,7 +239,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (make_holes) { - if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) + if (lseek (dest_fd, ext_start, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); return false; @@ -249,10 +249,10 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, { /* We're not inducing holes; write zeros to the destination file if there is a hole between the last and current extent. */ - if (last_ext_logical + last_ext_len < ext_logical) + if (last_ext_start + last_ext_len < ext_start) { - uint64_t hole_size = (ext_logical - - last_ext_logical + uint64_t hole_size = (ext_start + - last_ext_start - last_ext_len); if (! write_zeros (dest_fd, hole_size)) { @@ -262,7 +262,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } } - last_ext_logical = ext_logical; + last_ext_start = ext_start; last_ext_len = ext_len; last_read_size = 0; @@ -313,7 +313,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, file. */ if (make_holes) { - if (last_ext_logical + last_read_size < src_total_size) + if (last_ext_start + last_read_size < src_total_size) { if (ftruncate (dest_fd, src_total_size) < 0) { @@ -324,9 +324,9 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } else { - if (last_ext_logical + last_ext_len < src_total_size) + if (last_ext_start + last_ext_len < src_total_size) { - uint64_t holes_len = src_total_size - last_ext_logical - last_ext_len; + uint64_t holes_len = src_total_size - last_ext_start - last_ext_len; if (0 < holes_len) { if (! write_zeros (dest_fd, holes_len)) From d1067e37b0e4b945ab901e98d6eedb249fa2a42c Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 19:00:48 +0100 Subject: [PATCH 30/36] fiemap copy: simplify post-loop logic; improve comments * src/copy.c (extent_copy): Avoid duplication in post-loop extend-to-desired-length code. --- src/copy.c | 46 ++++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/src/copy.c b/src/copy.c index e164ab7df2..ab18a76b47 100644 --- a/src/copy.c +++ b/src/copy.c @@ -268,8 +268,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, while (ext_len) { - /* Avoid reading into the holes if the left extent - length is shorter than the buffer size. */ + /* Don't read from a following hole if EXT_LEN + is smaller than the buffer size. */ buf_size = MIN (ext_len, buf_size); ssize_t n_read = read (src_fd, buf, buf_size); @@ -285,7 +285,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (n_read == 0) { - /* Record number of bytes read from the previous extent. */ + /* Record number of bytes read from this extent-at-EOF. */ last_read_size = last_ext_len - ext_len; break; } @@ -306,33 +306,19 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } while (! scan.hit_final_extent); - /* If a file ends up with holes, the sum of the last extent logical offset - and the read-returned size or the last extent length will be shorter than - the actual size of the file. Use ftruncate to extend the length of the - destination file if make_holes, or write zeros up to the actual size of the - file. */ - if (make_holes) - { - if (last_ext_start + last_read_size < src_total_size) - { - if (ftruncate (dest_fd, src_total_size) < 0) - { - error (0, errno, _("failed to extend %s"), quote (dst_name)); - return false; - } - } - } - else - { - if (last_ext_start + last_ext_len < src_total_size) - { - uint64_t holes_len = src_total_size - last_ext_start - last_ext_len; - if (0 < holes_len) - { - if (! write_zeros (dest_fd, holes_len)) - return false; - } - } + /* When the source file ends with a hole, the sum of the last extent start + offset and (the read-returned size or the last extent length) is smaller + than the actual size of the file. In that case, extend the destination + file to the required length. When MAKE_HOLES is set, use ftruncate; + otherwise, use write_zeros. */ + uint64_t eof_hole_len = (src_total_size - last_ext_start + - (last_read_size ? last_read_size : last_ext_len)); + if (eof_hole_len && (make_holes + ? ftruncate (dest_fd, src_total_size) + : ! write_zeros (dest_fd, eof_hole_len))) + { + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; } return true; From 33f4a4a549afb3de94e546091c91586a1ece67ba Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 17:30:08 +0100 Subject: [PATCH 31/36] fiemap copy: avoid a performance hit due to very small buffer * src/copy.c (extent_copy): Don't let what should have been a temporary reduction of buf_size (to handle a short ext_len) become permanent and thus impact the performance of all further iterations. --- src/copy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/copy.c b/src/copy.c index ab18a76b47..9a3a8f7a6e 100644 --- a/src/copy.c +++ b/src/copy.c @@ -270,9 +270,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, { /* Don't read from a following hole if EXT_LEN is smaller than the buffer size. */ - buf_size = MIN (ext_len, buf_size); - - ssize_t n_read = read (src_fd, buf, buf_size); + size_t b_size = MIN (ext_len, buf_size); + ssize_t n_read = read (src_fd, buf, b_size); if (n_read < 0) { #ifdef EINTR From 47c8476ec9629239c82caf50b1c68b7bc58ba2d6 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 17:49:04 +0100 Subject: [PATCH 32/36] fiemap copy: avoid leak-on-error * src/copy.c (extent_copy): Don't leak an extent_scan buffer on failed lseek, read, or write. --- src/copy.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/copy.c b/src/copy.c index 9a3a8f7a6e..208e4635d4 100644 --- a/src/copy.c +++ b/src/copy.c @@ -234,6 +234,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (lseek (src_fd, ext_start, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (src_name)); + fail: + extent_scan_free (&scan); return false; } @@ -242,7 +244,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (lseek (dest_fd, ext_start, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return false; + goto fail; } } else @@ -257,7 +259,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (! write_zeros (dest_fd, hole_size)) { error (0, errno, _("%s: write failed"), quote (dst_name)); - return false; + goto fail; } } } @@ -279,7 +281,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, continue; #endif error (0, errno, _("reading %s"), quote (src_name)); - return false; + goto fail; } if (n_read == 0) @@ -292,7 +294,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if (full_write (dest_fd, buf, n_read) != n_read) { error (0, errno, _("writing %s"), quote (dst_name)); - return false; + goto fail; } ext_len -= n_read; From c0b7bc3864c06ea12c2740056e28623449fb63a7 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 20:57:17 +0100 Subject: [PATCH 33/36] copy: factor sparse-copying code into its own function, because we're going to have to use it from within extent_copy, too. * src/copy.c (sparse_copy): New function, factored out of... (copy_reg): ...here. Remove now-unused locals. --- src/copy.c | 212 ++++++++++++++++++++++++++++------------------------- 1 file changed, 114 insertions(+), 98 deletions(-) diff --git a/src/copy.c b/src/copy.c index 208e4635d4..cc8f68f146 100644 --- a/src/copy.c +++ b/src/copy.c @@ -134,6 +134,116 @@ utimens_symlink (char const *file, struct timespec const *timespec) return err; } +/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, + honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer + BUF for temporary storage. Return true upon successful completion; + print a diagnostic and return false upon error. */ +static bool +sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, + bool make_holes, + char const *src_name, char const *dst_name) +{ + typedef uintptr_t word; + off_t n_read_total = 0; + bool last_write_made_hole = false; + + while (true) + { + word *wp = NULL; + + ssize_t n_read = read (src_fd, buf, buf_size); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return false; + } + if (n_read == 0) + break; + + n_read_total += n_read; + + if (make_holes) + { + char *cp; + + /* Sentinel to stop loop. */ + buf[n_read] = '\1'; +#ifdef lint + /* Usually, buf[n_read] is not the byte just before a "word" + (aka uintptr_t) boundary. In that case, the word-oriented + test below (*wp++ == 0) would read some uninitialized bytes + after the sentinel. To avoid false-positive reports about + this condition (e.g., from a tool like valgrind), set the + remaining bytes -- to any value. */ + memset (buf + n_read + 1, 0, sizeof (word) - 1); +#endif + + /* Find first nonzero *word*, or the word with the sentinel. */ + + wp = (word *) buf; + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + + cp = (char *) (wp - 1); + while (*cp++ == 0) + continue; + + if (cp <= buf + n_read) + /* Clear to indicate that a normal write is needed. */ + wp = NULL; + else + { + /* We found the sentinel, so the whole input block was zero. + Make a hole. */ + if (lseek (dest_fd, n_read, SEEK_CUR) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return false; + } + last_write_made_hole = true; + } + } + + if (!wp) + { + size_t n = n_read; + if (full_write (dest_fd, buf, n) != n) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return false; + } + last_write_made_hole = false; + + /* It is tempting to return early here upon a short read from a + regular file. That would save the final read syscall for each + file. Unfortunately that doesn't work for certain files in + /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ + } + } + + /* If the file ends with a `hole', we need to do something to record + the length of the file. On modern systems, calling ftruncate does + the job. On systems without native ftruncate support, we have to + write a byte at the ending position. Otherwise the kernel would + truncate the file at the end of the last write operation. */ + if (last_write_made_hole) + { + if (ftruncate (dest_fd, n_read_total) < 0) + { + error (0, errno, _("truncating %s"), quote (dst_name)); + return false; + } + } + + return true; +} + /* Perform the O(1) btrfs clone operation, if possible. Upon success, return 0. Otherwise, return -1 and set errno. */ static inline int @@ -824,7 +934,6 @@ copy_reg (char const *src_name, char const *dst_name, if (data_copy_required) { typedef uintptr_t word; - off_t n_read_total = 0; /* Choose a suitable buffer size; it may be adjusted later. */ size_t buf_alignment = lcm (getpagesize (), sizeof (word)); @@ -832,7 +941,6 @@ copy_reg (char const *src_name, char const *dst_name, size_t buf_size = io_blksize (sb); /* Deal with sparse files. */ - bool last_write_made_hole = false; bool make_holes = false; if (S_ISREG (sb.st_mode)) @@ -897,103 +1005,11 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } - while (true) + if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, + make_holes, src_name, dst_name)) { - word *wp = NULL; - - ssize_t n_read = read (source_desc, buf, buf_size); - if (n_read < 0) - { -#ifdef EINTR - if (errno == EINTR) - continue; -#endif - error (0, errno, _("reading %s"), quote (src_name)); - return_val = false; - goto close_src_and_dst_desc; - } - if (n_read == 0) - break; - - n_read_total += n_read; - - if (make_holes) - { - char *cp; - - /* Sentinel to stop loop. */ - buf[n_read] = '\1'; -#ifdef lint - /* Usually, buf[n_read] is not the byte just before a "word" - (aka uintptr_t) boundary. In that case, the word-oriented - test below (*wp++ == 0) would read some uninitialized bytes - after the sentinel. To avoid false-positive reports about - this condition (e.g., from a tool like valgrind), set the - remaining bytes -- to any value. */ - memset (buf + n_read + 1, 0, sizeof (word) - 1); -#endif - - /* Find first nonzero *word*, or the word with the sentinel. */ - - wp = (word *) buf; - while (*wp++ == 0) - continue; - - /* Find the first nonzero *byte*, or the sentinel. */ - - cp = (char *) (wp - 1); - while (*cp++ == 0) - continue; - - if (cp <= buf + n_read) - /* Clear to indicate that a normal write is needed. */ - wp = NULL; - else - { - /* We found the sentinel, so the whole input block was zero. - Make a hole. */ - if (lseek (dest_desc, n_read, SEEK_CUR) < 0) - { - error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } - last_write_made_hole = true; - } - } - - if (!wp) - { - size_t n = n_read; - if (full_write (dest_desc, buf, n) != n) - { - error (0, errno, _("writing %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } - last_write_made_hole = false; - - /* It is tempting to return early here upon a short read from a - regular file. That would save the final read syscall for each - file. Unfortunately that doesn't work for certain files in - /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ - } - } - - /* If the file ends with a `hole', we need to do something to record - the length of the file. On modern systems, calling ftruncate does - the job. On systems without native ftruncate support, we have to - write a byte at the ending position. Otherwise the kernel would - truncate the file at the end of the last write operation. */ - - if (last_write_made_hole) - { - if (ftruncate (dest_desc, n_read_total) < 0) - { - error (0, errno, _("truncating %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } + return_val = false; + goto close_src_and_dst_desc; } } From 80038c3cba2dee9c6c41ab6a28a1233a538ee2ee Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 21:01:07 +0100 Subject: [PATCH 34/36] copy: remove obsolete comment * src/copy.c (sparse_copy): Remove now-obsolete comment about how we used to work around lack of ftruncate. Combine nested if conditions into one. --- src/copy.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/copy.c b/src/copy.c index cc8f68f146..4bfdce68bd 100644 --- a/src/copy.c +++ b/src/copy.c @@ -137,7 +137,10 @@ utimens_symlink (char const *file, struct timespec const *timespec) /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer BUF for temporary storage. Return true upon successful completion; - print a diagnostic and return false upon error. */ + print a diagnostic and return false upon error. + Note that for best results, BUF should be "well"-aligned. + BUF must have sizeof(uintptr_t)-1 bytes of additional space + beyond BUF[BUF_SIZE-1]. */ static bool sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, bool make_holes, @@ -227,18 +230,12 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } } - /* If the file ends with a `hole', we need to do something to record - the length of the file. On modern systems, calling ftruncate does - the job. On systems without native ftruncate support, we have to - write a byte at the ending position. Otherwise the kernel would - truncate the file at the end of the last write operation. */ - if (last_write_made_hole) + /* If the file ends with a `hole', we need to do something to record the + length of the file. On modern systems, calling ftruncate does the job. */ + if (last_write_made_hole && ftruncate (dest_fd, n_read_total) < 0) { - if (ftruncate (dest_fd, n_read_total) < 0) - { - error (0, errno, _("truncating %s"), quote (dst_name)); - return false; - } + error (0, errno, _("truncating %s"), quote (dst_name)); + return false; } return true; From f161ba3fcd5832d1344224ec41627cace5d73544 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 28 Jan 2011 21:19:50 +0100 Subject: [PATCH 35/36] copy: make extent_copy use sparse_copy, rather than its own code * src/copy.c (extent_copy): Before this change, extent_copy would fail to create holes, thus breaking --sparse=auto and --sparse=always. I.e., copying a large enough file of all zeros, cp --sparse=always should introduce a hole, but with extent_copy, it would not. --- src/copy.c | 109 ++++++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 60 deletions(-) diff --git a/src/copy.c b/src/copy.c index 4bfdce68bd..96bb35b135 100644 --- a/src/copy.c +++ b/src/copy.c @@ -136,25 +136,28 @@ utimens_symlink (char const *file, struct timespec const *timespec) /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer - BUF for temporary storage. Return true upon successful completion; + BUF for temporary storage. Copy no more than MAX_N_READ bytes. + Return true upon successful completion; print a diagnostic and return false upon error. Note that for best results, BUF should be "well"-aligned. BUF must have sizeof(uintptr_t)-1 bytes of additional space - beyond BUF[BUF_SIZE-1]. */ + beyond BUF[BUF_SIZE-1]. + Set *LAST_WRITE_MADE_HOLE to true if the final operation on + DEST_FD introduced a hole. */ static bool sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, bool make_holes, - char const *src_name, char const *dst_name) + char const *src_name, char const *dst_name, + uintmax_t max_n_read, bool *last_write_made_hole) { typedef uintptr_t word; - off_t n_read_total = 0; - bool last_write_made_hole = false; + *last_write_made_hole = false; - while (true) + while (max_n_read) { word *wp = NULL; - ssize_t n_read = read (src_fd, buf, buf_size); + ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); if (n_read < 0) { #ifdef EINTR @@ -166,8 +169,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } if (n_read == 0) break; - - n_read_total += n_read; + max_n_read -= n_read; if (make_holes) { @@ -209,7 +211,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, error (0, errno, _("cannot lseek %s"), quote (dst_name)); return false; } - last_write_made_hole = true; + *last_write_made_hole = true; } } @@ -221,7 +223,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, error (0, errno, _("writing %s"), quote (dst_name)); return false; } - last_write_made_hole = false; + *last_write_made_hole = false; /* It is tempting to return early here upon a short read from a regular file. That would save the final read syscall for each @@ -230,9 +232,16 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } } - /* If the file ends with a `hole', we need to do something to record the - length of the file. On modern systems, calling ftruncate does the job. */ - if (last_write_made_hole && ftruncate (dest_fd, n_read_total) < 0) + return true; +} + +/* If the file ends with a `hole' (i.e., if sparse_copy set wrote_hole_at_eof), + call this function to record the length of the output file. */ +static bool +sparse_copy_finalize (int dest_fd, char const *dst_name) +{ + off_t len = lseek (dest_fd, 0, SEEK_CUR); + if (0 <= len && ftruncate (dest_fd, len) < 0) { error (0, errno, _("truncating %s"), quote (dst_name)); return false; @@ -309,10 +318,10 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, struct extent_scan scan; off_t last_ext_start = 0; uint64_t last_ext_len = 0; - uint64_t last_read_size = 0; extent_scan_init (src_fd, &scan); + bool wrote_hole_at_eof = true; do { bool ok = extent_scan_read (&scan); @@ -356,8 +365,9 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } else { - /* We're not inducing holes; write zeros to the destination file - if there is a hole between the last and current extent. */ + /* When not inducing holes and when there is a hole between + the end of the previous extent and the beginning of the + current one, write zeros to the destination file. */ if (last_ext_start + last_ext_len < ext_start) { uint64_t hole_size = (ext_start @@ -373,39 +383,11 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, last_ext_start = ext_start; last_ext_len = ext_len; - last_read_size = 0; - - while (ext_len) - { - /* Don't read from a following hole if EXT_LEN - is smaller than the buffer size. */ - size_t b_size = MIN (ext_len, buf_size); - ssize_t n_read = read (src_fd, buf, b_size); - if (n_read < 0) - { -#ifdef EINTR - if (errno == EINTR) - continue; -#endif - error (0, errno, _("reading %s"), quote (src_name)); - goto fail; - } - - if (n_read == 0) - { - /* Record number of bytes read from this extent-at-EOF. */ - last_read_size = last_ext_len - ext_len; - break; - } - - if (full_write (dest_fd, buf, n_read) != n_read) - { - error (0, errno, _("writing %s"), quote (dst_name)); - goto fail; - } - ext_len -= n_read; - } + if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, + make_holes, src_name, dst_name, ext_len, + &wrote_hole_at_eof)) + return false; } /* Release the space allocated to scan->ext_info. */ @@ -414,16 +396,19 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } while (! scan.hit_final_extent); - /* When the source file ends with a hole, the sum of the last extent start - offset and (the read-returned size or the last extent length) is smaller - than the actual size of the file. In that case, extend the destination - file to the required length. When MAKE_HOLES is set, use ftruncate; - otherwise, use write_zeros. */ - uint64_t eof_hole_len = (src_total_size - last_ext_start - - (last_read_size ? last_read_size : last_ext_len)); - if (eof_hole_len && (make_holes - ? ftruncate (dest_fd, src_total_size) - : ! write_zeros (dest_fd, eof_hole_len))) + /* When the source file ends with a hole, we have to do a little more work, + since the above copied only up to and including the final extent. + In order to complete the copy, we may have to insert a hole or write + zeros in the destination corresponding to the source file's hole-at-EOF. + + In addition, if the final extent was a block of zeros at EOF and we've + just converted them to a hole in the destination, we must call ftruncate + here in order to record the proper length in the destination. */ + off_t dest_len = lseek (dest_fd, 0, SEEK_CUR); + if ((dest_len < src_total_size || wrote_hole_at_eof) + && (make_holes + ? ftruncate (dest_fd, src_total_size) + : ! write_zeros (dest_fd, src_total_size - dest_len))) { error (0, errno, _("failed to extend %s"), quote (dst_name)); return false; @@ -1002,8 +987,12 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } + bool wrote_hole_at_eof; if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, - make_holes, src_name, dst_name)) + make_holes, src_name, dst_name, UINTMAX_MAX, + &wrote_hole_at_eof) + || (wrote_hole_at_eof && + ! sparse_copy_finalize (dest_desc, dst_name))) { return_val = false; goto close_src_and_dst_desc; From 7f154dcfc5641c9616921d4c5ac5005bcb2507eb Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 27 Jan 2011 15:17:42 +0100 Subject: [PATCH 36/36] tests: cp/fiemap: exercise previously-failing parts * tests/cp/fiemap-2: New test. * tests/Makefile.am (TESTS): Add it. --- tests/Makefile.am | 1 + tests/cp/fiemap-2 | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100755 tests/cp/fiemap-2 diff --git a/tests/Makefile.am b/tests/Makefile.am index 7855ac5197..40d35ac7d8 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -321,6 +321,7 @@ TESTS = \ cp/existing-perm-race \ cp/fail-perm \ cp/fiemap-perf \ + cp/fiemap-2 \ cp/file-perm-race \ cp/into-self \ cp/link \ diff --git a/tests/cp/fiemap-2 b/tests/cp/fiemap-2 new file mode 100755 index 0000000000..d40505b704 --- /dev/null +++ b/tests/cp/fiemap-2 @@ -0,0 +1,54 @@ +#!/bin/sh +# Exercise a few more corners of the fiemap-copying code. + +# Copyright (C) 2011 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ cp + +# Require a fiemap-enabled FS. +df -T -t btrfs -t xfs -t ext4 -t ocfs2 . \ + || skip_ "this file system lacks FIEMAP support" + +# Exercise the code that handles a file ending in a hole. +printf x > k || framework_failure_ +dd bs=1k seek=128 of=k < /dev/null || framework_failure_ + +# The first time through the outer loop, the input file, K, ends with a hole. +# The second time through, we append a byte so that it does not. +for append in no yes; do + test $append = yes && printf y >> k + for i in always never; do + cp --sparse=$i k k2 || fail=1 + cmp k k2 || fail=1 + done +done + +# Ensure that --sparse=always can restore holes. +rm -f k +# Create a file starting with an "x", followed by 257K-1 0 bytes. +printf x > k || framework_failure_ +dd bs=1k seek=1 of=k count=255 < /dev/zero || framework_failure_ + +# cp should detect the all-zero blocks and convert some of them to holes. +# How many it detects/converts currently depends on io_blksize. +# Currently, on my F14/ext4 desktop, this K starts off with size 256KiB, +# (note that the K in the preceding test starts off with size 4KiB). +# cp from coreutils-8.9 with --sparse=always reduces the size to 32KiB. +cp --sparse=always k k2 || fail=1 +test $(stat -c %b k2) -lt $(stat -c %b k) || fail=1 + +Exit $fail