From 1ba1e9ae9401480324e56eb9e68547653687be5f Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Thu, 13 May 2010 22:09:30 +0800 Subject: [PATCH 01/15] cp: Add FIEMAP support for efficient sparse file copy * src/fiemap.h: Add fiemap.h for fiemap ioctl(2) support. Copied from linux's include/linux/fiemap.h, with minor formatting changes. * src/copy.c (copy_reg): Now, when `cp' invoked with --sparse=[WHEN] option, we will try to do FIEMAP-copy if the underlaying file system support it, fall back to a normal copy if it fails. --- src/copy.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/fiemap.h | 102 +++++++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 src/fiemap.h diff --git a/src/copy.c b/src/copy.c index 171499c479..2003380603 100644 --- a/src/copy.c +++ b/src/copy.c @@ -63,6 +63,10 @@ #include +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -149,6 +153,141 @@ clone_file (int dest_fd, int src_fd) #endif } +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Perform FIEMAP(available in mainline 2.6.27) copy if possible. + Call ioctl(2) with FS_IOC_FIEMAP to efficiently map file allocation + excepts holes. So the overhead to deal with holes with lseek(2) in + normal copy could be saved. This would result in much faster backups + for any kind of sparse file. */ +static bool +fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, + off_t src_total_size, char const *src_name, + char const *dst_name, bool *normal_copy_required) +{ + bool fail = false; + bool last = false; + char fiemap_buf[4096]; + struct fiemap *fiemap = (struct fiemap *)fiemap_buf; + struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; + uint32_t count = (sizeof (fiemap_buf) - sizeof (*fiemap)) / + sizeof (struct fiemap_extent); + off_t last_ext_logical = 0; + uint64_t last_ext_len = 0; + uint64_t last_read_size = 0; + unsigned int i = 0; + + /* This is required at least to initialize fiemap->fm_start, + but also serves (in May 2010) to appease valgrind, which + appears not to know the semantics of the FIEMAP ioctl. */ + memset (fiemap_buf, 0, sizeof fiemap_buf); + + do + { + fiemap->fm_length = FIEMAP_MAX_OFFSET; + fiemap->fm_extent_count = count; + + /* When ioctl(2) fails, fall back to the normal copy only if it + is the first time we met. */ + if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) + { + /* If `i > 0', then at least one ioctl(2) has been performed before. */ + if (i == 0) + *normal_copy_required = true; + return false; + } + + /* If 0 extents are returned, then more ioctls are not needed. */ + if (fiemap->fm_mapped_extents == 0) + break; + + for (i = 0; i < fiemap->fm_mapped_extents; i++) + { + assert (fm_ext[i].fe_logical <= OFF_T_MAX); + + off_t ext_logical = fm_ext[i].fe_logical; + uint64_t ext_len = fm_ext[i].fe_length; + + if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) + { + error (0, errno, _("cannot lseek %s"), quote (src_name)); + return fail; + } + + if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return fail; + } + + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) + { + last_ext_logical = ext_logical; + last_ext_len = ext_len; + last = true; + } + + while (ext_len) + { + char buf[buf_size]; + + /* Avoid reading into the holes if the left extent + length is shorter than the buffer size. */ + if (ext_len < buf_size) + buf_size = ext_len; + + ssize_t n_read = read (src_fd, buf, buf_size); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return fail; + } + + if (n_read == 0) + { + /* Figure out how many bytes read from the last extent. */ + last_read_size = last_ext_len - ext_len; + break; + } + + if (full_write (dest_fd, buf, n_read) != n_read) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return fail; + } + + ext_len -= n_read; + } + } + + fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length; + + } while (! last); + + /* If a file ends up with holes, the sum of the last extent logical offset + and the read-returned size will be shorter than the actual size of the + file. Use ftruncate to extend the length of the destination file. */ + if (last_ext_logical + last_read_size < src_total_size) + { + if (ftruncate (dest_fd, src_total_size) < 0) + { + error (0, errno, _("extending %s"), quote (dst_name)); + return fail; + } + } + + return ! fail; +} +#else +static bool fiemap_copy_ok (ignored) { errno == ENOTSUP; return false; } +#endif + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -679,6 +818,25 @@ copy_reg (char const *src_name, char const *dst_name, #endif } + if (make_holes) + { + bool require_normal_copy = false; + /* Perform efficient FIEMAP copy for sparse files, fall back to the + standard copy only if the ioctl(2) fails. */ + if (fiemap_copy_ok (source_desc, dest_desc, buf_size, + src_open_sb.st_size, src_name, + dst_name, &require_normal_copy)) + goto preserve_metadata; + else + { + if (! require_normal_copy) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + } + /* If not making a sparse file, try to use a more-efficient buffer size. */ if (! make_holes) @@ -807,6 +965,7 @@ copy_reg (char const *src_name, char const *dst_name, } } +preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; diff --git a/src/fiemap.h b/src/fiemap.h new file mode 100644 index 0000000000..d33293b5c3 --- /dev/null +++ b/src/fiemap.h @@ -0,0 +1,102 @@ +/* FS_IOC_FIEMAP ioctl infrastructure. + Some portions copyright (C) 2007 Cluster File Systems, Inc + Authors: Mark Fasheh + Kalpak Shah + Andreas Dilger . */ + +/* Copy from kernel, modified to respect GNU code style by Jie Liu. */ + +#ifndef _LINUX_FIEMAP_H +# define _LINUX_FIEMAP_H + +# include + +struct fiemap_extent +{ + /* Logical offset in bytes for the start of the extent + from the beginning of the file. */ + uint64_t fe_logical; + + /* Physical offset in bytes for the start of the extent + from the beginning of the disk. */ + uint64_t fe_physical; + + /* Length in bytes for this extent. */ + uint64_t fe_length; + + uint64_t fe_reserved64[2]; + + /* FIEMAP_EXTENT_* flags for this extent. */ + uint32_t fe_flags; + + uint32_t fe_reserved[3]; +}; + +struct fiemap +{ + /* Logical offset(inclusive) at which to start mapping(in). */ + uint64_t fm_start; + + /* Logical length of mapping which userspace wants(in). */ + uint64_t fm_length; + + /* FIEMAP_FLAG_* flags for request(in/out). */ + uint32_t fm_flags; + + /* Number of extents that were mapped(out). */ + uint32_t fm_mapped_extents; + + /* Size of fm_extents array(in). */ + uint32_t fm_extent_count; + + uint32_t fm_reserved; + + /* Array of mapped extents(out). */ + struct fiemap_extent fm_extents[0]; +}; + +/* The maximum offset can be mapped for a file. */ +# define FIEMAP_MAX_OFFSET (~0ULL) + +/* Sync file data before map. */ +# define FIEMAP_FLAG_SYNC 0x00000001 + +/* Map extented attribute tree. */ +# define FIEMAP_FLAG_XATTR 0x00000002 + +# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +/* Last extent in file. */ +# define FIEMAP_EXTENT_LAST 0x00000001 + +/* Data location unknown. */ +# define FIEMAP_EXTENT_UNKNOWN 0x00000002 + +/* Location still pending, Sets EXTENT_UNKNOWN. */ +# define FIEMAP_EXTENT_DELALLOC 0x00000004 + +/* Data can not be read while fs is unmounted. */ +# define FIEMAP_EXTENT_ENCODED 0x00000008 + +/* Data is encrypted by fs. Sets EXTENT_NO_BYPASS. */ +# define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 + +/* Extent offsets may not be block aligned. */ +# define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 + +/* Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_INLINE 0x00000200 + +/* Multiple files in block. Set EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_TAIL 0x00000400 + +/* Space allocated, but not data (i.e. zero). */ +# define FIEMAP_EXTENT_UNWRITTEN 0x00000800 + +/* File does not natively support extents. Result merged for efficiency. */ +# define FIEMAP_EXTENT_MERGED 0x00001000 + +/* Space shared with other files. */ +# define FIEMAP_EXTENT_SHARED 0x00002000 + +#endif From e3dca50c2a407dd82be58d0b716c1e8873a64752 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Thu, 13 May 2010 22:17:53 +0800 Subject: [PATCH 02/15] tests: add a new test for FIEMAP-copy * tests/cp/sparse-fiemap: Add a new test for FIEMAP-copy against a loopbacked ext4 partition. * tests/Makefile.am (sparse-fiemap): Reference the new test. --- tests/Makefile.am | 1 + tests/cp/sparse-fiemap | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 tests/cp/sparse-fiemap diff --git a/tests/Makefile.am b/tests/Makefile.am index c458574fc1..f7840c8bc0 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,6 +25,7 @@ root_tests = \ cp/special-bits \ cp/cp-mv-enotsup-xattr \ cp/capability \ + cp/sparse-fiemap \ dd/skip-seek-past-dev \ install/install-C-root \ ls/capability \ diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap new file mode 100755 index 0000000000..945c94b121 --- /dev/null +++ b/tests/cp/sparse-fiemap @@ -0,0 +1,56 @@ +#!/bin/sh +# Test cp --sparse=always through fiemap copy + +# Copyright (C) 2006-2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + cp --version +fi + +. $srcdir/test-lib.sh +require_root_ + +cwd=`pwd` +cleanup_() { cd /; umount "$cwd/mnt"; } + +skip=0 +# Create an ext4 loopback file system +dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 +mkdir mnt +mkfs -t ext4 -F blob || + skip_test_ "failed to create ext4 file system" +mount -oloop blob mnt || skip=1 +echo test > mnt/f || skip=1 +test -s mnt/f || skip=1 + +test $skip = 1 && + skip_test_ "insufficient mount/ext4 support" + +# Create a 1TiB sparse file +dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure + +cd mnt || fail=1 + +# It takes many minutes to copy this sparse file using the old method. +# By contrast, it takes far less than 1 second using FIEMAP-copy. +timeout 10 cp --sparse=always sparse fiemap || fail=1 + +# Ensure that the sparse file copied through fiemap has the same size +# in bytes as the original. +test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 + +Exit $fail From 9cc9dbaf5bf576b076a4d0b5132bdcf205a792ea Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 28 May 2010 09:24:15 +0200 Subject: [PATCH 03/15] tests: sparse-fiemap: factor out some set-up * tests/cp/sparse-fiemap: Cd into test directory sooner. --- tests/cp/sparse-fiemap | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 945c94b121..21b02acac6 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -33,9 +33,10 @@ dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 mkdir mnt mkfs -t ext4 -F blob || skip_test_ "failed to create ext4 file system" -mount -oloop blob mnt || skip=1 -echo test > mnt/f || skip=1 -test -s mnt/f || skip=1 +mount -oloop blob mnt || skip=1 +cd mnt || skip=1 +echo test > f || skip=1 +test -s f || skip=1 test $skip = 1 && skip_test_ "insufficient mount/ext4 support" @@ -43,7 +44,6 @@ test $skip = 1 && # Create a 1TiB sparse file dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure -cd mnt || fail=1 # It takes many minutes to copy this sparse file using the old method. # By contrast, it takes far less than 1 second using FIEMAP-copy. From 578db289bbaf7ebd4affe1af8fa9a6cf19d48981 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 21 May 2010 18:28:42 +0200 Subject: [PATCH 04/15] tests: exercise more of the new FIEMAP copying code * tests/cp/sparse-fiemap: Ensure that a file with many extents (more than fit in copy.c's internal 4KiB buffer) is copied properly. --- tests/cp/sparse-fiemap | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 21b02acac6..3608db3fd3 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -53,4 +53,42 @@ timeout 10 cp --sparse=always sparse fiemap || fail=1 # in bytes as the original. test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 +# ================================================= +# Ensure that we exercise the FIEMAP-copying code enough +# to provoke at least two iterations of the do...while loop +# in which it calls ioctl (fd, FS_IOC_FIEMAP,... +# This also verifies that non-trivial extents are preserved. + +$PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' + +$PERL -e 'BEGIN { $n = 16 * 1024; *F = *STDOUT }' \ + -e 'for (1..100) { sysseek (*F, $n, 1)' \ + -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 + +cp --sparse=always j1 j2 || fail=1 +cmp j1 j2 || fail=1 + +filefrag j1 | grep extent \ + || skip_test_ 'skipping part of this test; you lack filefrag' + +# Here is sample filefrag output: +# $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ +# -e 'for (1..5) { sysseek(*F,$n,1)' \ +# -e '&& syswrite *F,"."x$n or die "$!"}' > j +# $ filefrag -v j +# File system type is: ef53 +# File size of j is 163840 (40 blocks, blocksize 4096) +# ext logical physical expected length flags +# 0 4 6258884 4 +# 1 12 6258892 6258887 4 +# 2 20 6258900 6258895 4 +# 3 28 6258908 6258903 4 +# 4 36 6258916 6258911 4 eof +# j: 6 extents found + +# exclude the physical block numbers; they always differ +filefrag -v j1 | awk '/^ / {print $1,$2,$NF}' > ff1 || fail=1 +filefrag -v j2 | awk '/^ / {print $1,$2,$NF}' > ff2 || fail=1 +compare ff1 ff2 || fail=1 + Exit $fail From e62e22b798f3f8659a7f366f67eb739e8fdf78ec Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 May 2010 10:22:58 +0200 Subject: [PATCH 05/15] tests: require root only if current partition is neither btrfs nor xfs * tests/cp/sparse-fiemap: Don't require root access if current partition is btrfs or xfs. Use init.sh, not test-lib.sh. --- tests/cp/sparse-fiemap | 49 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 3608db3fd3..1f78671ce5 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -1,7 +1,7 @@ #!/bin/sh # Test cp --sparse=always through fiemap copy -# Copyright (C) 2006-2010 Free Software Foundation, Inc. +# Copyright (C) 2010 Free Software Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,29 +21,34 @@ if test "$VERBOSE" = yes; then cp --version fi -. $srcdir/test-lib.sh -require_root_ - -cwd=`pwd` -cleanup_() { cd /; umount "$cwd/mnt"; } - -skip=0 -# Create an ext4 loopback file system -dd if=/dev/zero of=blob bs=8192 count=1000 || skip=1 -mkdir mnt -mkfs -t ext4 -F blob || - skip_test_ "failed to create ext4 file system" -mount -oloop blob mnt || skip=1 -cd mnt || skip=1 -echo test > f || skip=1 -test -s f || skip=1 - -test $skip = 1 && - skip_test_ "insufficient mount/ext4 support" +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +if df -T -t btrfs -t xfs . ; then + : # Current dir is on a partition with working extents. Good! +else + # It's not; we need to create one, hence we need root access. + require_root_ + + cwd=$PWD + cleanup_() { cd /; umount "$cwd/mnt"; } + + skip=0 + # Create an XFS loopback file system + dd if=/dev/zero of=blob bs=32k count=1000 || skip=1 + mkdir mnt + mkfs -t xfs blob || + skip_test_ "failed to create XFS file system" + mount -oloop blob mnt || skip=1 + cd mnt || skip=1 + echo test > f || skip=1 + test -s f || skip=1 + + test $skip = 1 && + skip_test_ "insufficient mount/XFS support" +fi # Create a 1TiB sparse file -dd if=/dev/zero of=mnt/sparse bs=1k count=1 seek=1G || framework_failure - +dd if=/dev/zero of=sparse bs=1k count=1 seek=1G || framework_failure # It takes many minutes to copy this sparse file using the old method. # By contrast, it takes far less than 1 second using FIEMAP-copy. From 912e7d26c6456b9ed04a10f902c5f506f1148705 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 22 May 2010 10:21:46 +0200 Subject: [PATCH 06/15] tests: test fiemap-enabled cp more thoroughly * tests/cp/sparse-fiemap: More tests. --- tests/cp/sparse-fiemap | 61 ++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 1f78671ce5..907ccbdbc6 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -66,34 +66,37 @@ test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' -$PERL -e 'BEGIN { $n = 16 * 1024; *F = *STDOUT }' \ - -e 'for (1..100) { sysseek (*F, $n, 1)' \ - -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 - -cp --sparse=always j1 j2 || fail=1 -cmp j1 j2 || fail=1 - -filefrag j1 | grep extent \ - || skip_test_ 'skipping part of this test; you lack filefrag' - -# Here is sample filefrag output: -# $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ -# -e 'for (1..5) { sysseek(*F,$n,1)' \ -# -e '&& syswrite *F,"."x$n or die "$!"}' > j -# $ filefrag -v j -# File system type is: ef53 -# File size of j is 163840 (40 blocks, blocksize 4096) -# ext logical physical expected length flags -# 0 4 6258884 4 -# 1 12 6258892 6258887 4 -# 2 20 6258900 6258895 4 -# 3 28 6258908 6258903 4 -# 4 36 6258916 6258911 4 eof -# j: 6 extents found - -# exclude the physical block numbers; they always differ -filefrag -v j1 | awk '/^ / {print $1,$2,$NF}' > ff1 || fail=1 -filefrag -v j2 | awk '/^ / {print $1,$2,$NF}' > ff2 || fail=1 -compare ff1 ff2 || fail=1 +for i in $(seq 20); do + for j in 1 2 31 100; do + $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \ + -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ + -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 + + cp --sparse=always j1 j2 || fail=1 + cmp j1 j2 || fail=1 + filefrag -v j1 | grep extent \ + || skip_test_ 'skipping part of this test; you lack filefrag' + + # Here is sample filefrag output: + # $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ + # -e 'for (1..5) { sysseek(*F,$n,1)' \ + # -e '&& syswrite *F,"."x$n or die "$!"}' > j + # $ filefrag -v j + # File system type is: ef53 + # File size of j is 163840 (40 blocks, blocksize 4096) + # ext logical physical expected length flags + # 0 4 6258884 4 + # 1 12 6258892 6258887 4 + # 2 20 6258900 6258895 4 + # 3 28 6258908 6258903 4 + # 4 36 6258916 6258911 4 eof + # j: 6 extents found + + # exclude the physical block numbers; they always differ + filefrag -v j1 | awk '/^ / {print $1,$2}' > ff1 || fail=1 + filefrag -v j2 | awk '/^ / {print $1,$2}' > ff2 || fail=1 + compare ff1 ff2 || fail=1 + done +done Exit $fail From 50975af729167b064409d073effc519b3c3f748b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 29 May 2010 21:22:40 +0200 Subject: [PATCH 07/15] tests: relax the root-tests cross-check * cfg.mk (sc_root_tests): Allow spaces before "require_root_", now that tests/cp/sparse-fiemap has a conditional use. --- cfg.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cfg.mk b/cfg.mk index dff5de5d9d..17267cfa22 100644 --- a/cfg.mk +++ b/cfg.mk @@ -80,7 +80,7 @@ sc_root_tests: @if test -d tests \ && grep check-root tests/Makefile.am>/dev/null 2>&1; then \ t1=sc-root.expected; t2=sc-root.actual; \ - grep -nl '^require_root_$$' \ + grep -nl '^ *require_root_$$' \ $$($(VC_LIST) tests) |sed s,tests/,, |sort > $$t1; \ sed -n '/^root_tests =[ ]*\\$$/,/[^\]$$/p' \ $(srcdir)/tests/Makefile.am \ From e955826e4cd93ea0e997af4f3829fd8e794c6b1b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 30 May 2010 21:20:30 +0200 Subject: [PATCH 08/15] tests: improve fiemap test to work with 4 FS types; fall back on ext4 * tests/cp/sparse-fiemap: Improve. * tests/filefrag-extent-compare: New file. --- tests/cp/sparse-fiemap | 38 ++++++++++++++------ tests/filefrag-extent-compare | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 11 deletions(-) create mode 100644 tests/filefrag-extent-compare diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index 907ccbdbc6..dc0cf60595 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -23,7 +23,7 @@ fi . "${srcdir=.}/init.sh"; path_prepend_ ../src -if df -T -t btrfs -t xfs . ; then +if df -T -t btrfs -t xfs -t ext4 -t ocfs2 . ; then : # Current dir is on a partition with working extents. Good! else # It's not; we need to create one, hence we need root access. @@ -33,18 +33,18 @@ else cleanup_() { cd /; umount "$cwd/mnt"; } skip=0 - # Create an XFS loopback file system + # Create an ext4 loopback file system dd if=/dev/zero of=blob bs=32k count=1000 || skip=1 mkdir mnt - mkfs -t xfs blob || - skip_test_ "failed to create XFS file system" + mkfs -t ext4 -F blob || + skip_test_ "failed to create ext4 file system" mount -oloop blob mnt || skip=1 cd mnt || skip=1 echo test > f || skip=1 test -s f || skip=1 test $skip = 1 && - skip_test_ "insufficient mount/XFS support" + skip_test_ "insufficient mount/ext4 support" fi # Create a 1TiB sparse file @@ -66,13 +66,26 @@ test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' -for i in $(seq 20); do +# Extract logical block number and length pairs from filefrag -v output. +# The initial sed is to remove the "eof" from the normally-empty "flags" field. +# That is required when that final extent has no number in the "expected" field. +f() +{ + sed 's/ eof$//' $@ \ + | awk '/^ *[0-9]/ {printf "%d %d ", $2 ,NF < 5 ? $NF : $5 } END {print ""}' +} + +for i in $(seq 1 2 21); do for j in 1 2 31 100; do $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \ -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ - -e '&& syswrite (*F, "."x$n) or die "$!"}' > j1 || fail=1 - + -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1 + # sync cp --sparse=always j1 j2 || fail=1 + # sync + # Technically we may need the 'sync' uses above, but + # uncommenting them makes this test take much longer. + cmp j1 j2 || fail=1 filefrag -v j1 | grep extent \ || skip_test_ 'skipping part of this test; you lack filefrag' @@ -93,10 +106,13 @@ for i in $(seq 20); do # j: 6 extents found # exclude the physical block numbers; they always differ - filefrag -v j1 | awk '/^ / {print $1,$2}' > ff1 || fail=1 - filefrag -v j2 | awk '/^ / {print $1,$2}' > ff2 || fail=1 - compare ff1 ff2 || fail=1 + filefrag -v j1 > ff1 || fail=1 + filefrag -v j2 > ff2 || fail=1 + { f ff1; f ff2; } \ + | $PERL $abs_top_srcdir/tests/filefrag-extent-compare \ + || { fail=1; break; } done + test $fail = 1 && break done Exit $fail diff --git a/tests/filefrag-extent-compare b/tests/filefrag-extent-compare new file mode 100644 index 0000000000..3c095d52f4 --- /dev/null +++ b/tests/filefrag-extent-compare @@ -0,0 +1,68 @@ +eval '(exit $?0)' && eval 'exec perl -wS "$0" ${1+"$@"}' + & eval 'exec perl -wS "$0" $argv:q' + if 0; +# Determine whether two files have the same extents by comparing +# the logical block numbers and lengths from filefrag -v for each. + +# Invoke like this: +# This helper function, f, extracts logical block number and lengths. +# f() { awk '/^ *[0-9]/ {printf "%d %d ",$2,NF<5?$NF:$5} END {print ""}'; } +# { filefrag -v j1 | f; filefrag -v j2 | f; } | ./filefrag-extent-compare + +use warnings; +use strict; +(my $ME = $0) =~ s|.*/||; + +my @line = <>; +my $n_lines = @line; +$n_lines == 2 + or die "$ME: expected exactly two input lines; got $n_lines\n"; + +my @A = split ' ', $line[0]; +my @B = split ' ', $line[1]; +@A % 2 || @B % 2 + and die "$ME: unexpected input: odd number of numbers; expected even\n"; + +my @a; +my @b; +foreach my $i (0..@A/2-1) { $a[$i] = { L_BLK => $A[2*$i], LEN => $A[2*$i+1] } }; +foreach my $i (0..@B/2-1) { $b[$i] = { L_BLK => $B[2*$i], LEN => $B[2*$i+1] } }; + +my $i = 0; +my $j = 0; +while (1) + { + !defined $a[$i] && !defined $b[$j] + and exit 0; + defined $a[$i] && defined $b[$j] + or die "\@a and \@b have different lengths, even after adjustment\n"; + ($a[$i]->{L_BLK} == $b[$j]->{L_BLK} + && $a[$i]->{LEN} == $b[$j]->{LEN}) + and next; + ($a[$i]->{LEN} < $b[$j]->{LEN} + && exists $a[$i+1] && $a[$i]->{LEN} + $a[$i+1]->{LEN} == $b[$j]->{LEN}) + and ++$i, next; + exists $b[$j+1] && $a[$i]->{LEN} == $b[$i]->{LEN} + $b[$i+1]->{LEN} + and ++$j, next; + die "differing extent:\n" + . " [$i]=$a[$i]->{L_BLK} $a[$i]->{LEN}\n" + . " [$j]=$b[$j]->{L_BLK} $b[$j]->{LEN}\n" + } +continue + { + ++$i; + ++$j; + } + +### Setup "GNU" style for perl-mode and cperl-mode. +## Local Variables: +## mode: perl +## perl-indent-level: 2 +## perl-continued-statement-offset: 2 +## perl-continued-brace-offset: 0 +## perl-brace-offset: 0 +## perl-brace-imaginary-offset: 0 +## perl-label-offset: -2 +## perl-extra-newline-before-brace: t +## perl-merge-trailing-else: nil +## End: From 1a2b6d093887a6089fd3b45e20b9699d540a802a Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 5 Jun 2010 10:17:48 +0200 Subject: [PATCH 09/15] copy.c: adjust comments, tweak semantics * src/copy.c (fiemap_copy): Rename from fiemap_copy_ok. Add/improve comments. Remove local, "fail". (fiemap_copy): Do not require caller to set "normal_copy_required" before calling fiemap_copy. Report ioctl failure if it's the 2nd or subsequent call. --- src/copy.c | 62 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/src/copy.c b/src/copy.c index 2003380603..99fdb79fa8 100644 --- a/src/copy.c +++ b/src/copy.c @@ -157,30 +157,33 @@ clone_file (int dest_fd, int src_fd) # ifndef FS_IOC_FIEMAP # define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) # endif -/* Perform FIEMAP(available in mainline 2.6.27) copy if possible. - Call ioctl(2) with FS_IOC_FIEMAP to efficiently map file allocation - excepts holes. So the overhead to deal with holes with lseek(2) in - normal copy could be saved. This would result in much faster backups - for any kind of sparse file. */ +/* Perform a FIEMAP copy, if possible. + Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. This avoids the + overhead of detecting holes in a hole-introducing/preserving copy, + and thus makes copying sparse files much more efficient. Upon a + successful copy, return true. If the initial ioctl fails, set + *NORMAL_COPY_REQUIRED to true and return false. Upon any other + failure, set *NORMAL_COPY_REQUIRED to false and return false. */ static bool -fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, - off_t src_total_size, char const *src_name, - char const *dst_name, bool *normal_copy_required) +fiemap_copy (int src_fd, int dest_fd, size_t buf_size, + off_t src_total_size, char const *src_name, + char const *dst_name, bool *normal_copy_required) { - bool fail = false; bool last = false; char fiemap_buf[4096]; - struct fiemap *fiemap = (struct fiemap *)fiemap_buf; + struct fiemap *fiemap = (struct fiemap *) fiemap_buf; struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - uint32_t count = (sizeof (fiemap_buf) - sizeof (*fiemap)) / - sizeof (struct fiemap_extent); + uint32_t count = ((sizeof fiemap_buf - sizeof (*fiemap)) + / sizeof (struct fiemap_extent)); off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; unsigned int i = 0; + *normal_copy_required = false; /* This is required at least to initialize fiemap->fm_start, - but also serves (in May 2010) to appease valgrind, which + but also serves (in mid 2010) to appease valgrind, which appears not to know the semantics of the FIEMAP ioctl. */ memset (fiemap_buf, 0, sizeof fiemap_buf); @@ -193,9 +196,16 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, is the first time we met. */ if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) { - /* If `i > 0', then at least one ioctl(2) has been performed before. */ + /* If the first ioctl fails, tell the caller that it is + ok to proceed with a normal copy. */ if (i == 0) *normal_copy_required = true; + else + { + /* If the second or subsequent ioctl fails, diagnose it, + since it ends up causing the entire copy/cp to fail. */ + error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name)); + } return false; } @@ -213,13 +223,13 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) { error (0, errno, _("cannot lseek %s"), quote (src_name)); - return fail; + return false; } if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return fail; + return false; } if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) @@ -246,7 +256,7 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, continue; #endif error (0, errno, _("reading %s"), quote (src_name)); - return fail; + return false; } if (n_read == 0) @@ -259,7 +269,7 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, if (full_write (dest_fd, buf, n_read) != n_read) { error (0, errno, _("writing %s"), quote (dst_name)); - return fail; + return false; } ext_len -= n_read; @@ -277,15 +287,15 @@ fiemap_copy_ok (int src_fd, int dest_fd, size_t buf_size, { if (ftruncate (dest_fd, src_total_size) < 0) { - error (0, errno, _("extending %s"), quote (dst_name)); - return fail; + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; } } - return ! fail; + return true; } #else -static bool fiemap_copy_ok (ignored) { errno == ENOTSUP; return false; } +static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; } #endif /* FIXME: describe */ @@ -820,12 +830,12 @@ copy_reg (char const *src_name, char const *dst_name, if (make_holes) { - bool require_normal_copy = false; + bool require_normal_copy; /* Perform efficient FIEMAP copy for sparse files, fall back to the standard copy only if the ioctl(2) fails. */ - if (fiemap_copy_ok (source_desc, dest_desc, buf_size, - src_open_sb.st_size, src_name, - dst_name, &require_normal_copy)) + if (fiemap_copy (source_desc, dest_desc, buf_size, + src_open_sb.st_size, src_name, + dst_name, &require_normal_copy)) goto preserve_metadata; else { From f9daf7e7ed97f21fc5d9d4e41e43c8d7b8c49373 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 9 Jun 2010 08:15:07 +0200 Subject: [PATCH 10/15] copy.c: ensure proper alignment of fiemap buffer * src/copy.c (fiemap_copy): Ensure that our fiemap buffer is large enough and well-aligned. Replace "0LL" with equivalent "0" as 3rd argument to lseek. --- src/copy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/copy.c b/src/copy.c index 99fdb79fa8..f149be460f 100644 --- a/src/copy.c +++ b/src/copy.c @@ -171,11 +171,12 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, char const *dst_name, bool *normal_copy_required) { bool last = false; - char fiemap_buf[4096]; - struct fiemap *fiemap = (struct fiemap *) fiemap_buf; + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - uint32_t count = ((sizeof fiemap_buf - sizeof (*fiemap)) - / sizeof (struct fiemap_extent)); + enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext }; + verify (count != 0); + off_t last_ext_logical = 0; uint64_t last_ext_len = 0; uint64_t last_read_size = 0; @@ -185,7 +186,7 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, /* This is required at least to initialize fiemap->fm_start, but also serves (in mid 2010) to appease valgrind, which appears not to know the semantics of the FIEMAP ioctl. */ - memset (fiemap_buf, 0, sizeof fiemap_buf); + memset (&fiemap_buf, 0, sizeof fiemap_buf); do { @@ -220,13 +221,13 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, off_t ext_logical = fm_ext[i].fe_logical; uint64_t ext_len = fm_ext[i].fe_length; - if (lseek (src_fd, ext_logical, SEEK_SET) < 0LL) + if (lseek (src_fd, ext_logical, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (src_name)); return false; } - if (lseek (dest_fd, ext_logical, SEEK_SET) < 0LL) + if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) { error (0, errno, _("cannot lseek %s"), quote (dst_name)); return false; From 20c1eeec11acd999aad8dfcb1919a86fab16f1a5 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 9 Jun 2010 08:42:30 +0200 Subject: [PATCH 11/15] fiemap.h: include , not * src/fiemap.h: Include stdint.h, not linux/types.h, now that this file uses only portable type names. --- src/fiemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiemap.h b/src/fiemap.h index d33293b5c3..c5d8424b3b 100644 --- a/src/fiemap.h +++ b/src/fiemap.h @@ -9,7 +9,7 @@ #ifndef _LINUX_FIEMAP_H # define _LINUX_FIEMAP_H -# include +# include struct fiemap_extent { From 484903dc41246cb3c774f178f695725561b105a0 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 11 Jun 2010 14:34:03 +0200 Subject: [PATCH 12/15] tests: accommodate varying filefrag -v "flags" output * tests/cp/sparse-fiemap: Accommodate values other than "eof" in the "flags" column of filefrag -v output --- tests/cp/sparse-fiemap | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap index dc0cf60595..b6b1103909 100755 --- a/tests/cp/sparse-fiemap +++ b/tests/cp/sparse-fiemap @@ -68,10 +68,11 @@ $PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' # Extract logical block number and length pairs from filefrag -v output. # The initial sed is to remove the "eof" from the normally-empty "flags" field. +# Similarly, remove flags values like "unknown,delalloc,eof". # That is required when that final extent has no number in the "expected" field. f() { - sed 's/ eof$//' $@ \ + sed 's/ [a-z,][a-z,]*$//' $@ \ | awk '/^ *[0-9]/ {printf "%d %d ", $2 ,NF < 5 ? $NF : $5 } END {print ""}' } From 98b2a24d7f00fba786a81870c9c9c8ffb9b8f9cf Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 11 Jun 2010 16:29:02 +0800 Subject: [PATCH 13/15] copy.c: add FIEMAP_FLAG_SYNC to fiemap ioctl * src/copy.c (fiemap_copy): Force kernel to sync the source file before mapping. --- src/copy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/copy.c b/src/copy.c index f149be460f..f48c74df73 100644 --- a/src/copy.c +++ b/src/copy.c @@ -191,6 +191,7 @@ fiemap_copy (int src_fd, int dest_fd, size_t buf_size, do { fiemap->fm_length = FIEMAP_MAX_OFFSET; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; fiemap->fm_extent_count = count; /* When ioctl(2) fails, fall back to the normal copy only if it From be5548445e90a36ab5018cac0fb19f2498d0521c Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 13 Jun 2010 16:19:29 +0200 Subject: [PATCH 14/15] build: distribute new file, fiemap.h * src/Makefile.am (noinst_HEADERS): Add fiemap.h. --- src/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Makefile.am b/src/Makefile.am index 0630a069d4..7d563122ab 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -145,6 +145,7 @@ noinst_HEADERS = \ copy.h \ cp-hash.h \ dircolors.h \ + fiemap.h \ fs.h \ group-list.h \ ls.h \ From f25181d32c40f82ee26dea6de6b7f4b385352a14 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 13 Jun 2010 16:34:42 +0200 Subject: [PATCH 15/15] build: distribute new test script, filefrag-extent-compare * tests/Makefile.am (EXTRA_DIST): Add filefrag-extent-compare. --- tests/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Makefile.am b/tests/Makefile.am index f7840c8bc0..61ccf01a10 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -10,6 +10,7 @@ EXTRA_DIST = \ CuTmpdir.pm \ check.mk \ envvar-check \ + filefrag-extent-compare \ init.sh \ lang-default \ other-fs-tmpdir \