diff --git a/cfg.mk b/cfg.mk index 4df74f05d0..d0d68cc31c 100644 --- a/cfg.mk +++ b/cfg.mk @@ -80,7 +80,7 @@ sc_root_tests: @if test -d tests \ && grep check-root tests/Makefile.am>/dev/null 2>&1; then \ t1=sc-root.expected; t2=sc-root.actual; \ - grep -nl '^require_root_$$' \ + grep -nl '^ *require_root_$$' \ $$($(VC_LIST) tests) |sed s,tests/,, |sort > $$t1; \ sed -n '/^root_tests =[ ]*\\$$/,/[^\]$$/p' \ $(srcdir)/tests/Makefile.am \ diff --git a/src/Makefile.am b/src/Makefile.am index 91364635fc..bf1d60a886 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -145,6 +145,7 @@ noinst_HEADERS = \ copy.h \ cp-hash.h \ dircolors.h \ + fiemap.h \ find-mount-point.h \ fs.h \ group-list.h \ @@ -449,7 +450,7 @@ uninstall-local: fi; \ fi -copy_sources = copy.c cp-hash.c +copy_sources = copy.c cp-hash.c extent-scan.c extent-scan.h # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid # confusion with the `install' target. The install rule transforms `ginstall' diff --git a/src/copy.c b/src/copy.c index 9a014ad5aa..96bb35b135 100644 --- a/src/copy.c +++ b/src/copy.c @@ -36,6 +36,7 @@ #include "buffer-lcm.h" #include "copy.h" #include "cp-hash.h" +#include "extent-scan.h" #include "error.h" #include "fcntl--.h" #include "file-set.h" @@ -62,6 +63,10 @@ # include "verror.h" #endif +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -129,6 +134,122 @@ utimens_symlink (char const *file, struct timespec const *timespec) return err; } +/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, + honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer + BUF for temporary storage. Copy no more than MAX_N_READ bytes. + Return true upon successful completion; + print a diagnostic and return false upon error. + Note that for best results, BUF should be "well"-aligned. + BUF must have sizeof(uintptr_t)-1 bytes of additional space + beyond BUF[BUF_SIZE-1]. + Set *LAST_WRITE_MADE_HOLE to true if the final operation on + DEST_FD introduced a hole. */ +static bool +sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, + bool make_holes, + char const *src_name, char const *dst_name, + uintmax_t max_n_read, bool *last_write_made_hole) +{ + typedef uintptr_t word; + *last_write_made_hole = false; + + while (max_n_read) + { + word *wp = NULL; + + ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return false; + } + if (n_read == 0) + break; + max_n_read -= n_read; + + if (make_holes) + { + char *cp; + + /* Sentinel to stop loop. */ + buf[n_read] = '\1'; +#ifdef lint + /* Usually, buf[n_read] is not the byte just before a "word" + (aka uintptr_t) boundary. In that case, the word-oriented + test below (*wp++ == 0) would read some uninitialized bytes + after the sentinel. To avoid false-positive reports about + this condition (e.g., from a tool like valgrind), set the + remaining bytes -- to any value. */ + memset (buf + n_read + 1, 0, sizeof (word) - 1); +#endif + + /* Find first nonzero *word*, or the word with the sentinel. */ + + wp = (word *) buf; + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + + cp = (char *) (wp - 1); + while (*cp++ == 0) + continue; + + if (cp <= buf + n_read) + /* Clear to indicate that a normal write is needed. */ + wp = NULL; + else + { + /* We found the sentinel, so the whole input block was zero. + Make a hole. */ + if (lseek (dest_fd, n_read, SEEK_CUR) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return false; + } + *last_write_made_hole = true; + } + } + + if (!wp) + { + size_t n = n_read; + if (full_write (dest_fd, buf, n) != n) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return false; + } + *last_write_made_hole = false; + + /* It is tempting to return early here upon a short read from a + regular file. That would save the final read syscall for each + file. Unfortunately that doesn't work for certain files in + /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ + } + } + + return true; +} + +/* If the file ends with a `hole' (i.e., if sparse_copy set wrote_hole_at_eof), + call this function to record the length of the output file. */ +static bool +sparse_copy_finalize (int dest_fd, char const *dst_name) +{ + off_t len = lseek (dest_fd, 0, SEEK_CUR); + if (0 <= len && ftruncate (dest_fd, len) < 0) + { + error (0, errno, _("truncating %s"), quote (dst_name)); + return false; + } + + return true; +} + /* Perform the O(1) btrfs clone operation, if possible. Upon success, return 0. Otherwise, return -1 and set errno. */ static inline int @@ -148,6 +269,154 @@ clone_file (int dest_fd, int src_fd) #endif } +/* Write N_BYTES zero bytes to file descriptor FD. Return true if successful. + Upon write failure, set errno and return false. */ +static bool +write_zeros (int fd, uint64_t n_bytes) +{ + static char *zeros; + static size_t nz = IO_BUFSIZE; + + /* Attempt to use a relatively large calloc'd source buffer for + efficiency, but if that allocation fails, resort to a smaller + statically allocated one. */ + if (zeros == NULL) + { + static char fallback[1024]; + zeros = calloc (nz, 1); + if (zeros == NULL) + { + zeros = fallback; + nz = sizeof fallback; + } + } + + while (n_bytes) + { + uint64_t n = MIN (sizeof nz, n_bytes); + if ((full_write (fd, zeros, n)) != n) + return false; + n_bytes -= n; + } + + return true; +} + +/* Perform an efficient extent copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus makes copying sparse files much more efficient. + Upon a successful copy, return true. If the initial extent scan + fails, set *NORMAL_COPY_REQUIRED to true and return false. + Upon any other failure, set *NORMAL_COPY_REQUIRED to false and + return false. */ +static bool +extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, + off_t src_total_size, bool make_holes, + char const *src_name, char const *dst_name, + bool *require_normal_copy) +{ + struct extent_scan scan; + off_t last_ext_start = 0; + uint64_t last_ext_len = 0; + + extent_scan_init (src_fd, &scan); + + bool wrote_hole_at_eof = true; + do + { + bool ok = extent_scan_read (&scan); + if (! ok) + { + if (scan.hit_final_extent) + break; + + if (scan.initial_scan_failed) + { + *require_normal_copy = true; + return false; + } + + error (0, errno, _("%s: failed to get extents info"), + quote (src_name)); + return false; + } + + unsigned int i; + for (i = 0; i < scan.ei_count; i++) + { + off_t ext_start = scan.ext_info[i].ext_logical; + uint64_t ext_len = scan.ext_info[i].ext_length; + + if (lseek (src_fd, ext_start, SEEK_SET) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (src_name)); + fail: + extent_scan_free (&scan); + return false; + } + + if (make_holes) + { + if (lseek (dest_fd, ext_start, SEEK_SET) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + goto fail; + } + } + else + { + /* When not inducing holes and when there is a hole between + the end of the previous extent and the beginning of the + current one, write zeros to the destination file. */ + if (last_ext_start + last_ext_len < ext_start) + { + uint64_t hole_size = (ext_start + - last_ext_start + - last_ext_len); + if (! write_zeros (dest_fd, hole_size)) + { + error (0, errno, _("%s: write failed"), quote (dst_name)); + goto fail; + } + } + } + + last_ext_start = ext_start; + last_ext_len = ext_len; + + if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, + make_holes, src_name, dst_name, ext_len, + &wrote_hole_at_eof)) + return false; + } + + /* Release the space allocated to scan->ext_info. */ + extent_scan_free (&scan); + + } + while (! scan.hit_final_extent); + + /* When the source file ends with a hole, we have to do a little more work, + since the above copied only up to and including the final extent. + In order to complete the copy, we may have to insert a hole or write + zeros in the destination corresponding to the source file's hole-at-EOF. + + In addition, if the final extent was a block of zeros at EOF and we've + just converted them to a hole in the destination, we must call ftruncate + here in order to record the proper length in the destination. */ + off_t dest_len = lseek (dest_fd, 0, SEEK_CUR); + if ((dest_len < src_total_size || wrote_hole_at_eof) + && (make_holes + ? ftruncate (dest_fd, src_total_size) + : ! write_zeros (dest_fd, src_total_size - dest_len))) + { + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; + } + + return true; +} + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -647,7 +916,6 @@ copy_reg (char const *src_name, char const *dst_name, if (data_copy_required) { typedef uintptr_t word; - off_t n_read_total = 0; /* Choose a suitable buffer size; it may be adjusted later. */ size_t buf_alignment = lcm (getpagesize (), sizeof (word)); @@ -655,7 +923,6 @@ copy_reg (char const *src_name, char const *dst_name, size_t buf_size = io_blksize (sb); /* Deal with sparse files. */ - bool last_write_made_hole = false; bool make_holes = false; if (S_ISREG (sb.st_mode)) @@ -704,106 +971,35 @@ copy_reg (char const *src_name, char const *dst_name, buf_alloc = xmalloc (buf_size + buf_alignment_slop); buf = ptr_align (buf_alloc, buf_alignment); - while (true) + bool normal_copy_required; + /* Perform an efficient extent-based copy, falling back to the + standard copy only if the initial extent scan fails. If the + '--sparse=never' option is specified, write all data but use + any extents to read more efficiently. */ + if (extent_copy (source_desc, dest_desc, buf, buf_size, + src_open_sb.st_size, make_holes, + src_name, dst_name, &normal_copy_required)) + goto preserve_metadata; + + if (! normal_copy_required) { - word *wp = NULL; - - ssize_t n_read = read (source_desc, buf, buf_size); - if (n_read < 0) - { -#ifdef EINTR - if (errno == EINTR) - continue; -#endif - error (0, errno, _("reading %s"), quote (src_name)); - return_val = false; - goto close_src_and_dst_desc; - } - if (n_read == 0) - break; - - n_read_total += n_read; - - if (make_holes) - { - char *cp; - - /* Sentinel to stop loop. */ - buf[n_read] = '\1'; -#ifdef lint - /* Usually, buf[n_read] is not the byte just before a "word" - (aka uintptr_t) boundary. In that case, the word-oriented - test below (*wp++ == 0) would read some uninitialized bytes - after the sentinel. To avoid false-positive reports about - this condition (e.g., from a tool like valgrind), set the - remaining bytes -- to any value. */ - memset (buf + n_read + 1, 0, sizeof (word) - 1); -#endif - - /* Find first nonzero *word*, or the word with the sentinel. */ - - wp = (word *) buf; - while (*wp++ == 0) - continue; - - /* Find the first nonzero *byte*, or the sentinel. */ - - cp = (char *) (wp - 1); - while (*cp++ == 0) - continue; - - if (cp <= buf + n_read) - /* Clear to indicate that a normal write is needed. */ - wp = NULL; - else - { - /* We found the sentinel, so the whole input block was zero. - Make a hole. */ - if (lseek (dest_desc, n_read, SEEK_CUR) < 0) - { - error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } - last_write_made_hole = true; - } - } - - if (!wp) - { - size_t n = n_read; - if (full_write (dest_desc, buf, n) != n) - { - error (0, errno, _("writing %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } - last_write_made_hole = false; - - /* It is tempting to return early here upon a short read from a - regular file. That would save the final read syscall for each - file. Unfortunately that doesn't work for certain files in - /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ - } + return_val = false; + goto close_src_and_dst_desc; } - /* If the file ends with a `hole', we need to do something to record - the length of the file. On modern systems, calling ftruncate does - the job. On systems without native ftruncate support, we have to - write a byte at the ending position. Otherwise the kernel would - truncate the file at the end of the last write operation. */ - - if (last_write_made_hole) + bool wrote_hole_at_eof; + if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, + make_holes, src_name, dst_name, UINTMAX_MAX, + &wrote_hole_at_eof) + || (wrote_hole_at_eof && + ! sparse_copy_finalize (dest_desc, dst_name))) { - if (ftruncate (dest_desc, n_read_total) < 0) - { - error (0, errno, _("truncating %s"), quote (dst_name)); - return_val = false; - goto close_src_and_dst_desc; - } + return_val = false; + goto close_src_and_dst_desc; } } +preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; diff --git a/src/extent-scan.c b/src/extent-scan.c new file mode 100644 index 0000000000..3bb0d536ce --- /dev/null +++ b/src/extent-scan.c @@ -0,0 +1,116 @@ +/* extent-scan.c -- core functions for scanning extents + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Jie Liu (jeff.liu@oracle.com). */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "extent-scan.h" + +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + +/* Allocate space for struct extent_scan, initialize the entries if + necessary and return it as the input argument of extent_scan_read(). */ +extern void +extent_scan_init (int src_fd, struct extent_scan *scan) +{ + scan->fd = src_fd; + scan->ei_count = 0; + scan->scan_start = 0; + scan->initial_scan_failed = false; + scan->hit_final_extent = false; +} + +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. */ +extern bool +extent_scan_read (struct extent_scan *scan) +{ + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; + struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; + enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents }; + verify (count != 0); + + /* This is required at least to initialize fiemap->fm_start, + but also serves (in mid 2010) to appease valgrind, which + appears not to know the semantics of the FIEMAP ioctl. */ + memset (&fiemap_buf, 0, sizeof fiemap_buf); + + fiemap->fm_start = scan->scan_start; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; + fiemap->fm_extent_count = count; + fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start; + + /* Fall back to the standard copy if call ioctl(2) failed for the + the first time. */ + if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0) + { + if (scan->scan_start == 0) + scan->initial_scan_failed = true; + return false; + } + + /* If 0 extents are returned, then more get_extent_table() are not needed. */ + if (fiemap->fm_mapped_extents == 0) + { + scan->hit_final_extent = true; + return false; + } + + scan->ei_count = fiemap->fm_mapped_extents; + scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info)); + + unsigned int i; + for (i = 0; i < scan->ei_count; i++) + { + assert (fm_extents[i].fe_logical <= OFF_T_MAX); + + scan->ext_info[i].ext_logical = fm_extents[i].fe_logical; + scan->ext_info[i].ext_length = fm_extents[i].fe_length; + scan->ext_info[i].ext_flags = fm_extents[i].fe_flags; + } + + i--; + if (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_LAST) + { + scan->hit_final_extent = true; + return true; + } + + scan->scan_start = fm_extents[i].fe_logical + fm_extents[i].fe_length; + + return true; +} +#else +extern bool +extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED) +{ + errno = ENOTSUP; + return false; +} +#endif diff --git a/src/extent-scan.h b/src/extent-scan.h new file mode 100644 index 0000000000..ac9e5006fc --- /dev/null +++ b/src/extent-scan.h @@ -0,0 +1,68 @@ +/* core functions for efficient reading sparse files + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Jie Liu (jeff.liu@oracle.com). */ + +#ifndef EXTENT_SCAN_H +# define EXTENT_SCAN_H + +/* Structure used to store information of each extent. */ +struct extent_info +{ + /* Logical offset of an extent. */ + off_t ext_logical; + + /* Extent length. */ + uint64_t ext_length; + + /* Extent flags, use it for FIEMAP only, or set it to zero. */ + uint32_t ext_flags; +}; + +/* Structure used to reserve extent scan information per file. */ +struct extent_scan +{ + /* File descriptor of extent scan run against. */ + int fd; + + /* Next scan start offset. */ + off_t scan_start; + + /* How many extent info returned for a scan. */ + uint32_t ei_count; + + /* If true, fall back to a normal copy, either set by the + failure of ioctl(2) for FIEMAP or lseek(2) with SEEK_DATA. */ + bool initial_scan_failed; + + /* If true, the total extent scan per file has been finished. */ + bool hit_final_extent; + + /* Extent information: a malloc'd array of ei_count structs. */ + struct extent_info *ext_info; +}; + +void extent_scan_init (int src_fd, struct extent_scan *scan); + +bool extent_scan_read (struct extent_scan *scan); + +static inline void +extent_scan_free (struct extent_scan *scan) +{ + free (scan->ext_info); +} + +#endif /* EXTENT_SCAN_H */ diff --git a/src/fiemap.h b/src/fiemap.h new file mode 100644 index 0000000000..c5d8424b3b --- /dev/null +++ b/src/fiemap.h @@ -0,0 +1,102 @@ +/* FS_IOC_FIEMAP ioctl infrastructure. + Some portions copyright (C) 2007 Cluster File Systems, Inc + Authors: Mark Fasheh + Kalpak Shah + Andreas Dilger . */ + +/* Copy from kernel, modified to respect GNU code style by Jie Liu. */ + +#ifndef _LINUX_FIEMAP_H +# define _LINUX_FIEMAP_H + +# include + +struct fiemap_extent +{ + /* Logical offset in bytes for the start of the extent + from the beginning of the file. */ + uint64_t fe_logical; + + /* Physical offset in bytes for the start of the extent + from the beginning of the disk. */ + uint64_t fe_physical; + + /* Length in bytes for this extent. */ + uint64_t fe_length; + + uint64_t fe_reserved64[2]; + + /* FIEMAP_EXTENT_* flags for this extent. */ + uint32_t fe_flags; + + uint32_t fe_reserved[3]; +}; + +struct fiemap +{ + /* Logical offset(inclusive) at which to start mapping(in). */ + uint64_t fm_start; + + /* Logical length of mapping which userspace wants(in). */ + uint64_t fm_length; + + /* FIEMAP_FLAG_* flags for request(in/out). */ + uint32_t fm_flags; + + /* Number of extents that were mapped(out). */ + uint32_t fm_mapped_extents; + + /* Size of fm_extents array(in). */ + uint32_t fm_extent_count; + + uint32_t fm_reserved; + + /* Array of mapped extents(out). */ + struct fiemap_extent fm_extents[0]; +}; + +/* The maximum offset can be mapped for a file. */ +# define FIEMAP_MAX_OFFSET (~0ULL) + +/* Sync file data before map. */ +# define FIEMAP_FLAG_SYNC 0x00000001 + +/* Map extented attribute tree. */ +# define FIEMAP_FLAG_XATTR 0x00000002 + +# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +/* Last extent in file. */ +# define FIEMAP_EXTENT_LAST 0x00000001 + +/* Data location unknown. */ +# define FIEMAP_EXTENT_UNKNOWN 0x00000002 + +/* Location still pending, Sets EXTENT_UNKNOWN. */ +# define FIEMAP_EXTENT_DELALLOC 0x00000004 + +/* Data can not be read while fs is unmounted. */ +# define FIEMAP_EXTENT_ENCODED 0x00000008 + +/* Data is encrypted by fs. Sets EXTENT_NO_BYPASS. */ +# define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 + +/* Extent offsets may not be block aligned. */ +# define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 + +/* Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_INLINE 0x00000200 + +/* Multiple files in block. Set EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_TAIL 0x00000400 + +/* Space allocated, but not data (i.e. zero). */ +# define FIEMAP_EXTENT_UNWRITTEN 0x00000800 + +/* File does not natively support extents. Result merged for efficiency. */ +# define FIEMAP_EXTENT_MERGED 0x00001000 + +/* Space shared with other files. */ +# define FIEMAP_EXTENT_SHARED 0x00002000 + +#endif diff --git a/tests/Makefile.am b/tests/Makefile.am index 1e4e3009f7..40d35ac7d8 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -10,6 +10,7 @@ EXTRA_DIST = \ CuTmpdir.pm \ check.mk \ envvar-check \ + filefrag-extent-compare \ init.cfg \ init.sh \ lang-default \ @@ -25,6 +26,7 @@ root_tests = \ cp/special-bits \ cp/cp-mv-enotsup-xattr \ cp/capability \ + cp/sparse-fiemap \ dd/skip-seek-past-dev \ install/install-C-root \ ls/capability \ @@ -318,6 +320,8 @@ TESTS = \ cp/dir-vs-file \ cp/existing-perm-race \ cp/fail-perm \ + cp/fiemap-perf \ + cp/fiemap-2 \ cp/file-perm-race \ cp/into-self \ cp/link \ diff --git a/tests/cp/fiemap-2 b/tests/cp/fiemap-2 new file mode 100755 index 0000000000..d40505b704 --- /dev/null +++ b/tests/cp/fiemap-2 @@ -0,0 +1,54 @@ +#!/bin/sh +# Exercise a few more corners of the fiemap-copying code. + +# Copyright (C) 2011 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ cp + +# Require a fiemap-enabled FS. +df -T -t btrfs -t xfs -t ext4 -t ocfs2 . \ + || skip_ "this file system lacks FIEMAP support" + +# Exercise the code that handles a file ending in a hole. +printf x > k || framework_failure_ +dd bs=1k seek=128 of=k < /dev/null || framework_failure_ + +# The first time through the outer loop, the input file, K, ends with a hole. +# The second time through, we append a byte so that it does not. +for append in no yes; do + test $append = yes && printf y >> k + for i in always never; do + cp --sparse=$i k k2 || fail=1 + cmp k k2 || fail=1 + done +done + +# Ensure that --sparse=always can restore holes. +rm -f k +# Create a file starting with an "x", followed by 257K-1 0 bytes. +printf x > k || framework_failure_ +dd bs=1k seek=1 of=k count=255 < /dev/zero || framework_failure_ + +# cp should detect the all-zero blocks and convert some of them to holes. +# How many it detects/converts currently depends on io_blksize. +# Currently, on my F14/ext4 desktop, this K starts off with size 256KiB, +# (note that the K in the preceding test starts off with size 4KiB). +# cp from coreutils-8.9 with --sparse=always reduces the size to 32KiB. +cp --sparse=always k k2 || fail=1 +test $(stat -c %b k2) -lt $(stat -c %b k) || fail=1 + +Exit $fail diff --git a/tests/cp/fiemap-perf b/tests/cp/fiemap-perf new file mode 100755 index 0000000000..429e59beb1 --- /dev/null +++ b/tests/cp/fiemap-perf @@ -0,0 +1,32 @@ +#!/bin/sh +# ensure that a sparse file is copied efficiently, by default + +# Copyright (C) 2011 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ cp + +# Require a fiemap-enabled FS. +df -T -t btrfs -t xfs -t ext4 -t ocfs2 . \ + || skip_ "this file system lacks FIEMAP support" + +# Create a large-but-sparse file. +timeout 1 dd bs=1 seek=1T of=f < /dev/null || framework_failure_ + +# Nothing can read (much less write) that many bytes in so little time. +timeout 3 cp f f2 || framework_failure_ + +Exit $fail diff --git a/tests/cp/sparse-fiemap b/tests/cp/sparse-fiemap new file mode 100755 index 0000000000..b6b1103909 --- /dev/null +++ b/tests/cp/sparse-fiemap @@ -0,0 +1,119 @@ +#!/bin/sh +# Test cp --sparse=always through fiemap copy + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + cp --version +fi + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +if df -T -t btrfs -t xfs -t ext4 -t ocfs2 . ; then + : # Current dir is on a partition with working extents. Good! +else + # It's not; we need to create one, hence we need root access. + require_root_ + + cwd=$PWD + cleanup_() { cd /; umount "$cwd/mnt"; } + + skip=0 + # Create an ext4 loopback file system + dd if=/dev/zero of=blob bs=32k count=1000 || skip=1 + mkdir mnt + mkfs -t ext4 -F blob || + skip_test_ "failed to create ext4 file system" + mount -oloop blob mnt || skip=1 + cd mnt || skip=1 + echo test > f || skip=1 + test -s f || skip=1 + + test $skip = 1 && + skip_test_ "insufficient mount/ext4 support" +fi + +# Create a 1TiB sparse file +dd if=/dev/zero of=sparse bs=1k count=1 seek=1G || framework_failure + +# It takes many minutes to copy this sparse file using the old method. +# By contrast, it takes far less than 1 second using FIEMAP-copy. +timeout 10 cp --sparse=always sparse fiemap || fail=1 + +# Ensure that the sparse file copied through fiemap has the same size +# in bytes as the original. +test $(stat --printf %s sparse) = $(stat --printf %s fiemap) || fail=1 + +# ================================================= +# Ensure that we exercise the FIEMAP-copying code enough +# to provoke at least two iterations of the do...while loop +# in which it calls ioctl (fd, FS_IOC_FIEMAP,... +# This also verifies that non-trivial extents are preserved. + +$PERL -e 1 || skip_test_ 'skipping part of this test; you lack perl' + +# Extract logical block number and length pairs from filefrag -v output. +# The initial sed is to remove the "eof" from the normally-empty "flags" field. +# Similarly, remove flags values like "unknown,delalloc,eof". +# That is required when that final extent has no number in the "expected" field. +f() +{ + sed 's/ [a-z,][a-z,]*$//' $@ \ + | awk '/^ *[0-9]/ {printf "%d %d ", $2 ,NF < 5 ? $NF : $5 } END {print ""}' +} + +for i in $(seq 1 2 21); do + for j in 1 2 31 100; do + $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \ + -e 'for (1..'$j') { sysseek (*F, $n, 1)' \ + -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > j1 || fail=1 + # sync + cp --sparse=always j1 j2 || fail=1 + # sync + # Technically we may need the 'sync' uses above, but + # uncommenting them makes this test take much longer. + + cmp j1 j2 || fail=1 + filefrag -v j1 | grep extent \ + || skip_test_ 'skipping part of this test; you lack filefrag' + + # Here is sample filefrag output: + # $ perl -e 'BEGIN{$n=16*1024; *F=*STDOUT}' \ + # -e 'for (1..5) { sysseek(*F,$n,1)' \ + # -e '&& syswrite *F,"."x$n or die "$!"}' > j + # $ filefrag -v j + # File system type is: ef53 + # File size of j is 163840 (40 blocks, blocksize 4096) + # ext logical physical expected length flags + # 0 4 6258884 4 + # 1 12 6258892 6258887 4 + # 2 20 6258900 6258895 4 + # 3 28 6258908 6258903 4 + # 4 36 6258916 6258911 4 eof + # j: 6 extents found + + # exclude the physical block numbers; they always differ + filefrag -v j1 > ff1 || fail=1 + filefrag -v j2 > ff2 || fail=1 + { f ff1; f ff2; } \ + | $PERL $abs_top_srcdir/tests/filefrag-extent-compare \ + || { fail=1; break; } + done + test $fail = 1 && break +done + +Exit $fail diff --git a/tests/filefrag-extent-compare b/tests/filefrag-extent-compare new file mode 100644 index 0000000000..3c095d52f4 --- /dev/null +++ b/tests/filefrag-extent-compare @@ -0,0 +1,68 @@ +eval '(exit $?0)' && eval 'exec perl -wS "$0" ${1+"$@"}' + & eval 'exec perl -wS "$0" $argv:q' + if 0; +# Determine whether two files have the same extents by comparing +# the logical block numbers and lengths from filefrag -v for each. + +# Invoke like this: +# This helper function, f, extracts logical block number and lengths. +# f() { awk '/^ *[0-9]/ {printf "%d %d ",$2,NF<5?$NF:$5} END {print ""}'; } +# { filefrag -v j1 | f; filefrag -v j2 | f; } | ./filefrag-extent-compare + +use warnings; +use strict; +(my $ME = $0) =~ s|.*/||; + +my @line = <>; +my $n_lines = @line; +$n_lines == 2 + or die "$ME: expected exactly two input lines; got $n_lines\n"; + +my @A = split ' ', $line[0]; +my @B = split ' ', $line[1]; +@A % 2 || @B % 2 + and die "$ME: unexpected input: odd number of numbers; expected even\n"; + +my @a; +my @b; +foreach my $i (0..@A/2-1) { $a[$i] = { L_BLK => $A[2*$i], LEN => $A[2*$i+1] } }; +foreach my $i (0..@B/2-1) { $b[$i] = { L_BLK => $B[2*$i], LEN => $B[2*$i+1] } }; + +my $i = 0; +my $j = 0; +while (1) + { + !defined $a[$i] && !defined $b[$j] + and exit 0; + defined $a[$i] && defined $b[$j] + or die "\@a and \@b have different lengths, even after adjustment\n"; + ($a[$i]->{L_BLK} == $b[$j]->{L_BLK} + && $a[$i]->{LEN} == $b[$j]->{LEN}) + and next; + ($a[$i]->{LEN} < $b[$j]->{LEN} + && exists $a[$i+1] && $a[$i]->{LEN} + $a[$i+1]->{LEN} == $b[$j]->{LEN}) + and ++$i, next; + exists $b[$j+1] && $a[$i]->{LEN} == $b[$i]->{LEN} + $b[$i+1]->{LEN} + and ++$j, next; + die "differing extent:\n" + . " [$i]=$a[$i]->{L_BLK} $a[$i]->{LEN}\n" + . " [$j]=$b[$j]->{L_BLK} $b[$j]->{LEN}\n" + } +continue + { + ++$i; + ++$j; + } + +### Setup "GNU" style for perl-mode and cperl-mode. +## Local Variables: +## mode: perl +## perl-indent-level: 2 +## perl-continued-statement-offset: 2 +## perl-continued-brace-offset: 0 +## perl-brace-offset: 0 +## perl-brace-imaginary-offset: 0 +## perl-label-offset: -2 +## perl-extra-newline-before-brace: t +## perl-merge-trailing-else: nil +## End: