commit 16d74be6473033f4320ee5ea16d1e84e28d504e3
Author: Tim Woodall <tim@woodall.me.uk>
Date:   Sun Sep 29 19:21:06 2024 +0100

    Much better handling of the restore of sparse files.
    Do not seek on every sparse block.

--- a/configure.ac
+++ b/configure.ac
@@ -271,6 +271,14 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#in
  AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
 
 dnl
+dnl Check if SEEK_HOLE and SEEK_DATA is supported by lseek
+dnl
+AC_MSG_CHECKING(if SEEK_HOLE and SEEK_DATA is supported by lseek)
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <unistd.h>]],
+[[int sh = SEEK_HOLE; int sd = SEEK_DATA;]])],[AC_DEFINE([HAVE_SEEK_HOLE],1,[Define if SEEK_HOLE and SEEK_DATA are supported by lseek.])
+ AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+
+dnl
 dnl Check for blkid headers libraries
 dnl
 AC_ARG_ENABLE([blkid], [AS_HELP_STRING([--disable-blkid],[enable blkid support])])
--- a/restore/dirs.c
+++ b/restore/dirs.c
@@ -153,7 +153,7 @@ static void		 dcvt (struct odirect *, st
 static void		 flushent (void);
 static struct inotab	*inotablookup (dump_ino_t);
 static RST_DIR		*opendirfile (const char *);
-static void		 putdir (char *, size_t);
+static void		 putdir (char *, off_t, size_t);
 static void		 putent (struct direct *);
 static void		rst_seekdir (RST_DIR *, off_t, off_t);
 static off_t		rst_telldir (RST_DIR *);
@@ -389,7 +389,7 @@ searchdir(dump_ino_t inum, char *name)
  * Put the directory entries in the directory file
  */
 static void
-putdir(char *buf, size_t size)
+putdir(char *buf, UNUSED(off_t pos), size_t size)
 {
 	struct direct cvtbuf;
 	struct odirect *odp;
--- a/restore/extern.h
+++ b/restore/extern.h
@@ -71,7 +71,7 @@ void		 freeentry (struct entry *);
 void		 freename (char *);
 int	 	 genliteraldir (char *, dump_ino_t);
 char		*gentempname (struct entry *);
-void		 getfile (void (*)(char *, size_t), void (*)(char *, size_t));
+void		 getfile (void (*)(char *, off_t, size_t), void (*)(char *, off_t, size_t));
 void		 getvol (long);
 void		 initsymtable (char *);
 int	 	 inodetype (dump_ino_t);
@@ -116,7 +116,7 @@ void		 swabst (u_char *, u_char *);
 void	 	 treescan (char *, dump_ino_t, long (*)(char *, dump_ino_t, int));
 dump_ino_t	 upperbnd (dump_ino_t);
 long		 verifyfile (char *, dump_ino_t, int);
-void		 xtrnull (char *, size_t);
+void		 xtrnull (char *, off_t, size_t);
 
 /* From ../dump/dumprmt.c */
 void		rmtclose (void);
--- a/restore/tape.c
+++ b/restore/tape.c
@@ -148,7 +148,7 @@ static void	 converttapebuf (struct tape
 static void	 readtape (char *);
 static void	 setdumpnum (void);
 #ifdef DUMP_MACOSX
-static void	 xtrfilefinderinfo (char *, size_t);
+static void	 xtrfilefinderinfo (char *, off_t, size_t);
 #endif
 
 static u_int	 swabi (u_int);
@@ -159,13 +159,13 @@ static u_char	*swab64 (u_char *, int);
 static u_char	*swab32 (u_char *, int);
 static u_char	*swab16 (u_char *, int);
 static void	 terminateinput (void);
-static void	 xtrfile (char *, size_t);
-static void	 xtrlnkfile (char *, size_t);
-static void	 xtrlnkskip (char *, size_t);
-static void	 xtrmap (char *, size_t);
-static void	 xtrmapskip (char *, size_t);
-static void	 xtrskip (char *, size_t);
-static void	 xtrxattr (char *, size_t);
+static void	 xtrfile (char *, off_t, size_t);
+static void	 xtrlnkfile (char *, off_t, size_t);
+static void	 xtrlnkskip (char *, off_t, size_t);
+static void	 xtrmap (char *, off_t, size_t);
+static void	 xtrmapskip (char *, off_t, size_t);
+static void	 xtrskip (char *, off_t, size_t);
+static void	 xtrxattr (char *, off_t, size_t);
 static void	 setmagtapein (void);
 static int	 extractattr (char *);
 static void	 compareattr (char *);
@@ -188,8 +188,8 @@ static int	read_a_block (int, char *, si
 #if COMPARE_ONTHEFLY
 static int	ifile;		/* input file for compare */
 static int	cmperror;	/* compare error */
-static void	xtrcmpfile (char *, size_t);
-static void	xtrcmpskip (char *, size_t);
+static void	xtrcmpfile (char *, off_t, size_t);
+static void	xtrcmpskip (char *, off_t, size_t);
 #endif
 
 static int readmapflag;
@@ -1053,7 +1053,7 @@ extractfile(struct entry *ep, int doremo
 				memcpy(inline_data, curfile.dip->di_db, INLINE_DATA_MAX_INODE_SIZE);
 				getfile(xtrfile, xtrnull);
 				extractattr(name);
-				xtrfile(inline_data, inline_filesize);
+				xtrfile(inline_data, 0, inline_filesize);
 			} else {
 				getfile(xtrfile, xtrskip);
 				extractattr(name);
@@ -1369,16 +1369,18 @@ skipxattr(void)
  * to the skip function.
  */
 void
-getfile(void (*fill) (char *, size_t), void (*skip) (char *, size_t))
+getfile(void (*fill) (char *, off_t, size_t), void (*skip) (char *, off_t, size_t))
 {
 	int i;
 	volatile int curblk = 0;
 	volatile int64_t size = spcl.c_dinode.di_size;
+	off_t pos = 0;
 	volatile int last_write_was_hole = 0;
-	int64_t origsize = size;
 	static char clearedbuf[MAXBSIZE];
 	char buf[MAXBSIZE / TP_BSIZE][TP_BSIZE];
 	char junk[TP_BSIZE];
+	int missing_blocks;
+	int has_data;
 
 	if (spcl.c_type == TS_END)
 		panic("ran off end of tape\n");
@@ -1387,25 +1389,31 @@ getfile(void (*fill) (char *, size_t), v
 	if (!gettingfile && setjmp(restart) != 0)
 		return;
 	gettingfile++;
+	missing_blocks = 0;
+	has_data = 0;
 loop:
 	for (i = 0; i < spcl.c_count; i++) {
 		if (readmapflag || spcl.c_addr[i]) {
-			readtape(&buf[curblk++][0]);
 			if (curblk == fssize / TP_BSIZE) {
-				(*fill)((char *)buf, (size_t)(size > TP_BSIZE ?
-				     fssize : (curblk - 1) * TP_BSIZE + size));
+				size_t bytes = curblk * TP_BSIZE;
+				(*fill)((char *)buf, pos, bytes);
+				pos += bytes;
 				curblk = 0;
 				last_write_was_hole = 0;
+				has_data = 1;
 			}
+			readtape(&buf[curblk++][0]);
 		} else {
 			if (curblk > 0) {
-				(*fill)((char *)buf, (size_t)(size > TP_BSIZE ?
-				     curblk * TP_BSIZE :
-				     (curblk - 1) * TP_BSIZE + size));
+				size_t bytes = (size_t)(size > TP_BSIZE ? curblk * TP_BSIZE : (curblk - 1) * TP_BSIZE + size);
+				(*fill)((char *)buf, pos, bytes);
+				pos += bytes;
 				curblk = 0;
+				has_data = 1;
 			}
-			(*skip)(clearedbuf, (long)(size > TP_BSIZE ?
-				TP_BSIZE : size));
+			size_t sbytes = (size_t)(size > TP_BSIZE ?  TP_BSIZE : size);
+			(*skip)(clearedbuf, pos, sbytes);
+			pos += sbytes;
 			last_write_was_hole = 1;
 		}
 		if ((size -= TP_BSIZE) <= 0) {
@@ -1422,31 +1430,55 @@ loop:
 	if (size > 0) {
 		if (spcl.c_type == TS_ADDR)
 			goto loop;
-		Dprintf(stdout,
-			"Missing address (header) block for %s at %ld blocks\n",
-			curfile.name, blksread);
+		missing_blocks = (size+TP_BSIZE-1)/TP_BSIZE;
+		if (missing_blocks <= 12) {
+			size -= missing_blocks * TP_BSIZE;
+			fprintf(stderr, "%s: Found %d missing blocks - assuming version 0.4b42/43 dump sparse file bug\n", curfile.name, missing_blocks);
+		} else {
+			missing_blocks = 0;
+			size = 0;
+			fprintf(stderr, "Missing blocks at the end of %s. File likely corrupted\n", curfile.name);
+			Dprintf(stdout,
+				"Missing address (header) block for %s at %ld blocks\n",
+				curfile.name, blksread);
+		}
 	}
 	if (curblk > 0) {
  		size_t bytes = (size_t)((curblk * TP_BSIZE) + size);
  		(*fill)((char *)buf, pos, bytes);
 		pos += bytes;
 		last_write_was_hole = 0;
+		has_data = 1;
 	}
-	if (size > 0) {
-		fprintf(stderr, "Missing blocks at the end of %s, assuming hole\n", curfile.name);
-		while (size > 0) {
-			size_t skp = size > TP_BSIZE ? TP_BSIZE : size;
-			(*skip)(clearedbuf, skp);
-			size -= skp;
+	if (missing_blocks && ofile >= 0) {
+		int fallocate_needs = (missing_blocks % (fssize / TP_BSIZE));
+		int blksz = 1;
+		while (blksz < fallocate_needs) {
+			if (fallocate_needs & blksz)
+				fallocate_needs = blksz;
+			blksz <<= 1;
+		}
+		/* There was a bug in dump versions 0.4b42/43 the first 12 blocks of hole were missing from the start of the file (or the entire hole if shorter) */
+		if (!has_data) {
+			pos += missing_blocks * TP_BSIZE;
+		} else if (fallocate_needs) {
+			fprintf(stderr, "Restore file to a filesystem with a block size no more than %d to get a fixed file\n", fallocate_needs*TP_BSIZE);
+			missing_blocks = 0;
+		} else if (fallocate(ofile, FALLOC_FL_INSERT_RANGE, 0, missing_blocks * TP_BSIZE) == -1) {
+			fprintf(stderr, "%s: Attempt to insert %d bytes at start of %s failed (%s)\n", curfile.name, missing_blocks * TP_BSIZE, curfile.name, strerror(errno));
+		} else {
+			pos += missing_blocks * TP_BSIZE;
+			if (lseek(ofile, pos, SEEK_SET) == -1) {
+				fprintf(stderr, "%s: lseek to %ld failed (%s)\n", curfile.name, pos, strerror(errno));
+			}
 		}
-		last_write_was_hole = 1;
 	}
 	if (last_write_was_hole) {
 		/*
 		 * Do not attempt a truncate if running an on-the-fly compare, as there
 		 * IS no file to truncate in that case!
 		 */
-		if (ofile >= 0 && ftruncate(ofile, origsize) < 0)
+		if (ofile >= 0 && ftruncate(ofile, pos) < 0)
 			warn("%s: ftruncate", curfile.name);
 	}
 	if (!readingmaps)
@@ -1454,23 +1486,33 @@ loop:
 	gettingfile = 0;
 }
 
+static size_t hashole = 0;
+
 /*
  * Write out the next block of a file.
  */
 static void
-xtrfile(char *buf, size_t size)
+xtrfile(char *buf, off_t pos, size_t size)
 {
 
 	if (Nflag)
 		return;
+
+	if (hashole) {
+		if (lseek(ofile, pos, SEEK_SET) == -1)
+			err(1, "seek error extracting inode %lu, name %s\n",
+					(unsigned long)curfile.ino, curfile.name);
+		hashole = 0;
+	}
+
 	if (write(ofile, buf, (int) size) == -1)
-		err(1, "write error extracting inode %lu, name %s\nwrite",
+		err(1, "write error extracting inode %lu, name %s\n",
 			(unsigned long)curfile.ino, curfile.name);
 }
 
 #ifdef DUMP_MACOSX
 static void
-xtrfilefinderinfo(char *buf, size_t size)
+xtrfilefinderinfo(char *buf, UNUSED(off_t pos), size_t size)
 {
 	bcopy(buf, &gFndrInfo, size);
 }
@@ -1481,19 +1523,19 @@ xtrfilefinderinfo(char *buf, size_t size
  */
 /* ARGSUSED */
 static void
-xtrskip(UNUSED(char *buf), size_t size)
+xtrskip(UNUSED(char *buf), UNUSED(off_t pos), UNUSED(size_t size))
 {
+	if (Nflag)
+		return;
 
-	if (lseek(ofile, (off_t)size, SEEK_CUR) == -1)
-		err(1, "seek error extracting inode %lu, name %s\nlseek",
-			(unsigned long)curfile.ino, curfile.name);
+	hashole = 1;
 }
 
 /*
  * Collect the next block of a symbolic link.
  */
 static void
-xtrlnkfile(char *buf, size_t size)
+xtrlnkfile(char *buf, UNUSED(off_t pos), size_t size)
 {
 
 	pathlen += size;
@@ -1511,7 +1553,7 @@ xtrlnkfile(char *buf, size_t size)
  */
 /* ARGSUSED */
 static void
-xtrlnkskip(UNUSED(char *buf), UNUSED(size_t size))
+xtrlnkskip(UNUSED(char *buf), UNUSED(off_t pos), UNUSED(size_t size))
 {
 
 	errx(1, "unallocated block in symbolic link %s", curfile.name);
@@ -1521,7 +1563,7 @@ xtrlnkskip(UNUSED(char *buf), UNUSED(siz
  * Collect the next block of a bit map.
  */
 static void
-xtrmap(char *buf, size_t size)
+xtrmap(char *buf, UNUSED(off_t pos), size_t size)
 {
 
 	memmove(map, buf, size);
@@ -1533,7 +1575,7 @@ xtrmap(char *buf, size_t size)
  */
 /* ARGSUSED */
 static void
-xtrmapskip(UNUSED(char *buf), size_t size)
+xtrmapskip(UNUSED(char *buf), UNUSED(off_t pos), size_t size)
 {
 
 	panic("hole in map\n");
@@ -1545,7 +1587,7 @@ xtrmapskip(UNUSED(char *buf), size_t siz
  */
 /* ARGSUSED */
 void
-xtrnull(UNUSED(char *buf), UNUSED(size_t size))
+xtrnull(UNUSED(char *buf), UNUSED(off_t pos), UNUSED(size_t size))
 {
 
 	return;
@@ -1555,14 +1597,50 @@ xtrnull(UNUSED(char *buf), UNUSED(size_t
 /*
  * Compare the next block of a file.
  */
+
+#ifdef HAVE_SEEK_HOLE
+static off_t cmpfile_data = 0;  // The next data block -1 indicates that remainder of file is a hole
+static off_t cmpfile_hole = 0;  // The next hole
+static off_t cmpfile_curr = 0;  // The next hole
+#endif
+
 static void
-xtrcmpfile(char *buf, size_t size)
+xtrcmpfile(char *buf, off_t pos, size_t size)
 {
 	static char cmpbuf[MAXBSIZE];
 
 	if (cmperror)
 		return;
+#ifdef HAVE_SEEK_HOLE
+	cmpfile_curr = pos + size;
+
+	if (cmpfile_data > pos || cmpfile_data == -1) {
+		fprintf(stderr, "%s: tape has data but disk has hole\n", curfile.name);
+		cmperror = 1;
+		return;
+	}
 
+	// Find the next hole if we haven't already.
+	if (cmpfile_hole < (off_t)(pos + size)) {
+		if ((cmpfile_hole = lseek(ifile, pos, SEEK_HOLE)) == -1 ||
+				lseek(ifile, pos, SEEK_SET) == -1) {
+			fprintf(stderr, "%s: size has changed.\n",
+				curfile.name);
+			cmperror = 1;
+			return;
+		}
+	}
+	/* ext4 uninit look like sparse files to SEEK_HOLE/SEEK_DATA */
+	/*
+	if (cmpfile_hole < (off_t)(pos + size)) {
+		fprintf(stderr, "%s: tape has data but disk has hole\n", curfile.name);
+		cmperror = 1;
+		return;
+	}
+	*/
+#else
+	(void)pos;
+#endif
 	if (read(ifile, cmpbuf, size) != (ssize_t)size) {
 		fprintf(stderr, "%s: size has changed.\n",
 			curfile.name);
@@ -1582,13 +1660,39 @@ xtrcmpfile(char *buf, size_t size)
  * Skip over a hole in a file.
  */
 static void
-xtrcmpskip(UNUSED(char *buf), size_t size)
+xtrcmpskip(UNUSED(char *buf), off_t pos, size_t size)
 {
-	static char cmpbuf[MAXBSIZE];
-	int i;
-
 	if (cmperror)
 		return;
+#ifdef HAVE_SEEK_HOLE
+	cmpfile_curr = pos + size;
+
+	if (cmpfile_hole > pos) {
+		fprintf(stderr, "%s: tape has hole but disk has data\n", curfile.name);
+		cmperror = 1;
+		return;
+	}
+
+	// Trivial case - we're already in a hole
+	if (cmpfile_data > (off_t)(pos + size) || cmpfile_data == -1)
+		return;
+
+	if ((cmpfile_data = lseek(ifile, pos, SEEK_DATA)) == -1) {
+		/* There is no more data in the file */
+		return;
+	}
+
+	if (cmpfile_data >= (off_t)(pos + size)) {
+		return;
+	}
+
+	fprintf(stderr, "%s: tape has hole but disk has data\n", curfile.name);
+	cmperror = 1;
+	return;
+#else
+	(void)pos;
+	static char cmpbuf[MAXBSIZE];
+	int i;
 
 	if (read(ifile, cmpbuf, size) != (ssize_t)size) {
 		fprintf(stderr, "%s: size has changed.\n",
@@ -1604,11 +1708,12 @@ xtrcmpskip(UNUSED(char *buf), size_t siz
 			cmperror = 1;
 			return;
 		}
+#endif /* HAVE_SEEK_HOLE */
 }
 #endif /* COMPARE_ONTHEFLY */
 
 static void
-xtrxattr(char *buf, size_t size)
+xtrxattr(char *buf, UNUSED(off_t pos), size_t size)
 {
 	if (xattrlen + size > XATTR_MAXSIZE) {
 		fprintf(stderr, "EA size too big (%ld)", (long)xattrlen + size);
@@ -1960,13 +2065,26 @@ comparefile(char *name)
 				memcpy(inline_data, curfile.dip->di_db, INLINE_DATA_MAX_INODE_SIZE);
 				getfile(xtrcmpfile, xtrnull);
 				compareattr(name);
-				xtrcmpfile(inline_data, inline_filesize);
+				xtrcmpfile(inline_data, 0, inline_filesize);
 			} else {
+#ifdef HAVE_SEEK_HOLE
+				cmpfile_data = 0;
+				cmpfile_hole = 0;
+				cmpfile_curr = 0;
+#endif
 				getfile(xtrcmpfile, xtrcmpskip);
 				compareattr(name);
 			}
 			if (!cmperror) {
 				char c;
+#ifdef HAVE_SEEK_HOLE
+				if (lseek(ifile, cmpfile_curr, SEEK_SET) == -1) {
+					fprintf(stderr, "%s: size has changed.\n",
+						name);
+					cmperror = 1;
+				}
+				else
+#endif
 				if (read(ifile, &c, 1) != 0) {
 					fprintf(stderr, "%s: size has changed.\n",
 						name);
@@ -1977,7 +2095,7 @@ comparefile(char *name)
 				do_compare_error;
 			close(ifile);
 		}
-#else
+#else /* COMPARE_ONTHEFLY */
 		if (tmpfile == NULL) {
 			/* argument to mktemp() must not be in RO space: */
 			snprintf(tmpfilename, sizeof(tmpfilename), "%s/restoreCXXXXXX", tmpdir);
