Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F157144800
D5333.id13441.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
32 KB
Referenced Files
None
Subscribers
None
D5333.id13441.diff
View Options
Index: share/man/man4/geom_uzip.4
===================================================================
--- share/man/man4/geom_uzip.4
+++ share/man/man4/geom_uzip.4
@@ -30,7 +30,7 @@
.Os
.Sh NAME
.Nm geom_uzip
-.Nd "GEOM based compressed disk images"
+.Nd "GEOM based compressed disk images and partitions"
.Sh SYNOPSIS
To compile this driver into the kernel,
place the following line in your
@@ -51,7 +51,7 @@
framework provides support for compressed read only
disk images.
This allows significant storage savings at the expense of
-a little CPU time on each read.
+a some CPU time on each read.
Data written in the GEOM label area allows
.Nm
to detect compressed images which have been created with
@@ -65,15 +65,53 @@
.Pp
The
.Nm
+is not limited to support
+.Xr md 4
+images only.
+The image can also be copied to a block device, either read-write such as
+disk drive, flash drive or flash card, or read-only such as DVD-ROM.
+When that disk partition is probed or media is inserted the appropriate
+device node with the
+.Pa .uzip
+suffix will appear:
+.Bd -literal -offset indent
+# gpart show da0
+=> 0 7833600 da0 BSD (3.7G)
+ 0 2097152 1 freebsd-ufs (1.0G)
+ 2097152 5736448 - free - (2.7G)
+# gpart add -t freebsd-ufs -s 1G da0
+da0b added
+# dd if=/tmp/20160217_dcomp_zcomp.uzip bs=256k of=/dev/da0b
+3190+1 records in
+3190+1 records out
+836331008 bytes transferred in 111.021489 secs (7533055 bytes/sec)
+# fsck -t ffs /dev/da0b.uzip
+** /dev/da0b.uzip (NO WRITE)
+** Last Mounted on /mnt
+** Phase 1 - Check Blocks and Sizes
+** Phase 2 - Check Pathnames
+** Phase 3 - Check Connectivity
+** Phase 4 - Check Reference Counts
+** Phase 5 - Check Cyl groups
+97455 files, 604242 used, 184741 free (2349 frags, 22799 blocks,
+ 0.3% fragmentation)
+# mount -o ro /dev/da0b.uzip /mnt
+# df /dev/da0b.uzip
+Filesystem 1K-blocks Used Avail Capacity Mounted on
+/dev/da0b.uzip 3155932 2416968 738964 77% /mnt
+.Ed
+.Pp
+The
+.Nm
device is subsequently used by the
.Fx
-kernel to access the disk images.
+kernel to access the compressed data.
The
.Nm
driver does not allow write operations to the underlying disk image.
To check which
-.Xr md 4
-devices match a given
+.Dq providers
+match a given
.Nm
device:
.Bd -literal -offset indent
@@ -83,13 +121,46 @@
1. Name: md1.uzip
Mediasize: 22003712 (21M)
Sectorsize: 512
- Mode: r1w0e1
Consumers:
1. Name: md1
Mediasize: 9563648 (9.1M)
Sectorsize: 512
- Mode: r1w0e1
+
+Geom name: da0b.uzip
+Providers:
+1. Name: da0b.uzip
+ Mediasize: 3355443200 (3.1G)
+ Sectorsize: 512
+Consumers:
+1. Name: da0b
+ Mediasize: 1073741824 (1.0G)
+ Sectorsize: 512
.Ed
+.Pp
+The
+.Nm
+allows mounting root file system off compressed preloaded image or disk
+partition by setting
+.Dv vfs.root.mountfrom
+kernel environment parameter.
+See
+.Xr loader.conf 5
+for details.
+.Sh DIAGNOSTICS
+Several flags are provided for tracing
+.Nm
+I/O operations and TOC parsing via the following sysctls.
+.Bl -tag -width indent
+.It Va kern.geom.uzip.debug
+Log level.
+Setting it to the value greater than 0 would enable debug logging in key
+places of the
+.Nm
+code.
+Supported levels are from 0 (no logging) to 3 (maximum amount of logging).
+.It Va kern.geom.uzip.debug_block
+Log key operations involving specific compressed cluster number.
+.El
.Sh SEE ALSO
.Xr GEOM 4 ,
.Xr md 4 ,
@@ -101,5 +172,9 @@
.Nm
driver was written by
.An Max Khon Aq Mt fjoe@FreeBSD.org .
+The block de-duplication code as well as some
+.Nm
+driver optimizations have been contributed by
+.An Maxim Sobolev Aq Mt sobomax@FreeBSD.org .
This manual page was written by
.An Ceri Davies Aq Mt ceri@FreeBSD.org .
Index: sys/geom/uzip/g_uzip.c
===================================================================
--- sys/geom/uzip/g_uzip.c
+++ sys/geom/uzip/g_uzip.c
@@ -44,11 +44,40 @@
FEATURE(geom_uzip, "GEOM uzip read-only compressed disks support");
+struct g_uzip_blk {
+ uint64_t offset;
+ uint32_t blen;
+#define BLEN_UNDEF UINT32_MAX
+};
+
+#ifndef ABS
+#define ABS(a) ((a) < 0 ? -(a) : (a))
+#endif
+
+#define BLK_IN_RANGE(cn1, cn2, cr) (((cn2) != BLEN_UNDEF) && \
+ (ABS((int64_t)(cn1) - (int64_t)(cn2)) < (int64_t)(cr)))
+
+SYSCTL_DECL(_kern_geom);
+SYSCTL_NODE(_kern_geom, OID_AUTO, uzip, CTLFLAG_RW, 0, "GEOM_UZIP stuff");
+static u_int g_uzip_debug = 0;
+SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug, CTLFLAG_RWTUN, &g_uzip_debug, 0,
+ "Debug level");
+static u_int g_uzip_debug_block = BLEN_UNDEF;
+SYSCTL_UINT(_kern_geom_uzip, OID_AUTO, debug_block, CTLFLAG_RWTUN,
+ &g_uzip_debug_block, 0, "Debug operations around specific cluster#");
+
#undef GEOM_UZIP_DEBUG
#ifdef GEOM_UZIP_DEBUG
-#define DPRINTF(a) printf a
+#define DPRINTF(lvl, a) printf a
+#define DPRINTF_BLK(lvl, cn, a) printf a
#else
-#define DPRINTF(a)
+#define DPRINTF(lvl, a) if ((lvl) <= g_uzip_debug) { \
+ printf a; \
+ }
+#define DPRINTF_BLK(lvl, cn, a) if ((lvl) <= g_uzip_debug || \
+ BLK_IN_RANGE(cn, g_uzip_debug_block, 10)) { \
+ printf a; \
+ }
#endif
static MALLOC_DEFINE(M_GEOM_UZIP, "geom_uzip", "GEOM UZIP data structures");
@@ -78,7 +107,7 @@
struct g_uzip_softc {
uint32_t blksz; /* block size */
uint32_t nblocks; /* number of blocks */
- uint64_t *offsets;
+ struct g_uzip_blk *toc; /* table of contents */
struct mtx last_mtx;
uint32_t last_blk; /* last blk no */
@@ -94,12 +123,12 @@
{
if (gp != NULL) {
- DPRINTF(("%s: %d requests, %d cached\n",
+ DPRINTF(1, ("%s: %d requests, %d cached\n",
gp->name, sc->req_total, sc->req_cached));
}
- if (sc->offsets != NULL) {
- free(sc->offsets, M_GEOM_UZIP);
- sc->offsets = NULL;
+ if (sc->toc != NULL) {
+ free(sc->toc, M_GEOM_UZIP);
+ sc->toc = NULL;
}
mtx_destroy(&sc->last_mtx);
free(sc->last_buf, M_GEOM_UZIP);
@@ -144,7 +173,7 @@
sc->req_cached++;
mtx_unlock(&sc->last_mtx);
- DPRINTF(("%s/%s: %p: offset=%jd: got %jd bytes from cache\n",
+ DPRINTF(1, ("%s/%s: %p: offset=%jd: got %jd bytes from cache\n",
__func__, gp->name, bp, (intmax_t)ofs, (intmax_t)usz));
bp->bio_completed += usz;
@@ -160,6 +189,18 @@
return (0);
}
+#define BLK_ENDS(sc, bi) ((sc)->toc[(bi)].offset + \
+ (sc)->toc[(bi)].blen)
+
+#define BLK_IS_CONT(sc, bi) (BLK_ENDS((sc), (bi) - 1) == \
+ (sc)->toc[(bi)].offset)
+
+#define TOFF_2_BOFF(sc, pp, bi) ((sc)->toc[(bi)].offset - \
+ (sc)->toc[(bi)].offset % (pp)->sectorsize)
+#define TLEN_2_BLEN(sc, pp, bp, ei) ((BLK_ENDS((sc), (ei)) - \
+ (bp)->bio_offset + (pp)->sectorsize - 1) / \
+ (pp)->sectorsize * (pp)->sectorsize)
+
static int
g_uzip_request(struct g_geom *gp, struct bio *bp)
{
@@ -168,20 +209,13 @@
struct g_consumer *cp;
struct g_provider *pp;
off_t ofs;
- size_t start_blk, end_blk;
+ size_t i, start_blk, end_blk, total_len;
if (g_uzip_cached(gp, bp) != 0)
return (1);
sc = gp->softc;
- bp2 = g_clone_bio(bp);
- if (bp2 == NULL) {
- g_io_deliver(bp, ENOMEM);
- return (1);
- }
- bp2->bio_done = g_uzip_done;
-
cp = LIST_FIRST(&gp->consumer);
pp = cp->provider;
@@ -191,17 +225,48 @@
end_blk = (ofs + bp->bio_resid + sc->blksz - 1) / sc->blksz;
KASSERT(end_blk <= sc->nblocks, ("end_blk out of range"));
- DPRINTF(("%s/%s: %p: start=%u (%jd), end=%u (%jd)\n",
+ total_len = 0;
+ for (i = start_blk; i < end_blk; i++) {
+ total_len += sc->toc[i].blen;
+ }
+ if (total_len == 0) {
+ /*
+ * All zero request, don't need to put it through the
+ * whole "clone request, get it done", fill in
+ * immediately.
+ */
+ DPRINTF(1, ("%s/%s: %p/%ju: returning %ju zero bytes\n",
+ __func__, gp->name, gp, (uintmax_t)bp->bio_completed,
+ (uintmax_t)bp->bio_resid));
+ bzero(bp->bio_data + bp->bio_completed, bp->bio_resid);
+ bp->bio_completed += bp->bio_resid;
+ bp->bio_resid = 0;
+ g_io_deliver(bp, 0);
+ return (1);
+ }
+
+ for (i = start_blk + 1; i < end_blk; i++) {
+ if (!BLK_IS_CONT(sc, i)) {
+ end_blk = i;
+ break;
+ }
+ }
+
+ DPRINTF(1, ("%s/%s: %p: start=%u (%ju), end=%u (%ju)\n",
__func__, gp->name, bp,
- (u_int)start_blk, (intmax_t)sc->offsets[start_blk],
- (u_int)end_blk, (intmax_t)sc->offsets[end_blk]));
+ (u_int)start_blk, (uintmax_t)sc->toc[start_blk].offset,
+ (u_int)end_blk, (uintmax_t)BLK_ENDS(sc, end_blk - 1)));
+
+ bp2 = g_clone_bio(bp);
+ if (bp2 == NULL) {
+ g_io_deliver(bp, ENOMEM);
+ return (1);
+ }
+ bp2->bio_done = g_uzip_done;
- bp2->bio_offset = sc->offsets[start_blk] -
- sc->offsets[start_blk] % pp->sectorsize;
+ bp2->bio_offset = TOFF_2_BOFF(sc, pp, start_blk);
while (1) {
- bp2->bio_length = sc->offsets[end_blk] - bp2->bio_offset;
- bp2->bio_length = (bp2->bio_length + pp->sectorsize - 1) /
- pp->sectorsize * pp->sectorsize;
+ bp2->bio_length = TLEN_2_BLEN(sc, pp, bp2, end_blk - 1);
if (bp2->bio_length <= MAXPHYS)
break;
@@ -215,7 +280,7 @@
return (1);
}
- DPRINTF(("%s/%s: %p: reading %jd bytes from offset %jd\n",
+ DPRINTF(1, ("%s/%s: %p: reading %jd bytes from offset %jd\n",
__func__, gp->name, bp,
(intmax_t)bp2->bio_length, (intmax_t)bp2->bio_offset));
@@ -234,7 +299,7 @@
struct g_uzip_softc *sc;
char *data, *data2;
off_t ofs;
- size_t blk, blkofs, len, ulen;
+ size_t blk, blkofs, len, ulen, firstblk;
bp2 = bp->bio_parent;
gp = bp2->bio_to->geom;
@@ -257,19 +322,27 @@
zs.zfree = z_free;
if (inflateInit(&zs) != Z_OK) {
bp2->bio_error = EILSEQ;
+ DPRINTF(1, ("%s/%s: inflateInit(%p) failed\n", __func__,
+ gp->name, &zs));
goto done;
}
ofs = bp2->bio_offset + bp2->bio_completed;
- blk = ofs / sc->blksz;
+ firstblk = blk = ofs / sc->blksz;
blkofs = ofs % sc->blksz;
- data = bp->bio_data + sc->offsets[blk] % pp->sectorsize;
+ data = bp->bio_data + sc->toc[blk].offset % pp->sectorsize;
data2 = bp2->bio_data + bp2->bio_completed;
while (bp->bio_completed && bp2->bio_resid) {
+ if (blk > firstblk && !BLK_IS_CONT(sc, blk)) {
+ DPRINTF_BLK(1, blk, ("%s/%s: %p: backref'ed cluster "
+ "#%u requested, looping around\n", __func__,
+ gp->name, bp2, (u_int)blk));
+ goto inflate_done;
+ }
ulen = MIN(sc->blksz - blkofs, bp2->bio_resid);
- len = sc->offsets[blk + 1] - sc->offsets[blk];
- DPRINTF(("%s/%s: %p/%ju: data2=%p, ulen=%u, data=%p, len=%u\n",
- __func__, gp->name, gp, bp->bio_completed,
+ len = sc->toc[blk].blen;
+ DPRINTF(1, ("%s/%s: %p/%ju: data2=%p, ulen=%u, data=%p, "
+ "len=%u\n", __func__, gp->name, gp, bp->bio_completed,
data2, (u_int)ulen, data, (u_int)len));
if (len == 0) {
/* All zero block: no cache update */
@@ -285,6 +358,8 @@
mtx_unlock(&sc->last_mtx);
inflateEnd(&zs);
bp2->bio_error = EILSEQ;
+ DPRINTF(1, ("%s/%s: inflate(%p) failed\n",
+ __func__, gp->name, &zs));
goto done;
}
sc->last_blk = blk;
@@ -293,6 +368,8 @@
if (inflateReset(&zs) != Z_OK) {
inflateEnd(&zs);
bp2->bio_error = EILSEQ;
+ DPRINTF(1, ("%s/%s: inflateReset(%p) failed\n",
+ __func__, gp->name, &zs));
goto done;
}
data += len;
@@ -307,6 +384,7 @@
blk++;
}
+inflate_done:
if (inflateEnd(&zs) != Z_OK)
bp2->bio_error = EILSEQ;
@@ -330,7 +408,7 @@
pp = bp->bio_to;
gp = pp->geom;
- DPRINTF(("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, buffer=%p\n",
+ DPRINTF(1, ("%s/%s: %p: cmd=%d, offset=%jd, length=%jd, buffer=%p\n",
__func__, gp->name, bp, bp->bio_cmd, (intmax_t)bp->bio_offset,
(intmax_t)bp->bio_length, bp->bio_data));
@@ -392,6 +470,75 @@
g_wither_geom(gp, ENXIO);
}
+static int
+g_uzip_parse_toc(struct g_uzip_softc *sc, struct g_provider *pp,
+ struct g_geom *gp)
+{
+ uint32_t i, j, backref_to;
+ uint64_t max_offset;
+
+ max_offset = sc->toc[0].offset - 1;
+ for (i = 0; i < sc->nblocks; i++) {
+ DPRINTF_BLK(3, i, ("%s: cluster #%u sc->toc[i].offset=%ju "
+ "max_offset=%ju\n", gp->name, (u_int)i,
+ (uintmax_t)sc->toc[i].offset, (uintmax_t)max_offset));
+ backref_to = BLEN_UNDEF;
+ if (sc->toc[i].offset < max_offset) {
+ /*
+ * For the backref'ed blocks search already parsed
+ * TOC entries for the matching offset and copy the
+ * size from matched entry.
+ */
+ for (j = 0; j <= i; j++) {
+ if (sc->toc[j].offset == sc->toc[i].offset &&
+ sc->toc[j].blen > 0) {
+ break;
+ }
+ if (j != i) {
+ continue;
+ }
+ DPRINTF(1, ("%s: cannot match backref'ed "
+ "offset at cluster #%u\n", gp->name, i));
+ return (-1);
+ }
+ sc->toc[i].blen = sc->toc[j].blen;
+ backref_to = j;
+ } else {
+ /*
+ * For the "normal blocks" seek forward until we hit
+ * block whose offset is larger than ours and assume
+ * it's going to be the next one.
+ */
+ for (j = i + 1; j < sc->nblocks; j++) {
+ if (sc->toc[j].offset > max_offset) {
+ break;
+ }
+ }
+ sc->toc[i].blen = sc->toc[j].offset -
+ sc->toc[i].offset;
+ if (BLK_ENDS(sc, i) > pp->mediasize) {
+ DPRINTF(1, ("%s: cluster #%u extends past "
+ "media boundary (%ju > %ju)\n", gp->name,
+ (u_int)i, (uintmax_t)BLK_ENDS(sc, i),
+ (intmax_t)pp->mediasize));
+ return (-1);
+ }
+ KASSERT(max_offset <= sc->toc[i].offset, (
+ "%s: max_offset is incorrect: %ju",
+ gp->name, (uintmax_t)max_offset));
+ max_offset = BLK_ENDS(sc, i) - 1;
+ }
+ DPRINTF_BLK(3, i, ("%s: cluster #%u, original %u bytes, in %u "
+ "bytes", gp->name, i, sc->blksz, sc->toc[i].blen));
+ if (backref_to != BLEN_UNDEF) {
+ DPRINTF_BLK(3, i, (" (->#%u)",
+ (u_int)backref_to));
+ }
+ DPRINTF_BLK(3, i, ("\n"));
+ }
+ return (0);
+}
+
static struct g_geom *
g_uzip_taste(struct g_class *mp, struct g_provider *pp, int flags)
{
@@ -433,7 +580,7 @@
* Read cloop header, look for CLOOP magic, perform
* other validity checks.
*/
- DPRINTF(("%s: media sectorsize %u, mediasize %jd\n",
+ DPRINTF(1, ("%s: media sectorsize %u, mediasize %jd\n",
gp->name, pp->sectorsize, (intmax_t)pp->mediasize));
buf = g_read_data(cp, 0, pp->sectorsize, NULL);
if (buf == NULL)
@@ -441,11 +588,11 @@
header = (struct cloop_header *) buf;
if (strncmp(header->magic, CLOOP_MAGIC_START,
sizeof(CLOOP_MAGIC_START) - 1) != 0) {
- DPRINTF(("%s: no CLOOP magic\n", gp->name));
+ DPRINTF(1, ("%s: no CLOOP magic\n", gp->name));
goto err;
}
if (header->magic[0x0b] != 'V' || header->magic[0x0c] < '2') {
- DPRINTF(("%s: image version too old\n", gp->name));
+ DPRINTF(1, ("%s: image version too old\n", gp->name));
goto err;
}
@@ -472,13 +619,15 @@
gp->name, sc->nblocks);
goto err;
}
- sc->offsets = malloc(
- total_offsets * sizeof(uint64_t), M_GEOM_UZIP, M_WAITOK);
+ sc->toc = malloc(
+ total_offsets * sizeof(struct g_uzip_blk), M_GEOM_UZIP, M_WAITOK);
offsets_read = MIN(total_offsets,
(pp->sectorsize - sizeof(*header)) / sizeof(uint64_t));
- for (i = 0; i < offsets_read; i++)
- sc->offsets[i] = be64toh(((uint64_t *) (header + 1))[i]);
- DPRINTF(("%s: %u offsets in the first sector\n",
+ for (i = 0; i < offsets_read; i++) {
+ sc->toc[i].offset = be64toh(((uint64_t *) (header + 1))[i]);
+ sc->toc[i].blen = BLEN_UNDEF;
+ }
+ DPRINTF(1, ("%s: %u offsets in the first sector\n",
gp->name, offsets_read));
for (blk = 1; offsets_read < total_offsets; blk++) {
uint32_t nread;
@@ -490,16 +639,31 @@
goto err;
nread = MIN(total_offsets - offsets_read,
pp->sectorsize / sizeof(uint64_t));
- DPRINTF(("%s: %u offsets read from sector %d\n",
+ DPRINTF(2, ("%s: %u offsets read from sector %d\n",
gp->name, nread, blk));
for (i = 0; i < nread; i++) {
- sc->offsets[offsets_read + i] =
+ sc->toc[offsets_read + i].offset =
be64toh(((uint64_t *) buf)[i]);
+ sc->toc[offsets_read + i].blen = BLEN_UNDEF;
}
offsets_read += nread;
}
free(buf, M_GEOM);
- DPRINTF(("%s: done reading offsets\n", gp->name));
+ buf = NULL;
+ offsets_read -= 1;
+ DPRINTF(1, ("%s: done reading %u block offsets from %u sectors\n",
+ gp->name, offsets_read, blk));
+ if (sc->nblocks != offsets_read) {
+ DPRINTF(1, ("%s: read %s offsets than expected blocks\n",
+ gp->name,
+ sc->nblocks < offsets_read ? "more" : "less"));
+ goto err;
+ }
+ /* Massage TOC (table of contents), make sure it is sound */
+ if (g_uzip_parse_toc(sc, pp, gp) != 0) {
+ DPRINTF(1, ("%s: TOC error\n", gp->name));
+ goto err;
+ }
mtx_init(&sc->last_mtx, "geom_uzip cache", NULL, MTX_DEF);
sc->last_blk = -1;
sc->last_buf = malloc(sc->blksz, M_GEOM_UZIP, M_WAITOK);
@@ -515,11 +679,11 @@
g_error_provider(pp2, 0);
g_access(cp, -1, 0, 0);
- DPRINTF(("%s: taste ok (%d, %jd), (%d, %d), %x\n",
+ DPRINTF(1, ("%s: taste ok (%d, %jd), (%d, %d), %x\n",
gp->name,
pp2->sectorsize, (intmax_t)pp2->mediasize,
pp2->stripeoffset, pp2->stripesize, pp2->flags));
- DPRINTF(("%s: %u x %u blocks\n", gp->name, sc->nblocks, sc->blksz));
+ DPRINTF(1, ("%s: %u x %u blocks\n", gp->name, sc->nblocks, sc->blksz));
return (gp);
err:
@@ -547,7 +711,7 @@
g_topology_assert();
if (gp->softc == NULL) {
- DPRINTF(("%s(%s): gp->softc == NULL\n", __func__, gp->name));
+ DPRINTF(1, ("%s(%s): gp->softc == NULL\n", __func__, gp->name));
return (ENXIO);
}
Index: usr.bin/mkuzip/Makefile
===================================================================
--- usr.bin/mkuzip/Makefile
+++ usr.bin/mkuzip/Makefile
@@ -2,7 +2,8 @@
PROG= mkuzip
MAN= mkuzip.8
+SRCS= mkuzip.c mkuz_blockcache.c
-LIBADD= z
+LIBADD= z md
.include <bsd.prog.mk>
Index: usr.bin/mkuzip/mkuz_blockcache.h
===================================================================
--- /dev/null
+++ usr.bin/mkuzip/mkuz_blockcache.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+struct mkuz_blkcache_hit {
+ uint64_t offset;
+ ssize_t len;
+ uint32_t blkno;
+};
+
+struct mkuz_blkcache_hit *mkuz_blkcache_regblock(int, uint32_t, off_t, ssize_t,
+ void *);
Index: usr.bin/mkuzip/mkuz_blockcache.c
===================================================================
--- /dev/null
+++ usr.bin/mkuzip/mkuz_blockcache.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/types.h>
+#include <md5.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(MKUZ_DEBUG)
+# include <stdio.h>
+#endif
+
+#include "mkuz_blockcache.h"
+
+struct mkuz_blkcache {
+ struct mkuz_blkcache_hit hit;
+ off_t data_offset;
+ unsigned char digest[16];
+ struct mkuz_blkcache *next;
+};
+
+static struct mkuz_blkcache blkcache;
+
+struct mkuz_blkcache_hit *
+mkuz_blkcache_regblock(int fd, uint32_t blkno, off_t offset, ssize_t len,
+ void *data)
+{
+ struct mkuz_blkcache *bcep;
+ MD5_CTX mcontext;
+ off_t data_offset;
+ unsigned char mdigest[16];
+
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset < 0) {
+ return (NULL);
+ }
+ MD5Init(&mcontext);
+ MD5Update(&mcontext, data, len);
+ MD5Final(mdigest, &mcontext);
+ if (blkcache.hit.len == 0) {
+ bcep = &blkcache;
+ } else {
+ for (bcep = &blkcache; bcep != NULL; bcep = bcep->next) {
+ if (bcep->hit.len != len)
+ continue;
+ if (memcmp(mdigest, bcep->digest, sizeof(mdigest)) == 0) {
+ break;
+ }
+ }
+ if (bcep != NULL) {
+#if defined(MKUZ_DEBUG)
+ printf("cache hit %d, %d, %d\n", (int)bcep->hit.offset, (int)data_offset, (int)len);
+#endif
+ return (&bcep->hit);
+ }
+ bcep = malloc(sizeof(struct mkuz_blkcache));
+ if (bcep == NULL)
+ return (NULL);
+ memset(bcep, '\0', sizeof(struct mkuz_blkcache));
+ bcep->next = blkcache.next;
+ blkcache.next = bcep;
+ }
+ memcpy(bcep->digest, mdigest, sizeof(mdigest));
+ bcep->data_offset = data_offset;
+ bcep->hit.offset = offset;
+ bcep->hit.len = len;
+ bcep->hit.blkno = blkno;
+ return (NULL);
+}
Index: usr.bin/mkuzip/mkuzip.8
===================================================================
--- usr.bin/mkuzip/mkuzip.8
+++ usr.bin/mkuzip/mkuzip.8
@@ -1,9 +1,27 @@
-.\" ----------------------------------------------------------------------------
-.\" "THE BEER-WARE LICENSE" (Revision 42):
-.\" <sobomax@FreeBSD.org> wrote this file. As long as you retain this notice you
-.\" can do whatever you want with this stuff. If we meet some day, and you think
-.\" this stuff is worth it, you can buy me a beer in return. Maxim Sobolev
-.\" ----------------------------------------------------------------------------
+.\"-
+.\" Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
.\"
.\" $FreeBSD: head/usr.bin/mkuzip/mkuzip.8 267773 2014-06-23 08:23:05Z bapt $
.\"
@@ -61,6 +79,27 @@
should be a multiple of 512 bytes.
.It Fl v
Display verbose messages.
+.It Fl Z
+Disable zero-blocks detection and elimination.
+When this option is set, the
+.Nm
+would compress empty blocks (i.e. clusters that consist of only zero bytes)
+just as it would any other block.
+When the option is not set, the
+.Nm
+detects such blocks and skips them from the output.
+Setting
+.Fl Z
+results is slight increase of compressed image size, typically less than 0.1%
+of a final size of the compressed image.
+.It Fl d
+Enable de-duplication.
+When the option is enabled the
+.Nm
+detects identical blocks in the input and replaces each subsequent occurence
+of such block with pointer to the very first one in the output.
+Setting this option results is moderate decrease of compressed image size,
+typically around 3-5% of a final size of the compressed image.
.El
.Sh NOTES
The compression ratio largely depends on the cluster size used.
@@ -92,6 +131,20 @@
and automatically mount it using
.Xr mount_cd9660 8
on the mount point provided as the first argument to the script.
+.Pp
+The de-duplication is a
+.Fx
+specific feature and while it does not require any changes to on-disk
+compressed image format, however it did require some matching changes to the
+.Xr geom_uzip 4
+to handle resulting images correctly.
+It is possible that other implementations may not have the logic necessary
+for proper handling of such images and an attempt to attach de-duplicated
+images on such systems could result in undefined behaviour including system
+crash or data loss.
+The same applies to the older versions of the
+.Fx
+kernel as well.
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
Index: usr.bin/mkuzip/mkuzip.c
===================================================================
--- usr.bin/mkuzip/mkuzip.c
+++ usr.bin/mkuzip/mkuzip.c
@@ -1,12 +1,27 @@
/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <sobomax@FreeBSD.ORG> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return. Maxim Sobolev
- * ----------------------------------------------------------------------------
+ * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
*
- * $FreeBSD: head/usr.bin/mkuzip/mkuzip.c 221832 2011-05-13 09:55:48Z ru $
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*
*/
@@ -26,6 +41,8 @@
#include <string.h>
#include <unistd.h>
+#include "mkuz_blockcache.h"
+
#define CLSTSIZE 16384
#define DEFAULT_SUFX ".uzip"
@@ -38,6 +55,7 @@
static void usage(void);
static void *safe_malloc(size_t);
static void cleanup(void);
+static int memvcmp(const void *, unsigned char, size_t);
static char *cleanfile = NULL;
@@ -45,24 +63,27 @@
{
char *iname, *oname, *obuf, *ibuf;
uint64_t *toc;
- int fdr, fdw, i, opt, verbose, tmp;
+ int fdr, fdw, i, opt, verbose, no_zcomp, tmp, en_dcomp;
struct iovec iov[2];
struct stat sb;
uLongf destlen;
- uint64_t offset;
+ uint64_t offset, last_offset;
struct cloop_header {
char magic[CLOOP_MAGIC_LEN]; /* cloop magic */
uint32_t blksz; /* block size */
uint32_t nblocks; /* number of blocks */
} hdr;
+ struct mkuz_blkcache_hit *chit;
memset(&hdr, 0, sizeof(hdr));
hdr.blksz = CLSTSIZE;
strcpy(hdr.magic, CLOOP_MAGIC_START);
oname = NULL;
verbose = 0;
+ no_zcomp = 0;
+ en_dcomp = 0;
- while((opt = getopt(argc, argv, "o:s:v")) != -1) {
+ while((opt = getopt(argc, argv, "o:s:vZd")) != -1) {
switch(opt) {
case 'o':
oname = optarg;
@@ -91,6 +112,14 @@
verbose = 1;
break;
+ case 'Z':
+ no_zcomp = 1;
+ break;
+
+ case 'd':
+ en_dcomp = 1;
+ break;
+
default:
usage();
/* Not reached */
@@ -180,37 +209,75 @@
for(i = 0; i == 0 || ibuf != NULL; i++) {
ibuf = readblock(fdr, ibuf, hdr.blksz);
if (ibuf != NULL) {
- destlen = compressBound(hdr.blksz);
- if (compress2(obuf, &destlen, ibuf, hdr.blksz,
- Z_BEST_COMPRESSION) != Z_OK) {
- errx(1, "can't compress data: compress2() "
- "failed");
- /* Not reached */
+ if (no_zcomp == 0 && \
+ memvcmp(ibuf, '\0', hdr.blksz) != 0) {
+ /* All zeroes block */
+ destlen = 0;
+ } else {
+ destlen = compressBound(hdr.blksz);
+ if (compress2(obuf, &destlen, ibuf, hdr.blksz,
+ Z_BEST_COMPRESSION) != Z_OK) {
+ errx(1, "can't compress data: "
+ "compress2() failed");
+ /* Not reached */
+ }
}
- if (verbose != 0)
- fprintf(stderr, "cluster #%d, in %u bytes, "
- "out %lu bytes\n", i, hdr.blksz, destlen);
} else {
destlen = DEV_BSIZE - (offset % DEV_BSIZE);
memset(obuf, 0, destlen);
if (verbose != 0)
- fprintf(stderr, "padding data with %lu bytes so "
- "that file size is multiple of %d\n", destlen,
- DEV_BSIZE);
+ fprintf(stderr, "padding data with %lu bytes "
+ "so that file size is multiple of %d\n",
+ destlen, DEV_BSIZE);
}
- if (write(fdw, obuf, destlen) < 0) {
- err(1, "write(%s)", oname);
- /* Not reached */
+ if (destlen > 0 && en_dcomp != 0) {
+ chit = mkuz_blkcache_regblock(fdw, i, offset, destlen,
+ obuf);
+ /*
+ * There should be at least one non-empty block
+ * between us and the backref'ed offset, otherwise
+ * we won't be able to parse that sequence correctly
+ * as it would be indistinguishible from another
+ * empty block.
+ */
+ if (chit != NULL && chit->offset == last_offset) {
+ chit = NULL;
+ }
+ } else {
+ chit = NULL;
+ }
+ if (chit != NULL) {
+ toc[i] = htobe64(chit->offset);
+ } else {
+ if (destlen > 0 && write(fdw, obuf, destlen) < 0) {
+ err(1, "write(%s)", oname);
+ /* Not reached */
+ }
+ toc[i] = htobe64(offset);
+ last_offset = offset;
+ offset += destlen;
+ }
+ if (ibuf != NULL && verbose != 0) {
+ fprintf(stderr, "cluster #%d, in %u bytes, "
+ "out len=%lu offset=%lu", i, hdr.blksz,
+ chit == NULL ? destlen : 0,
+ (u_long)be64toh(toc[i]));
+ if (chit != NULL) {
+ fprintf(stderr, " (backref'ed to #%d)",
+ chit->blkno);
+ }
+ fprintf(stderr, "\n");
+
}
- toc[i] = htobe64(offset);
- offset += destlen;
}
close(fdr);
if (verbose != 0)
fprintf(stderr, "compressed data to %ju bytes, saved %lld "
- "bytes, %.2f%% decrease.\n", offset, (long long)(sb.st_size - offset),
- 100.0 * (long long)(sb.st_size - offset) / (float)sb.st_size);
+ "bytes, %.2f%% decrease.\n", offset,
+ (long long)(sb.st_size - offset),
+ 100.0 * (long long)(sb.st_size - offset) /
+ (float)sb.st_size);
/* Convert to big endian */
hdr.blksz = htonl(hdr.blksz);
@@ -248,7 +315,8 @@
usage(void)
{
- fprintf(stderr, "usage: mkuzip [-v] [-o outfile] [-s cluster_size] infile\n");
+ fprintf(stderr, "usage: mkuzip [-vZd] [-o outfile] [-s cluster_size] "
+ "infile\n");
exit(1);
}
@@ -272,3 +340,12 @@
if (cleanfile != NULL)
unlink(cleanfile);
}
+
+static int
+memvcmp(const void *memory, unsigned char val, size_t size)
+{
+ const u_char *mm;
+
+ mm = (const u_char *)memory;
+ return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, May 19, 6:12 PM (18 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33324405
Default Alt Text
D5333.id13441.diff (32 KB)
Attached To
Mode
D5333: Improve mkuzip(8) and geom_uzip(4), merge in LZMA support from mkulzma(8) and geom_uncompress(4)
Attached
Detach File
Event Timeline
Log In to Comment