/*
Copyright (C) 2006-2016 Con Kolivas
Copyright (C) 1998 Andrew Tridgell
Modified to use flat hash, memory limit and variable hash culling
by Rusty Russell copyright (C) 2003.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/* rzip compression algorithm */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef HAVE_SYS_MMAN_H
# include
#endif
#include
#ifdef HAVE_SYS_STAT_H
# include
#endif
#ifdef HAVE_SYS_TIME_H
# include
#endif
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#include
#ifdef HAVE_ERRNO_H
# include
#endif
#ifdef HAVE_ENDIAN_H
# include
#elif HAVE_SYS_ENDIAN_H
# include
#endif
#ifdef HAVE_ARPA_INET_H
# include
#endif
#include
#include "md5.h"
#include "stream.h"
#include "util.h"
#include "lrzip_core.h"
/* needed for CRC routines */
#include "lzma/C/7zCrc.h"
#ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
#endif
#define CHUNK_MULTIPLE (100 * 1024 * 1024)
#define CKSUM_CHUNK 1024*1024
#define GREAT_MATCH 1024
#define MINIMUM_MATCH 31
/* Hash table works as follows. We start by throwing tags at every
* offset into the table. As it fills, we start eliminating tags
* which don't have lower bits set to one (ie. first we eliminate all
* even tags, then all tags divisible by four, etc.). This ensures
* that on average, all parts of the file are covered by the hash, if
* sparsely. */
/* All zero means empty. We might miss the first chunk this way. */
struct hash_entry {
i64 offset;
tag t;
};
/* Levels control hashtable size and bzip2 level. */
static struct level {
unsigned long mb_used;
unsigned initial_freq;
unsigned max_chain_len;
} levels[10] = {
{ 1, 4, 1 },
{ 2, 4, 2 },
{ 4, 4, 2 },
{ 8, 4, 2 },
{ 16, 4, 3 },
{ 32, 4, 4 },
{ 32, 2, 6 },
{ 64, 1, 16 }, /* More MB makes sense, but need bigger test files */
{ 64, 1, 32 },
{ 64, 1, 128 },
};
static void remap_low_sb(rzip_control *control, struct sliding_buffer *sb)
{
i64 new_offset;
new_offset = sb->offset_search;
round_to_page(&new_offset);
print_maxverbose("Sliding main buffer to offset %lld\n", new_offset);
if (unlikely(munmap(sb->buf_low, sb->size_low)))
failure("Failed to munmap in remap_low_sb\n");
if (new_offset + sb->size_low > sb->orig_size)
sb->size_low = sb->orig_size - new_offset;
sb->offset_low = new_offset;
sb->buf_low = (uchar *)mmap(sb->buf_low, sb->size_low, PROT_READ, MAP_SHARED, sb->fd, sb->orig_offset + sb->offset_low);
if (unlikely(sb->buf_low == MAP_FAILED))
failure("Failed to re mmap in remap_low_sb\n");
}
static inline void remap_high_sb(rzip_control *control, struct sliding_buffer *sb, i64 p)
{
if (unlikely(munmap(sb->buf_high, sb->size_high)))
failure("Failed to munmap in remap_high_sb\n");
sb->size_high = sb->high_length; /* In case we shrunk it when we hit the end of the file */
sb->offset_high = p;
/* Make sure offset is rounded to page size of total offset */
sb->offset_high -= (sb->offset_high + sb->orig_offset) % control->page_size;
if (unlikely(sb->offset_high + sb->size_high > sb->orig_size))
sb->size_high = sb->orig_size - sb->offset_high;
sb->buf_high = (uchar *)mmap(sb->buf_high, sb->size_high, PROT_READ, MAP_SHARED, sb->fd, sb->orig_offset + sb->offset_high);
if (unlikely(sb->buf_high == MAP_FAILED))
failure("Failed to re mmap in remap_high_sb\n");
}
/* We use a "sliding mmap" to effectively read more than we can fit into the
* compression window. This is done by using a maximally sized lower mmap at
* the beginning of the block which slides up once the hash search moves beyond
* it, and a 64k mmap block that slides up and down as is required for any
* offsets outside the range of the lower one. This is much slower than mmap
* but makes it possible to have unlimited sized compression windows.
* We use a pointer to the function we actually want to use and only enable
* the sliding mmap version if we need sliding mmap functionality as this is
* a hot function during the rzip phase */
static uchar *sliding_get_sb(rzip_control *control, i64 p)
{
struct sliding_buffer *sb = &control->sb;
i64 sbo;
sbo = sb->offset_low;
if (p >= sbo && p < sbo + sb->size_low)
return (sb->buf_low + p - sbo);
sbo = sb->offset_high;
if (p >= sbo && p < (sbo + sb->size_high))
return (sb->buf_high + (p - sbo));
/* p is not within the low or high buffer range */
remap_high_sb(control, &control->sb, p);
/* Use sb->offset_high directly since it will have changed */
return (sb->buf_high + (p - sb->offset_high));
}
/* The length of continous range of the sliding buffer,
* starting from the offset P.
*/
static inline i64 sliding_get_sb_range(rzip_control *control, i64 p)
{
struct sliding_buffer *sb = &control->sb;
i64 sbo, sbs;
sbo = sb->offset_low;
sbs = sb->size_low;
if (p >= sbo && p < sbo + sbs)
return (sbs - (p - sbo));
sbo = sb->offset_high;
sbs = sb->size_high;
if (likely(p >= sbo && p < (sbo + sbs)))
return (sbs - (p - sbo));
fatal_return(("sliding_get_sb_range: the pointer is out of range\n"), 0);
}
/* Since the sliding get_sb only allows us to access one byte at a time, we
* do the same as we did with get_sb with the memcpy since one memcpy is much
* faster than numerous memcpys 1 byte at a time */
static void single_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len)
{
memcpy(buf, control->sb.buf_low + offset, len);
}
static void sliding_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len)
{
i64 n = 0;
while (n < len) {
uchar *srcbuf = sliding_get_sb(control, offset + n);
i64 m = MIN(sliding_get_sb_range(control, offset + n), len - n);
memcpy(buf + n, srcbuf, m);
n += m;
}
}
/* All put_u8/u32/vchars go to stream 0 */
static inline void put_u8(rzip_control *control, void *ss, uchar b)
{
write_stream(control, ss, 0, &b, 1);
}
static inline void put_u32(rzip_control *control, void *ss, uint32_t s)
{
s = htole32(s);
write_stream(control, ss, 0, (uchar *)&s, 4);
}
/* Put a variable length of bytes dependant on how big the chunk is */
static void put_vchars(rzip_control *control, void *ss, i64 s, i64 length)
{
s = htole64(s);
write_stream(control, ss, 0, (uchar *)&s, length);
}
static void put_header(rzip_control *control, void *ss, uchar head, i64 len)
{
put_u8(control, ss, head);
put_vchars(control, ss, len, 2);
}
static inline void put_match(rzip_control *control, struct rzip_state *st,
i64 p, i64 offset, i64 len)
{
do {
i64 ofs;
i64 n = len;
if (n > 0xFFFF)
n = 0xFFFF;
ofs = (p - offset);
put_header(control, st->ss, 1, n);
put_vchars(control, st->ss, ofs, st->chunk_bytes);
st->stats.matches++;
st->stats.match_bytes += n;
len -= n;
p += n;
offset += n;
} while (len);
}
/* write some data to a stream mmap encoded. Return -1 on failure */
static inline void write_sbstream(rzip_control *control, void *ss, int stream,
i64 p, i64 len)
{
struct stream_info *sinfo = ss;
while (len) {
i64 n = MIN(sinfo->bufsize - sinfo->s[stream].buflen, len);
control->do_mcpy(control, sinfo->s[stream].buf + sinfo->s[stream].buflen, p, n);
sinfo->s[stream].buflen += n;
p += n;
len -= n;
if (sinfo->s[stream].buflen == sinfo->bufsize)
flush_buffer(control, sinfo, stream);
}
}
static void put_literal(rzip_control *control, struct rzip_state *st, i64 last, i64 p)
{
do {
i64 len = p - last;
if (len > 0xFFFF)
len = 0xFFFF;
st->stats.literals++;
st->stats.literal_bytes += len;
put_header(control, st->ss, 0, len);
if (len)
write_sbstream(control, st->ss, 1, last, len);
last += len;
} while (p > last);
}
/* Could give false positive on offset 0. Who cares. */
static inline bool empty_hash(struct hash_entry *he)
{
return !(he->offset | he->t);
}
static i64 primary_hash(struct rzip_state *st, tag t)
{
return t & ((1 << st->hash_bits) - 1);
}
static inline tag increase_mask(tag tag_mask)
{
/* Get more precise. */
return (tag_mask << 1) | 1;
}
static inline bool minimum_bitness(struct rzip_state *st, tag t)
{
tag better_than_min = increase_mask(st->minimum_tag_mask);
if ((t & better_than_min) != better_than_min)
return true;
return false;
}
/* Is a going to be cleaned before b? ie. does a have fewer low bits
* set than b? */
static inline bool lesser_bitness(tag a, tag b)
{
a ^= 0xffffffffffffffff;
b ^= 0xffffffffffffffff;
return (ffsll(a) < ffsll(b));
}
/* If hash bucket is taken, we spill into next bucket(s). Secondary hashing
works better in theory, but modern caches make this 20% faster. */
static void insert_hash(struct rzip_state *st, tag t, i64 offset)
{
i64 h, victim_h = 0, round = 0;
/* If we need to kill one, this will be it. */
static i64 victim_round = 0;
struct hash_entry *he;
h = primary_hash(st, t);
he = &st->hash_table[h];
while (!empty_hash(he)) {
/* If this due for cleaning anyway, just replace it:
rehashing might move it behind tag_clean_ptr. */
if (minimum_bitness(st, he->t)) {
st->hash_count--;
break;
}
/* If we are better than current occupant, we can't
jump over it: it will be cleaned before us, and
noone would then find us in the hash table. Rehash
it, then take its place. */
if (lesser_bitness(he->t, t)) {
insert_hash(st, he->t,
he->offset);
break;
}
/* If we have lots of identical patterns, we end up
with lots of the same hash number. Discard random. */
if (he->t == t) {
if (round == victim_round)
victim_h = h;
if (++round == st->level->max_chain_len) {
h = victim_h;
he = &st->hash_table[h];
st->hash_count--;
victim_round++;
if (victim_round == st->level->max_chain_len)
victim_round = 0;
break;
}
}
h++;
h &= ((1 << st->hash_bits) - 1);
he = &st->hash_table[h];
}
he->t = t;
he->offset = offset;
}
/* Eliminate one hash entry with minimum number of lower bits set.
Returns tag requirement for any new entries. */
static inline tag clean_one_from_hash(rzip_control *control, struct rzip_state *st)
{
struct hash_entry *he;
tag better_than_min;
again:
better_than_min = increase_mask(st->minimum_tag_mask);
if (!st->tag_clean_ptr)
print_maxverbose("Starting sweep for mask %u\n", (unsigned int)st->minimum_tag_mask);
for (; st->tag_clean_ptr < (1U << st->hash_bits); st->tag_clean_ptr++) {
he = &st->hash_table[st->tag_clean_ptr];
if (empty_hash(he))
continue;
if ((he->t & better_than_min) != better_than_min) {
he->offset = 0;
he->t = 0;
st->hash_count--;
return better_than_min;
}
}
/* We hit the end: everthing in hash satisfies the better mask. */
st->minimum_tag_mask = better_than_min;
st->tag_clean_ptr = 0;
goto again;
}
static void single_next_tag(rzip_control *control, struct rzip_state *st, i64 p, tag *t)
{
uchar u;
u = control->sb.buf_low[p - 1];
*t ^= st->hash_index[u];
u = control->sb.buf_low[p + MINIMUM_MATCH - 1];
*t ^= st->hash_index[u];
}
static void sliding_next_tag(rzip_control *control, struct rzip_state *st, i64 p, tag *t)
{
uchar *u;
u = sliding_get_sb(control, p - 1);
*t ^= st->hash_index[*u];
u = sliding_get_sb(control, p + MINIMUM_MATCH - 1);
*t ^= st->hash_index[*u];
}
static tag single_full_tag(rzip_control *control, struct rzip_state *st, i64 p)
{
tag ret = 0;
int i;
uchar u;
for (i = 0; i < MINIMUM_MATCH; i++) {
u = control->sb.buf_low[p + i];
ret ^= st->hash_index[u];
}
return ret;
}
static tag sliding_full_tag(rzip_control *control, struct rzip_state *st, i64 p)
{
tag ret = 0;
int i;
uchar *u;
for (i = 0; i < MINIMUM_MATCH; i++) {
u = sliding_get_sb(control, p + i);
ret ^= st->hash_index[*u];
}
return ret;
}
static i64
single_match_len(rzip_control *control, struct rzip_state *st, i64 p0, i64 op,
i64 end, i64 *rev)
{
i64 p, len;
if (op >= p0)
return 0;
p = p0;
while (p < end && control->sb.buf_low[p] == control->sb.buf_low[op]) {
p++;
op++;
}
len = p - p0;
p = p0;
op -= len;
end = MAX(0, st->last_match);
while (p > end && op > 0 && control->sb.buf_low[op - 1] == control->sb.buf_low[p - 1]) {
op--;
p--;
}
len += *rev = p0 - p;
if (len < MINIMUM_MATCH)
return 0;
return len;
}
static i64
sliding_match_len(rzip_control *control, struct rzip_state *st, i64 p0, i64 op,
i64 end, i64 *rev)
{
i64 p, len;
if (op >= p0)
return 0;
p = p0;
while (p < end && *sliding_get_sb(control, p) == *sliding_get_sb(control, op)) {
p++;
op++;
}
len = p - p0;
p = p0;
op -= len;
end = MAX(0, st->last_match);
while (p > end && op > 0 && *sliding_get_sb(control, op - 1) == *sliding_get_sb(control, p - 1)) {
op--;
p--;
}
len += *rev = p0 - p;
if (len < MINIMUM_MATCH)
return 0;
return len;
}
static inline i64
find_best_match(rzip_control *control, struct rzip_state *st, tag t, i64 p,
i64 end, i64 *offset, i64 *reverse)
{
struct hash_entry *he;
i64 length = 0;
i64 rev;
i64 h;
rev = 0;
*reverse = 0;
/* Could optimise: if lesser goodness, can stop search. But
* chains are usually short anyway. */
h = primary_hash(st, t);
he = &st->hash_table[h];
while (!empty_hash(he)) {
i64 mlen;
if (t == he->t) {
mlen = control->match_len(control, st, p, he->offset, end,
&rev);
if (mlen) {
if (mlen > length) {
length = mlen;
(*offset) = he->offset - rev;
(*reverse) = rev;
}
st->stats.tag_hits++;
} else
st->stats.tag_misses++;
}
h++;
h &= ((1 << st->hash_bits) - 1);
he = &st->hash_table[h];
}
return length;
}
static void show_distrib(rzip_control *control, struct rzip_state *st)
{
struct hash_entry *he;
i64 primary = 0;
i64 total = 0;
i64 i;
for (i = 0; i < (1U << st->hash_bits); i++) {
he = &st->hash_table[i];
if (empty_hash(he))
continue;
total++;
if (primary_hash(st, he->t) == i)
primary++;
}
if (total != st->hash_count)
print_err("WARNING: hash_count says total %lld\n", st->hash_count);
if (!total)
print_output("0 total hashes\n");
else {
print_output("%lld total hashes -- %lld in primary bucket (%-2.3f%%)\n",
total, primary, primary * 100.0 / total);
}
}
/* Perform all checksumming in a separate thread to speed up the hash search. */
static void *cksumthread(void *data)
{
rzip_control *control = (rzip_control *)data;
pthread_detach(pthread_self());
*control->checksum.cksum = CrcUpdate(*control->checksum.cksum, control->checksum.buf, control->checksum.len);
if (!NO_MD5)
md5_process_bytes(control->checksum.buf, control->checksum.len, &control->ctx);
free(control->checksum.buf);
cksem_post(control, &control->cksumsem);
return NULL;
}
static inline void cksum_update(rzip_control *control)
{
pthread_t thread;
create_pthread(control, &thread, NULL, cksumthread, control);
}
static inline void hash_search(rzip_control *control, struct rzip_state *st,
double pct_base, double pct_multiple)
{
i64 cksum_limit = 0, p, end, cksum_chunks, cksum_remains, i;
tag t = 0, tag_mask = (1 << st->level->initial_freq) - 1;
struct sliding_buffer *sb = &control->sb;
int lastpct = 0, last_chunkpct = 0;
struct {
i64 p;
i64 ofs;
i64 len;
} current;
if (st->hash_table)
memset(st->hash_table, 0, sizeof(st->hash_table[0]) * (1<hash_bits));
else {
i64 hashsize = st->level->mb_used *
(1024 * 1024 / sizeof(st->hash_table[0]));
for (st->hash_bits = 0; (1U << st->hash_bits) < hashsize; st->hash_bits++);
print_maxverbose("hashsize = %lld. bits = %lld. %luMB\n",
hashsize, st->hash_bits, st->level->mb_used);
/* 66% full at max. */
st->hash_limit = (1 << st->hash_bits) / 3 * 2;
st->hash_table = calloc(sizeof(st->hash_table[0]), (1 << st->hash_bits));
if (unlikely(!st->hash_table))
failure("Failed to allocate hash table in hash_search\n");
}
st->minimum_tag_mask = tag_mask;
st->tag_clean_ptr = 0;
st->cksum = 0;
st->hash_count = 0;
p = 0;
end = st->chunk_size - MINIMUM_MATCH;
st->last_match = p;
current.len = 0;
current.p = p;
current.ofs = 0;
if (likely(end > 0))
t = control->full_tag(control, st, p);
while (p < end) {
i64 reverse, mlen, offset;
sb->offset_search = ++p;
if (unlikely(sb->offset_search > sb->offset_low + sb->size_low))
remap_low_sb(control, &control->sb);
if (unlikely(p % 128 == 0 && st->chunk_size)) {
i64 chunk_pct;
int pct;
pct = pct_base + (pct_multiple * (100.0 * p) / st->chunk_size );
chunk_pct = p * 100 / end;
if (pct != lastpct || chunk_pct != last_chunkpct) {
if (!STDIN || st->stdin_eof)
print_progress("Total: %2d%% ", pct);
print_progress("Chunk: %2d%%\r", chunk_pct);
if (control->info_cb)
control->info_cb(control->info_data,
(!STDIN || st->stdin_eof) ? pct : -1, chunk_pct);
lastpct = pct;
last_chunkpct = chunk_pct;
}
}
control->next_tag(control, st, p, &t);
/* Don't look for a match if there are no tags with
this number of bits in the hash table. */
if ((t & st->minimum_tag_mask) != st->minimum_tag_mask)
continue;
offset = 0;
mlen = find_best_match(control, st, t, p, end, &offset, &reverse);
/* Only insert occasionally into hash. */
if ((t & tag_mask) == tag_mask) {
st->stats.inserts++;
st->hash_count++;
insert_hash(st, t, p);
if (st->hash_count > st->hash_limit)
tag_mask = clean_one_from_hash(control, st);
}
if (mlen > current.len) {
current.p = p - reverse;
current.len = mlen;
current.ofs = offset;
}
if ((current.len >= GREAT_MATCH || p >= current.p + MINIMUM_MATCH)
&& current.len >= MINIMUM_MATCH) {
if (st->last_match < current.p)
put_literal(control, st, st->last_match, current.p);
put_match(control, st, current.p, current.ofs, current.len);
st->last_match = current.p + current.len;
current.p = p = st->last_match;
current.len = 0;
t = control->full_tag(control, st, p);
}
if (p > cksum_limit) {
/* We lock the mutex here and unlock it in the
* cksumthread. This lock protects all the data in
* control->checksum.
*/
cksem_wait(control, &control->cksumsem);
control->checksum.len = MIN(st->chunk_size - p, control->page_size);
control->checksum.buf = malloc(control->checksum.len);
if (unlikely(!control->checksum.buf))
failure("Failed to malloc ckbuf in hash_search\n");
control->do_mcpy(control, control->checksum.buf, cksum_limit, control->checksum.len);
control->checksum.cksum = &st->cksum;
cksum_update(control);
cksum_limit += control->checksum.len;
}
}
if (MAX_VERBOSE)
show_distrib(control, st);
if (st->last_match < st->chunk_size)
put_literal(control, st, st->last_match, st->chunk_size);
if (st->chunk_size > cksum_limit) {
i64 cksum_len = control->maxram;
void *buf;
while (42) {
round_to_page(&cksum_len);
buf = malloc(cksum_len);
if (buf) {
print_maxverbose("Malloced %"PRId64" for checksum ckbuf\n", cksum_len);
break;
}
cksum_len = cksum_len / 3 * 2;
if (cksum_len < control->page_size)
failure("Failed to malloc any ram for checksum ckbuf\n");
}
/* Compute checksum. If the entire chunk is longer than maxram,
* do it "per-partes" */
cksem_wait(control, &control->cksumsem);
control->checksum.buf = buf;
control->checksum.len = st->chunk_size - cksum_limit;
cksum_chunks = control->checksum.len / cksum_len;
cksum_remains = control->checksum.len % cksum_len;
for (i = 0; i < cksum_chunks; i++) {
control->do_mcpy(control, control->checksum.buf, cksum_limit, cksum_len);
cksum_limit += cksum_len;
st->cksum = CrcUpdate(st->cksum, control->checksum.buf, cksum_len);
if (!NO_MD5)
md5_process_bytes(control->checksum.buf, cksum_len, &control->ctx);
}
/* Process end of the checksum buffer */
control->do_mcpy(control, control->checksum.buf, cksum_limit, cksum_remains);
st->cksum = CrcUpdate(st->cksum, control->checksum.buf, cksum_remains);
if (!NO_MD5)
md5_process_bytes(control->checksum.buf, cksum_remains, &control->ctx);
free(control->checksum.buf);
cksem_post(control, &control->cksumsem);
} else {
cksem_wait(control, &control->cksumsem);
cksem_post(control, &control->cksumsem);
}
put_literal(control, st, 0, 0);
put_u32(control, st->ss, st->cksum);
}
static inline void init_hash_indexes(struct rzip_state *st)
{
int i;
for (i = 0; i < 256; i++)
st->hash_index[i] = ((random() << 16) ^ random());
}
#if !defined(__linux)
# define mremap fake_mremap
static inline void *fake_mremap(void *old_address, size_t old_size, size_t new_size, int flags __UNUSED__)
{
if (new_size > old_size) {
fprintf(stderr, "fake_mremap: This should only be used to shrink things. I'm not bothering with this.\n");
exit(1);
} else {
/* new_size occupies N pages; old_size occupies M > N pages;
we want to unmap the M - N pages at the end.
note the idiom: ceiling(n/k) = (n+k-1) div k */
size_t kept_n = (new_size + PAGE_SIZE - 1) / PAGE_SIZE;
int ret = munmap(old_address + (kept_n * PAGE_SIZE), old_size - (kept_n * PAGE_SIZE));
if (ret < 0)
return MAP_FAILED;
return old_address;
}
}
#endif
/* stdin is not file backed so we have to emulate the mmap by mapping
* anonymous ram and reading stdin into it. It means the maximum ram
* we can use will be less but we will already have determined this in
* rzip_chunk */
static inline void mmap_stdin(rzip_control *control, uchar *buf,
struct rzip_state *st)
{
i64 len = st->chunk_size;
uchar *offset_buf = buf;
ssize_t ret;
i64 total;
total = 0;
while (len > 0) {
ret = MIN(len, one_g);
ret = read(fileno(control->inFILE), offset_buf, (size_t)ret);
if (unlikely(ret < 0))
failure("Failed to read in mmap_stdin\n");
total += ret;
if (ret == 0) {
/* Should be EOF */
print_maxverbose("Shrinking chunk to %lld\n", total);
if (likely(total)) {
buf = (uchar *)mremap(buf, st->chunk_size, total, 0);
st->mmap_size = st->chunk_size = total;
} else {
/* Empty file */
buf = (uchar *)mremap(buf, st->chunk_size, control->page_size, 0);
st->mmap_size = control->page_size;
st->chunk_size = 0;
}
if (unlikely(buf == MAP_FAILED))
failure("Failed to remap to smaller buf in mmap_stdin\n");
control->eof = st->stdin_eof = 1;
break;
}
offset_buf += ret;
len -= ret;
}
control->st_size += total;
}
static inline void
init_sliding_mmap(rzip_control *control, struct rzip_state *st, int fd_in,
i64 offset)
{
struct sliding_buffer *sb = &control->sb;
/* Initialise the high buffer. One page size is fastest to manipulate */
if (!STDIN) {
sb->high_length = control->page_size;
sb->buf_high = (uchar *)mmap(NULL, sb->high_length, PROT_READ, MAP_SHARED, fd_in, offset);
if (unlikely(sb->buf_high == MAP_FAILED))
failure("Unable to mmap buf_high in init_sliding_mmap\n");
sb->size_high = sb->high_length;
sb->offset_high = 0;
}
sb->offset_low = 0;
sb->offset_search = 0;
sb->size_low = st->mmap_size;
sb->orig_size = st->chunk_size;
sb->fd = fd_in;
}
/* compress a chunk of an open file. Assumes that the file is able to
be mmap'd and is seekable */
static inline void
rzip_chunk(rzip_control *control, struct rzip_state *st, int fd_in, int fd_out,
i64 offset, double pct_base, double pct_multiple)
{
struct sliding_buffer *sb = &control->sb;
init_sliding_mmap(control, st, fd_in, offset);
st->ss = open_stream_out(control, fd_out, NUM_STREAMS, st->chunk_size, st->chunk_bytes);
if (unlikely(!st->ss))
failure("Failed to open streams in rzip_chunk\n");
print_verbose("Beginning rzip pre-processing phase\n");
hash_search(control, st, pct_base, pct_multiple);
/* unmap buffer before closing and reallocating streams */
if (unlikely(munmap(sb->buf_low, sb->size_low))) {
close_stream_out(control, st->ss);
failure("Failed to munmap in rzip_chunk\n");
}
if (!STDIN) {
if (unlikely(munmap(sb->buf_high, sb->size_high))) {
close_stream_out(control, st->ss);
failure("Failed to munmap in rzip_chunk\n");
}
}
if (unlikely(close_stream_out(control, st->ss)))
failure("Failed to flush/close streams in rzip_chunk\n");
}
/* compress a whole file chunks at a time */
void rzip_fd(rzip_control *control, int fd_in, int fd_out)
{
struct sliding_buffer *sb = &control->sb;
/* add timers for ETA estimates
* Base it off the file size and number of iterations required
* depending on compression window size
* Track elapsed time and estimated time to go
* If file size < compression window, can't do
*/
struct timeval current, start, last;
i64 len = 0, last_chunk = 0;
int pass = 0, passes, j;
double chunkmbs, tdiff;
struct rzip_state *st;
struct statvfs fbuf;
struct stat s, s2;
i64 free_space;
init_mutex(control, &control->control_lock);
if (!NO_MD5)
md5_init_ctx(&control->ctx);
cksem_init(control, &control->cksumsem);
cksem_post(control, &control->cksumsem);
st = calloc(sizeof(*st), 1);
if (unlikely(!st))
failure("Failed to allocate control state in rzip_fd\n");
if (LZO_COMPRESS) {
if (unlikely(lzo_init() != LZO_E_OK)) {
free(st);
failure("lzo_init() failed\n");
}
}
if (unlikely(fstat(fd_in, &s))) {
free(st);
failure("Failed to stat fd_in in rzip_fd\n");
}
if (!STDIN) {
len = control->st_size = s.st_size;
print_verbose("File size: %lld\n", len);
} else
control->st_size = 0;
if (!STDOUT) {
/* Check if there's enough free space on the device chosen to fit the
* compressed file, based on the compressed file being as large as the
* uncompressed file. */
if (unlikely(fstatvfs(fd_out, &fbuf))) {
free(st);
failure("Failed to fstatvfs in compress_file\n");
}
free_space = (i64)fbuf.f_bsize * (i64)fbuf.f_bavail;
if (free_space < control->st_size) {
if (FORCE_REPLACE)
print_err("Warning, possibly inadequate free space detected, but attempting to compress due to -f option being used.\n");
else {
free(st);
failure("Possibly inadequate free space to compress file, use -f to override.\n");
}
}
}
/* Optimal use of ram involves using no more than 2/3 of it, so we
* allocate 1/3 of it to the main buffer and use a sliding mmap
* buffer to work on 2/3 ram size, leaving enough ram for the
* compression backends */
control->max_mmap = control->maxram;
round_to_page(&control->max_mmap);
/* Set maximum chunk size to 2/3 of ram if not unlimited or specified
* by a control window. When it's smaller than the file size, round it
* to page size for efficiency. */
if (UNLIMITED)
control->max_chunk = control->st_size;
else if (control->window)
control->max_chunk = control->window * CHUNK_MULTIPLE;
else
control->max_chunk = control->ramsize / 3 * 2;
control->max_mmap = MIN(control->max_mmap, control->max_chunk);
if (control->max_chunk < control->st_size)
round_to_page(&control->max_chunk);
if (!STDIN)
st->chunk_size = MIN(control->max_chunk, len);
else
st->chunk_size = control->max_mmap;
if (st->chunk_size < len)
round_to_page(&st->chunk_size);
st->level = &levels[control->compression_level];
st->fd_in = fd_in;
st->fd_out = fd_out;
st->stdin_eof = 0;
init_hash_indexes(st);
passes = 0;
/* set timers and chunk counter */
last.tv_sec = last.tv_usec = 0;
gettimeofday(&start, NULL);
prepare_streamout_threads(control);
control->do_mcpy = single_mcpy;
control->next_tag = &single_next_tag;
control->full_tag = &single_full_tag;
control->match_len = &single_match_len;
while (!pass || len > 0 || (STDIN && !st->stdin_eof)) {
double pct_base, pct_multiple;
i64 offset = s.st_size - len;
int bits = 8;
st->chunk_size = control->max_chunk;
st->mmap_size = control->max_mmap;
if (!STDIN) {
st->chunk_size = MIN(st->chunk_size, len);
if (likely(st->chunk_size))
st->mmap_size = MIN(st->mmap_size, len);
else
st->mmap_size = control->page_size;
}
retry:
if (STDIN) {
/* NOTE the buf is saved here for STDIN mode */
sb->buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
/* Better to shrink the window to the largest size that works than fail */
if (sb->buf_low == MAP_FAILED) {
if (unlikely(errno != ENOMEM)) {
close_streamout_threads(control);
free(st->hash_table);
free(st);
failure("Failed to mmap %s\n", control->infile);
}
st->mmap_size = st->mmap_size / 10 * 9;
round_to_page(&st->mmap_size);
if (unlikely(!st->mmap_size)) {
close_streamout_threads(control);
free(st->hash_table);
free(st);
failure("Unable to mmap any ram\n");
}
goto retry;
}
st->chunk_size = st->mmap_size;
mmap_stdin(control, sb->buf_low, st);
} else {
/* NOTE The buf is saved here for !STDIN mode */
sb->buf_low = (uchar *)mmap(sb->buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset);
if (sb->buf_low == MAP_FAILED) {
if (unlikely(errno != ENOMEM)) {
close_streamout_threads(control);
free(st->hash_table);
free(st);
failure("Failed to mmap %s\n", control->infile);
}
st->mmap_size = st->mmap_size / 10 * 9;
round_to_page(&st->mmap_size);
if (unlikely(!st->mmap_size)) {
close_streamout_threads(control);
free(st->hash_table);
free(st);
failure("Unable to mmap any ram\n");
}
goto retry;
}
if (st->mmap_size < st->chunk_size) {
print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size);
control->do_mcpy = &sliding_mcpy;
control->next_tag = &sliding_next_tag;
control->full_tag = &sliding_full_tag;
control->match_len = &sliding_match_len;
}
}
print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size);
if (st->chunk_size > control->ramsize)
print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n");
if (!passes && !STDIN && st->chunk_size) {
passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size);
if (passes == 1)
print_verbose("Will take 1 pass\n");
else
print_verbose("Will take %d passes\n", passes);
}
sb->orig_offset = offset;
print_maxverbose("Chunk size: %lld\n", st->chunk_size);
/* Determine the chunk byte width to write to the file
* This allows archives of different chunk sizes to have
* optimal byte width entries. When working with stdin we
* won't know in advance how big it is so it will always be
* rounded up to the window size. */
while (st->chunk_size >> bits > 0)
bits++;
st->chunk_bytes = bits / 8;
if (bits % 8)
st->chunk_bytes++;
print_maxverbose("Byte width: %d\n", st->chunk_bytes);
if (STDIN)
pct_base = (100.0 * -len) / control->st_size;
else
pct_base = (100.0 * (control->st_size - len)) / control->st_size;
pct_multiple = ((double)st->chunk_size) / control->st_size;
pass++;
if (st->stdin_eof)
passes = pass;
gettimeofday(¤t, NULL);
/* this will count only when size > window */
if (last.tv_sec > 0 && pct_base > 100) {
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_time, finish_time,
elapsed_hours, elapsed_minutes, elapsed_seconds, diff_seconds;
elapsed_time = current.tv_sec - start.tv_sec;
finish_time = elapsed_time / (pct_base / 100.0);
elapsed_hours = elapsed_time / 3600;
elapsed_minutes = (elapsed_time / 60) % 60;
elapsed_seconds = elapsed_time % 60;
diff_seconds = finish_time - elapsed_time;
eta_hours = diff_seconds / 3600;
eta_minutes = (diff_seconds / 60) % 60;
eta_seconds = diff_seconds % 60;
chunkmbs = (last_chunk / 1024 / 1024) / (double)(current.tv_sec-last.tv_sec);
if (!STDIN || st->stdin_eof)
print_verbose("\nPass %d / %d -- Elapsed Time: %02d:%02d:%02d. ETA: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n",
pass, passes, elapsed_hours, elapsed_minutes, elapsed_seconds,
eta_hours, eta_minutes, eta_seconds, chunkmbs);
else
print_verbose("\nPass %d -- Elapsed Time: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n",
pass, elapsed_hours, elapsed_minutes, elapsed_seconds, chunkmbs);
}
last.tv_sec = current.tv_sec;
last.tv_usec = current.tv_usec;
if (st->chunk_size == len)
control->eof = 1;
rzip_chunk(control, st, fd_in, fd_out, offset, pct_base, pct_multiple);
/* st->chunk_size may be shrunk in rzip_chunk */
last_chunk = st->chunk_size;
len -= st->chunk_size;
if (unlikely(len > 0 && control->eof)) {
close_streamout_threads(control);
free(st->hash_table);
free(st);
failure("Wrote EOF to file yet chunk_size was shrunk, corrupting archive.\n");
}
}
if (likely(st->hash_table))
free(st->hash_table);
if (unlikely(!close_streamout_threads(control))) {
free(st);
failure("Failed to close_streamout_threads in rzip_fd\n");
}
if (!NO_MD5) {
/* Temporary workaround till someone fixes apple md5 */
md5_finish_ctx(&control->ctx, control->md5_resblock);
if (HASH_CHECK || MAX_VERBOSE) {
print_output("MD5: ");
for (j = 0; j < MD5_DIGEST_SIZE; j++)
print_output("%02x", control->md5_resblock[j] & 0xFF);
print_output("\n");
}
/* When encrypting data, we encrypt the MD5 value as well */
if (ENCRYPT)
if (unlikely(!lrz_encrypt(control, control->md5_resblock, MD5_DIGEST_SIZE, control->salt_pass))) {
free(st);
failure("Failed to lrz_encrypt in rzip_fd\n");
}
if (unlikely(write_1g(control, control->md5_resblock, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) {
free(st);
failure("Failed to write md5 in rzip_fd\n");
}
}
if (TMP_OUTBUF) {
if (unlikely(!flush_tmpoutbuf(control))) {
free(st);
failure("Failed to flush_tmpoutbuf in rzip_fd\n");
}
}
gettimeofday(¤t, NULL);
if (STDIN)
s.st_size = control->st_size;
tdiff = current.tv_sec - start.tv_sec;
if (!tdiff)
tdiff = 1;
chunkmbs = (s.st_size / 1024 / 1024) / tdiff;
fstat(fd_out, &s2);
print_maxverbose("matches=%u match_bytes=%u\n",
(unsigned int)st->stats.matches, (unsigned int)st->stats.match_bytes);
print_maxverbose("literals=%u literal_bytes=%u\n",
(unsigned int)st->stats.literals, (unsigned int)st->stats.literal_bytes);
print_maxverbose("true_tag_positives=%u false_tag_positives=%u\n",
(unsigned int)st->stats.tag_hits, (unsigned int)st->stats.tag_misses);
print_maxverbose("inserts=%u match %.3f\n",
(unsigned int)st->stats.inserts,
(1.0 + st->stats.match_bytes) / st->stats.literal_bytes);
if (!STDIN)
print_progress("%s - ", control->infile);
print_progress("Compression Ratio: %.3f. Average Compression Speed: %6.3fMB/s.\n",
1.0 * s.st_size / s2.st_size, chunkmbs);
free(st);
}
void rzip_control_free(rzip_control *control)
{
size_t x;
if (!control)
return;
free(control->tmpdir);
free(control->outname);
free(control->outdir);
if (control->suffix && control->suffix[0]) free(control->suffix);
for (x = 0; x < control->sinfo_idx; x++) {
free(control->sinfo_queue[x]->s);
free(control->sinfo_queue[x]);
}
free(control->sinfo_queue);
free(control);
}