Add original UC2 compression engine with LZ77+Huffman coding
Implement a compressor that produces bitstreams compatible with the existing Bobrowski decompressor. The engine uses LZ77 sliding-window match finding with hash chains, Huffman entropy coding, and delta-coded tree serialization matching the original UC2 format exactly. New files: - lib/src/compress.c: LZ77+Huffman compressor (~950 lines) - lib/src/uc2_internal.h: shared constants, types, checksums - lib/src/uc2_tables.c: vval/ivval delta tables, default Huffman tree - tests/src/test_roundtrip.c: compress→archive→decompress→verify tests Key details: - 4 compression levels (Fast/Normal/Tight/Ultra) with tunable search - Lazy evaluation for better match selection at higher levels - Delta-coded Huffman tree serialization with RLE - Fletcher/XOR checksum computation - Round-trip test covers 8 patterns × 4 levels (32 test cases) Fixed 28 errors in the hand-computed ivval inverse delta table (rows 9-13) that caused the decompressor to reconstruct wrong Huffman trees from compressor output.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# libuc2 — UC2 decompression library
|
||||
|
||||
set(LIBUC2_SOURCES src/decompress.c)
|
||||
set(LIBUC2_SOURCES src/decompress.c src/compress.c src/uc2_tables.c)
|
||||
|
||||
# Embed super.bin: use .S with .incbin on GCC/Clang, generated C array on MSVC
|
||||
if(MSVC)
|
||||
|
||||
@@ -83,6 +83,22 @@ UC2_API int uc2_extract(
|
||||
|
||||
UC2_API const char *uc2_message(uc2_handle, int ret);
|
||||
|
||||
/* Compress raw data into a UC2 bitstream (no archive framing).
|
||||
level: 2=Fast, 3=Normal, 4=Tight(default), 5=Ultra.
|
||||
read() should return bytes read (0 at EOF, <0 on error).
|
||||
write() should return <0 on error.
|
||||
Returns 0 on success, negative UC2_* error code on failure. */
|
||||
UC2_API int uc2_compress(
|
||||
int level,
|
||||
int (*read)(void *context, void *buf, unsigned len),
|
||||
void *read_ctx,
|
||||
int (*write)(void *context, const void *ptr, unsigned len),
|
||||
void *write_ctx,
|
||||
unsigned size,
|
||||
unsigned short *checksum_out,
|
||||
unsigned *compressed_size_out
|
||||
);
|
||||
|
||||
struct uc2_io {
|
||||
/* Read len bytes from the archive at offset pos into buf.
|
||||
Return number of bytes read, or less if eof.
|
||||
|
||||
947
lib/src/compress.c
Normal file
947
lib/src/compress.c
Normal file
@@ -0,0 +1,947 @@
|
||||
/* UC2 LZ77+Huffman compressor.
|
||||
Produces bitstreams compatible with Bobrowski's decompressor (decompress.c).
|
||||
|
||||
Algorithm: LZ77 sliding window with hash-chain match finding,
|
||||
Huffman entropy coding, delta-coded tree serialization.
|
||||
|
||||
Copyright (c) 2026 Eremey Valetov
|
||||
License: GPL-3.0 */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "uc2/libuc2.h"
|
||||
#include "uc2_internal.h"
|
||||
|
||||
/* ---------- bitstream output ---------- */
|
||||
|
||||
struct bits_out {
|
||||
u8 *buf;
|
||||
unsigned pos; /* byte position in output */
|
||||
unsigned capacity;
|
||||
u16 word; /* accumulator */
|
||||
int bits_left; /* bits remaining in word (16 = empty) */
|
||||
|
||||
int (*write)(void *ctx, const void *ptr, unsigned len);
|
||||
void *ctx;
|
||||
};
|
||||
|
||||
static void bitsout_init(struct bits_out *bo,
|
||||
int (*write)(void *ctx, const void *ptr, unsigned len),
|
||||
void *ctx, u8 *buf, unsigned capacity)
|
||||
{
|
||||
bo->buf = buf;
|
||||
bo->pos = 0;
|
||||
bo->capacity = capacity;
|
||||
bo->word = 0;
|
||||
bo->bits_left = 16;
|
||||
bo->write = write;
|
||||
bo->ctx = ctx;
|
||||
}
|
||||
|
||||
static int bitsout_flush_buf(struct bits_out *bo)
|
||||
{
|
||||
if (bo->pos > 0) {
|
||||
int r = bo->write(bo->ctx, bo->buf, bo->pos);
|
||||
if (r < 0) return r;
|
||||
bo->pos = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bitsout_emit_word(struct bits_out *bo, u16 w)
|
||||
{
|
||||
if (bo->pos + 2 > bo->capacity) {
|
||||
int r = bitsout_flush_buf(bo);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
bo->buf[bo->pos++] = w & 0xff;
|
||||
bo->buf[bo->pos++] = w >> 8;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Put bits MSB-first into 16-bit LE words */
|
||||
static int bitsout_put(struct bits_out *bo, unsigned val, int nbits)
|
||||
{
|
||||
assert(nbits <= 16);
|
||||
int left = bo->bits_left;
|
||||
if (nbits <= left) {
|
||||
bo->word |= (u16)(val << (left - nbits));
|
||||
bo->bits_left = left - nbits;
|
||||
if (bo->bits_left == 0) {
|
||||
int r = bitsout_emit_word(bo, bo->word);
|
||||
if (r < 0) return r;
|
||||
bo->word = 0;
|
||||
bo->bits_left = 16;
|
||||
}
|
||||
} else {
|
||||
/* Split across word boundary */
|
||||
int first = left;
|
||||
int second = nbits - first;
|
||||
bo->word |= (u16)(val >> second);
|
||||
int r = bitsout_emit_word(bo, bo->word);
|
||||
if (r < 0) return r;
|
||||
bo->word = (u16)(val << (16 - second));
|
||||
bo->bits_left = 16 - second;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bitsout_finish(struct bits_out *bo)
|
||||
{
|
||||
if (bo->bits_left < 16) {
|
||||
int r = bitsout_emit_word(bo, bo->word);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
return bitsout_flush_buf(bo);
|
||||
}
|
||||
|
||||
/* ---------- Huffman tree generation ---------- */
|
||||
|
||||
/* Generate canonical Huffman code lengths from symbol frequencies.
|
||||
Enforces MaxCodeBits (13) limit via RepairLengths. */
|
||||
|
||||
struct heap_node {
|
||||
u32 freq;
|
||||
int sym; /* >= 0: leaf, < 0: internal */
|
||||
int left, right;
|
||||
};
|
||||
|
||||
static void treegen(const u32 *freqs, int nsym, u8 *lengths)
|
||||
{
|
||||
struct heap_node nodes[2 * NumSymbols];
|
||||
int heap[NumSymbols + 1];
|
||||
int heap_size = 0;
|
||||
int next_internal = nsym;
|
||||
|
||||
memset(lengths, 0, nsym);
|
||||
|
||||
/* Build initial heap of non-zero frequency symbols */
|
||||
for (int i = 0; i < nsym; i++) {
|
||||
if (freqs[i] > 0) {
|
||||
nodes[i].freq = freqs[i];
|
||||
nodes[i].sym = i;
|
||||
nodes[i].left = nodes[i].right = -1;
|
||||
heap_size++;
|
||||
heap[heap_size] = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (heap_size == 0)
|
||||
return;
|
||||
|
||||
if (heap_size == 1) {
|
||||
lengths[nodes[heap[1]].sym] = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Heapify (min-heap by frequency) */
|
||||
for (int i = heap_size / 2; i >= 1; i--) {
|
||||
int k = i;
|
||||
int v = heap[k];
|
||||
while (k * 2 <= heap_size) {
|
||||
int j = k * 2;
|
||||
if (j + 1 <= heap_size && nodes[heap[j + 1]].freq < nodes[heap[j]].freq)
|
||||
j++;
|
||||
if (nodes[v].freq <= nodes[heap[j]].freq)
|
||||
break;
|
||||
heap[k] = heap[j];
|
||||
k = j;
|
||||
}
|
||||
heap[k] = v;
|
||||
}
|
||||
|
||||
/* Extract min helper */
|
||||
#define EXTRACT_MIN(out) do { \
|
||||
out = heap[1]; \
|
||||
heap[1] = heap[heap_size--]; \
|
||||
int k_ = 1; \
|
||||
while (k_ * 2 <= heap_size) { \
|
||||
int j_ = k_ * 2; \
|
||||
if (j_ + 1 <= heap_size && nodes[heap[j_ + 1]].freq < nodes[heap[j_]].freq) j_++; \
|
||||
if (nodes[heap[k_]].freq <= nodes[heap[j_]].freq) break; \
|
||||
int t_ = heap[k_]; heap[k_] = heap[j_]; heap[j_] = t_; \
|
||||
k_ = j_; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INSERT(idx) do { \
|
||||
heap[++heap_size] = idx; \
|
||||
int k_ = heap_size; \
|
||||
while (k_ > 1 && nodes[heap[k_]].freq < nodes[heap[k_/2]].freq) { \
|
||||
int t_ = heap[k_]; heap[k_] = heap[k_/2]; heap[k_/2] = t_; \
|
||||
k_ /= 2; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Build tree */
|
||||
while (heap_size > 1) {
|
||||
int a, b;
|
||||
EXTRACT_MIN(a);
|
||||
EXTRACT_MIN(b);
|
||||
int n = next_internal++;
|
||||
nodes[n].freq = nodes[a].freq + nodes[b].freq;
|
||||
nodes[n].sym = -1;
|
||||
nodes[n].left = a;
|
||||
nodes[n].right = b;
|
||||
INSERT(n);
|
||||
}
|
||||
|
||||
#undef EXTRACT_MIN
|
||||
#undef INSERT
|
||||
|
||||
/* Walk tree to compute code lengths */
|
||||
int stack[64];
|
||||
int depths[64];
|
||||
int sp = 0;
|
||||
stack[sp] = heap[1];
|
||||
depths[sp] = 0;
|
||||
|
||||
while (sp >= 0) {
|
||||
int idx = stack[sp];
|
||||
int depth = depths[sp];
|
||||
sp--;
|
||||
if (nodes[idx].left == -1) {
|
||||
/* Leaf */
|
||||
lengths[nodes[idx].sym] = (u8)(depth > MaxCodeBits ? MaxCodeBits : depth);
|
||||
} else {
|
||||
sp++;
|
||||
stack[sp] = nodes[idx].left;
|
||||
depths[sp] = depth + 1;
|
||||
sp++;
|
||||
stack[sp] = nodes[idx].right;
|
||||
depths[sp] = depth + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RepairLengths: enforce MaxCodeBits limit while maintaining Kraft inequality.
|
||||
If any code length exceeds MaxCodeBits, redistribute by shortening deep
|
||||
codes and lengthening short codes. */
|
||||
for (;;) {
|
||||
u32 kraft = 0;
|
||||
int max_len = 0;
|
||||
for (int i = 0; i < nsym; i++) {
|
||||
if (lengths[i] > 0) {
|
||||
kraft += 1u << (MaxCodeBits - lengths[i]);
|
||||
if (lengths[i] > max_len) max_len = lengths[i];
|
||||
}
|
||||
}
|
||||
if (max_len <= MaxCodeBits && kraft == (1u << MaxCodeBits))
|
||||
break;
|
||||
|
||||
/* Fix: find longest code and shorten it */
|
||||
if (max_len > MaxCodeBits) {
|
||||
for (int i = 0; i < nsym; i++)
|
||||
if (lengths[i] > MaxCodeBits)
|
||||
lengths[i] = MaxCodeBits;
|
||||
}
|
||||
|
||||
/* Recalculate kraft sum */
|
||||
kraft = 0;
|
||||
for (int i = 0; i < nsym; i++)
|
||||
if (lengths[i] > 0)
|
||||
kraft += 1u << (MaxCodeBits - lengths[i]);
|
||||
|
||||
if (kraft == (1u << MaxCodeBits))
|
||||
break;
|
||||
|
||||
/* Kraft sum too large: lengthen shortest codes */
|
||||
while (kraft > (1u << MaxCodeBits)) {
|
||||
/* Find shortest non-zero code */
|
||||
int min_len = MaxCodeBits + 1, min_i = -1;
|
||||
for (int i = 0; i < nsym; i++)
|
||||
if (lengths[i] > 0 && lengths[i] < min_len) {
|
||||
min_len = lengths[i];
|
||||
min_i = i;
|
||||
}
|
||||
if (min_i < 0 || min_len >= MaxCodeBits) break;
|
||||
kraft -= 1u << (MaxCodeBits - lengths[min_i]);
|
||||
lengths[min_i]++;
|
||||
kraft += 1u << (MaxCodeBits - lengths[min_i]);
|
||||
}
|
||||
|
||||
/* Kraft sum too small: shorten longest codes */
|
||||
while (kraft < (1u << MaxCodeBits)) {
|
||||
int max_l = 0, max_i = -1;
|
||||
for (int i = 0; i < nsym; i++)
|
||||
if (lengths[i] > max_l) { max_l = lengths[i]; max_i = i; }
|
||||
if (max_i < 0) break;
|
||||
u32 freed = 1u << (MaxCodeBits - lengths[max_i]);
|
||||
u32 needed = 1u << (MaxCodeBits - (lengths[max_i] - 1));
|
||||
if (kraft - freed + needed <= (1u << MaxCodeBits)) {
|
||||
kraft -= freed;
|
||||
lengths[max_i]--;
|
||||
kraft += needed;
|
||||
} else break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate canonical Huffman codes from lengths (sorted by length, then symbol) */
|
||||
static void codegen(const u8 *lengths, int nsym, u16 *codes)
|
||||
{
|
||||
u16 code = 0;
|
||||
for (int len = 1; len <= MaxCodeBits; len++) {
|
||||
for (int i = 0; i < nsym; i++) {
|
||||
if (lengths[i] == len)
|
||||
codes[i] = code++;
|
||||
}
|
||||
code <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------- Huffman tree encoding (delta-coded, as in TREEENC.CPP) ---------- */
|
||||
|
||||
/* Encode a Huffman tree to the bitstream using delta coding against
|
||||
the previous block's tree (stored in symprev).
|
||||
|
||||
Bitstream format (matches decompress.c ht_dec):
|
||||
tree-changed:1 — 0 = use default tree, 1 = new tree follows
|
||||
t:2 — (has_hi<<1)|has_lo
|
||||
tlengths:15×3 — tree-encoding tree (delta/repeat code lengths)
|
||||
stream:var — delta-coded symbol lengths via tree-encoding tree
|
||||
|
||||
The present[] array must exactly match the decoder's rle[][] table:
|
||||
t=0: {9,10,12,13, 32..127, 256..343} = 188 symbols
|
||||
t=1: {0..127, 256..343} = 216 symbols
|
||||
t=2: {9,10,12,13, 32..343} = 316 symbols
|
||||
t=3: {0..343} = 344 symbols
|
||||
|
||||
RLE repeat code: RepeatCode followed by count c.
|
||||
Decoder emits c + MinRepeat - 1 copies of the previous value. */
|
||||
static int tree_enc(struct bits_out *bo, const u8 lengths[NumSymbols], u8 symprev[NumSymbols])
|
||||
{
|
||||
int r;
|
||||
|
||||
/* tree-changed = 0 means "use default tree" (resets symprev) */
|
||||
u8 def_tree[NumSymbols];
|
||||
uc2_default_lengths(def_tree);
|
||||
if (memcmp(lengths, def_tree, NumSymbols) == 0) {
|
||||
r = bitsout_put(bo, 0, 1);
|
||||
if (r < 0) return r;
|
||||
memcpy(symprev, def_tree, NumSymbols);
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = bitsout_put(bo, 1, 1); /* new tree */
|
||||
if (r < 0) return r;
|
||||
|
||||
/* has_lo: need full 0..127 encoding?
|
||||
Symbols 9,10,12,13 (tab,LF,FF,CR) are always individually coded.
|
||||
has_lo must be set if ANY of {0..8, 11, 14..31} have non-zero length. */
|
||||
int has_lo = 0;
|
||||
for (int i = 0; i <= 8 && !has_lo; i++)
|
||||
if (lengths[i] > 0) has_lo = 1;
|
||||
if (!has_lo && lengths[11] > 0) has_lo = 1;
|
||||
if (!has_lo)
|
||||
for (int i = 14; i <= 31; i++)
|
||||
if (lengths[i] > 0) { has_lo = 1; break; }
|
||||
|
||||
int has_hi = 0;
|
||||
for (int i = 128; i < 256; i++)
|
||||
if (lengths[i] > 0) { has_hi = 1; break; }
|
||||
|
||||
int t = (has_hi << 1) | has_lo;
|
||||
r = bitsout_put(bo, t, 2);
|
||||
if (r < 0) return r;
|
||||
|
||||
/* Build present[] to exactly match decoder's rle[][] regions */
|
||||
int present[NumSymbols];
|
||||
memset(present, 0, sizeof present);
|
||||
switch (t) {
|
||||
case 0: /* no lo, no hi */
|
||||
present[9] = present[10] = 1;
|
||||
present[12] = present[13] = 1;
|
||||
for (int i = 32; i < 128; i++) present[i] = 1;
|
||||
for (int i = 256; i < NumSymbols; i++) present[i] = 1;
|
||||
break;
|
||||
case 1: /* has_lo */
|
||||
for (int i = 0; i < 128; i++) present[i] = 1;
|
||||
for (int i = 256; i < NumSymbols; i++) present[i] = 1;
|
||||
break;
|
||||
case 2: /* has_hi */
|
||||
present[9] = present[10] = 1;
|
||||
present[12] = present[13] = 1;
|
||||
for (int i = 32; i < NumSymbols; i++) present[i] = 1;
|
||||
break;
|
||||
case 3: /* both */
|
||||
for (int i = 0; i < NumSymbols; i++) present[i] = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Generate delta stream for present symbols */
|
||||
u8 stream[NumSymbols];
|
||||
int stream_len = 0;
|
||||
for (int i = 0; i < NumSymbols; i++)
|
||||
if (present[i])
|
||||
stream[stream_len++] = ivval[symprev[i]][lengths[i]];
|
||||
|
||||
/* Compute frequencies of delta/repeat codes for the tree-encoding tree.
|
||||
Always emit first value of a run as non-repeat (sets decoder's val),
|
||||
then use RepeatCode for the remaining copies. Each RepeatCode+c
|
||||
encodes c + MinRepeat - 1 copies. */
|
||||
u32 tfreqs[NumLenCodes];
|
||||
memset(tfreqs, 0, sizeof tfreqs);
|
||||
for (int i = 0; i < stream_len; ) {
|
||||
int run = 1;
|
||||
while (i + run < stream_len && stream[i + run] == stream[i])
|
||||
run++;
|
||||
if (run >= (int)MinRepeat) {
|
||||
tfreqs[stream[i]]++; /* first as non-repeat */
|
||||
int reps = run - 1;
|
||||
while (reps >= (int)(MinRepeat - 1)) {
|
||||
int copies = reps;
|
||||
if (copies > (int)(NumDeltaCodes - 1 + MinRepeat - 1))
|
||||
copies = NumDeltaCodes - 1 + MinRepeat - 1;
|
||||
tfreqs[RepeatCode]++;
|
||||
tfreqs[copies - (MinRepeat - 1)]++;
|
||||
reps -= copies;
|
||||
}
|
||||
for (int j = 0; j < reps; j++)
|
||||
tfreqs[stream[i]]++;
|
||||
i += run;
|
||||
} else {
|
||||
for (int j = 0; j < run; j++)
|
||||
tfreqs[stream[i + j]]++;
|
||||
i += run;
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate tree-encoding tree (15 symbols, 3-bit length field → max 7) */
|
||||
u8 tlengths[NumLenCodes];
|
||||
treegen(tfreqs, NumLenCodes, tlengths);
|
||||
|
||||
/* Enforce 7-bit limit and repair Kraft inequality */
|
||||
for (int i = 0; i < NumLenCodes; i++)
|
||||
if (tlengths[i] > 7) tlengths[i] = 7;
|
||||
for (;;) {
|
||||
u32 kraft = 0;
|
||||
for (int i = 0; i < NumLenCodes; i++)
|
||||
if (tlengths[i] > 0)
|
||||
kraft += 1u << (MaxCodeBits - tlengths[i]);
|
||||
if (kraft <= (1u << MaxCodeBits))
|
||||
break;
|
||||
int min_len = 8, min_i = -1;
|
||||
for (int i = 0; i < NumLenCodes; i++)
|
||||
if (tlengths[i] > 0 && tlengths[i] < min_len) {
|
||||
min_len = tlengths[i];
|
||||
min_i = i;
|
||||
}
|
||||
if (min_i < 0 || min_len >= 7) break;
|
||||
tlengths[min_i]++;
|
||||
}
|
||||
|
||||
u16 tcodes[NumLenCodes];
|
||||
codegen(tlengths, NumLenCodes, tcodes);
|
||||
|
||||
/* Write tree-encoding tree lengths (15 × 3 bits) */
|
||||
for (int i = 0; i < NumLenCodes; i++) {
|
||||
r = bitsout_put(bo, tlengths[i], 3);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
|
||||
/* Write delta-coded symbol stream with RLE */
|
||||
for (int i = 0; i < stream_len; ) {
|
||||
int run = 1;
|
||||
while (i + run < stream_len && stream[i + run] == stream[i])
|
||||
run++;
|
||||
if (run >= (int)MinRepeat) {
|
||||
/* Emit first value as non-repeat (sets decoder's val) */
|
||||
r = bitsout_put(bo, tcodes[stream[i]], tlengths[stream[i]]);
|
||||
if (r < 0) return r;
|
||||
int reps = run - 1;
|
||||
while (reps >= (int)(MinRepeat - 1)) {
|
||||
int copies = reps;
|
||||
if (copies > (int)(NumDeltaCodes - 1 + MinRepeat - 1))
|
||||
copies = NumDeltaCodes - 1 + MinRepeat - 1;
|
||||
int c = copies - (MinRepeat - 1);
|
||||
r = bitsout_put(bo, tcodes[RepeatCode], tlengths[RepeatCode]);
|
||||
if (r < 0) return r;
|
||||
r = bitsout_put(bo, tcodes[c], tlengths[c]);
|
||||
if (r < 0) return r;
|
||||
reps -= copies;
|
||||
}
|
||||
for (int j = 0; j < reps; j++) {
|
||||
r = bitsout_put(bo, tcodes[stream[i]], tlengths[stream[i]]);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
i += run;
|
||||
} else {
|
||||
for (int j = 0; j < run; j++) {
|
||||
r = bitsout_put(bo, tcodes[stream[i + j]], tlengths[stream[i + j]]);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
i += run;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update symprev for next block */
|
||||
for (int i = 0; i < NumSymbols; i++)
|
||||
symprev[i] = lengths[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------- LZ77 compressor core ---------- */
|
||||
|
||||
struct compressor {
|
||||
/* Sliding window (64KB circular buffer, u16 index wraps naturally) */
|
||||
u8 data[UC2_BUF_SIZE];
|
||||
|
||||
/* Hash chains */
|
||||
u16 head[8192]; /* hash -> most recent position */
|
||||
u16 prev[UC2_BUF_SIZE]; /* position -> previous position with same hash */
|
||||
|
||||
/* Current position and limits */
|
||||
u16 pos; /* current compression position */
|
||||
u16 end; /* end of valid data */
|
||||
unsigned data_len; /* total bytes loaded so far */
|
||||
|
||||
/* Intermediate buffer for literals/distances/lengths */
|
||||
u16 ibuf[32768];
|
||||
unsigned ibuf_pos;
|
||||
|
||||
/* Frequency counts for current block */
|
||||
u32 bd_freq[NumByteSym + NumDistSym];
|
||||
u32 l_freq[NumLenSym];
|
||||
|
||||
/* Previous tree lengths (for delta coding) */
|
||||
u8 symprev[NumSymbols];
|
||||
|
||||
/* Output bitstream */
|
||||
struct bits_out bo;
|
||||
u8 outbuf[4096];
|
||||
|
||||
/* Compression parameters */
|
||||
unsigned max_search;
|
||||
unsigned lazy_depth;
|
||||
unsigned lazy_limit;
|
||||
unsigned give_up;
|
||||
|
||||
/* Total compressed bytes written */
|
||||
unsigned compressed_bytes;
|
||||
};
|
||||
|
||||
static inline u16 hash3(const u8 *p)
|
||||
{
|
||||
return (u16)(p[0] ^ (p[1] << 3) ^ ((0x7f & p[2]) << 6));
|
||||
}
|
||||
|
||||
/* Find longest match at current position. Returns match length (0 if none). */
|
||||
static unsigned find_match(struct compressor *c, u16 pos, unsigned max_depth,
|
||||
unsigned give_up, unsigned *match_dist)
|
||||
{
|
||||
unsigned best_len = UC2_MIN_MATCH - 1;
|
||||
unsigned best_dist = 0;
|
||||
|
||||
u16 h = hash3(c->data + pos);
|
||||
u16 chain = c->head[h];
|
||||
unsigned depth = 0;
|
||||
|
||||
while (depth < max_depth) {
|
||||
u16 dist = (u16)(pos - chain);
|
||||
if (dist == 0 || dist > UC2_MAX_DIST)
|
||||
break;
|
||||
|
||||
/* Quick filter: check byte at best_len position first */
|
||||
if (c->data[(u16)(chain + best_len)] == c->data[(u16)(pos + best_len)]) {
|
||||
/* Full comparison */
|
||||
unsigned len = 0;
|
||||
unsigned max_len = UC2_MAX_LEN;
|
||||
u16 avail = (u16)(c->end - pos);
|
||||
if (max_len > avail) max_len = avail;
|
||||
|
||||
while (len < max_len &&
|
||||
c->data[(u16)(chain + len)] == c->data[(u16)(pos + len)])
|
||||
len++;
|
||||
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
best_dist = dist;
|
||||
if (len >= give_up)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chain = c->prev[chain];
|
||||
depth++;
|
||||
if ((u16)(pos - chain) > UC2_MAX_DIST)
|
||||
break;
|
||||
}
|
||||
|
||||
*match_dist = best_dist;
|
||||
return best_len >= UC2_MIN_MATCH ? best_len : 0;
|
||||
}
|
||||
|
||||
static void hash_enter(struct compressor *c, u16 pos)
|
||||
{
|
||||
u16 h = hash3(c->data + pos);
|
||||
c->prev[pos] = c->head[h];
|
||||
c->head[h] = pos;
|
||||
}
|
||||
|
||||
/* Encode a distance into the intermediate buffer */
|
||||
static void encode_dist(struct compressor *c, unsigned dist)
|
||||
{
|
||||
unsigned sym, extra, nbits;
|
||||
|
||||
if (dist <= 15) {
|
||||
sym = dist - 1 + NumByteSym; /* symbols 256..270 */
|
||||
c->bd_freq[sym]++;
|
||||
c->ibuf[c->ibuf_pos++] = (u16)(dist + 256 - 1);
|
||||
return;
|
||||
}
|
||||
if (dist <= 255) {
|
||||
unsigned slot = (dist - 16) / 16;
|
||||
sym = slot + 15 + NumByteSym; /* symbols 271..285 */
|
||||
extra = (dist - 16) % 16;
|
||||
nbits = 4;
|
||||
} else if (dist <= 4095) {
|
||||
unsigned slot = (dist - 256) / 256;
|
||||
sym = slot + 30 + NumByteSym; /* symbols 286..300 */
|
||||
extra = (dist - 256) % 256;
|
||||
nbits = 8;
|
||||
} else {
|
||||
unsigned slot = (dist - 4096) / 4096;
|
||||
sym = slot + 45 + NumByteSym; /* symbols 301..315 */
|
||||
extra = (dist - 4096) % 4096;
|
||||
nbits = 12;
|
||||
}
|
||||
|
||||
c->bd_freq[sym]++;
|
||||
c->ibuf[c->ibuf_pos++] = (u16)(sym - NumByteSym + 256);
|
||||
c->ibuf[c->ibuf_pos++] = (u16)extra;
|
||||
(void)nbits;
|
||||
}
|
||||
|
||||
static void encode_len(struct compressor *c, unsigned len)
|
||||
{
|
||||
unsigned sym, extra;
|
||||
|
||||
if (len <= 10) {
|
||||
sym = len - 3;
|
||||
c->l_freq[sym]++;
|
||||
c->ibuf[c->ibuf_pos++] = (u16)len;
|
||||
return;
|
||||
}
|
||||
if (len <= 26) {
|
||||
sym = (len - 11) / 2 + 8;
|
||||
extra = (len - 11) % 2;
|
||||
} else if (len <= 90) {
|
||||
sym = (len - 27) / 8 + 16;
|
||||
extra = (len - 27) % 8;
|
||||
} else if (len <= 154) {
|
||||
sym = 24;
|
||||
extra = len - 91;
|
||||
} else if (len <= 666) {
|
||||
sym = 25;
|
||||
extra = len - 155;
|
||||
} else if (len <= 2714) {
|
||||
sym = 26;
|
||||
extra = len - 667;
|
||||
} else {
|
||||
sym = 27;
|
||||
extra = len - 2715;
|
||||
}
|
||||
|
||||
c->l_freq[sym]++;
|
||||
c->ibuf[c->ibuf_pos++] = (u16)len;
|
||||
(void)extra;
|
||||
}
|
||||
|
||||
/* Distance/length encoding tables (for Huffman encoding phase) */
|
||||
static const struct { u16 base; u8 bits; } dist_enc[] = {
|
||||
/* 0..14: dist 1..15, 0 extra bits */
|
||||
{1,0},{2,0},{3,0},{4,0},{5,0},{6,0},{7,0},{8,0},
|
||||
{9,0},{10,0},{11,0},{12,0},{13,0},{14,0},{15,0},
|
||||
/* 15..29: dist 16..240 base, 4 extra bits */
|
||||
{16,4},{32,4},{48,4},{64,4},{80,4},{96,4},{112,4},{128,4},
|
||||
{144,4},{160,4},{176,4},{192,4},{208,4},{224,4},{240,4},
|
||||
/* 30..44: dist 256..3840 base, 8 extra bits */
|
||||
{256,8},{512,8},{768,8},{1024,8},{1280,8},{1536,8},{1792,8},{2048,8},
|
||||
{2304,8},{2560,8},{2816,8},{3072,8},{3328,8},{3584,8},{3840,8},
|
||||
/* 45..59: dist 4096..61440 base, 12 extra bits */
|
||||
{4096,12},{8192,12},{12288,12},{16384,12},{20480,12},{24576,12},
|
||||
{28672,12},{32768,12},{36864,12},{40960,12},{45056,12},{49152,12},
|
||||
{53248,12},{57344,12},{61440,12},
|
||||
};
|
||||
|
||||
static const struct { u16 base; u8 bits; } len_enc[] = {
|
||||
{3,0},{4,0},{5,0},{6,0},{7,0},{8,0},{9,0},{10,0},
|
||||
{11,1},{13,1},{15,1},{17,1},{19,1},{21,1},{23,1},{25,1},
|
||||
{27,3},{35,3},{43,3},{51,3},{59,3},{67,3},{75,3},{83,3},
|
||||
{91,6},{155,9},{667,11},{2715,15},
|
||||
};
|
||||
|
||||
/* Find the distance symbol for a given distance */
|
||||
static int dist_to_sym(unsigned dist)
|
||||
{
|
||||
for (int i = NumDistSym - 1; i >= 0; i--)
|
||||
if (dist >= dist_enc[i].base)
|
||||
return i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Find the length symbol for a given length */
|
||||
static int len_to_sym(unsigned len)
|
||||
{
|
||||
for (int i = NumLenSym - 1; i >= 0; i--)
|
||||
if (len >= len_enc[i].base)
|
||||
return i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Flush intermediate buffer: generate Huffman trees and encode data */
|
||||
static int flush_block(struct compressor *c, int is_last)
|
||||
{
|
||||
int r;
|
||||
|
||||
/* Generate Huffman trees from frequency data */
|
||||
u8 lengths[NumSymbols];
|
||||
|
||||
/* BD tree (literals + distances) */
|
||||
treegen(c->bd_freq, NumByteSym + NumDistSym, lengths);
|
||||
|
||||
/* Length tree */
|
||||
treegen(c->l_freq, NumLenSym, lengths + NumByteSym + NumDistSym);
|
||||
|
||||
/* Emit block-present flag */
|
||||
r = bitsout_put(&c->bo, 1, 1);
|
||||
if (r < 0) return r;
|
||||
|
||||
/* Encode and emit Huffman tree */
|
||||
r = tree_enc(&c->bo, lengths, c->symprev);
|
||||
if (r < 0) return r;
|
||||
|
||||
/* Generate canonical codes */
|
||||
u16 bd_codes[NumByteSym + NumDistSym];
|
||||
u16 l_codes[NumLenSym];
|
||||
codegen(lengths, NumByteSym + NumDistSym, bd_codes);
|
||||
codegen(lengths + NumByteSym + NumDistSym, NumLenSym, l_codes);
|
||||
|
||||
/* Encode buffered literals/matches.
|
||||
ibuf format: literal = byte (0..255), distance = sym_idx + 256,
|
||||
followed by extra value (if dist_enc[sym_idx].bits > 0),
|
||||
followed by raw length value. */
|
||||
unsigned i = 0;
|
||||
while (i < c->ibuf_pos) {
|
||||
u16 val = c->ibuf[i++];
|
||||
if (val < 256) {
|
||||
/* Literal byte */
|
||||
r = bitsout_put(&c->bo, bd_codes[val], lengths[val]);
|
||||
if (r < 0) return r;
|
||||
} else {
|
||||
/* Distance: val - 256 is the distance symbol index */
|
||||
int dsym = val - 256;
|
||||
r = bitsout_put(&c->bo, bd_codes[NumByteSym + dsym],
|
||||
lengths[NumByteSym + dsym]);
|
||||
if (r < 0) return r;
|
||||
if (dist_enc[dsym].bits > 0) {
|
||||
u16 extra = c->ibuf[i++];
|
||||
r = bitsout_put(&c->bo, extra, dist_enc[dsym].bits);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
|
||||
/* Length follows distance */
|
||||
u16 len = c->ibuf[i++];
|
||||
int lsym = len_to_sym(len);
|
||||
r = bitsout_put(&c->bo, l_codes[lsym],
|
||||
lengths[NumByteSym + NumDistSym + lsym]);
|
||||
if (r < 0) return r;
|
||||
if (len_enc[lsym].bits > 0) {
|
||||
r = bitsout_put(&c->bo, len - len_enc[lsym].base,
|
||||
len_enc[lsym].bits);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit end-of-block marker: distance = EOB_MARK, length = 3 */
|
||||
{
|
||||
int dsym = dist_to_sym(UC2_EOB_MARK);
|
||||
r = bitsout_put(&c->bo, bd_codes[NumByteSym + dsym],
|
||||
lengths[NumByteSym + dsym]);
|
||||
if (r < 0) return r;
|
||||
if (dist_enc[dsym].bits > 0) {
|
||||
r = bitsout_put(&c->bo, UC2_EOB_MARK - dist_enc[dsym].base,
|
||||
dist_enc[dsym].bits);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
/* Length = 3 (symbol 0) */
|
||||
r = bitsout_put(&c->bo, l_codes[0],
|
||||
lengths[NumByteSym + NumDistSym]);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
|
||||
/* Reset intermediate buffer and frequencies */
|
||||
c->ibuf_pos = 0;
|
||||
memset(c->bd_freq, 0, sizeof c->bd_freq);
|
||||
memset(c->l_freq, 0, sizeof c->l_freq);
|
||||
|
||||
/* If last block, emit end-of-stream (block-present = 0) */
|
||||
if (is_last) {
|
||||
r = bitsout_put(&c->bo, 0, 1);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------- Public compression API ---------- */
|
||||
|
||||
struct compress_ctx {
|
||||
struct compressor comp;
|
||||
struct csum csum;
|
||||
unsigned total_in;
|
||||
unsigned total_out;
|
||||
int finished;
|
||||
};
|
||||
|
||||
/* Counting writer: wraps user writer to track compressed size */
|
||||
struct count_writer {
|
||||
int (*write)(void *ctx, const void *ptr, unsigned len);
|
||||
void *ctx;
|
||||
unsigned *count;
|
||||
};
|
||||
|
||||
static int count_write(void *ctx, const void *ptr, unsigned len)
|
||||
{
|
||||
struct count_writer *cw = ctx;
|
||||
*cw->count += len;
|
||||
return cw->write(cw->ctx, ptr, len);
|
||||
}
|
||||
|
||||
int uc2_compress(
|
||||
int level,
|
||||
int (*read)(void *context, void *buf, unsigned len),
|
||||
void *read_ctx,
|
||||
int (*write)(void *context, const void *ptr, unsigned len),
|
||||
void *write_ctx,
|
||||
unsigned size,
|
||||
unsigned short *checksum_out,
|
||||
unsigned *compressed_size_out)
|
||||
{
|
||||
struct compress_ctx *ctx = calloc(1, sizeof *ctx);
|
||||
if (!ctx) return UC2_UserFault;
|
||||
|
||||
struct compressor *c = &ctx->comp;
|
||||
|
||||
/* Set compression parameters based on level */
|
||||
switch (level) {
|
||||
case 2: c->max_search = 15; c->lazy_depth = 2; c->lazy_limit = 15; c->give_up = 25; break;
|
||||
case 3: c->max_search = 70; c->lazy_depth = 10; c->lazy_limit = 30; c->give_up = 50; break;
|
||||
case 5: c->max_search = 10000; c->lazy_depth = 5000; c->lazy_limit = 200; c->give_up = 100; break;
|
||||
default: /* level 4 = Tight, default */
|
||||
c->max_search = 600; c->lazy_depth = 50; c->lazy_limit = 40; c->give_up = 100; break;
|
||||
}
|
||||
|
||||
/* Initialize */
|
||||
csum_init(&ctx->csum);
|
||||
uc2_default_lengths(c->symprev);
|
||||
memset(c->head, 0, sizeof c->head);
|
||||
memset(c->bd_freq, 0, sizeof c->bd_freq);
|
||||
memset(c->l_freq, 0, sizeof c->l_freq);
|
||||
c->ibuf_pos = 0;
|
||||
|
||||
struct count_writer cw = { .write = write, .ctx = write_ctx, .count = &ctx->total_out };
|
||||
bitsout_init(&c->bo, count_write, &cw, c->outbuf, sizeof c->outbuf);
|
||||
|
||||
/* Read all input data into circular buffer and compress */
|
||||
unsigned remaining = size;
|
||||
u16 load_pos = 0;
|
||||
|
||||
/* Pre-count EOB distance symbol frequency so the tree includes it */
|
||||
c->bd_freq[NumByteSym + dist_to_sym(UC2_EOB_MARK)]++;
|
||||
c->l_freq[0]++; /* length = 3 for EOB marker */
|
||||
|
||||
while (remaining > 0) {
|
||||
/* Load a chunk into the circular buffer */
|
||||
unsigned chunk = remaining;
|
||||
if (chunk > UC2_READ_SIZE) chunk = UC2_READ_SIZE;
|
||||
|
||||
int nread = read(read_ctx, c->data + load_pos, chunk);
|
||||
if (nread <= 0) break;
|
||||
|
||||
csum_update(&ctx->csum, c->data + load_pos, nread);
|
||||
remaining -= nread;
|
||||
|
||||
load_pos = (u16)(load_pos + nread);
|
||||
c->end = load_pos;
|
||||
|
||||
/* Compress loaded data */
|
||||
while ((u16)(c->end - c->pos) >= UC2_MIN_MATCH) {
|
||||
/* Enter current position into hash */
|
||||
if ((u16)(c->end - c->pos) >= 3)
|
||||
hash_enter(c, c->pos);
|
||||
|
||||
unsigned dist;
|
||||
unsigned len = find_match(c, c->pos, c->max_search, c->give_up, &dist);
|
||||
|
||||
if (len == 0) {
|
||||
/* Literal */
|
||||
c->bd_freq[c->data[c->pos]]++;
|
||||
c->ibuf[c->ibuf_pos++] = c->data[c->pos];
|
||||
c->pos++;
|
||||
} else {
|
||||
/* Lazy evaluation: if match is short, check next position */
|
||||
if (len < c->lazy_limit && (u16)(c->end - c->pos) > len) {
|
||||
unsigned dist2;
|
||||
if ((u16)(c->end - (u16)(c->pos + 1)) >= 3)
|
||||
hash_enter(c, (u16)(c->pos + 1));
|
||||
unsigned len2 = find_match(c, (u16)(c->pos + 1),
|
||||
c->lazy_depth, c->give_up, &dist2);
|
||||
if (len2 > len) {
|
||||
/* Better match at next position — emit literal */
|
||||
c->bd_freq[c->data[c->pos]]++;
|
||||
c->ibuf[c->ibuf_pos++] = c->data[c->pos];
|
||||
c->pos++;
|
||||
len = len2;
|
||||
dist = dist2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit match */
|
||||
encode_dist(c, dist);
|
||||
encode_len(c, len);
|
||||
|
||||
/* Enter skipped positions into hash */
|
||||
for (unsigned j = 1; j < len && (u16)(c->end - (u16)(c->pos + j)) >= 3; j++)
|
||||
hash_enter(c, (u16)(c->pos + j));
|
||||
c->pos = (u16)(c->pos + len);
|
||||
}
|
||||
|
||||
/* Flush block if intermediate buffer is getting full */
|
||||
if (c->ibuf_pos > 27000) {
|
||||
int r = flush_block(c, 0);
|
||||
if (r < 0) { free(ctx); return r; }
|
||||
/* Re-add EOB marker frequency for next block */
|
||||
c->bd_freq[NumByteSym + dist_to_sym(UC2_EOB_MARK)]++;
|
||||
c->l_freq[0]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle trailing bytes (less than MIN_MATCH) */
|
||||
while (c->pos != c->end) {
|
||||
c->bd_freq[c->data[c->pos]]++;
|
||||
c->ibuf[c->ibuf_pos++] = c->data[c->pos];
|
||||
c->pos++;
|
||||
}
|
||||
|
||||
/* Flush final block */
|
||||
int r = flush_block(c, 1);
|
||||
if (r < 0) { free(ctx); return r; }
|
||||
|
||||
r = bitsout_finish(&c->bo);
|
||||
if (r < 0) { free(ctx); return r; }
|
||||
|
||||
if (checksum_out)
|
||||
*checksum_out = csum_get(&ctx->csum);
|
||||
if (compressed_size_out)
|
||||
*compressed_size_out = ctx->total_out;
|
||||
|
||||
free(ctx);
|
||||
return 0;
|
||||
}
|
||||
@@ -1117,7 +1117,7 @@ enum {
|
||||
NumLenCodes = NumDeltaCodes + NumExtraCodes,
|
||||
};
|
||||
|
||||
const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
|
||||
static const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
|
||||
{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
|
||||
{ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
|
||||
{ 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
|
||||
|
||||
103
lib/src/uc2_internal.h
Normal file
103
lib/src/uc2_internal.h
Normal file
@@ -0,0 +1,103 @@
|
||||
/* UC2 format constants and shared types.
|
||||
Used by both the compressor and decompressor. */
|
||||
|
||||
#ifndef UC2_INTERNAL_H
|
||||
#define UC2_INTERNAL_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
|
||||
/* Huffman tree parameters */
|
||||
enum {
|
||||
MaxCodeBits = 13,
|
||||
LookupSize = 1 << MaxCodeBits, /* 8192 */
|
||||
|
||||
NumByteSym = 256,
|
||||
NumDistSym = 60,
|
||||
NumLenSym = 28,
|
||||
NumSymbols = NumByteSym + NumDistSym + NumLenSym, /* 344 */
|
||||
|
||||
NumLoAsciiSym = 28, /* symbols 4..31 (0-3 are control) */
|
||||
NumHiByteSym = 128, /* symbols 128..255 */
|
||||
|
||||
NumDeltaCodes = MaxCodeBits + 1, /* 14 (code lengths 0..13) */
|
||||
NumExtraCodes = 1, /* repeat code */
|
||||
NumLenCodes = NumDeltaCodes + NumExtraCodes, /* 15 */
|
||||
RepeatCode = MaxCodeBits + 1, /* 14 */
|
||||
MinRepeat = 6,
|
||||
};
|
||||
|
||||
/* LZ77 parameters */
|
||||
enum {
|
||||
UC2_MAX_DIST = 125 * 512, /* 64000 */
|
||||
UC2_READ_SIZE = 512,
|
||||
UC2_BUF_SIZE = 65536, /* circular buffer: u16 index wraps */
|
||||
UC2_EOB_MARK = 125 * 512 + 1, /* 64001 — end-of-block distance */
|
||||
UC2_MIN_MATCH = 3,
|
||||
UC2_MAX_LEN = 200, /* direct match limit */
|
||||
UC2_MAX_XLEN = 32760, /* extended match limit */
|
||||
};
|
||||
|
||||
/* Distance encoding: 60 codes in 4 tiers.
|
||||
tier 0: dist 1..15 (15 codes, 0 extra bits)
|
||||
tier 1: dist 16..255 (15 codes, 4 extra bits)
|
||||
tier 2: dist 256..4095 (15 codes, 8 extra bits)
|
||||
tier 3: dist 4096..64000 (15 codes, 12 extra bits) */
|
||||
|
||||
/* Length encoding: 28 codes.
|
||||
0..7: len 3..10 (0 extra bits)
|
||||
8..15: len 11..26 (1 extra bit)
|
||||
16..23: len 27..90 (3 extra bits)
|
||||
24: len 91..154 (6 extra bits)
|
||||
25: len 155..666 (9 extra bits)
|
||||
26: len 667..2714 (11 extra bits)
|
||||
27: len 2715..35482 (15 extra bits) */
|
||||
|
||||
/* Delta-to-absolute table for tree decoding (from decompress.c).
|
||||
vval[prev_length][delta_code] = absolute_length */
|
||||
extern const u8 vval[NumDeltaCodes][NumDeltaCodes];
|
||||
|
||||
/* Inverse: absolute-to-delta table for tree encoding.
|
||||
ivval[prev_length][abs_length] = delta_code */
|
||||
extern const u8 ivval[NumDeltaCodes][NumDeltaCodes];
|
||||
|
||||
/* Default Huffman code lengths for the first block */
|
||||
void uc2_default_lengths(u8 d[NumSymbols]);
|
||||
|
||||
/* Little-endian record types */
|
||||
typedef struct u16le { u8 b[2]; } u16le;
|
||||
typedef struct u32le { u8 b[4]; } u32le;
|
||||
|
||||
static inline u16 get16(u16le v) { return v.b[0] | v.b[1] << 8; }
|
||||
static inline u32 get32(u32le v) { return v.b[0] | v.b[1] << 8 | v.b[2] << 16 | (u32)v.b[3] << 24; }
|
||||
static inline u16le put16(u16 v) { return (u16le){{v & 0xff, v >> 8}}; }
|
||||
static inline u32le put32(u32 v) { return (u32le){{v & 0xff, v >> 8 & 0xff, v >> 16 & 0xff, v >> 24}}; }
|
||||
|
||||
/* Fletcher checksum (XOR-based, as used by UC2) */
|
||||
struct csum { u32 value; };
|
||||
|
||||
static inline void csum_init(struct csum *cs) { cs->value = 0xA55A; }
|
||||
|
||||
static inline void csum_update(struct csum *cs, const u8 *p, unsigned n)
|
||||
{
|
||||
if (!n) return;
|
||||
u32 v = cs->value;
|
||||
const u8 *e = p + n - 1;
|
||||
if (v > 0xffff)
|
||||
v ^= *p++ << 8;
|
||||
while (p < e) {
|
||||
v ^= p[0] | p[1] << 8;
|
||||
p += 2;
|
||||
}
|
||||
v &= 0xffff;
|
||||
if (p == e)
|
||||
v ^= *p | 0x10000;
|
||||
cs->value = v;
|
||||
}
|
||||
|
||||
static inline u16 csum_get(struct csum *cs) { return (u16)cs->value; }
|
||||
|
||||
#endif
|
||||
58
lib/src/uc2_tables.c
Normal file
58
lib/src/uc2_tables.c
Normal file
@@ -0,0 +1,58 @@
|
||||
/* UC2 shared tables: Huffman delta coding and default tree lengths. */
|
||||
|
||||
#include "uc2_internal.h"
|
||||
|
||||
/* Delta-to-absolute lookup: vval[prev][delta_code] = absolute_length.
|
||||
Used by the decompressor's ht_dec(). */
|
||||
const u8 vval[NumDeltaCodes][NumDeltaCodes] = {
|
||||
{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
|
||||
{ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
|
||||
{ 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0},
|
||||
{ 3, 2, 4, 1, 5, 6, 7, 8, 9,10,11,12,13, 0},
|
||||
{ 4, 3, 5, 2, 6, 1, 7, 8, 9,10,11,12,13, 0},
|
||||
{ 5, 4, 6, 3, 7, 2, 8, 1, 9,10,11,12,13, 0},
|
||||
{ 6, 5, 7, 4, 8, 3, 9, 2,10, 1,11,12,13, 0},
|
||||
{ 7, 6, 8, 5, 9, 4,10, 3,11, 2,12, 1,13, 0},
|
||||
{ 8, 7, 9, 6,10, 5,11, 4,12, 3,13, 2, 0, 1},
|
||||
{ 9, 8,10, 7,11, 6,12, 5,13, 4, 0, 3, 2, 1},
|
||||
{10, 9,11, 8,12, 7,13, 6, 0, 5, 4, 3, 2, 1},
|
||||
{11,10,12, 9,13, 8, 0, 7, 6, 5, 4, 3, 2, 1},
|
||||
{12,11,13,10, 0, 9, 8, 7, 6, 5, 4, 3, 2, 1},
|
||||
{13,12, 0,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
|
||||
};
|
||||
|
||||
/* Inverse delta table: ivval[prev][absolute] = delta_code.
|
||||
Used by the compressor's tree encoder. */
|
||||
const u8 ivval[NumDeltaCodes][NumDeltaCodes] = {
|
||||
{ 0,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
|
||||
{13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
|
||||
{13, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
|
||||
{13, 3, 1, 0, 2, 4, 5, 6, 7, 8, 9,10,11,12},
|
||||
{13, 5, 3, 1, 0, 2, 4, 6, 7, 8, 9,10,11,12},
|
||||
{13, 7, 5, 3, 1, 0, 2, 4, 6, 8, 9,10,11,12},
|
||||
{13, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10,11,12},
|
||||
{13,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10,12},
|
||||
{12,13,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8,10},
|
||||
{10,13,12,11, 9, 7, 5, 3, 1, 0, 2, 4, 6, 8},
|
||||
{ 8,13,12,11,10, 9, 7, 5, 3, 1, 0, 2, 4, 6},
|
||||
{ 6,13,12,11,10, 9, 8, 7, 5, 3, 1, 0, 2, 4},
|
||||
{ 4,13,12,11,10, 9, 8, 7, 6, 5, 3, 1, 0, 2},
|
||||
{ 2,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 1, 0}
|
||||
};
|
||||
|
||||
void uc2_default_lengths(u8 d[NumSymbols])
|
||||
{
|
||||
static const u8 rle[] = {
|
||||
10,9, 1,7, 1,9, 1,7, 19,9, 1,7, 13,8, 1,7, 11,8, 1,7,
|
||||
33,8, 1,7, 35,8, 128,10, 16,6, 12,7, 6,8, 10,9, 16,10,
|
||||
9,4, 9,5, 10,6, 0
|
||||
};
|
||||
const u8 *s = rle;
|
||||
u8 n = s[0];
|
||||
do {
|
||||
u8 v = s[1];
|
||||
s += 2;
|
||||
do { *d++ = v; } while (--n);
|
||||
n = *s;
|
||||
} while (n);
|
||||
}
|
||||
@@ -14,3 +14,9 @@ add_test(NAME extract COMMAND test_extract
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/archives"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/corpus"
|
||||
)
|
||||
|
||||
add_executable(test_roundtrip src/test_roundtrip.c)
|
||||
target_link_libraries(test_roundtrip PRIVATE uc2)
|
||||
target_include_directories(test_roundtrip PRIVATE "${PROJECT_BINARY_DIR}/lib")
|
||||
target_compile_features(test_roundtrip PRIVATE c_std_99)
|
||||
add_test(NAME roundtrip COMMAND test_roundtrip)
|
||||
|
||||
476
tests/src/test_roundtrip.c
Normal file
476
tests/src/test_roundtrip.c
Normal file
@@ -0,0 +1,476 @@
|
||||
/* Round-trip test: compress with uc2_compress, wrap in a minimal UC2 archive,
|
||||
decompress with the library's existing decompressor, verify byte identity.
|
||||
|
||||
The archive format requires a compressed central directory, so we use
|
||||
uc2_compress for both the file data and the cdir. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <uc2/libuc2.h>
|
||||
|
||||
static int failures;
|
||||
|
||||
/* --- Fletcher checksum (must match decompress.c / uc2_internal.h) --- */
|
||||
|
||||
static unsigned short fletcher_checksum(const unsigned char *data, unsigned len)
|
||||
{
|
||||
if (!len) return 0xA55A;
|
||||
unsigned v = 0xA55A;
|
||||
const unsigned char *p = data;
|
||||
const unsigned char *e = p + len - 1;
|
||||
if (v > 0xFFFF)
|
||||
v ^= *p++ << 8;
|
||||
while (p < e) {
|
||||
v ^= p[0] | p[1] << 8;
|
||||
p += 2;
|
||||
}
|
||||
v &= 0xFFFF;
|
||||
if (p == e)
|
||||
v ^= *p | 0x10000;
|
||||
return (unsigned short)(v & 0xFFFF);
|
||||
}
|
||||
|
||||
/* --- Growable buffer --- */
|
||||
|
||||
struct membuf {
|
||||
unsigned char *data;
|
||||
unsigned len, cap;
|
||||
};
|
||||
|
||||
static void membuf_init(struct membuf *b)
|
||||
{
|
||||
b->data = NULL;
|
||||
b->len = b->cap = 0;
|
||||
}
|
||||
|
||||
static void membuf_free(struct membuf *b)
|
||||
{
|
||||
free(b->data);
|
||||
membuf_init(b);
|
||||
}
|
||||
|
||||
static int membuf_write(void *ctx, const void *ptr, unsigned len)
|
||||
{
|
||||
struct membuf *buf = ctx;
|
||||
if (buf->len + len > buf->cap) {
|
||||
unsigned newcap = buf->cap ? buf->cap * 2 : 4096;
|
||||
while (newcap < buf->len + len)
|
||||
newcap *= 2;
|
||||
unsigned char *p = realloc(buf->data, newcap);
|
||||
if (!p) return -1;
|
||||
buf->data = p;
|
||||
buf->cap = newcap;
|
||||
}
|
||||
memcpy(buf->data + buf->len, ptr, len);
|
||||
buf->len += len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void membuf_append(struct membuf *b, const void *data, unsigned len)
|
||||
{
|
||||
membuf_write(b, data, len);
|
||||
}
|
||||
|
||||
/* --- Compressor read callback --- */
|
||||
|
||||
struct mem_reader {
|
||||
const unsigned char *data;
|
||||
unsigned pos, len;
|
||||
};
|
||||
|
||||
static int mem_read(void *ctx, void *buf, unsigned len)
|
||||
{
|
||||
struct mem_reader *mr = ctx;
|
||||
unsigned avail = mr->len - mr->pos;
|
||||
if (len > avail) len = avail;
|
||||
if (len > 0) {
|
||||
memcpy(buf, mr->data + mr->pos, len);
|
||||
mr->pos += len;
|
||||
}
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
/* --- Library I/O callbacks (read from membuf at offset) --- */
|
||||
|
||||
static int archive_read(void *ctx, unsigned pos, void *buf, unsigned len)
|
||||
{
|
||||
struct membuf *mb = ctx;
|
||||
if (pos >= mb->len) return 0;
|
||||
unsigned avail = mb->len - pos;
|
||||
if (len > avail) len = avail;
|
||||
memcpy(buf, mb->data + pos, len);
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
static void *my_alloc(void *ctx, unsigned size) { (void)ctx; return malloc(size); }
|
||||
static void my_free(void *ctx, void *ptr) { (void)ctx; free(ptr); }
|
||||
|
||||
/* --- Little-endian helpers --- */
|
||||
|
||||
static void put_u16le(unsigned char *p, unsigned v)
|
||||
{
|
||||
p[0] = v & 0xFF;
|
||||
p[1] = (v >> 8) & 0xFF;
|
||||
}
|
||||
|
||||
static void put_u32le(unsigned char *p, unsigned v)
|
||||
{
|
||||
p[0] = v & 0xFF;
|
||||
p[1] = (v >> 8) & 0xFF;
|
||||
p[2] = (v >> 16) & 0xFF;
|
||||
p[3] = (v >> 24) & 0xFF;
|
||||
}
|
||||
|
||||
/* --- Build a minimal UC2 archive containing one file --- */
|
||||
|
||||
static int compress_data(const unsigned char *data, unsigned len, int level,
|
||||
struct membuf *out, unsigned short *csum_out)
|
||||
{
|
||||
struct mem_reader mr = { .data = data, .pos = 0, .len = len };
|
||||
unsigned csize = 0;
|
||||
membuf_init(out);
|
||||
int ret = uc2_compress(level, mem_read, &mr, membuf_write, out,
|
||||
len, csum_out, &csize);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int build_archive(const unsigned char *file_compressed, unsigned file_csize,
|
||||
unsigned file_orig_size, unsigned short file_csum,
|
||||
int level, struct membuf *archive)
|
||||
{
|
||||
/*
|
||||
* Archive layout:
|
||||
* [0] FHEAD (13 bytes)
|
||||
* [13] XHEAD (16 bytes)
|
||||
* [29] file compressed bitstream
|
||||
* [29+fc] COMPRESS record for cdir (10 bytes)
|
||||
* [29+fc+10] cdir compressed bitstream
|
||||
*
|
||||
* Raw cdir layout:
|
||||
* OHEAD(type=2) 1 byte
|
||||
* OSMETA(parent4,attr1,time4,name11,hid1,tag1) 22 bytes
|
||||
* FILEMETA(length4,fletch2) 6 bytes
|
||||
* COMPRESS(clen4,method2,master4) 10 bytes
|
||||
* LOCATION(volume4,offset4) 8 bytes
|
||||
* OHEAD(type=4 EndOfCdir) 1 byte
|
||||
* XTAIL(beta1,lock1,serial4,label11) 17 bytes
|
||||
* aserial(4) 4 bytes
|
||||
* Total: 69 bytes
|
||||
*/
|
||||
|
||||
unsigned file_data_offset = 29;
|
||||
|
||||
/* Build raw cdir */
|
||||
unsigned char raw_cdir[69];
|
||||
unsigned char *p = raw_cdir;
|
||||
|
||||
/* OHEAD: FileEntry */
|
||||
*p++ = 2;
|
||||
|
||||
/* OSMETA */
|
||||
put_u32le(p, 0); p += 4; /* parent = root */
|
||||
*p++ = 0x20; /* attrib = archive */
|
||||
put_u32le(p, 0); p += 4; /* time */
|
||||
memcpy(p, "TEST DAT", 11); p += 11; /* name */
|
||||
*p++ = 0; /* hidden */
|
||||
*p++ = 0; /* tag */
|
||||
|
||||
/* FILEMETA */
|
||||
put_u32le(p, file_orig_size); p += 4; /* length */
|
||||
put_u16le(p, file_csum); p += 2; /* fletch */
|
||||
|
||||
/* COMPRESS */
|
||||
put_u32le(p, file_csize); p += 4; /* compressedLength */
|
||||
put_u16le(p, 1); p += 2; /* method = ultra */
|
||||
put_u32le(p, 1); p += 4; /* masterPrefix = NoMaster */
|
||||
|
||||
/* LOCATION */
|
||||
put_u32le(p, 1); p += 4; /* volume = 1 */
|
||||
put_u32le(p, file_data_offset); p += 4; /* offset */
|
||||
|
||||
/* EndOfCdir */
|
||||
*p++ = 4;
|
||||
|
||||
/* XTAIL */
|
||||
*p++ = 0; /* beta */
|
||||
*p++ = 0; /* lock */
|
||||
put_u32le(p, 0); p += 4; /* serial */
|
||||
memset(p, ' ', 11); p += 11; /* label */
|
||||
|
||||
/* aserial */
|
||||
put_u32le(p, 0); p += 4;
|
||||
|
||||
unsigned raw_cdir_len = (unsigned)(p - raw_cdir);
|
||||
|
||||
/* Compute Fletcher checksum of raw cdir */
|
||||
unsigned short cdir_csum = fletcher_checksum(raw_cdir, raw_cdir_len);
|
||||
|
||||
/* Compress the cdir */
|
||||
struct membuf cdir_compressed;
|
||||
unsigned short cdir_compress_csum = 0;
|
||||
int ret = compress_data(raw_cdir, raw_cdir_len, level,
|
||||
&cdir_compressed, &cdir_compress_csum);
|
||||
if (ret < 0) {
|
||||
membuf_free(&cdir_compressed);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned cdir_compress_offset = 29 + file_csize;
|
||||
unsigned total_size = cdir_compress_offset + 10 + cdir_compressed.len;
|
||||
|
||||
/* Assemble archive */
|
||||
membuf_init(archive);
|
||||
unsigned char header[29 + 10]; /* FHEAD + XHEAD + room for COMPRESS */
|
||||
|
||||
/* FHEAD (13 bytes) */
|
||||
unsigned component_length = total_size - 13;
|
||||
put_u32le(header + 0, 0x1A324355); /* "UC2\x1a" */
|
||||
put_u32le(header + 4, component_length); /* componentLength */
|
||||
put_u32le(header + 8, component_length + 0x01B2C3D4); /* componentLength2 */
|
||||
header[12] = 0; /* damageProtected */
|
||||
|
||||
/* XHEAD (16 bytes) */
|
||||
put_u32le(header + 13, 1); /* cdir.volume */
|
||||
put_u32le(header + 17, cdir_compress_offset); /* cdir.offset */
|
||||
put_u16le(header + 21, cdir_csum); /* fletch */
|
||||
header[23] = 0; /* busy */
|
||||
put_u16le(header + 24, 200); /* versionMadeBy = 2.00 */
|
||||
put_u16le(header + 26, 200); /* versionNeededToExtract = 2.00 */
|
||||
header[28] = 0; /* dummy */
|
||||
|
||||
membuf_append(archive, header, 29);
|
||||
|
||||
/* File compressed bitstream */
|
||||
membuf_append(archive, file_compressed, file_csize);
|
||||
|
||||
/* COMPRESS record for cdir (10 bytes) */
|
||||
unsigned char cdir_compress_rec[10];
|
||||
put_u32le(cdir_compress_rec + 0, cdir_compressed.len); /* compressedLength */
|
||||
put_u16le(cdir_compress_rec + 4, 1); /* method = ultra */
|
||||
put_u32le(cdir_compress_rec + 6, 1); /* masterPrefix = NoMaster */
|
||||
membuf_append(archive, cdir_compress_rec, 10);
|
||||
|
||||
/* Cdir compressed bitstream */
|
||||
membuf_append(archive, cdir_compressed.data, cdir_compressed.len);
|
||||
|
||||
membuf_free(&cdir_compressed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_roundtrip(const char *name, const unsigned char *input,
|
||||
unsigned input_len, int level)
|
||||
{
|
||||
printf(" %s (level %d, %u bytes): ", name, level, input_len);
|
||||
|
||||
/* Compress file data */
|
||||
struct membuf file_compressed;
|
||||
unsigned short file_csum = 0;
|
||||
int ret = compress_data(input, input_len, level, &file_compressed, &file_csum);
|
||||
if (ret < 0) {
|
||||
printf("FAIL (compress returned %d)\n", ret);
|
||||
failures++;
|
||||
membuf_free(&file_compressed);
|
||||
return;
|
||||
}
|
||||
printf("compressed %u -> %u, ", input_len, file_compressed.len);
|
||||
|
||||
/* Build archive */
|
||||
struct membuf archive;
|
||||
ret = build_archive(file_compressed.data, file_compressed.len,
|
||||
input_len, file_csum, level, &archive);
|
||||
membuf_free(&file_compressed);
|
||||
if (ret < 0) {
|
||||
printf("FAIL (build_archive returned %d)\n", ret);
|
||||
failures++;
|
||||
membuf_free(&archive);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Open archive with the library */
|
||||
struct uc2_io io = {
|
||||
.read = archive_read,
|
||||
.alloc = my_alloc,
|
||||
.free = my_free
|
||||
};
|
||||
uc2_handle uc2 = uc2_open(&io, &archive);
|
||||
if (!uc2) {
|
||||
printf("FAIL (uc2_open)\n");
|
||||
failures++;
|
||||
membuf_free(&archive);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Read cdir */
|
||||
struct uc2_entry entry;
|
||||
ret = uc2_read_cdir(uc2, &entry);
|
||||
if (ret < 0) {
|
||||
printf("FAIL (read_cdir: %s)\n", uc2_message(uc2, ret));
|
||||
failures++;
|
||||
uc2_close(uc2);
|
||||
membuf_free(&archive);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Skip tags */
|
||||
while (ret == UC2_TaggedEntry) {
|
||||
char *tag; void *data; unsigned size;
|
||||
ret = uc2_get_tag(uc2, &entry, &tag, &data, &size);
|
||||
if (ret < 0) break;
|
||||
}
|
||||
|
||||
/* Read to end */
|
||||
struct uc2_entry dummy;
|
||||
ret = uc2_read_cdir(uc2, &dummy);
|
||||
if (ret != UC2_End) {
|
||||
printf("FAIL (expected UC2_End, got %d)\n", ret);
|
||||
failures++;
|
||||
uc2_close(uc2);
|
||||
membuf_free(&archive);
|
||||
return;
|
||||
}
|
||||
|
||||
char label[12];
|
||||
uc2_finish_cdir(uc2, label);
|
||||
|
||||
/* Extract */
|
||||
struct membuf output;
|
||||
membuf_init(&output);
|
||||
ret = uc2_extract(uc2, &entry.xi, entry.size, membuf_write, &output);
|
||||
uc2_close(uc2);
|
||||
membuf_free(&archive);
|
||||
|
||||
if (ret < 0) {
|
||||
printf("FAIL (extract: error %d)\n", ret);
|
||||
failures++;
|
||||
membuf_free(&output);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Verify */
|
||||
if (output.len != input_len) {
|
||||
printf("FAIL (size: got %u, expected %u)\n", output.len, input_len);
|
||||
failures++;
|
||||
} else if (input_len > 0 && memcmp(output.data, input, input_len) != 0) {
|
||||
unsigned diff = 0;
|
||||
while (diff < input_len && output.data[diff] == input[diff])
|
||||
diff++;
|
||||
printf("FAIL (mismatch at byte %u: got 0x%02x, expected 0x%02x)\n",
|
||||
diff, output.data[diff], input[diff]);
|
||||
failures++;
|
||||
} else {
|
||||
printf("OK\n");
|
||||
}
|
||||
|
||||
membuf_free(&output);
|
||||
}
|
||||
|
||||
/* Test data generators */
|
||||
|
||||
static unsigned char *gen_zeros(unsigned len)
|
||||
{
|
||||
return calloc(1, len ? len : 1);
|
||||
}
|
||||
|
||||
static unsigned char *gen_random(unsigned len, unsigned seed)
|
||||
{
|
||||
unsigned char *data = malloc(len ? len : 1);
|
||||
unsigned s = seed;
|
||||
for (unsigned i = 0; i < len; i++) {
|
||||
s = s * 1103515245 + 12345;
|
||||
data[i] = (unsigned char)(s >> 16);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static unsigned char *gen_text(unsigned *out_len)
|
||||
{
|
||||
const char *text =
|
||||
"The quick brown fox jumps over the lazy dog. "
|
||||
"Pack my box with five dozen liquor jugs. "
|
||||
"How vexingly quick daft zebras jump. ";
|
||||
unsigned base = (unsigned)strlen(text);
|
||||
unsigned len = base * 15;
|
||||
unsigned char *data = malloc(len);
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
data[i] = (unsigned char)text[i % base];
|
||||
*out_len = len;
|
||||
return data;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int levels[] = {2, 3, 4, 5};
|
||||
int nlev = (int)(sizeof levels / sizeof *levels);
|
||||
|
||||
for (int li = 0; li < nlev; li++) {
|
||||
int level = levels[li];
|
||||
printf("Level %d:\n", level);
|
||||
|
||||
/* Empty */
|
||||
{
|
||||
unsigned char empty = 0;
|
||||
test_roundtrip("empty", &empty, 0, level);
|
||||
}
|
||||
|
||||
/* Single byte */
|
||||
{
|
||||
unsigned char one = 'A';
|
||||
test_roundtrip("single_byte", &one, 1, level);
|
||||
}
|
||||
|
||||
/* Short string */
|
||||
{
|
||||
const unsigned char *hi = (const unsigned char *)"Hi";
|
||||
test_roundtrip("short_2", hi, 2, level);
|
||||
}
|
||||
|
||||
/* All zeros 1K */
|
||||
{
|
||||
unsigned char *z = gen_zeros(1024);
|
||||
test_roundtrip("zeros_1k", z, 1024, level);
|
||||
free(z);
|
||||
}
|
||||
|
||||
/* All zeros 64K */
|
||||
{
|
||||
unsigned char *z = gen_zeros(65536);
|
||||
test_roundtrip("zeros_64k", z, 65536, level);
|
||||
free(z);
|
||||
}
|
||||
|
||||
/* Random data (incompressible) */
|
||||
{
|
||||
unsigned char *r = gen_random(1024, 42);
|
||||
test_roundtrip("random_1k", r, 1024, level);
|
||||
free(r);
|
||||
}
|
||||
|
||||
/* Repeated text */
|
||||
{
|
||||
unsigned tlen;
|
||||
unsigned char *t = gen_text(&tlen);
|
||||
test_roundtrip("text_2k", t, tlen, level);
|
||||
free(t);
|
||||
}
|
||||
|
||||
/* Pattern */
|
||||
{
|
||||
unsigned len = 4096;
|
||||
unsigned char *d = malloc(len);
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
d[i] = (unsigned char)(i % 17);
|
||||
test_roundtrip("pattern_4k", d, len, level);
|
||||
free(d);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (failures) {
|
||||
fprintf(stderr, "%d test(s) FAILED\n", failures);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
printf("test_roundtrip: all tests passed\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user