Add content-aware preprocessing filters (BCJ, BWT, delta)
New library (uc2_preprocess.h / uc2_preprocess.c) for Phase 4:
BCJ (Branch/Call/Jump) filter:
- E8/E9 x86 address normalization (relative → absolute)
- Makes calls to the same function from different locations produce
identical byte sequences, improving LZ77 matching
- Round-trip verified; address normalization confirmed
BWT (Burrows-Wheeler Transform):
- Suffix-array-based forward transform
- LF-mapping inverse with reverse reconstruction
- Groups similar contexts for better entropy coding
- Round-trip verified for text ("banana") and binary data
Delta filter:
- Byte-wise delta encoding with configurable stride
- Stride 1 for sequential data, stride 2+ for interleaved channels
- Constant-delta sequences (arithmetic progressions) reduce to
repeated single values
Content detection:
- Automatic content type identification (text/x86/structured/binary)
- MZ/PE and ELF header recognition for x86
- Printable ASCII ratio for text detection
11 unit tests covering all filters and detection.
This commit is contained in:
@@ -76,10 +76,11 @@ backward compatibility.
|
||||
serialization format, and cross-archive sharing via block store.
|
||||
6 unit tests including round-trip and corruption detection.
|
||||
- [ ] LZ4 ultra-fast mode for real-time or low-resource scenarios
|
||||
- [ ] Content-aware preprocessing pipeline:
|
||||
- BWT (Burrows-Wheeler) for text
|
||||
- E8/E9 transform for x86 executables (BCJ filter)
|
||||
- Delta filter for structured/tabular data
|
||||
- [x] Content-aware preprocessing (`uc2_preprocess.h`):
|
||||
BCJ (E8/E9 x86 address normalization), BWT (Burrows-Wheeler
|
||||
for text), delta filter (byte-wise with configurable stride),
|
||||
automatic content detection (text/x86/structured/binary).
|
||||
11 unit tests.
|
||||
- [ ] Built-in `uc2 --benchmark` mode: test all methods on input, report results
|
||||
|
||||
## Phase 5: Quantum-Resistant Encryption
|
||||
|
||||
@@ -17,10 +17,10 @@ root. Key phases:
|
||||
cross-archive block store, SimHash near-duplicate detection,
|
||||
and delta compression. All Phase 3 items complete.
|
||||
|
||||
4. **Modern Compression Backends** — In progress. rANS entropy coder
|
||||
integrated as method 10, zstd-style dictionary metadata with
|
||||
content-hash IDs. Remaining: content-aware preprocessing (BWT,
|
||||
BCJ, delta filters), LZ4 ultra-fast mode, benchmarking mode.
|
||||
4. **Modern Compression Backends** — In progress. rANS entropy coder,
|
||||
zstd-style dictionary metadata, content-aware preprocessing (BCJ,
|
||||
BWT, delta filter with auto-detection). Remaining: LZ4 ultra-fast
|
||||
mode, benchmarking mode, preprocessing integration into CLI.
|
||||
|
||||
5. **Quantum-Resistant Encryption** — CRYSTALS-Kyber + AES-256-GCM.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# libuc2 — UC2 decompression library
|
||||
|
||||
set(LIBUC2_SOURCES src/decompress.c src/compress.c src/uc2_tables.c src/uc2_cdc.c src/uc2_merkle.c src/uc2_blockstore.c src/uc2_simhash.c src/uc2_delta.c src/uc2_rans.c src/uc2_dict.c)
|
||||
set(LIBUC2_SOURCES src/decompress.c src/compress.c src/uc2_tables.c src/uc2_cdc.c src/uc2_merkle.c src/uc2_blockstore.c src/uc2_simhash.c src/uc2_delta.c src/uc2_rans.c src/uc2_dict.c src/uc2_preprocess.c)
|
||||
|
||||
# Embed super.bin: use .S with .incbin on GCC/Clang, generated C array on MSVC
|
||||
if(MSVC)
|
||||
|
||||
65
lib/include/uc2/uc2_preprocess.h
Normal file
65
lib/include/uc2/uc2_preprocess.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/* Content-aware preprocessing filters for improved compression.
|
||||
*
|
||||
* These transforms are applied BEFORE compression to expose redundancy
|
||||
* that LZ77+entropy coding can exploit more efficiently. Each filter
|
||||
* is reversible (apply/revert) and content-type specific.
|
||||
*
|
||||
* Filters:
|
||||
* BCJ — x86 branch/call/jump address normalization (E8/E9 transform)
|
||||
* BWT — Burrows-Wheeler transform for text (groups similar contexts)
|
||||
* Delta — byte-wise delta encoding for structured/tabular data
|
||||
*/
|
||||
|
||||
#ifndef UC2_PREPROCESS_H
|
||||
#define UC2_PREPROCESS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* --- BCJ (Branch/Call/Jump) filter for x86 executables --- */
|
||||
|
||||
/* Convert relative x86 CALL/JMP addresses to absolute.
|
||||
* This makes the same function called from different locations produce
|
||||
* identical byte sequences, improving LZ77 matching.
|
||||
* Operates in-place. Returns 0 on success. */
|
||||
int uc2_bcj_apply(uint8_t *data, size_t len);
|
||||
|
||||
/* Revert BCJ transform (absolute → relative). */
|
||||
int uc2_bcj_revert(uint8_t *data, size_t len);
|
||||
|
||||
/* --- BWT (Burrows-Wheeler Transform) for text --- */
|
||||
|
||||
/* Apply BWT to data. Allocates *out (caller must free).
|
||||
* Sets *primary_index to the BWT primary index (needed for revert).
|
||||
* Returns 0 on success. */
|
||||
int uc2_bwt_apply(const uint8_t *data, size_t len,
|
||||
uint8_t **out, uint32_t *primary_index);
|
||||
|
||||
/* Revert BWT. Allocates *out (caller must free).
|
||||
* Returns 0 on success. */
|
||||
int uc2_bwt_revert(const uint8_t *data, size_t len,
|
||||
uint32_t primary_index, uint8_t **out);
|
||||
|
||||
/* --- Delta filter for structured data --- */
|
||||
|
||||
/* Apply byte-wise delta encoding (each byte = current - previous).
|
||||
* Operates in-place. Stride controls the delta distance (1 = adjacent
|
||||
* bytes, 2 = every other byte, etc.). Stride 1 is best for sequential
|
||||
* data; stride 2+ for interleaved multi-channel data. */
|
||||
void uc2_delta_filter_apply(uint8_t *data, size_t len, int stride);
|
||||
|
||||
/* Revert byte-wise delta encoding. Operates in-place. */
|
||||
void uc2_delta_filter_revert(uint8_t *data, size_t len, int stride);
|
||||
|
||||
/* --- Content detection --- */
|
||||
|
||||
/* Detect likely content type for automatic filter selection.
|
||||
* Returns one of the UC2_CONTENT_* constants. */
|
||||
#define UC2_CONTENT_BINARY 0 /* generic binary / unknown */
|
||||
#define UC2_CONTENT_TEXT 1 /* text (high ASCII printable ratio) */
|
||||
#define UC2_CONTENT_X86 2 /* x86 executable (MZ/PE/ELF header) */
|
||||
#define UC2_CONTENT_STRUCT 3 /* structured/tabular (regular patterns) */
|
||||
|
||||
int uc2_detect_content(const uint8_t *data, size_t len);
|
||||
|
||||
#endif
|
||||
187
lib/src/uc2_preprocess.c
Normal file
187
lib/src/uc2_preprocess.c
Normal file
@@ -0,0 +1,187 @@
|
||||
/* Content-aware preprocessing filters. */
|
||||
|
||||
#include "uc2/uc2_preprocess.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* --- BCJ (E8/E9 transform for x86) --- */
|
||||
|
||||
/* Convert relative CALL (E8) and JMP (E9) addresses to absolute.
|
||||
* The 4-byte displacement after E8/E9 is replaced with an absolute
|
||||
* address relative to position 0. This normalizes calls to the same
|
||||
* function from different locations, improving LZ77 matching. */
|
||||
|
||||
int uc2_bcj_apply(uint8_t *data, size_t len)
|
||||
{
|
||||
if (len < 5) return 0;
|
||||
for (size_t i = 0; i + 4 < len; i++) {
|
||||
if (data[i] == 0xE8 || data[i] == 0xE9) {
|
||||
int32_t rel = (int32_t)(data[i+1] | (data[i+2] << 8) |
|
||||
(data[i+3] << 16) | (data[i+4] << 24));
|
||||
int32_t abs_addr = rel + (int32_t)(i + 5);
|
||||
data[i+1] = (uint8_t)(abs_addr);
|
||||
data[i+2] = (uint8_t)(abs_addr >> 8);
|
||||
data[i+3] = (uint8_t)(abs_addr >> 16);
|
||||
data[i+4] = (uint8_t)(abs_addr >> 24);
|
||||
i += 4; /* skip the address bytes */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uc2_bcj_revert(uint8_t *data, size_t len)
|
||||
{
|
||||
if (len < 5) return 0;
|
||||
for (size_t i = 0; i + 4 < len; i++) {
|
||||
if (data[i] == 0xE8 || data[i] == 0xE9) {
|
||||
int32_t abs_addr = (int32_t)(data[i+1] | (data[i+2] << 8) |
|
||||
(data[i+3] << 16) | (data[i+4] << 24));
|
||||
int32_t rel = abs_addr - (int32_t)(i + 5);
|
||||
data[i+1] = (uint8_t)(rel);
|
||||
data[i+2] = (uint8_t)(rel >> 8);
|
||||
data[i+3] = (uint8_t)(rel >> 16);
|
||||
data[i+4] = (uint8_t)(rel >> 24);
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* --- BWT (Burrows-Wheeler Transform) --- */
|
||||
|
||||
/* Simple BWT using suffix array (O(n log^2 n) via qsort). */
|
||||
|
||||
static const uint8_t *bwt_data;
|
||||
static size_t bwt_len;
|
||||
|
||||
static int bwt_cmp(const void *a, const void *b)
|
||||
{
|
||||
uint32_t ia = *(const uint32_t *)a;
|
||||
uint32_t ib = *(const uint32_t *)b;
|
||||
for (size_t k = 0; k < bwt_len; k++) {
|
||||
uint8_t ca = bwt_data[(ia + k) % bwt_len];
|
||||
uint8_t cb = bwt_data[(ib + k) % bwt_len];
|
||||
if (ca != cb) return (int)ca - (int)cb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uc2_bwt_apply(const uint8_t *data, size_t len,
|
||||
uint8_t **out, uint32_t *primary_index)
|
||||
{
|
||||
if (len == 0) { *out = NULL; *primary_index = 0; return 0; }
|
||||
|
||||
uint32_t *sa = malloc(len * sizeof(uint32_t));
|
||||
uint8_t *result = malloc(len);
|
||||
if (!sa || !result) { free(sa); free(result); return -1; }
|
||||
|
||||
for (size_t i = 0; i < len; i++) sa[i] = (uint32_t)i;
|
||||
bwt_data = data;
|
||||
bwt_len = len;
|
||||
qsort(sa, len, sizeof(uint32_t), bwt_cmp);
|
||||
|
||||
*primary_index = 0;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (sa[i] == 0) *primary_index = (uint32_t)i;
|
||||
result[i] = data[(sa[i] + len - 1) % len];
|
||||
}
|
||||
|
||||
free(sa);
|
||||
*out = result;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uc2_bwt_revert(const uint8_t *data, size_t len,
|
||||
uint32_t primary_index, uint8_t **out)
|
||||
{
|
||||
if (len == 0) { *out = NULL; return 0; }
|
||||
|
||||
uint8_t *result = malloc(len);
|
||||
uint32_t *T = malloc(len * sizeof(uint32_t));
|
||||
if (!result || !T) { free(result); free(T); return -1; }
|
||||
|
||||
/* Build the LF-mapping (Last-to-First column mapping).
|
||||
T[i] = position in first column corresponding to last column position i. */
|
||||
uint32_t count[256];
|
||||
memset(count, 0, sizeof count);
|
||||
for (size_t i = 0; i < len; i++) count[data[i]]++;
|
||||
|
||||
uint32_t sum = 0;
|
||||
uint32_t start[256];
|
||||
for (int c = 0; c < 256; c++) {
|
||||
start[c] = sum;
|
||||
sum += count[c];
|
||||
}
|
||||
|
||||
/* Reset count for building T */
|
||||
memset(count, 0, sizeof count);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
T[i] = start[data[i]] + count[data[i]];
|
||||
count[data[i]]++;
|
||||
}
|
||||
|
||||
/* Reconstruct: follow T from primary_index, reading in reverse */
|
||||
uint32_t idx = primary_index;
|
||||
for (size_t i = len; i > 0; i--) {
|
||||
result[i - 1] = data[idx];
|
||||
idx = T[idx];
|
||||
}
|
||||
|
||||
free(T);
|
||||
*out = result;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* --- Delta filter --- */
|
||||
|
||||
void uc2_delta_filter_apply(uint8_t *data, size_t len, int stride)
|
||||
{
|
||||
if (stride < 1) stride = 1;
|
||||
/* Process from end to start to avoid overwriting needed values */
|
||||
for (size_t i = len; i > (size_t)stride; ) {
|
||||
i--;
|
||||
data[i] = (uint8_t)(data[i] - data[i - stride]);
|
||||
}
|
||||
}
|
||||
|
||||
void uc2_delta_filter_revert(uint8_t *data, size_t len, int stride)
|
||||
{
|
||||
if (stride < 1) stride = 1;
|
||||
for (size_t i = (size_t)stride; i < len; i++)
|
||||
data[i] = (uint8_t)(data[i] + data[i - stride]);
|
||||
}
|
||||
|
||||
/* --- Content detection --- */
|
||||
|
||||
int uc2_detect_content(const uint8_t *data, size_t len)
|
||||
{
|
||||
if (len < 4) return UC2_CONTENT_BINARY;
|
||||
|
||||
/* Check for x86 executable signatures */
|
||||
if (data[0] == 'M' && data[1] == 'Z')
|
||||
return UC2_CONTENT_X86; /* DOS/PE executable */
|
||||
if (data[0] == 0x7F && data[1] == 'E' && data[2] == 'L' && data[3] == 'F')
|
||||
return UC2_CONTENT_X86; /* ELF executable */
|
||||
|
||||
/* Count printable ASCII characters */
|
||||
size_t check = len > 4096 ? 4096 : len;
|
||||
size_t printable = 0;
|
||||
for (size_t i = 0; i < check; i++)
|
||||
if ((data[i] >= 32 && data[i] <= 126) ||
|
||||
data[i] == '\n' || data[i] == '\r' || data[i] == '\t')
|
||||
printable++;
|
||||
|
||||
if (printable * 100 / check > 85)
|
||||
return UC2_CONTENT_TEXT;
|
||||
|
||||
/* Check for structured data: regular byte-value patterns */
|
||||
if (len >= 64) {
|
||||
size_t zeros = 0;
|
||||
for (size_t i = 0; i < check; i++)
|
||||
if (data[i] == 0) zeros++;
|
||||
if (zeros * 100 / check > 20)
|
||||
return UC2_CONTENT_STRUCT;
|
||||
}
|
||||
|
||||
return UC2_CONTENT_BINARY;
|
||||
}
|
||||
@@ -87,6 +87,12 @@ target_include_directories(test_dict PRIVATE "${PROJECT_BINARY_DIR}/lib")
|
||||
target_compile_features(test_dict PRIVATE c_std_99)
|
||||
add_test(NAME dict COMMAND test_dict)
|
||||
|
||||
add_executable(test_preprocess src/test_preprocess.c)
|
||||
target_link_libraries(test_preprocess PRIVATE uc2)
|
||||
target_include_directories(test_preprocess PRIVATE "${PROJECT_BINARY_DIR}/lib")
|
||||
target_compile_features(test_preprocess PRIVATE c_std_99)
|
||||
add_test(NAME preprocess COMMAND test_preprocess)
|
||||
|
||||
# Cross-tool round-trip: UC2 v3 <-> original uc2pro.exe via DOSBox-X
|
||||
add_test(NAME roundtrip_dosbox
|
||||
COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/scripts/roundtrip_dosbox.sh
|
||||
|
||||
184
tests/src/test_preprocess.c
Normal file
184
tests/src/test_preprocess.c
Normal file
@@ -0,0 +1,184 @@
|
||||
/* Tests for content-aware preprocessing filters. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <uc2/uc2_preprocess.h>
|
||||
|
||||
static int tests_run = 0, tests_passed = 0;
|
||||
#define TEST(name) do { tests_run++; printf(" %s: ", #name); name(); tests_passed++; printf("OK\n"); } while (0)
|
||||
|
||||
/* --- BCJ tests --- */
|
||||
|
||||
static void test_bcj_roundtrip(void)
|
||||
{
|
||||
/* Simulate x86 code with E8 (CALL) instructions */
|
||||
uint8_t code[] = {
|
||||
0x90, /* NOP */
|
||||
0xE8, 0x10, 0x00, 0x00, 0x00, /* CALL +16 (relative) */
|
||||
0x90, /* NOP */
|
||||
0xE8, 0x20, 0x00, 0x00, 0x00, /* CALL +32 (relative) */
|
||||
0x90, 0x90, 0x90, 0x90, /* NOPs */
|
||||
};
|
||||
uint8_t orig[sizeof code];
|
||||
memcpy(orig, code, sizeof code);
|
||||
|
||||
uc2_bcj_apply(code, sizeof code);
|
||||
/* After apply, the relative addresses should be absolute */
|
||||
assert(memcmp(code, orig, sizeof code) != 0);
|
||||
|
||||
uc2_bcj_revert(code, sizeof code);
|
||||
assert(memcmp(code, orig, sizeof code) == 0);
|
||||
}
|
||||
|
||||
static void test_bcj_normalizes(void)
|
||||
{
|
||||
/* Two different calls to the same target from different positions.
|
||||
After BCJ, both should have the same absolute address. */
|
||||
uint8_t a[] = { 0xE8, 0x0A, 0x00, 0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x90 };
|
||||
uint8_t b[] = { 0x90, 0x90, 0xE8, 0x07, 0x00, 0x00, 0x00, 0x90, 0x90, 0x90 };
|
||||
/* Both call offset 15 from start: a: 5+10=15, b: 7+8=15... let me compute:
|
||||
a at pos 0: rel=10, abs=10+5=15
|
||||
b at pos 2: rel=7, abs=7+7=14... not same. Adjust: */
|
||||
/* a: E8 at pos 0, rel=0x0A=10, abs=10+5=15 → target 15
|
||||
b: E8 at pos 2, rel=0x0A=10, abs=10+7=17 → target 17
|
||||
For same target (15): b needs rel=15-7=8 → 0x08 */
|
||||
b[3] = 0x08; b[4] = 0x00; b[5] = 0x00; b[6] = 0x00;
|
||||
/* Now both target absolute address 15 */
|
||||
|
||||
uc2_bcj_apply(a, sizeof a);
|
||||
uc2_bcj_apply(b, sizeof b);
|
||||
|
||||
/* Both should now have abs=15 in the displacement bytes */
|
||||
int32_t abs_a = a[1] | (a[2]<<8) | (a[3]<<16) | (a[4]<<24);
|
||||
int32_t abs_b = b[3] | (b[4]<<8) | (b[5]<<16) | (b[6]<<24);
|
||||
assert(abs_a == 15);
|
||||
assert(abs_b == 15);
|
||||
}
|
||||
|
||||
static void test_bcj_short_data(void)
|
||||
{
|
||||
uint8_t data[] = { 0xE8, 0x01 };
|
||||
uc2_bcj_apply(data, 2); /* too short, no transform */
|
||||
assert(data[0] == 0xE8 && data[1] == 0x01);
|
||||
}
|
||||
|
||||
/* --- BWT tests --- */
|
||||
|
||||
static void test_bwt_roundtrip(void)
|
||||
{
|
||||
uint8_t data[] = "banana";
|
||||
size_t len = 6;
|
||||
uint8_t *bwt;
|
||||
uint32_t pidx;
|
||||
assert(uc2_bwt_apply(data, len, &bwt, &pidx) == 0);
|
||||
|
||||
/* BWT of "banana" is well-known: "nnbaaa" with primary index at 3 */
|
||||
printf("(bwt='%.*s' idx=%u) ", (int)len, bwt, pidx);
|
||||
|
||||
uint8_t *orig;
|
||||
assert(uc2_bwt_revert(bwt, len, pidx, &orig) == 0);
|
||||
assert(memcmp(orig, data, len) == 0);
|
||||
|
||||
free(bwt);
|
||||
free(orig);
|
||||
}
|
||||
|
||||
static void test_bwt_roundtrip_binary(void)
|
||||
{
|
||||
size_t len = 256;
|
||||
uint8_t *data = malloc(len);
|
||||
for (size_t i = 0; i < len; i++) data[i] = (uint8_t)(i * 37 + 13);
|
||||
|
||||
uint8_t *bwt;
|
||||
uint32_t pidx;
|
||||
assert(uc2_bwt_apply(data, len, &bwt, &pidx) == 0);
|
||||
|
||||
uint8_t *orig;
|
||||
assert(uc2_bwt_revert(bwt, len, pidx, &orig) == 0);
|
||||
assert(memcmp(orig, data, len) == 0);
|
||||
|
||||
free(data);
|
||||
free(bwt);
|
||||
free(orig);
|
||||
}
|
||||
|
||||
/* --- Delta filter tests --- */
|
||||
|
||||
static void test_delta_roundtrip(void)
|
||||
{
|
||||
uint8_t data[] = {10, 12, 14, 16, 18, 20, 22, 24};
|
||||
uint8_t orig[sizeof data];
|
||||
memcpy(orig, data, sizeof data);
|
||||
|
||||
uc2_delta_filter_apply(data, sizeof data, 1);
|
||||
/* After delta: differences should be constant (2) for arithmetic sequence */
|
||||
for (size_t i = 1; i < sizeof data; i++)
|
||||
assert(data[i] == 2);
|
||||
|
||||
uc2_delta_filter_revert(data, sizeof data, 1);
|
||||
assert(memcmp(data, orig, sizeof data) == 0);
|
||||
}
|
||||
|
||||
static void test_delta_stride2(void)
|
||||
{
|
||||
/* Interleaved stereo: L0 R0 L1 R1 L2 R2 ... */
|
||||
uint8_t data[] = {100, 200, 102, 202, 104, 204, 106, 206};
|
||||
uint8_t orig[sizeof data];
|
||||
memcpy(orig, data, sizeof data);
|
||||
|
||||
uc2_delta_filter_apply(data, sizeof data, 2);
|
||||
/* With stride 2: each channel has constant delta of 2 */
|
||||
assert(data[2] == 2 && data[3] == 2);
|
||||
assert(data[4] == 2 && data[5] == 2);
|
||||
|
||||
uc2_delta_filter_revert(data, sizeof data, 2);
|
||||
assert(memcmp(data, orig, sizeof data) == 0);
|
||||
}
|
||||
|
||||
/* --- Content detection tests --- */
|
||||
|
||||
static void test_detect_text(void)
|
||||
{
|
||||
uint8_t data[] = "This is plain text content with newlines\n"
|
||||
"and more text on the second line.\n";
|
||||
assert(uc2_detect_content(data, sizeof data - 1) == UC2_CONTENT_TEXT);
|
||||
}
|
||||
|
||||
static void test_detect_x86_mz(void)
|
||||
{
|
||||
uint8_t data[] = {'M', 'Z', 0x90, 0x00};
|
||||
assert(uc2_detect_content(data, sizeof data) == UC2_CONTENT_X86);
|
||||
}
|
||||
|
||||
static void test_detect_x86_elf(void)
|
||||
{
|
||||
uint8_t data[] = {0x7F, 'E', 'L', 'F', 0x02};
|
||||
assert(uc2_detect_content(data, sizeof data) == UC2_CONTENT_X86);
|
||||
}
|
||||
|
||||
static void test_detect_binary(void)
|
||||
{
|
||||
uint8_t data[64];
|
||||
for (int i = 0; i < 64; i++) data[i] = (uint8_t)(i * 7);
|
||||
assert(uc2_detect_content(data, sizeof data) == UC2_CONTENT_BINARY);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("Preprocessing filter tests:\n");
|
||||
TEST(test_bcj_roundtrip);
|
||||
TEST(test_bcj_normalizes);
|
||||
TEST(test_bcj_short_data);
|
||||
TEST(test_bwt_roundtrip);
|
||||
TEST(test_bwt_roundtrip_binary);
|
||||
TEST(test_delta_roundtrip);
|
||||
TEST(test_delta_stride2);
|
||||
TEST(test_detect_text);
|
||||
TEST(test_detect_x86_mz);
|
||||
TEST(test_detect_x86_elf);
|
||||
TEST(test_detect_binary);
|
||||
printf("%d/%d tests passed\n", tests_passed, tests_run);
|
||||
return tests_passed == tests_run ? 0 : 1;
|
||||
}
|
||||
Reference in New Issue
Block a user