libarchive plugin: directory paths, round-trip test (M5-M6)
The read handler now composes full directory paths from the cdir's directory ids rather than emitting bare leaf names: build_dir_path walks the parent chain (root dirid 0, depth-capped against cyclic cdirs), so multi-file archives with subdirectories list correctly. Master-block resolution (M4) and tagged long names (M6) already work through libuc2's extract and tag paths; this adds a libarchive round-trip test that creates archives at Huffman and rANS levels and verifies every byte back through libarchive's public API. Documents the plugin build recipe (libarchive source tree + static lib). Verified against libarchive 3.7.7; round-trip clean under valgrind.
This commit is contained in:
@@ -2,12 +2,16 @@
|
||||
|
||||
/* libarchive read handler for UC2 v3 archives.
|
||||
*
|
||||
* Status: milestones 1-3.
|
||||
* Status: milestones 1-6.
|
||||
* M1 -- bid() with UC2 magic check.
|
||||
* M2 -- read_header iterates uc2_read_cdir, maps each cdir entry to
|
||||
* libarchive's archive_entry shape (name, size, mode, mtime).
|
||||
* M3 -- read_data uses uc2_extract to decompress an entry, buffers
|
||||
* the result, then yields it via libarchive's pull-style API.
|
||||
* M4 -- master blocks resolve inside libuc2 during uc2_extract.
|
||||
* M5 -- multi-file archives with full directory paths composed from
|
||||
* the cdir's directory ids (parent-before-child not assumed).
|
||||
* M6 -- tagged entries (Win95 long names) resolved via uc2_get_tag.
|
||||
*
|
||||
* Strategy: on the first read_header call we slurp the entire archive
|
||||
* into memory through __archive_read_ahead, then drive libuc2 against
|
||||
@@ -51,6 +55,7 @@ struct uc2_la_state {
|
||||
/* Cached cdir entries. uc2_read_cdir is single-pass; we capture
|
||||
* everything on the first read_header call. */
|
||||
struct uc2_entry *entries;
|
||||
char **paths; /* composed full path per entry */
|
||||
int n_entries;
|
||||
int n_capacity;
|
||||
int next_entry;
|
||||
@@ -306,6 +311,79 @@ collect_entries(struct archive_read *a, struct uc2_la_state *st)
|
||||
return (ARCHIVE_OK);
|
||||
}
|
||||
|
||||
/* Append the full path of directory `id` (with a trailing slash) to
|
||||
* buf. Returns the new offset, or -1 on overflow. UC2 directory ids
|
||||
* are archive-global; root is 0. The depth cap breaks cycles in
|
||||
* damaged directories. */
|
||||
static int
|
||||
build_dir_path(struct uc2_la_state *st, unsigned id,
|
||||
char *buf, size_t cap, int depth)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (id == 0)
|
||||
return (0);
|
||||
if (depth > 64)
|
||||
return (-1); /* cyclic or pathologically deep: corrupt cdir */
|
||||
for (i = 0; i < st->n_entries; i++) {
|
||||
struct uc2_entry *d = &st->entries[i];
|
||||
if (d->is_dir && d->id == id) {
|
||||
int off = build_dir_path(st, d->dirid, buf, cap,
|
||||
depth + 1);
|
||||
int n;
|
||||
if (off < 0)
|
||||
return (-1);
|
||||
n = snprintf(buf + off, cap - off, "%s/", d->name);
|
||||
if (n < 0 || (size_t)n >= cap - off)
|
||||
return (-1);
|
||||
return (off + n);
|
||||
}
|
||||
}
|
||||
return (0); /* unknown parent: fall back to root */
|
||||
}
|
||||
|
||||
/* Compose a full path for every entry: parent directories joined with
|
||||
* '/', directories themselves carrying a trailing slash. */
|
||||
static int
|
||||
compose_paths(struct archive_read *a, struct uc2_la_state *st)
|
||||
{
|
||||
int i;
|
||||
|
||||
st->paths = (char **)calloc((size_t)st->n_entries,
|
||||
sizeof *st->paths);
|
||||
if (st->paths == NULL && st->n_entries > 0) {
|
||||
archive_set_error(&a->archive, ENOMEM,
|
||||
"UC2: out of memory composing paths");
|
||||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
|
||||
for (i = 0; i < st->n_entries; i++) {
|
||||
struct uc2_entry *e = &st->entries[i];
|
||||
char buf[2048];
|
||||
int off = build_dir_path(st, e->dirid, buf, sizeof buf, 0);
|
||||
int n;
|
||||
if (off < 0) {
|
||||
archive_set_error(&a->archive, EINVAL,
|
||||
"UC2: directory path too long");
|
||||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
n = snprintf(buf + off, sizeof buf - off, "%s%s",
|
||||
e->name, e->is_dir ? "/" : "");
|
||||
if (n < 0 || (size_t)n >= sizeof buf - off) {
|
||||
archive_set_error(&a->archive, EINVAL,
|
||||
"UC2: entry path too long");
|
||||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
st->paths[i] = strdup(buf);
|
||||
if (st->paths[i] == NULL) {
|
||||
archive_set_error(&a->archive, ENOMEM,
|
||||
"UC2: out of memory composing paths");
|
||||
return (ARCHIVE_FATAL);
|
||||
}
|
||||
}
|
||||
return (ARCHIVE_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
uc2_la_read_header(struct archive_read *a, struct archive_entry *entry)
|
||||
{
|
||||
@@ -321,6 +399,9 @@ uc2_la_read_header(struct archive_read *a, struct archive_entry *entry)
|
||||
|
||||
r = collect_entries(a, st);
|
||||
if (r != ARCHIVE_OK) return r;
|
||||
|
||||
r = compose_paths(a, st);
|
||||
if (r != ARCHIVE_OK) return r;
|
||||
}
|
||||
|
||||
if (st->next_entry >= st->n_entries)
|
||||
@@ -332,7 +413,7 @@ uc2_la_read_header(struct archive_read *a, struct archive_entry *entry)
|
||||
st->entry_len = 0;
|
||||
st->entry_yielded = 0;
|
||||
|
||||
archive_entry_set_pathname(entry, e->name);
|
||||
archive_entry_set_pathname(entry, st->paths[st->next_entry - 1]);
|
||||
archive_entry_set_size(entry, (la_int64_t)e->size);
|
||||
archive_entry_set_mtime(entry, dos_to_unix_time(e->dos_time), 0);
|
||||
|
||||
@@ -409,6 +490,12 @@ uc2_la_cleanup(struct archive_read *a)
|
||||
return (ARCHIVE_OK);
|
||||
if (st->handle)
|
||||
uc2_close(st->handle);
|
||||
if (st->paths) {
|
||||
int i;
|
||||
for (i = 0; i < st->n_entries; i++)
|
||||
free(st->paths[i]);
|
||||
free(st->paths);
|
||||
}
|
||||
free(st->data);
|
||||
free(st->entries);
|
||||
free(st->entry_data);
|
||||
|
||||
@@ -41,6 +41,43 @@ Cross-compile from a Linux host using the DJGPP toolchain:
|
||||
|
||||
This produces a DOS executable suitable for DOSBox or real hardware.
|
||||
|
||||
libarchive Read Plugin
|
||||
----------------------
|
||||
|
||||
The optional libarchive read handler (``contrib/libarchive/``) lets any
|
||||
libarchive consumer — ``bsdtar``, file managers, language bindings —
|
||||
list and extract ``.uc2`` archives. It uses libarchive's internal
|
||||
read-format API, so it builds against a libarchive **source tree**
|
||||
rather than an installed ``-devel`` package.
|
||||
|
||||
Unpack a libarchive release and build a static library (a
|
||||
dependency-free configuration is enough for the plugin and its test):
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
curl -LO https://github.com/libarchive/libarchive/releases/download/v3.7.7/libarchive-3.7.7.tar.gz
|
||||
tar xzf libarchive-3.7.7.tar.gz
|
||||
cmake -S libarchive-3.7.7 -B larch-build -DCMAKE_BUILD_TYPE=Release \
|
||||
-DBUILD_SHARED_LIBS=OFF -DENABLE_TEST=OFF
|
||||
cmake --build larch-build --target archive_static
|
||||
|
||||
Then configure UC2 with the plugin enabled, pointing at the source tree
|
||||
and the static library:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
cmake -B build -DCMAKE_BUILD_TYPE=Release \
|
||||
-DUC2_BUILD_LIBARCHIVE_PLUGIN=ON \
|
||||
-DLIBARCHIVE_SOURCE_DIR=$PWD/libarchive-3.7.7 \
|
||||
-DLIBARCHIVE_LIBRARY=$PWD/larch-build/libarchive/libarchive.a
|
||||
cmake --build build
|
||||
|
||||
This builds ``libuc2_libarchive.a`` and the ``libarchive_roundtrip``
|
||||
test, which creates archives at multiple compression levels and reads
|
||||
them back through libarchive's public API, verifying every byte. The
|
||||
plugin handles multi-file archives with directory paths, master-block
|
||||
deduplication, and Win95 long names.
|
||||
|
||||
Build Options
|
||||
-------------
|
||||
|
||||
@@ -54,6 +91,9 @@ Build Options
|
||||
* - ``UC2_BUILD_TESTS``
|
||||
- ``ON``
|
||||
- Build test programs
|
||||
* - ``UC2_BUILD_LIBARCHIVE_PLUGIN``
|
||||
- ``OFF``
|
||||
- Build the libarchive read handler (needs ``LIBARCHIVE_SOURCE_DIR``)
|
||||
* - ``CMAKE_BUILD_TYPE``
|
||||
- (none)
|
||||
- ``Release``, ``Debug``, ``RelWithDebInfo``
|
||||
|
||||
@@ -155,6 +155,26 @@ if(Python3_Interpreter_FOUND)
|
||||
)
|
||||
endif()
|
||||
|
||||
# libarchive plugin round-trip. Needs -DUC2_BUILD_LIBARCHIVE_PLUGIN=ON,
|
||||
# -DLIBARCHIVE_SOURCE_DIR=<source tree>, and -DLIBARCHIVE_LIBRARY=<built
|
||||
# libarchive.a> (a deps-disabled static build is enough; see docs).
|
||||
if(TARGET uc2_libarchive AND DEFINED LIBARCHIVE_LIBRARY
|
||||
AND DEFINED LIBARCHIVE_SOURCE_DIR)
|
||||
add_executable(test_libarchive_uc2 src/test_libarchive_uc2.c)
|
||||
target_include_directories(test_libarchive_uc2 PRIVATE
|
||||
"${LIBARCHIVE_SOURCE_DIR}/libarchive")
|
||||
target_link_libraries(test_libarchive_uc2 PRIVATE
|
||||
uc2_libarchive "${LIBARCHIVE_LIBRARY}" uc2)
|
||||
target_compile_features(test_libarchive_uc2 PRIVATE c_std_99)
|
||||
add_test(NAME libarchive_roundtrip
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
-DUC2_CLI=$<TARGET_FILE:uc2-cli>
|
||||
-DLA_TEST=$<TARGET_FILE:test_libarchive_uc2>
|
||||
-DTEST_DIR=${CMAKE_CURRENT_BINARY_DIR}/libarchive_test
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/test_cli_libarchive.cmake
|
||||
)
|
||||
endif()
|
||||
|
||||
# Cross-tool round-trip: UC2 v3 <-> original uc2pro.exe via DOSBox-X
|
||||
add_test(NAME roundtrip_dosbox
|
||||
COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/scripts/roundtrip_dosbox.sh
|
||||
|
||||
134
tests/src/test_libarchive_uc2.c
Normal file
134
tests/src/test_libarchive_uc2.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/* Round-trip verification of the libarchive UC2 read plugin.
|
||||
*
|
||||
* Usage: test_libarchive_uc2 <archive.uc2> <originals-dir>
|
||||
*
|
||||
* Opens the archive through libarchive's public API with the UC2
|
||||
* format registered, walks every entry, extracts the data, and
|
||||
* compares it byte-for-byte against <originals-dir>/<entry-name>.
|
||||
* Exit 0 only if every file entry matches.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
|
||||
extern int archive_read_support_format_uc2(struct archive *);
|
||||
|
||||
static unsigned char *slurp(const char *path, size_t *out_len)
|
||||
{
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f) {
|
||||
fprintf(stderr, "FAIL: cannot open original %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
fseek(f, 0, SEEK_END);
|
||||
long n = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
if (n < 0) {
|
||||
fprintf(stderr, "FAIL: ftell %s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
unsigned char *buf = malloc(n > 0 ? (size_t)n : 1);
|
||||
if (!buf) {
|
||||
fprintf(stderr, "FAIL: malloc\n");
|
||||
exit(1);
|
||||
}
|
||||
*out_len = fread(buf, 1, (size_t)n, f);
|
||||
fclose(f);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "usage: %s <archive.uc2> <originals-dir>\n",
|
||||
argv[0]);
|
||||
return 2;
|
||||
}
|
||||
|
||||
struct archive *a = archive_read_new();
|
||||
if (!a) return 2;
|
||||
if (archive_read_support_format_uc2(a) != ARCHIVE_OK) {
|
||||
fprintf(stderr, "FAIL: cannot register UC2 format: %s\n",
|
||||
archive_error_string(a));
|
||||
return 1;
|
||||
}
|
||||
if (archive_read_open_filename(a, argv[1], 65536) != ARCHIVE_OK) {
|
||||
fprintf(stderr, "FAIL: open %s: %s\n", argv[1],
|
||||
archive_error_string(a));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int nfiles = 0, ndirs = 0, bad = 0;
|
||||
struct archive_entry *e;
|
||||
int r;
|
||||
while ((r = archive_read_next_header(a, &e)) == ARCHIVE_OK) {
|
||||
const char *name = archive_entry_pathname(e);
|
||||
if (archive_entry_filetype(e) == AE_IFDIR) {
|
||||
ndirs++;
|
||||
continue;
|
||||
}
|
||||
la_int64_t want = archive_entry_size(e);
|
||||
|
||||
size_t cap = want > 0 ? (size_t)want : 1;
|
||||
unsigned char *got = malloc(cap);
|
||||
if (!got) {
|
||||
fprintf(stderr, "FAIL: malloc\n");
|
||||
return 1;
|
||||
}
|
||||
size_t got_len = 0;
|
||||
for (;;) {
|
||||
la_ssize_t n = archive_read_data(a, got + got_len,
|
||||
cap - got_len);
|
||||
if (n < 0) {
|
||||
fprintf(stderr, "FAIL: read_data %s: %s\n",
|
||||
name, archive_error_string(a));
|
||||
return 1;
|
||||
}
|
||||
if (n == 0)
|
||||
break;
|
||||
got_len += (size_t)n;
|
||||
if (got_len == cap)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((la_int64_t)got_len != want) {
|
||||
fprintf(stderr, "BAD: %s: size %zu, header said %lld\n",
|
||||
name, got_len, (long long)want);
|
||||
bad++;
|
||||
free(got);
|
||||
nfiles++;
|
||||
continue;
|
||||
}
|
||||
|
||||
char opath[4096];
|
||||
snprintf(opath, sizeof opath, "%s/%s", argv[2], name);
|
||||
size_t ref_len;
|
||||
unsigned char *ref = slurp(opath, &ref_len);
|
||||
if (ref_len != got_len || memcmp(ref, got, got_len) != 0) {
|
||||
fprintf(stderr, "BAD: %s: content mismatch "
|
||||
"(%zu vs %zu bytes)\n", name, got_len, ref_len);
|
||||
bad++;
|
||||
}
|
||||
free(ref);
|
||||
free(got);
|
||||
nfiles++;
|
||||
}
|
||||
if (r != ARCHIVE_EOF) {
|
||||
fprintf(stderr, "FAIL: next_header: %s\n",
|
||||
archive_error_string(a));
|
||||
return 1;
|
||||
}
|
||||
archive_read_free(a);
|
||||
|
||||
printf("libarchive round-trip: %d files (%d dirs), %d bad\n",
|
||||
nfiles, ndirs, bad);
|
||||
if (nfiles == 0) {
|
||||
fprintf(stderr, "FAIL: no file entries found\n");
|
||||
return 1;
|
||||
}
|
||||
return bad ? 1 : 0;
|
||||
}
|
||||
39
tests/test_cli_libarchive.cmake
Normal file
39
tests/test_cli_libarchive.cmake
Normal file
@@ -0,0 +1,39 @@
|
||||
# Round-trip test for the libarchive UC2 read plugin: the uc2 CLI
|
||||
# creates archives (Huffman and rANS), then test_libarchive_uc2 reads
|
||||
# them back through libarchive's public API and verifies every byte.
|
||||
|
||||
file(REMOVE_RECURSE "${TEST_DIR}")
|
||||
file(MAKE_DIRECTORY "${TEST_DIR}/input/subdir")
|
||||
|
||||
file(WRITE "${TEST_DIR}/input/hello.txt" "Hello from libarchive!\n")
|
||||
string(REPEAT "The quick brown fox jumps over the lazy dog.\n" 200 REPEATED)
|
||||
file(WRITE "${TEST_DIR}/input/repeated.txt" "${REPEATED}")
|
||||
string(RANDOM LENGTH 8192 RANDOM_SEED 99 BLOB)
|
||||
file(WRITE "${TEST_DIR}/input/blob.dat" "${BLOB}")
|
||||
file(WRITE "${TEST_DIR}/input/subdir/nested_long_file_name.txt"
|
||||
"nested content with a long name\n")
|
||||
file(WRITE "${TEST_DIR}/input/empty.dat" "")
|
||||
|
||||
foreach(LEVEL 4 6)
|
||||
set(ARCHIVE "${TEST_DIR}/la${LEVEL}.uc2")
|
||||
execute_process(
|
||||
COMMAND "${UC2_CLI}" -q -w -L ${LEVEL} "${ARCHIVE}"
|
||||
hello.txt repeated.txt blob.dat empty.dat subdir
|
||||
WORKING_DIRECTORY "${TEST_DIR}/input"
|
||||
RESULT_VARIABLE RC
|
||||
)
|
||||
if(NOT RC EQUAL 0)
|
||||
message(FATAL_ERROR "uc2 -w -L ${LEVEL} failed: ${RC}")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND "${LA_TEST}" "${ARCHIVE}" "${TEST_DIR}/input"
|
||||
RESULT_VARIABLE RC
|
||||
OUTPUT_VARIABLE OUT
|
||||
ERROR_VARIABLE OUT
|
||||
)
|
||||
message(STATUS "L${LEVEL}: ${OUT}")
|
||||
if(NOT RC EQUAL 0)
|
||||
message(FATAL_ERROR "libarchive round-trip failed at -L ${LEVEL}")
|
||||
endif()
|
||||
endforeach()
|
||||
Reference in New Issue
Block a user