;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2014, 2015, 2016, 2018 Ludovic Courtès ;;; Copyright © 2016, 2021 Mark H Weaver ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix build graft) #:use-module (guix build utils) #:use-module (guix build debug-link) #:use-module (rnrs bytevectors) #:use-module (ice-9 vlist) #:use-module (ice-9 match) #:use-module (ice-9 threads) #:use-module (ice-9 binary-ports) #:use-module (srfi srfi-1) ; list library #:use-module (srfi srfi-26) ; cut and cute #:export (replace-store-references rewrite-directory graft)) ;;; Commentary: ;;; ;;; This module supports "grafts". Grafting a directory means rewriting it, ;;; with references to some specific items replaced by references to other ;;; store items---the grafts. ;;; ;;; This method is used to provide fast security updates as only the leaves of ;;; the dependency graph need to be grafted, even when the security updates ;;; affect a core component such as Bash or libc. It is based on the idea of ;;; 'replace-dependency' implemented by Shea Levy in Nixpkgs. ;;; ;;; Code: (define-constant hash-length %store-hash-string-length) (define nix-base32-char? (cute char-set-contains? ;; ASCII digits and lower case letters except e o t u (string->char-set "0123456789abcdfghijklmnpqrsvwxyz") <>)) (define (nix-base32-char-or-nul? c) "Return true if C is a nix-base32 character or NUL, otherwise return false." (or (nix-base32-char? c) (char=? c #\nul))) (define (possible-utf16-hash? buffer i w) "Return true if (I - W) is large enough to hold a UTF-16 encoded nix-base32 hash and if BUFFER contains NULs in all positions where NULs are to be expected in a UTF-16 encoded hash+dash pattern whose dash is found at position I. Otherwise, return false." (and (<= (* 2 hash-length) (- i w)) (let loop ((j (+ 1 (- i (* 2 hash-length))))) (or (>= j i) (and (zero? (bytevector-u8-ref buffer j)) (loop (+ j 2))))))) (define (possible-utf32-hash? buffer i w) "Return true if (I - W) is large enough to hold a UTF-32 encoded nix-base32 hash and if BUFFER contains NULs in all positions where NULs are to be expected in a UTF-32 encoded hash+dash pattern whose dash is found at position I. Otherwise, return false." (and (<= (* 4 hash-length) (- i w)) (let loop ((j (+ 1 (- i (* 4 hash-length))))) (or (>= j i) (and (zero? (bytevector-u8-ref buffer j)) (zero? (bytevector-u8-ref buffer (+ j 1))) (zero? (bytevector-u8-ref buffer (+ j 2))) (loop (+ j 4))))))) (define (insert-nuls char-size bv) "Given a bytevector BV, return a bytevector containing the same bytes but with (CHAR-SIZE - 1) NULs inserted between every two adjacent bytes from BV. For example, (insert-nuls 4 #u8(1 2 3)) => #u8(1 0 0 0 2 0 0 0 3)." (if (= char-size 1) bv (let* ((len (bytevector-length bv)) (bv* (make-bytevector (+ 1 (* char-size (- len 1))) 0))) (let loop ((i 0)) (when (< i len) (bytevector-u8-set! bv* (* i char-size) (bytevector-u8-ref bv i)) (loop (+ i 1)))) bv*))) (define* (replace-store-references input output replacement-table #:optional (store (%store-directory))) "Read data from INPUT, replacing store references according to REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a vhash that maps strings (original hashes) to bytevectors (replacement strings comprising the replacement hash, a dash, and a string). Note: We use string keys to work around the fact that guile-2.0 hashes all bytevectors to the same value." (define (lookup-replacement s) (match (vhash-assoc s replacement-table) ((origin . replacement) replacement) (#f #f))) (define (optimize-u8-predicate pred) (cute vector-ref (list->vector (map pred (iota 256))) <>)) (define nix-base32-byte-or-nul? (optimize-u8-predicate (compose nix-base32-char-or-nul? integer->char))) (define (dash? byte) (= byte 45)) (define request-size (expt 2 20)) ; 1 MiB ;; We scan the file for the following 33-byte pattern: 32 bytes of ;; nix-base32 characters followed by a dash. When we find such a pattern ;; whose hash is in REPLACEMENT-TABLE, we perform the required rewrite and ;; continue scanning. ;; ;; To support UTF-16 and UTF-32 store references, the 33 bytes comprising ;; this hash+dash pattern may optionally be interspersed by extra NUL bytes. ;; This simple approach works because the characters we are looking for are ;; restricted to ASCII. UTF-16 hashes are interspersed with single NUL ;; bytes ("\0"), and UTF-32 hashes are interspersed with triplets of NULs ;; ("\0\0\0"). Note that we require NULs to be present only *between* the ;; other bytes, and not at either end, in order to be insensitive to byte ;; order. ;; ;; To accommodate large files, we do not read the entire file at once, but ;; instead work on buffers of up to REQUEST-SIZE bytes. To ensure that ;; every hash+dash pattern appears in its entirety in at least one buffer, ;; adjacent buffers must overlap by one byte less than the maximum size of a ;; hash+dash pattern. We accomplish this by "ungetting" a suffix of each ;; buffer before reading the next buffer, unless we know that we've reached ;; the end-of-file. (let ((buffer (make-bytevector request-size))) (define-syntax-rule (byte-at i) (bytevector-u8-ref buffer i)) (let outer-loop () (match (get-bytevector-n! input buffer 0 request-size) ((? eof-object?) 'done) (end (define (scan-from i w) ;; Scan the buffer for dashes that might be preceded by nix hashes, ;; where I is the minimum position where such a dash might be ;; found, and W is the number of bytes in the buffer that have been ;; written so far. We assume that I - W >= HASH-LENGTH. ;; ;; The key optimization here is that whenever we find a byte at ;; position I that cannot occur within a nix hash (because it's ;; neither a nix-base32 character nor NUL), we can infer that the ;; earliest position where the next hash could start is at I + 1, ;; and therefore the earliest position for the following dash is ;; (+ I 1 HASH-LENGTH), which is I + 33. ;; ;; Since nix-base32-or-nul characters comprise only about 1/8 of ;; the 256 possible byte values, and exclude some of the most ;; common letters in English text (e t o u), we can advance 33 ;; positions much of the time. (if (< i end) (let ((byte (byte-at i))) (cond ((dash? byte) (found-dash i w)) ((nix-base32-byte-or-nul? byte) (scan-from (+ i 1) w)) (else (not-part-of-hash i w)))) (finish-buffer i w))) (define (not-part-of-hash i w) ;; Position I is known to not be within a nix hash that we must ;; rewrite. Therefore, the earliest position where the next hash ;; might start is I + 1, and therefore the earliest position of ;; the following dash is (+ I 1 HASH-LENGTH). (scan-from (+ i 1 hash-length) w)) (define (found-dash i w) ;; We know that there is a dash '-' at position I, and that ;; I - W >= HASH-LENGTH. The immediately preceding bytes *might* ;; contain a nix-base32 hash, but that is not yet known. Here, ;; we rule out all but one possible encoding (ASCII, UTF-16, ;; UTF-32) by counting how many NULs precede the dash. (cond ((not (zero? (byte-at (- i 1)))) ;; The dash is *not* preceded by a NUL, therefore it ;; cannot possibly be a UTF-16 or UTF-32 hash. Proceed ;; to check for an ASCII hash. (found-possible-hash 1 i w)) ((not (zero? (byte-at (- i 2)))) ;; The dash is preceded by exactly one NUL, therefore it ;; cannot be an ASCII or UTF-32 hash. Proceed to check ;; for a UTF-16 hash. (if (possible-utf16-hash? buffer i w) (found-possible-hash 2 i w) (not-part-of-hash i w))) (else ;; The dash is preceded by at least two NULs, therefore ;; it cannot be an ASCII or UTF-16 hash. Proceed to ;; check for a UTF-32 hash. (if (possible-utf32-hash? buffer i w) (found-possible-hash 4 i w) (not-part-of-hash i w))))) (define (found-possible-hash char-size i w) ;; We know that there is a dash '-' at position I, that ;; I - W >= CHAR-SIZE * HASH-LENGTH, and that the only ;; possible encoding for the preceding hash is as indicated by ;; CHAR-SIZE. Here we check to see if the given hash is in ;; REPLACEMENT-TABLE, and if so, we perform the required ;; rewrite. (let* ((hash (string-tabulate (lambda (j) (integer->char (byte-at (- i (* char-size (- hash-length j)))))) hash-length)) (replacement* (lookup-replacement hash)) (replacement (and replacement* (insert-nuls char-size replacement*)))) (cond ((not replacement) (not-part-of-hash i w)) (else ;; We've found a hash that needs to be replaced. ;; First, write out all bytes preceding the hash ;; that have not yet been written. (put-bytevector output buffer w (- i (* char-size hash-length) w)) ;; Now write the replacement string. (put-bytevector output replacement) ;; Now compute the new values of W and I and continue. (let ((w (+ (- i (* char-size hash-length)) (bytevector-length replacement)))) (scan-from (+ w hash-length) w)))))) (define (finish-buffer i w) ;; We have finished scanning the buffer. Now we determine how many ;; bytes have not yet been written, and how many bytes to "unget". ;; If END is less than REQUEST-SIZE then we read less than we asked ;; for, which indicates that we are at EOF, so we needn't unget ;; anything. Otherwise, we unget up to (* 4 HASH-LENGTH) bytes. ;; However, we must be careful not to unget bytes that have already ;; been written, because that would cause them to be written again ;; from the next buffer. In practice, this case occurs when a ;; replacement is made near or beyond the end of the buffer. When ;; REPLACEMENT went beyond END, we consume the extra bytes from ;; INPUT. (if (> w end) (get-bytevector-n! input buffer 0 (- w end)) (let* ((unwritten (- end w)) (unget-size (if (= end request-size) (min (* 4 hash-length) unwritten) 0)) (write-size (- unwritten unget-size))) (put-bytevector output buffer w write-size) (unget-bytevector input buffer (+ w write-size) unget-size))) (outer-loop)) (scan-from hash-length 0)))))) (define (rename-matching-files directory mapping) "Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is a list of store file name pairs." (let* ((mapping (map (match-lambda ((source . target) (cons (basename source) (basename target)))) mapping)) (matches (find-files directory (lambda (file stat) (assoc-ref mapping (basename file))) #:directories? #t))) ;; XXX: This is not quite correct: if MAPPING contains "foo", and ;; DIRECTORY contains "bar/foo/foo", we first rename "bar/foo" and then ;; "bar/foo/foo" no longer exists so we fail. Oh well, surely that's good ;; enough! (for-each (lambda (file) (let ((target (assoc-ref mapping (basename file)))) (rename-file file (string-append (dirname file) "/" target)))) matches))) (define* (rewrite-directory directory output mapping #:optional (store (%store-directory))) "Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of file name pairs." (define hash-mapping ;; List of hash/replacement pairs, where the hash is a nix-base32 string ;; and the replacement is a string that includes the replacement's name, ;; like "r837zajjc1q8z9hph4b6860a9c05blyy-openssl-1.0.2j". (let* ((prefix (string-append store "/")) (start (string-length prefix)) (end (+ start hash-length))) (define (valid-hash? h) (every nix-base32-char? (string->list h))) (define (hash+rest s) (and (< end (string-length s)) (let ((hash (substring s start end)) (all (substring s start))) (and (string-prefix? prefix s) (valid-hash? hash) (eqv? #\- (string-ref s end)) (list hash all))))) (map (match-lambda (((= hash+rest (origin-hash origin-string)) . (= hash+rest (replacement-hash replacement-string))) (unless (= (string-length origin-string) (string-length replacement-string)) (error "replacement length differs from the original length" origin-string replacement-string)) (cons origin-hash (string->utf8 replacement-string))) ((origin . replacement) (error "invalid replacement" origin replacement))) mapping))) (define replacement-table (alist->vhash hash-mapping)) (define prefix-len (string-length directory)) (define (destination file) (string-append output (string-drop file prefix-len))) (define (rewrite-leaf file) (let ((stat (lstat file)) (dest (destination file))) (unless (file-exists? (dirname dest)) (mkdir-p (dirname dest))) (case (stat:type stat) ((symlink) (let ((target (readlink file))) (symlink (call-with-output-string (lambda (output) (replace-store-references (open-input-string target) output replacement-table store))) dest))) ((regular) (call-with-input-file file (lambda (input) (call-with-output-file dest (lambda (output) (replace-store-references input output replacement-table store) (chmod output (stat:perms stat))))))) ((directory) (mkdir-p dest)) (else (error "unsupported file type" stat))))) ;; n-par-for-each can lead to segfaults in the grafting code? (for-each rewrite-leaf (find-files directory (const #t) #:directories? #t)) (rename-matching-files output mapping)) (define %graft-hooks ;; Default list of hooks run after grafting. (list graft-debug-links)) (define* (graft old-outputs new-outputs mapping #:key (log-port (current-output-port)) (hooks %graft-hooks)) "Apply the grafts described by MAPPING on OLD-OUTPUTS, leading to NEW-OUTPUTS. MAPPING must be a list of file name pairs; OLD-OUTPUTS and NEW-OUTPUTS are lists of output name/file name pairs." (for-each (lambda (input output) (format log-port "grafting '~a' -> '~a'...~%" input output) (force-output) (rewrite-directory input output mapping)) (match old-outputs (((names . files) ...) files)) (match new-outputs (((names . files) ...) files))) (for-each (lambda (hook) (hook old-outputs new-outputs mapping #:log-port log-port)) hooks)) ;;; graft.scm ends here