guix-play/guix/build/graft.scm
Ludovic Courtès 94e86a6b67
graft: Correctly replace references near the end of the scan buffer.
Fixes <http://bugs.gnu.org/28212>.
Reported by Leo Famulari <leo@famulari.name>.

* guix/build/graft.scm (replace-store-references): When I >= END, check
whether WRITTEN > END and call 'get-bytevector-n!' when it is.
* tests/grafts.scm (buffer-size): New variable.
("replace-store-references, <http://bugs.gnu.org/28212>"): New test.
2017-08-24 23:55:51 +02:00

324 lines
14 KiB
Scheme

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2016 Mark H Weaver <mhw@netris.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix build graft)
#:use-module (guix build utils)
#:use-module (rnrs bytevectors)
#:use-module (ice-9 vlist)
#:use-module (ice-9 match)
#:use-module (ice-9 threads)
#:use-module (ice-9 binary-ports)
#:use-module (srfi srfi-1) ; list library
#:use-module (srfi srfi-26) ; cut and cute
#:export (replace-store-references
rewrite-directory))
;;; Commentary:
;;;
;;; This module supports "grafts". Grafting a directory means rewriting it,
;;; with references to some specific items replaced by references to other
;;; store items---the grafts.
;;;
;;; This method is used to provide fast security updates as only the leaves of
;;; the dependency graph need to be grafted, even when the security updates
;;; affect a core component such as Bash or libc. It is based on the idea of
;;; 'replace-dependency' implemented by Shea Levy in Nixpkgs.
;;;
;;; Code:
(define-syntax-rule (define-inline name val)
(define-syntax name (identifier-syntax val)))
(define-inline hash-length 32)
(define nix-base32-char?
(cute char-set-contains?
;; ASCII digits and lower case letters except e o t u
(string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
<>))
(define* (replace-store-references input output replacement-table
#:optional (store (%store-directory)))
"Read data from INPUT, replacing store references according to
REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
vhash that maps strings (original hashes) to bytevectors (replacement strings
comprising the replacement hash, a dash, and a string).
Note: We use string keys to work around the fact that guile-2.0 hashes all
bytevectors to the same value."
(define (lookup-replacement s)
(match (vhash-assoc s replacement-table)
((origin . replacement)
replacement)
(#f #f)))
(define (optimize-u8-predicate pred)
(cute vector-ref
(list->vector (map pred (iota 256)))
<>))
(define nix-base32-byte?
(optimize-u8-predicate
(compose nix-base32-char?
integer->char)))
(define (dash? byte) (= byte 45))
(define request-size (expt 2 20)) ; 1 MiB
;; We scan the file for the following 33-byte pattern: 32 bytes of
;; nix-base32 characters followed by a dash. To accommodate large files,
;; we do not read the entire file, but instead work on buffers of up to
;; 'request-size' bytes. To ensure that every 33-byte sequence appears
;; entirely within exactly one buffer, adjacent buffers must overlap,
;; i.e. they must share 32 byte positions. We accomplish this by
;; "ungetting" the last 32 bytes of each buffer before reading the next
;; buffer, unless we know that we've reached the end-of-file.
(let ((buffer (make-bytevector request-size)))
(let loop ()
;; Note: We avoid 'get-bytevector-n' to work around
;; <http://bugs.gnu.org/17466>.
(match (get-bytevector-n! input buffer 0 request-size)
((? eof-object?) 'done)
(end
;; We scan the buffer for dashes that might be preceded by a
;; nix-base32 hash. The key optimization here is that whenever we
;; find a NON-nix-base32 character at position 'i', we know that it
;; cannot be part of a hash, so the earliest position where the next
;; hash could start is i+1 with the following dash at position i+33.
;;
;; Since nix-base32 characters comprise only 1/8 of the 256 possible
;; byte values, and exclude some of the most common letters in
;; English text (e t o u), in practice we can advance by 33 positions
;; most of the time.
(let scan-from ((i hash-length) (written 0))
;; 'i' is the first position where we look for a dash. 'written'
;; is the number of bytes in the buffer that have already been
;; written.
(if (< i end)
(let ((byte (bytevector-u8-ref buffer i)))
(cond ((and (dash? byte)
;; We've found a dash. Note that we do not know
;; whether the preceeding 32 bytes are nix-base32
;; characters, but we do not need to know. If
;; they are not, the following lookup will fail.
(lookup-replacement
(string-tabulate (lambda (j)
(integer->char
(bytevector-u8-ref buffer
(+ j (- i hash-length)))))
hash-length)))
=> (lambda (replacement)
;; We've found a hash that needs to be replaced.
;; First, write out all bytes preceding the hash
;; that have not yet been written.
(put-bytevector output buffer written
(- i hash-length written))
;; Now write the replacement string.
(put-bytevector output replacement)
;; Since the byte at position 'i' is a dash,
;; which is not a nix-base32 char, the earliest
;; position where the next hash might start is
;; i+1, and the earliest position where the
;; following dash might start is (+ i 1
;; hash-length). Also, increase the write
;; position to account for REPLACEMENT.
(let ((len (bytevector-length replacement)))
(scan-from (+ i 1 len)
(+ i (- len hash-length))))))
;; If the byte at position 'i' is a nix-base32 char,
;; then the dash we're looking for might be as early as
;; the following byte, so we can only advance by 1.
((nix-base32-byte? byte)
(scan-from (+ i 1) written))
;; If the byte at position 'i' is NOT a nix-base32
;; char, then the earliest position where the next hash
;; might start is i+1, with the following dash at
;; position (+ i 1 hash-length).
(else
(scan-from (+ i 1 hash-length) written))))
;; We have finished scanning the buffer. Now we determine how
;; many bytes have not yet been written, and how many bytes to
;; "unget". If 'end' is less than 'request-size' then we read
;; less than we asked for, which indicates that we are at EOF,
;; so we needn't unget anything. Otherwise, we unget up to
;; 'hash-length' bytes (32 bytes). However, we must be careful
;; not to unget bytes that have already been written, because
;; that would cause them to be written again from the next
;; buffer. In practice, this case occurs when a replacement is
;; made near or beyond the end of the buffer. When REPLACEMENT
;; went beyond END, we consume the extra bytes from INPUT.
(begin
(if (> written end)
(get-bytevector-n! input buffer 0 (- written end))
(let* ((unwritten (- end written))
(unget-size (if (= end request-size)
(min hash-length unwritten)
0))
(write-size (- unwritten unget-size)))
(put-bytevector output buffer written write-size)
(unget-bytevector input buffer (+ written write-size)
unget-size)))
(loop)))))))))
(define (rename-matching-files directory mapping)
"Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
a list of store file name pairs."
(let* ((mapping (map (match-lambda
((source . target)
(cons (basename source) (basename target))))
mapping))
(matches (find-files directory
(lambda (file stat)
(assoc-ref mapping (basename file)))
#:directories? #t)))
;; XXX: This is not quite correct: if MAPPING contains "foo", and
;; DIRECTORY contains "bar/foo/foo", we first rename "bar/foo" and then
;; "bar/foo/foo" no longer exists so we fail. Oh well, surely that's good
;; enough!
(for-each (lambda (file)
(let ((target (assoc-ref mapping (basename file))))
(rename-file file
(string-append (dirname file) "/" target))))
matches)))
(define (exit-on-exception proc)
"Return a procedure that wraps PROC so that 'primitive-exit' is called when
an exception is caught."
(lambda (arg)
(catch #t
(lambda ()
(proc arg))
(lambda (key . args)
;; Since ports are not thread-safe as of Guile 2.0, reopen stderr.
(let ((port (fdopen 2 "w0")))
(print-exception port #f key args)
(primitive-exit 1))))))
(define* (mkdir-p* dir #:optional (mode #o755))
"This is a variant of 'mkdir-p' that works around
<http://bugs.gnu.org/24659> by passing MODE explicitly in each 'mkdir' call."
(define absolute?
(string-prefix? "/" dir))
(define not-slash
(char-set-complement (char-set #\/)))
(let loop ((components (string-tokenize dir not-slash))
(root (if absolute?
""
".")))
(match components
((head tail ...)
(let ((path (string-append root "/" head)))
(catch 'system-error
(lambda ()
(mkdir path mode)
(loop tail path))
(lambda args
(if (= EEXIST (system-error-errno args))
(loop tail path)
(apply throw args))))))
(() #t))))
(define* (rewrite-directory directory output mapping
#:optional (store (%store-directory)))
"Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
file name pairs."
(define hash-mapping
;; List of hash/replacement pairs, where the hash is a nix-base32 string
;; and the replacement is a string that includes the replacement's name,
;; like "r837zajjc1q8z9hph4b6860a9c05blyy-openssl-1.0.2j".
(let* ((prefix (string-append store "/"))
(start (string-length prefix))
(end (+ start hash-length)))
(define (valid-hash? h)
(every nix-base32-char? (string->list h)))
(define (hash+rest s)
(and (< end (string-length s))
(let ((hash (substring s start end))
(all (substring s start)))
(and (string-prefix? prefix s)
(valid-hash? hash)
(eqv? #\- (string-ref s end))
(list hash all)))))
(map (match-lambda
(((= hash+rest (origin-hash origin-string))
.
(= hash+rest (replacement-hash replacement-string)))
(unless (= (string-length origin-string)
(string-length replacement-string))
(error "replacement length differs from the original length"
origin-string replacement-string))
(cons origin-hash (string->utf8 replacement-string)))
((origin . replacement)
(error "invalid replacement" origin replacement)))
mapping)))
(define replacement-table
(alist->vhash hash-mapping))
(define prefix-len
(string-length directory))
(define (destination file)
(string-append output (string-drop file prefix-len)))
(define (rewrite-leaf file)
(let ((stat (lstat file))
(dest (destination file)))
(mkdir-p* (dirname dest))
(case (stat:type stat)
((symlink)
(let ((target (readlink file)))
(symlink (call-with-output-string
(lambda (output)
(replace-store-references (open-input-string target)
output replacement-table
store)))
dest)))
((regular)
(call-with-input-file file
(lambda (input)
(call-with-output-file dest
(lambda (output)
(replace-store-references input output replacement-table
store)
(chmod output (stat:perms stat)))))))
((directory)
(mkdir-p* dest))
(else
(error "unsupported file type" stat)))))
;; Use 'exit-on-exception' to force an exit upon I/O errors, given that
;; 'n-par-for-each' silently swallows exceptions.
;; See <http://bugs.gnu.org/23581>.
(n-par-for-each (parallel-job-count)
(exit-on-exception rewrite-leaf)
(find-files directory (const #t)
#:directories? #t))
(rename-matching-files output mapping))
;;; graft.scm ends here