offload: Use (guix inferior) instead of (ssh dist node).

Using inferiors and thus 'guix repl' simplifies setup on build
machines (no need to worry about GUILE_LOAD_PATH etc.)

Furthermore, the 'guix repl -t machine' protocol running in a remote
pipe addresses several issues with the current implementation of nodes
and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile
--listen' process behind it, stateless (since a new process is started
each time), more efficient (the SSH channel can be reused), more
reliable (no 'pgrep', 'pkill', and shellology; see
<https://github.com/artyom-poptsov/guile-ssh/issues/11> as an example.)

* guix/ssh.scm (inferior-remote-eval): New procedure.
(send-files): Use it instead of 'make-node' and 'node-eval'.
* guix/scripts/offload.scm (node-guile-version): New procedure.
(node-free-disk-space, transfer-and-offload, node-load)
(choose-build-machine, assert-node-has-guix): Use 'remote-inferior'
instead of 'make-node' and 'inferior-eval' instead of 'node-eval'.
(assert-node-can-import, assert-node-can-export): Likewise, and add
'session' parameter.
(check-machine-availability): Likewise, and add calls to
'close-inferior' and 'disconnect!'.
(check-machine-status): Likewise.
* doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in
$PATH and $GUILE_LOAD_PATH; mention 'guix' alone.
This commit is contained in:
Ludovic Courtès 2018-12-24 15:40:04 +01:00
parent af15fe13b6
commit ed7b44370f
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
3 changed files with 80 additions and 63 deletions

View File

@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines.
@end table @end table
@end deftp @end deftp
The @code{guile} command must be in the search path on the build The @command{guix} command must be in the search path on the build
machines. In addition, the Guix modules must be in machines. You can check whether this is the case by running:
@code{$GUILE_LOAD_PATH} on the build machine---you can check whether
this is the case by running:
@example @example
ssh build-machine guile -c "'(use-modules (guix config))'" ssh build-machine guix repl --version
@end example @end example
There is one last thing to do once @file{machines.scm} is in place. As There is one last thing to do once @file{machines.scm} is in place. As

View File

@ -23,13 +23,12 @@
#:use-module (ssh session) #:use-module (ssh session)
#:use-module (ssh channel) #:use-module (ssh channel)
#:use-module (ssh popen) #:use-module (ssh popen)
#:use-module (ssh dist)
#:use-module (ssh dist node)
#:use-module (ssh version) #:use-module (ssh version)
#:use-module (guix config) #:use-module (guix config)
#:use-module (guix records) #:use-module (guix records)
#:use-module (guix ssh) #:use-module (guix ssh)
#:use-module (guix store) #:use-module (guix store)
#:use-module (guix inferior)
#:use-module (guix derivations) #:use-module (guix derivations)
#:use-module ((guix serialization) #:use-module ((guix serialization)
#:select (nar-error? nar-error-file)) #:select (nar-error? nar-error-file))
@ -321,12 +320,15 @@ hook."
(set-port-revealed! port 1) (set-port-revealed! port 1)
port)) port))
(define (node-guile-version node)
(inferior-eval '(version) node))
(define (node-free-disk-space node) (define (node-free-disk-space node)
"Return the free disk space, in bytes, in NODE's store." "Return the free disk space, in bytes, in NODE's store."
(node-eval node (inferior-eval `(begin
`(begin (use-modules (guix build syscalls))
(use-modules (guix build syscalls)) (free-disk-space ,(%store-prefix)))
(free-disk-space ,(%store-prefix))))) node))
(define* (transfer-and-offload drv machine (define* (transfer-and-offload drv machine
#:key #:key
@ -367,8 +369,12 @@ MACHINE."
(derivation-file-name drv) (derivation-file-name drv)
(build-machine-name machine) (build-machine-name machine)
(nix-protocol-error-message c)) (nix-protocol-error-message c))
(let* ((space (false-if-exception (let* ((inferior (false-if-exception (remote-inferior session)))
(node-free-disk-space (make-node session))))) (space (false-if-exception
(node-free-disk-space inferior))))
(when inferior
(close-inferior inferior))
;; Use exit code 100 for a permanent build failure. The daemon ;; Use exit code 100 for a permanent build failure. The daemon
;; interprets other non-zero codes as transient build failures. ;; interprets other non-zero codes as transient build failures.
@ -417,11 +423,11 @@ of free disk space on '~a'~%")
(define (node-load node) (define (node-load node)
"Return the load on NODE. Return +∞ if NODE is misbehaving." "Return the load on NODE. Return +∞ if NODE is misbehaving."
(let ((line (node-eval node (let ((line (inferior-eval '(begin
'(begin (use-modules (ice-9 rdelim))
(use-modules (ice-9 rdelim)) (call-with-input-file "/proc/loadavg"
(call-with-input-file "/proc/loadavg" read-string))
read-string))))) node)))
(if (eof-object? line) (if (eof-object? line)
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
(match (string-tokenize line) (match (string-tokenize line)
@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
;; Note: We call 'node-load' only as a last resort because it is ;; Note: We call 'node-load' only as a last resort because it is
;; too costly to call it once for every machine. ;; too costly to call it once for every machine.
(let* ((session (false-if-exception (open-ssh-session best))) (let* ((session (false-if-exception (open-ssh-session best)))
(node (and session (make-node session))) (node (and session (remote-inferior session)))
(load (and node (normalized-load best (node-load node)))) (load (and node (normalized-load best (node-load node))))
(space (and node (node-free-disk-space node)))) (space (and node (node-free-disk-space node))))
(when node (close-inferior node))
(when session (disconnect! session)) (when session (disconnect! session))
(if (and node (< load 2.) (>= space %minimum-disk-space)) (if (and node (< load 2.) (>= space %minimum-disk-space))
(match others (match others
@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..."
(#f (#f
(report-guile-error name)) (report-guile-error name))
((? string? version) ((? string? version)
;; Note: The version string already contains the word "Guile". (info (G_ "'~a' is running GNU Guile ~a~%")
(info (G_ "'~a' is running ~a~%")
name (node-guile-version node))))) name (node-guile-version node)))))
(define (assert-node-has-guix node name) (define (assert-node-has-guix node name)
"Bail out if NODE lacks the (guix) module, or if its daemon is not running." "Bail out if NODE lacks the (guix) module, or if its daemon is not running."
(catch 'node-repl-error (catch 'node-repl-error
(lambda () (lambda ()
(match (node-eval node (match (inferior-eval '(begin
'(begin (use-modules (guix))
(use-modules (guix)) (and add-text-to-store 'alright))
(and add-text-to-store 'alright))) node)
('alright #t) ('alright #t)
(_ (report-module-error name)))) (_ (report-module-error name))))
(lambda (key . args) (lambda (key . args)
@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..."
(catch 'node-repl-error (catch 'node-repl-error
(lambda () (lambda ()
(match (node-eval node (match (inferior-eval '(begin
'(begin (use-modules (guix))
(use-modules (guix)) (with-store store
(with-store store (add-text-to-store store "test"
(add-text-to-store store "test" "Hello, build machine!")))
"Hello, build machine!")))) node)
((? string? str) ((? string? str)
(info (G_ "Guix is usable on '~a' (test returned ~s)~%") (info (G_ "Guix is usable on '~a' (test returned ~s)~%")
name str)) name str))
@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..."
(string-append name "-" (string-append name "-"
(number->string (random 1000000 (force %random-state))))) (number->string (random 1000000 (force %random-state)))))
(define (assert-node-can-import node name daemon-socket) (define (assert-node-can-import session node name daemon-socket)
"Bail out if NODE refuses to import our archives." "Bail out if NODE refuses to import our archives."
(let ((session (node-session node))) (with-store store
(with-store store (let* ((item (add-text-to-store store "export-test" (nonce)))
(let* ((item (add-text-to-store store "export-test" (nonce))) (remote (connect-to-remote-daemon session daemon-socket)))
(remote (connect-to-remote-daemon session daemon-socket))) (with-store local
(with-store local (send-files local (list item) remote))
(send-files local (list item) remote))
(if (valid-path? remote item) (if (valid-path? remote item)
(info (G_ "'~a' successfully imported '~a'~%") (info (G_ "'~a' successfully imported '~a'~%")
name item) name item)
(leave (G_ "'~a' was not properly imported on '~a'~%") (leave (G_ "'~a' was not properly imported on '~a'~%")
item name)))))) item name)))))
(define (assert-node-can-export node name daemon-socket) (define (assert-node-can-export session node name daemon-socket)
"Bail out if we cannot import signed archives from NODE." "Bail out if we cannot import signed archives from NODE."
(let* ((session (node-session node)) (let* ((remote (connect-to-remote-daemon session daemon-socket))
(remote (connect-to-remote-daemon session daemon-socket))
(item (add-text-to-store remote "import-test" (nonce name)))) (item (add-text-to-store remote "import-test" (nonce name))))
(with-store store (with-store store
(if (and (retrieve-files store (list item) remote) (if (and (retrieve-files store (list item) remote)
@ -701,11 +705,13 @@ machine."
(let* ((names (map build-machine-name machines)) (let* ((names (map build-machine-name machines))
(sockets (map build-machine-daemon-socket machines)) (sockets (map build-machine-daemon-socket machines))
(sessions (map open-ssh-session machines)) (sessions (map open-ssh-session machines))
(nodes (map make-node sessions))) (nodes (map remote-inferior sessions)))
(for-each assert-node-repl nodes names) (for-each assert-node-repl nodes names)
(for-each assert-node-has-guix nodes names) (for-each assert-node-has-guix nodes names)
(for-each assert-node-can-import nodes names sockets) (for-each assert-node-can-import sessions nodes names sockets)
(for-each assert-node-can-export nodes names sockets)))) (for-each assert-node-can-export sessions nodes names sockets)
(for-each close-inferior nodes)
(for-each disconnect! sessions))))
(define (check-machine-status machine-file pred) (define (check-machine-status machine-file pred)
"Print the load of each machine matching PRED in MACHINE-FILE." "Print the load of each machine matching PRED in MACHINE-FILE."
@ -722,10 +728,11 @@ machine."
(length machines) machine-file) (length machines) machine-file)
(for-each (lambda (machine) (for-each (lambda (machine)
(let* ((session (open-ssh-session machine)) (let* ((session (open-ssh-session machine))
(node (make-node session)) (inferior (remote-inferior session))
(uts (node-eval node '(uname))) (uts (inferior-eval '(uname) inferior))
(load (node-load node)) (load (node-load inferior))
(free (node-free-disk-space node))) (free (node-free-disk-space inferior)))
(close-inferior inferior)
(disconnect! session) (disconnect! session)
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%"

View File

@ -27,8 +27,6 @@
#:use-module (ssh channel) #:use-module (ssh channel)
#:use-module (ssh popen) #:use-module (ssh popen)
#:use-module (ssh session) #:use-module (ssh session)
#:use-module (ssh dist)
#:use-module (ssh dist node)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
@ -102,6 +100,20 @@ Throw an error on failure."
"guix" "repl" "-t" "machine"))) "guix" "repl" "-t" "machine")))
(port->inferior pipe))) (port->inferior pipe)))
(define (inferior-remote-eval exp session)
"Evaluate EXP in a new inferior running in SESSION, and close the inferior
right away."
(let ((inferior (remote-inferior session)))
(dynamic-wind
(const #t)
(lambda ()
(inferior-eval exp inferior))
(lambda ()
;; Close INFERIOR right away to prevent finalization from happening in
;; another thread at the wrong time (see
;; <https://bugs.gnu.org/26976>.)
(close-inferior inferior)))))
(define* (remote-daemon-channel session (define* (remote-daemon-channel session
#:optional #:optional
(socket-name (socket-name
@ -277,15 +289,15 @@ Return the list of store items actually sent."
;; Compute the subset of FILES missing on SESSION and send them. ;; Compute the subset of FILES missing on SESSION and send them.
(let* ((files (if recursive? (requisites local files) files)) (let* ((files (if recursive? (requisites local files) files))
(session (channel-get-session (nix-server-socket remote))) (session (channel-get-session (nix-server-socket remote)))
(node (make-node session)) (missing (inferior-remote-eval
(missing (node-eval node `(begin
`(begin (use-modules (guix)
(use-modules (guix) (srfi srfi-1) (srfi srfi-26))
(srfi srfi-1) (srfi srfi-26))
(with-store store (with-store store
(remove (cut valid-path? store <>) (remove (cut valid-path? store <>)
',files))))) ',files)))
session))
(count (length missing)) (count (length missing))
(sizes (map (lambda (item) (sizes (map (lambda (item)
(path-info-nar-size (query-path-info local item))) (path-info-nar-size (query-path-info local item)))