2018-11-19 09:40:21 -05:00
|
|
|
|
;;; GNU Guix --- Functional package management for GNU
|
2021-01-20 05:35:10 -05:00
|
|
|
|
;;; Copyright © 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
|
2020-02-23 06:06:31 -05:00
|
|
|
|
;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
|
2018-11-19 09:40:21 -05:00
|
|
|
|
;;;
|
|
|
|
|
;;; This file is part of GNU Guix.
|
|
|
|
|
;;;
|
|
|
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
|
|
|
;;; under the terms of the GNU General Public License as published by
|
|
|
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
|
|
|
;;; your option) any later version.
|
|
|
|
|
;;;
|
|
|
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
;;; GNU General Public License for more details.
|
|
|
|
|
;;;
|
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
(define-module (guix swh)
|
|
|
|
|
#:use-module (guix base16)
|
|
|
|
|
#:use-module (guix build utils)
|
|
|
|
|
#:use-module ((guix build syscalls) #:select (mkdtemp!))
|
2019-08-29 09:59:16 -04:00
|
|
|
|
#:use-module (web uri)
|
2018-11-19 09:40:21 -05:00
|
|
|
|
#:use-module (web client)
|
|
|
|
|
#:use-module (web response)
|
|
|
|
|
#:use-module (json)
|
|
|
|
|
#:use-module (srfi srfi-1)
|
|
|
|
|
#:use-module (srfi srfi-9)
|
|
|
|
|
#:use-module (srfi srfi-11)
|
|
|
|
|
#:use-module (srfi srfi-19)
|
|
|
|
|
#:use-module (ice-9 match)
|
|
|
|
|
#:use-module (ice-9 regex)
|
|
|
|
|
#:use-module (ice-9 popen)
|
|
|
|
|
#:use-module ((ice-9 ftw) #:select (scandir))
|
2019-07-20 14:13:39 -04:00
|
|
|
|
#:export (%swh-base-url
|
2020-07-09 11:19:52 -04:00
|
|
|
|
%verify-swh-certificate?
|
2019-08-29 09:59:16 -04:00
|
|
|
|
%allow-request?
|
|
|
|
|
|
|
|
|
|
request-rate-limit-reached?
|
2019-07-20 14:13:39 -04:00
|
|
|
|
|
|
|
|
|
origin?
|
2018-11-19 09:40:21 -05:00
|
|
|
|
origin-type
|
|
|
|
|
origin-url
|
|
|
|
|
origin-visits
|
|
|
|
|
lookup-origin
|
|
|
|
|
|
|
|
|
|
visit?
|
|
|
|
|
visit-date
|
|
|
|
|
visit-origin
|
|
|
|
|
visit-url
|
|
|
|
|
visit-snapshot-url
|
|
|
|
|
visit-status
|
|
|
|
|
visit-number
|
|
|
|
|
visit-snapshot
|
|
|
|
|
|
|
|
|
|
branch?
|
|
|
|
|
branch-name
|
|
|
|
|
branch-target
|
|
|
|
|
|
|
|
|
|
release?
|
|
|
|
|
release-id
|
|
|
|
|
release-name
|
|
|
|
|
release-message
|
|
|
|
|
release-target
|
|
|
|
|
|
|
|
|
|
revision?
|
|
|
|
|
revision-id
|
|
|
|
|
revision-date
|
|
|
|
|
revision-directory
|
|
|
|
|
lookup-revision
|
|
|
|
|
lookup-origin-revision
|
|
|
|
|
|
|
|
|
|
content?
|
|
|
|
|
content-checksums
|
|
|
|
|
content-data-url
|
|
|
|
|
content-length
|
|
|
|
|
lookup-content
|
|
|
|
|
|
|
|
|
|
directory-entry?
|
|
|
|
|
directory-entry-name
|
|
|
|
|
directory-entry-type
|
|
|
|
|
directory-entry-checksums
|
|
|
|
|
directory-entry-length
|
|
|
|
|
directory-entry-permissions
|
|
|
|
|
lookup-directory
|
|
|
|
|
directory-entry-target
|
|
|
|
|
|
2018-11-27 04:11:52 -05:00
|
|
|
|
save-reply?
|
|
|
|
|
save-reply-origin-url
|
|
|
|
|
save-reply-origin-type
|
|
|
|
|
save-reply-request-date
|
|
|
|
|
save-reply-request-status
|
|
|
|
|
save-reply-task-status
|
|
|
|
|
save-origin
|
|
|
|
|
save-origin-status
|
|
|
|
|
|
2018-11-19 09:40:21 -05:00
|
|
|
|
vault-reply?
|
|
|
|
|
vault-reply-id
|
|
|
|
|
vault-reply-fetch-url
|
|
|
|
|
vault-reply-object-id
|
|
|
|
|
vault-reply-object-type
|
|
|
|
|
vault-reply-progress-message
|
|
|
|
|
vault-reply-status
|
|
|
|
|
query-vault
|
|
|
|
|
request-cooking
|
|
|
|
|
vault-fetch
|
|
|
|
|
|
2019-08-29 10:02:29 -04:00
|
|
|
|
commit-id?
|
|
|
|
|
|
2021-03-18 16:49:40 -04:00
|
|
|
|
swh-download-directory
|
2018-11-19 09:40:21 -05:00
|
|
|
|
swh-download))
|
|
|
|
|
|
|
|
|
|
;;; Commentary:
|
|
|
|
|
;;;
|
|
|
|
|
;;; This module provides bindings to the HTTP interface of Software Heritage.
|
|
|
|
|
;;; It allows you to browse the archive, look up revisions (such as SHA1
|
|
|
|
|
;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
|
|
|
|
|
;;; <https://archive.softwareheritage.org/api/> for more information.
|
|
|
|
|
;;;
|
|
|
|
|
;;; The high-level 'swh-download' procedure allows you to download a Git
|
|
|
|
|
;;; revision from Software Heritage, provided it is available.
|
|
|
|
|
;;;
|
|
|
|
|
;;; Code:
|
|
|
|
|
|
|
|
|
|
(define %swh-base-url
|
|
|
|
|
;; Presumably we won't need to change it.
|
2019-07-20 14:13:39 -04:00
|
|
|
|
(make-parameter "https://archive.softwareheritage.org"))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(define %verify-swh-certificate?
|
|
|
|
|
;; Whether to verify the X.509 HTTPS certificate for %SWH-BASE-URL.
|
|
|
|
|
(make-parameter #t))
|
|
|
|
|
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(define (swh-url path . rest)
|
2020-02-23 06:06:31 -05:00
|
|
|
|
;; URLs returned by the API may be relative or absolute. This has changed
|
|
|
|
|
;; without notice before. Handle both cases by detecting whether the path
|
|
|
|
|
;; starts with a domain.
|
|
|
|
|
(define root
|
|
|
|
|
(if (string-prefix? "/" path)
|
|
|
|
|
(string-append (%swh-base-url) path)
|
|
|
|
|
path))
|
|
|
|
|
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(define url
|
2020-02-23 06:06:31 -05:00
|
|
|
|
(string-append root (string-join rest "/" 'prefix)))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
;; Ensure there's a trailing slash or we get a redirect.
|
|
|
|
|
(if (string-suffix? "/" url)
|
|
|
|
|
url
|
|
|
|
|
(string-append url "/")))
|
|
|
|
|
|
2021-05-26 16:30:31 -04:00
|
|
|
|
;; XXX: Work around a bug in Guile 3.0.2 where #:verify-certificate? would
|
|
|
|
|
;; be ignored (<https://bugs.gnu.org/40486>).
|
|
|
|
|
(define* (http-get* uri #:rest rest)
|
|
|
|
|
(apply http-request uri #:method 'GET rest))
|
|
|
|
|
(define* (http-post* uri #:rest rest)
|
|
|
|
|
(apply http-request uri #:method 'POST rest))
|
2020-07-09 11:19:52 -04:00
|
|
|
|
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(define %date-regexp
|
|
|
|
|
;; Match strings like "2014-11-17T22:09:38+01:00" or
|
|
|
|
|
;; "2018-09-30T23:20:07.815449+00:00"".
|
|
|
|
|
(make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
|
|
|
|
|
|
|
|
|
|
(define (string->date* str)
|
|
|
|
|
"Return a SRFI-19 date parsed from STR, a date string as returned by
|
|
|
|
|
Software Heritage."
|
|
|
|
|
;; We can't use 'string->date' because of the timezone format: SWH returns
|
|
|
|
|
;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
|
|
|
|
|
(or (and=> (regexp-exec %date-regexp str)
|
|
|
|
|
(lambda (match)
|
|
|
|
|
(define (ref n)
|
|
|
|
|
(string->number (match:substring match n)))
|
|
|
|
|
|
|
|
|
|
(make-date (let ((ns (match:substring match 8)))
|
|
|
|
|
(if ns
|
|
|
|
|
(string->number (string-drop ns 1))
|
|
|
|
|
0))
|
|
|
|
|
(ref 6) (ref 5) (ref 4)
|
|
|
|
|
(ref 3) (ref 2) (ref 1)
|
|
|
|
|
(+ (* 3600 (ref 9)) ;time zone
|
|
|
|
|
(if (< (ref 9) 0)
|
|
|
|
|
(- (ref 10))
|
|
|
|
|
(ref 10))))))
|
|
|
|
|
str)) ;oops!
|
|
|
|
|
|
2019-08-28 05:31:18 -04:00
|
|
|
|
(define string*
|
|
|
|
|
;; Converts "string or #nil" coming from JSON to "string or #f".
|
|
|
|
|
(match-lambda
|
|
|
|
|
((? string? str) str)
|
2020-06-29 18:15:35 -04:00
|
|
|
|
((? null?) #f) ;Guile-JSON 3.x
|
|
|
|
|
('null #f))) ;Guile-JSON 4.x
|
2019-08-28 05:31:18 -04:00
|
|
|
|
|
2019-08-29 09:59:16 -04:00
|
|
|
|
(define %allow-request?
|
|
|
|
|
;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
|
2020-07-09 11:19:52 -04:00
|
|
|
|
;; to keep going. This can be used to disallow requests when
|
2019-08-29 09:59:16 -04:00
|
|
|
|
;; 'request-rate-limit-reached?' returns true, for instance.
|
|
|
|
|
(make-parameter (const #t)))
|
|
|
|
|
|
|
|
|
|
;; The time when the rate limit for "/origin/save" POST requests and that of
|
|
|
|
|
;; other requests will be reset.
|
|
|
|
|
;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
|
|
|
|
|
(define %save-rate-limit-reset-time 0)
|
|
|
|
|
(define %general-rate-limit-reset-time 0)
|
|
|
|
|
|
|
|
|
|
(define (request-rate-limit-reached? url method)
|
|
|
|
|
"Return true if the rate limit has been reached for URI."
|
|
|
|
|
(define uri
|
|
|
|
|
(string->uri url))
|
|
|
|
|
|
|
|
|
|
(define reset-time
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(if (and (eq? method http-post*)
|
2019-08-29 09:59:16 -04:00
|
|
|
|
(string-prefix? "/api/1/origin/save/" (uri-path uri)))
|
|
|
|
|
%save-rate-limit-reset-time
|
|
|
|
|
%general-rate-limit-reset-time))
|
|
|
|
|
|
|
|
|
|
(< (car (gettimeofday)) reset-time))
|
|
|
|
|
|
|
|
|
|
(define (update-rate-limit-reset-time! url method response)
|
|
|
|
|
"Update the rate limit reset time for URL and METHOD based on the headers in
|
|
|
|
|
RESPONSE."
|
|
|
|
|
(let ((uri (string->uri url)))
|
|
|
|
|
(match (assq-ref (response-headers response) 'x-ratelimit-reset)
|
|
|
|
|
((= string->number (? number? reset))
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(if (and (eq? method http-post*)
|
2019-08-29 09:59:16 -04:00
|
|
|
|
(string-prefix? "/api/1/origin/save/" (uri-path uri)))
|
|
|
|
|
(set! %save-rate-limit-reset-time reset)
|
|
|
|
|
(set! %general-rate-limit-reset-time reset)))
|
|
|
|
|
(_
|
|
|
|
|
#f))))
|
|
|
|
|
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(define* (call url decode #:optional (method http-get*)
|
2018-11-19 09:40:21 -05:00
|
|
|
|
#:key (false-if-404? #t))
|
|
|
|
|
"Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
|
|
|
|
|
using DECODE, a one-argument procedure that takes an input port. When
|
|
|
|
|
FALSE-IF-404? is true, return #f upon 404 responses."
|
2019-08-29 09:59:16 -04:00
|
|
|
|
(and ((%allow-request?) url method)
|
|
|
|
|
(let*-values (((response port)
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(method url #:streaming? #t
|
|
|
|
|
#:verify-certificate?
|
|
|
|
|
(%verify-swh-certificate?))))
|
2019-08-29 09:59:16 -04:00
|
|
|
|
;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
|
|
|
|
|
(match (assq-ref (response-headers response) 'x-ratelimit-remaining)
|
|
|
|
|
(#f #t)
|
|
|
|
|
((? (compose zero? string->number))
|
|
|
|
|
(update-rate-limit-reset-time! url method response)
|
|
|
|
|
(throw 'swh-error url method response))
|
|
|
|
|
(_ #t))
|
|
|
|
|
|
|
|
|
|
(cond ((= 200 (response-code response))
|
|
|
|
|
(let ((result (decode port)))
|
|
|
|
|
(close-port port)
|
|
|
|
|
result))
|
|
|
|
|
((and false-if-404?
|
|
|
|
|
(= 404 (response-code response)))
|
|
|
|
|
(close-port port)
|
|
|
|
|
#f)
|
|
|
|
|
(else
|
|
|
|
|
(close-port port)
|
|
|
|
|
(throw 'swh-error url method response))))))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define-syntax define-query
|
|
|
|
|
(syntax-rules (path)
|
|
|
|
|
"Define a procedure that performs a Software Heritage query."
|
|
|
|
|
((_ (name args ...) docstring (path components ...)
|
|
|
|
|
json->value)
|
|
|
|
|
(define (name args ...)
|
|
|
|
|
docstring
|
|
|
|
|
(call (swh-url components ...) json->value)))))
|
|
|
|
|
|
2019-12-25 18:19:39 -05:00
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get>
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(define-json-mapping <origin> make-origin origin?
|
|
|
|
|
json->origin
|
|
|
|
|
(visits-url origin-visits-url "origin_visits_url")
|
|
|
|
|
(type origin-type)
|
|
|
|
|
(url origin-url))
|
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
|
|
|
|
|
(define-json-mapping <visit> make-visit visit?
|
|
|
|
|
json->visit
|
|
|
|
|
(date visit-date "date" string->date*)
|
|
|
|
|
(origin visit-origin)
|
|
|
|
|
(url visit-url "origin_visit_url")
|
2019-08-28 05:31:18 -04:00
|
|
|
|
(snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f
|
|
|
|
|
(status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(number visit-number "visit"))
|
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
|
|
|
|
|
(define-json-mapping <snapshot> make-snapshot snapshot?
|
|
|
|
|
json->snapshot
|
|
|
|
|
(branches snapshot-branches "branches" json->branches))
|
|
|
|
|
|
|
|
|
|
;; This is used for the "branches" field of snapshots.
|
|
|
|
|
(define-record-type <branch>
|
|
|
|
|
(make-branch name target-type target-url)
|
|
|
|
|
branch?
|
|
|
|
|
(name branch-name)
|
|
|
|
|
(target-type branch-target-type) ;release | revision
|
|
|
|
|
(target-url branch-target-url))
|
|
|
|
|
|
|
|
|
|
(define (json->branches branches)
|
2019-07-21 17:05:54 -04:00
|
|
|
|
(map (match-lambda
|
|
|
|
|
((key . value)
|
|
|
|
|
(make-branch key
|
|
|
|
|
(string->symbol
|
|
|
|
|
(assoc-ref value "target_type"))
|
|
|
|
|
(assoc-ref value "target_url"))))
|
|
|
|
|
branches))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
|
|
|
|
|
(define-json-mapping <release> make-release release?
|
|
|
|
|
json->release
|
|
|
|
|
(id release-id)
|
|
|
|
|
(name release-name)
|
|
|
|
|
(message release-message)
|
|
|
|
|
(target-type release-target-type "target_type" string->symbol)
|
|
|
|
|
(target-url release-target-url "target_url"))
|
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
|
|
|
|
|
(define-json-mapping <revision> make-revision revision?
|
|
|
|
|
json->revision
|
|
|
|
|
(id revision-id)
|
|
|
|
|
(date revision-date "date" string->date*)
|
|
|
|
|
(directory revision-directory)
|
|
|
|
|
(directory-url revision-directory-url "directory_url"))
|
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/content/>
|
|
|
|
|
(define-json-mapping <content> make-content content?
|
|
|
|
|
json->content
|
|
|
|
|
(checksums content-checksums "checksums" json->checksums)
|
|
|
|
|
(data-url content-data-url "data_url")
|
|
|
|
|
(file-type-url content-file-type-url "filetype_url")
|
|
|
|
|
(language-url content-language-url "language_url")
|
|
|
|
|
(length content-length)
|
|
|
|
|
(license-url content-license-url "license_url"))
|
|
|
|
|
|
|
|
|
|
(define (json->checksums checksums)
|
2019-07-21 17:05:54 -04:00
|
|
|
|
(map (match-lambda
|
|
|
|
|
((key . value)
|
|
|
|
|
(cons key (base16-string->bytevector value))))
|
|
|
|
|
checksums))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
|
|
|
|
|
(define-json-mapping <directory-entry> make-directory-entry directory-entry?
|
|
|
|
|
json->directory-entry
|
|
|
|
|
(name directory-entry-name)
|
|
|
|
|
(type directory-entry-type "type"
|
|
|
|
|
(match-lambda
|
|
|
|
|
("dir" 'directory)
|
|
|
|
|
(str (string->symbol str))))
|
|
|
|
|
(checksums directory-entry-checksums "checksums"
|
|
|
|
|
(match-lambda
|
|
|
|
|
(#f #f)
|
2021-01-20 05:35:10 -05:00
|
|
|
|
((? unspecified?) #f)
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(lst (json->checksums lst))))
|
|
|
|
|
(id directory-entry-id "dir_id")
|
|
|
|
|
(length directory-entry-length)
|
|
|
|
|
(permissions directory-entry-permissions "perms")
|
|
|
|
|
(target-url directory-entry-target-url "target_url"))
|
|
|
|
|
|
|
|
|
|
;; <https://archive.softwareheritage.org/api/1/origin/save/>
|
|
|
|
|
(define-json-mapping <save-reply> make-save-reply save-reply?
|
|
|
|
|
json->save-reply
|
|
|
|
|
(origin-url save-reply-origin-url "origin_url")
|
|
|
|
|
(origin-type save-reply-origin-type "origin_type")
|
|
|
|
|
(request-date save-reply-request-date "save_request_date"
|
|
|
|
|
string->date*)
|
|
|
|
|
(request-status save-reply-request-status "save_request_status"
|
|
|
|
|
string->symbol)
|
|
|
|
|
(task-status save-reply-task-status "save_task_status"
|
|
|
|
|
(match-lambda
|
|
|
|
|
("not created" 'not-created)
|
|
|
|
|
((? string? str) (string->symbol str)))))
|
|
|
|
|
|
|
|
|
|
;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
|
|
|
|
|
(define-json-mapping <vault-reply> make-vault-reply vault-reply?
|
|
|
|
|
json->vault-reply
|
|
|
|
|
(id vault-reply-id)
|
|
|
|
|
(fetch-url vault-reply-fetch-url "fetch_url")
|
|
|
|
|
(object-id vault-reply-object-id "obj_id")
|
|
|
|
|
(object-type vault-reply-object-type "obj_type" string->symbol)
|
|
|
|
|
(progress-message vault-reply-progress-message "progress_message")
|
|
|
|
|
(status vault-reply-status "status" string->symbol))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; RPCs.
|
|
|
|
|
;;;
|
|
|
|
|
|
|
|
|
|
(define-query (lookup-origin url)
|
|
|
|
|
"Return an origin for URL."
|
2019-12-15 16:00:56 -05:00
|
|
|
|
(path "/api/1/origin" url "get")
|
2018-11-19 09:40:21 -05:00
|
|
|
|
json->origin)
|
|
|
|
|
|
|
|
|
|
(define-query (lookup-content hash type)
|
|
|
|
|
"Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
|
|
|
|
|
(path "/api/1/content"
|
|
|
|
|
(string-append type ":"
|
|
|
|
|
(bytevector->base16-string hash)))
|
|
|
|
|
json->content)
|
|
|
|
|
|
|
|
|
|
(define-query (lookup-revision id)
|
|
|
|
|
"Return the revision with the given ID, typically a Git commit SHA1."
|
|
|
|
|
(path "/api/1/revision" id)
|
|
|
|
|
json->revision)
|
|
|
|
|
|
|
|
|
|
(define-query (lookup-directory id)
|
|
|
|
|
"Return the directory with the given ID."
|
|
|
|
|
(path "/api/1/directory" id)
|
|
|
|
|
json->directory-entries)
|
|
|
|
|
|
|
|
|
|
(define (json->directory-entries port)
|
2019-07-21 17:05:54 -04:00
|
|
|
|
(map json->directory-entry
|
|
|
|
|
(vector->list (json->scm port))))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define (origin-visits origin)
|
|
|
|
|
"Return the list of visits of ORIGIN, a record as returned by
|
|
|
|
|
'lookup-origin'."
|
|
|
|
|
(call (swh-url (origin-visits-url origin))
|
|
|
|
|
(lambda (port)
|
2019-07-21 17:05:54 -04:00
|
|
|
|
(map json->visit (vector->list (json->scm port))))))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define (visit-snapshot visit)
|
2019-08-28 05:31:18 -04:00
|
|
|
|
"Return the snapshot corresponding to VISIT or #f if no snapshot is
|
|
|
|
|
available."
|
|
|
|
|
(and (visit-snapshot-url visit)
|
|
|
|
|
(call (swh-url (visit-snapshot-url visit))
|
|
|
|
|
json->snapshot)))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define (branch-target branch)
|
|
|
|
|
"Return the target of BRANCH, either a <revision> or a <release>."
|
|
|
|
|
(match (branch-target-type branch)
|
|
|
|
|
('release
|
|
|
|
|
(call (swh-url (branch-target-url branch))
|
|
|
|
|
json->release))
|
|
|
|
|
('revision
|
|
|
|
|
(call (swh-url (branch-target-url branch))
|
|
|
|
|
json->revision))))
|
|
|
|
|
|
|
|
|
|
(define (lookup-origin-revision url tag)
|
|
|
|
|
"Return a <revision> corresponding to the given TAG for the repository
|
|
|
|
|
coming from URL. Example:
|
|
|
|
|
|
2019-08-28 05:31:18 -04:00
|
|
|
|
(lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\")
|
2018-11-19 09:40:21 -05:00
|
|
|
|
=> #<<revision> id: \"44941…\" …>
|
|
|
|
|
|
|
|
|
|
The information is based on the latest visit of URL available. Return #f if
|
|
|
|
|
URL could not be found."
|
|
|
|
|
(match (lookup-origin url)
|
|
|
|
|
(#f #f)
|
|
|
|
|
(origin
|
2021-05-14 17:21:45 -04:00
|
|
|
|
(match (filter (lambda (visit)
|
|
|
|
|
;; Return #f if (visit-snapshot VISIT) would return #f.
|
|
|
|
|
(and (visit-snapshot-url visit)
|
|
|
|
|
(eq? 'full (visit-status visit))))
|
|
|
|
|
(origin-visits origin))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
((visit . _)
|
|
|
|
|
(let ((snapshot (visit-snapshot visit)))
|
|
|
|
|
(match (and=> (find (lambda (branch)
|
|
|
|
|
(string=? (string-append "refs/tags/" tag)
|
|
|
|
|
(branch-name branch)))
|
|
|
|
|
(snapshot-branches snapshot))
|
|
|
|
|
branch-target)
|
|
|
|
|
((? release? release)
|
|
|
|
|
(release-target release))
|
|
|
|
|
((? revision? revision)
|
|
|
|
|
revision)
|
|
|
|
|
(#f ;tag not found
|
|
|
|
|
#f))))
|
|
|
|
|
(()
|
|
|
|
|
#f)))))
|
|
|
|
|
|
|
|
|
|
(define (release-target release)
|
|
|
|
|
"Return the revision that is the target of RELEASE."
|
|
|
|
|
(match (release-target-type release)
|
|
|
|
|
('revision
|
|
|
|
|
(call (swh-url (release-target-url release))
|
|
|
|
|
json->revision))))
|
|
|
|
|
|
|
|
|
|
(define (directory-entry-target entry)
|
|
|
|
|
"If ENTRY, a directory entry, has type 'directory, return its list of
|
|
|
|
|
directory entries; if it has type 'file, return its <content> object."
|
|
|
|
|
(call (swh-url (directory-entry-target-url entry))
|
|
|
|
|
(match (directory-entry-type entry)
|
|
|
|
|
('file json->content)
|
|
|
|
|
('directory json->directory-entries))))
|
|
|
|
|
|
|
|
|
|
(define* (save-origin url #:optional (type "git"))
|
|
|
|
|
"Request URL to be saved."
|
|
|
|
|
(call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
|
2020-07-09 11:19:52 -04:00
|
|
|
|
http-post*))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define-query (save-origin-status url type)
|
|
|
|
|
"Return the status of a /save request for URL and TYPE (e.g., \"git\")."
|
|
|
|
|
(path "/api/1/origin/save" type "url" url)
|
|
|
|
|
json->save-reply)
|
|
|
|
|
|
|
|
|
|
(define-query (query-vault id kind)
|
|
|
|
|
"Ask the availability of object ID and KIND to the vault, where KIND is
|
|
|
|
|
'directory or 'revision. Return #f if it could not be found, or a
|
|
|
|
|
<vault-reply> on success."
|
|
|
|
|
;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
|
|
|
|
|
;; There's a single format supported for directories and revisions and for
|
|
|
|
|
;; now, the "/format" bit of the URL *must* be omitted.
|
|
|
|
|
(path "/api/1/vault" (symbol->string kind) id)
|
|
|
|
|
json->vault-reply)
|
|
|
|
|
|
|
|
|
|
(define (request-cooking id kind)
|
|
|
|
|
"Request the cooking of object ID and KIND (one of 'directory or 'revision)
|
|
|
|
|
to the vault. Return a <vault-reply>."
|
|
|
|
|
(call (swh-url "/api/1/vault" (symbol->string kind) id)
|
|
|
|
|
json->vault-reply
|
2020-07-09 11:19:52 -04:00
|
|
|
|
http-post*))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
|
|
|
|
|
(define* (vault-fetch id kind
|
|
|
|
|
#:key (log-port (current-error-port)))
|
|
|
|
|
"Return an input port from which a bundle of the object with the given ID
|
|
|
|
|
and KIND (one of 'directory or 'revision) can be retrieved, or #f if the
|
|
|
|
|
object could not be found.
|
|
|
|
|
|
|
|
|
|
For a directory, the returned stream is a gzip-compressed tarball. For a
|
|
|
|
|
revision, it is a gzip-compressed stream for 'git fast-import'."
|
|
|
|
|
(let loop ((reply (query-vault id kind)))
|
|
|
|
|
(match reply
|
|
|
|
|
(#f
|
|
|
|
|
(and=> (request-cooking id kind) loop))
|
|
|
|
|
(_
|
|
|
|
|
(match (vault-reply-status reply)
|
|
|
|
|
('done
|
|
|
|
|
;; Fetch the bundle.
|
|
|
|
|
(let-values (((response port)
|
2020-07-09 11:19:52 -04:00
|
|
|
|
(http-get* (swh-url (vault-reply-fetch-url reply))
|
|
|
|
|
#:streaming? #t
|
|
|
|
|
#:verify-certificate?
|
|
|
|
|
(%verify-swh-certificate?))))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(if (= (response-code response) 200)
|
|
|
|
|
port
|
|
|
|
|
(begin ;shouldn't happen
|
|
|
|
|
(close-port port)
|
|
|
|
|
#f))))
|
|
|
|
|
('failed
|
|
|
|
|
;; Upon failure, we're supposed to try again.
|
|
|
|
|
(format log-port "SWH vault: failure: ~a~%"
|
|
|
|
|
(vault-reply-progress-message reply))
|
|
|
|
|
(format log-port "SWH vault: retrying...~%")
|
|
|
|
|
(loop (request-cooking id kind)))
|
|
|
|
|
((and (or 'new 'pending) status)
|
|
|
|
|
;; Wait until the bundle shows up.
|
|
|
|
|
(let ((message (vault-reply-progress-message reply)))
|
|
|
|
|
(when (eq? 'new status)
|
|
|
|
|
(format log-port "SWH vault: \
|
|
|
|
|
requested bundle cooking, waiting for completion...~%"))
|
|
|
|
|
(when (string? message)
|
|
|
|
|
(format log-port "SWH vault: ~a~%" message))
|
|
|
|
|
|
|
|
|
|
;; Wait long enough so we don't exhaust our maximum number of
|
|
|
|
|
;; requests per hour too fast (as of this writing, the limit is 60
|
|
|
|
|
;; requests per hour per IP address.)
|
|
|
|
|
(sleep (if (eq? status 'new) 60 30))
|
|
|
|
|
|
|
|
|
|
(loop (query-vault id kind)))))))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;;
|
|
|
|
|
;;; High-level interface.
|
|
|
|
|
;;;
|
|
|
|
|
|
|
|
|
|
(define (call-with-temporary-directory proc) ;FIXME: factorize
|
|
|
|
|
"Call PROC with a name of a temporary directory; close the directory and
|
|
|
|
|
delete it when leaving the dynamic extent of this call."
|
|
|
|
|
(let* ((directory (or (getenv "TMPDIR") "/tmp"))
|
|
|
|
|
(template (string-append directory "/guix-directory.XXXXXX"))
|
|
|
|
|
(tmp-dir (mkdtemp! template)))
|
|
|
|
|
(dynamic-wind
|
|
|
|
|
(const #t)
|
|
|
|
|
(lambda ()
|
|
|
|
|
(proc tmp-dir))
|
|
|
|
|
(lambda ()
|
|
|
|
|
(false-if-exception (delete-file-recursively tmp-dir))))))
|
|
|
|
|
|
2021-03-18 16:49:40 -04:00
|
|
|
|
(define* (swh-download-directory id output
|
|
|
|
|
#:key (log-port (current-error-port)))
|
|
|
|
|
"Download from Software Heritage the directory with the given ID, and
|
|
|
|
|
unpack it to OUTPUT. Return #t on success and #f on failure"
|
|
|
|
|
(call-with-temporary-directory
|
|
|
|
|
(lambda (directory)
|
|
|
|
|
(match (vault-fetch id 'directory #:log-port log-port)
|
|
|
|
|
(#f
|
|
|
|
|
(format log-port
|
|
|
|
|
"SWH: directory ~a could not be fetched from the vault~%"
|
|
|
|
|
id)
|
|
|
|
|
#f)
|
|
|
|
|
((? port? input)
|
|
|
|
|
(let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
|
|
|
|
|
(dump-port input tar)
|
|
|
|
|
(close-port input)
|
|
|
|
|
(let ((status (close-pipe tar)))
|
|
|
|
|
(unless (zero? status)
|
|
|
|
|
(error "tar extraction failure" status)))
|
|
|
|
|
|
|
|
|
|
(match (scandir directory)
|
|
|
|
|
(("." ".." sub-directory)
|
|
|
|
|
(copy-recursively (string-append directory "/" sub-directory)
|
|
|
|
|
output
|
|
|
|
|
#:log (%make-void-port "w"))
|
|
|
|
|
#t))))))))
|
|
|
|
|
|
|
|
|
|
(define (commit-id? reference)
|
|
|
|
|
"Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
|
|
|
|
|
it is a tag name. This is based on a simple heuristic so use with care!"
|
|
|
|
|
(and (= (string-length reference) 40)
|
|
|
|
|
(string-every char-set:hex-digit reference)))
|
|
|
|
|
|
2019-08-28 05:10:55 -04:00
|
|
|
|
(define* (swh-download url reference output
|
|
|
|
|
#:key (log-port (current-error-port)))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
"Download from Software Heritage a checkout of the Git tag or commit
|
|
|
|
|
REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
|
|
|
|
|
and #f on failure.
|
|
|
|
|
|
|
|
|
|
This procedure uses the \"vault\", which contains \"cooked\" directories in
|
|
|
|
|
the form of tarballs. If the requested directory is not cooked yet, it will
|
|
|
|
|
wait until it becomes available, which could take several minutes."
|
|
|
|
|
(match (if (commit-id? reference)
|
|
|
|
|
(lookup-revision reference)
|
|
|
|
|
(lookup-origin-revision url reference))
|
|
|
|
|
((? revision? revision)
|
2019-08-28 05:10:55 -04:00
|
|
|
|
(format log-port "SWH: found revision ~a with directory at '~a'~%"
|
|
|
|
|
(revision-id revision)
|
|
|
|
|
(swh-url (revision-directory-url revision)))
|
2021-03-18 16:49:40 -04:00
|
|
|
|
(swh-download-directory (revision-directory revision) output
|
|
|
|
|
#:log-port log-port))
|
2018-11-19 09:40:21 -05:00
|
|
|
|
(#f
|
|
|
|
|
#f)))
|