2021-03-01 15:35:47 -05:00
|
|
|
package render
|
2020-06-20 00:09:01 -04:00
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
2020-07-10 15:33:39 -04:00
|
|
|
"io"
|
2020-06-20 00:09:01 -04:00
|
|
|
"mime"
|
2020-11-23 21:09:48 -05:00
|
|
|
"os"
|
2020-06-20 00:09:01 -04:00
|
|
|
"strings"
|
2020-12-20 15:54:47 -05:00
|
|
|
"time"
|
2020-06-20 00:09:01 -04:00
|
|
|
|
|
|
|
"github.com/makeworld-the-better-one/amfora/structs"
|
|
|
|
"github.com/makeworld-the-better-one/go-gemini"
|
2020-07-10 15:33:39 -04:00
|
|
|
"github.com/spf13/viper"
|
2020-06-20 00:09:01 -04:00
|
|
|
"golang.org/x/text/encoding/ianaindex"
|
|
|
|
)
|
|
|
|
|
2020-07-10 15:33:39 -04:00
|
|
|
var ErrTooLarge = errors.New("page content would be too large")
|
|
|
|
var ErrTimedOut = errors.New("page download timed out")
|
2020-08-25 21:03:21 -04:00
|
|
|
var ErrCantDisplay = errors.New("invalid content for a page")
|
|
|
|
var ErrBadEncoding = errors.New("unsupported encoding")
|
2020-08-27 17:57:19 -04:00
|
|
|
var ErrBadMediatype = errors.New("displayable mediatype is not handled in the code, implementation error")
|
2020-07-10 15:33:39 -04:00
|
|
|
|
2020-06-20 00:09:01 -04:00
|
|
|
// isUTF8 returns true for charsets that are compatible with UTF-8 and don't need to be decoded.
|
|
|
|
func isUTF8(charset string) bool {
|
|
|
|
utfCharsets := []string{"", "utf-8", "us-ascii"}
|
2021-02-04 18:06:56 -05:00
|
|
|
for _, s := range utfCharsets {
|
|
|
|
if charset == s || strings.ToLower(charset) == s {
|
2020-06-20 00:09:01 -04:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-02-17 14:58:55 -05:00
|
|
|
// decodeMeta returns the output of mime.ParseMediaType, but handles the empty
|
2021-02-04 18:06:56 -05:00
|
|
|
// META which is equal to "text/gemini; charset=utf-8" according to the spec.
|
|
|
|
func decodeMeta(meta string) (string, map[string]string, error) {
|
|
|
|
if meta == "" {
|
2021-02-17 14:58:55 -05:00
|
|
|
return "text/gemini", make(map[string]string), nil
|
2021-02-04 18:06:56 -05:00
|
|
|
}
|
2021-02-17 14:58:55 -05:00
|
|
|
|
|
|
|
mediatype, params, err := mime.ParseMediaType(meta)
|
|
|
|
|
|
|
|
if mediatype != "" && err != nil {
|
|
|
|
// The mediatype was successfully decoded but there's some error with the params
|
|
|
|
// Ignore the params
|
|
|
|
return mediatype, make(map[string]string), nil
|
|
|
|
}
|
|
|
|
return mediatype, params, err
|
2021-02-04 18:06:56 -05:00
|
|
|
}
|
|
|
|
|
2020-06-20 00:09:01 -04:00
|
|
|
// CanDisplay returns true if the response is supported by Amfora
|
|
|
|
// for displaying on the screen.
|
|
|
|
// It also doubles as a function to detect whether something can be stored in a Page struct.
|
|
|
|
func CanDisplay(res *gemini.Response) bool {
|
|
|
|
if gemini.SimplifyStatus(res.Status) != 20 {
|
|
|
|
// No content
|
|
|
|
return false
|
|
|
|
}
|
2021-02-04 18:06:56 -05:00
|
|
|
mediatype, params, err := decodeMeta(res.Meta)
|
2020-06-20 00:09:01 -04:00
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
2020-06-21 23:49:43 -04:00
|
|
|
if !strings.HasPrefix(mediatype, "text/") {
|
2020-06-20 00:09:01 -04:00
|
|
|
// Amfora doesn't support other filetypes
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if isUTF8(params["charset"]) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
enc, err := ianaindex.MIME.Encoding(params["charset"]) // Lowercasing is done inside
|
|
|
|
// Encoding sometimes returns nil, see #3 on this repo and golang/go#19421
|
|
|
|
return err == nil && enc != nil
|
|
|
|
}
|
|
|
|
|
2020-07-02 23:55:24 -04:00
|
|
|
// MakePage creates a formatted, rendered Page from the given network response and params.
|
|
|
|
// You must set the Page.Width value yourself.
|
2021-02-17 14:17:13 -05:00
|
|
|
func MakePage(url string, res *gemini.Response, width int, proxied bool) (*structs.Page, error) {
|
2020-06-20 00:09:01 -04:00
|
|
|
if !CanDisplay(res) {
|
2020-08-25 21:03:21 -04:00
|
|
|
return nil, ErrCantDisplay
|
2020-06-20 00:09:01 -04:00
|
|
|
}
|
|
|
|
|
2021-05-15 23:48:47 -04:00
|
|
|
// Create page with initial known values
|
|
|
|
page := structs.Page{
|
|
|
|
URL: url,
|
|
|
|
MadeAt: time.Now(),
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err := io.CopyN(&structs.BytesWriter{&page.Raw}, res.Body, viper.GetInt64("a-general.page_max_size")+1)
|
2020-12-24 23:40:55 -05:00
|
|
|
|
2020-07-10 15:33:39 -04:00
|
|
|
if err == nil {
|
2020-07-26 12:12:54 -04:00
|
|
|
// Content was larger than max size
|
2020-07-10 15:33:39 -04:00
|
|
|
return nil, ErrTooLarge
|
|
|
|
} else if err != io.EOF {
|
2020-11-23 21:26:44 -05:00
|
|
|
if os.IsTimeout(err) {
|
|
|
|
// I would use
|
|
|
|
// errors.Is(err, os.ErrDeadlineExceeded)
|
|
|
|
// but that isn't supported before Go 1.15.
|
|
|
|
|
2020-07-10 15:33:39 -04:00
|
|
|
return nil, ErrTimedOut
|
|
|
|
}
|
|
|
|
// Some other error
|
2020-06-20 00:09:01 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
2020-07-10 15:33:39 -04:00
|
|
|
// Otherwise, the error is EOF, which is what we want.
|
2020-06-20 00:09:01 -04:00
|
|
|
|
2021-02-04 18:06:56 -05:00
|
|
|
mediatype, params, _ := decodeMeta(res.Meta)
|
2020-06-20 00:09:01 -04:00
|
|
|
|
2021-05-15 23:48:47 -04:00
|
|
|
page.RawMediatype = mediatype
|
|
|
|
|
2020-06-20 00:09:01 -04:00
|
|
|
// Convert content first
|
2021-05-15 23:48:47 -04:00
|
|
|
if !isUTF8(params["charset"]) {
|
2020-06-20 00:09:01 -04:00
|
|
|
encoding, err := ianaindex.MIME.Encoding(params["charset"])
|
|
|
|
if encoding == nil || err != nil {
|
|
|
|
// Some encoding doesn't exist and wasn't caught in CanDisplay()
|
2020-08-25 21:03:21 -04:00
|
|
|
return nil, ErrBadEncoding
|
2020-06-20 00:09:01 -04:00
|
|
|
}
|
2021-05-15 23:48:47 -04:00
|
|
|
page.Raw, err = encoding.NewDecoder().Bytes(page.Raw)
|
2020-06-20 00:09:01 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if mediatype == "text/gemini" {
|
2021-05-15 23:48:47 -04:00
|
|
|
page.Mediatype = structs.TextGemini
|
2020-06-21 23:49:43 -04:00
|
|
|
} else if strings.HasPrefix(mediatype, "text/") {
|
2020-07-28 16:58:32 -04:00
|
|
|
if mediatype == "text/x-ansi" || strings.HasSuffix(url, ".ans") || strings.HasSuffix(url, ".ansi") {
|
2020-07-10 17:45:14 -04:00
|
|
|
// ANSI
|
2021-05-15 23:48:47 -04:00
|
|
|
page.Mediatype = structs.TextAnsi
|
2020-07-10 17:45:14 -04:00
|
|
|
}
|
2020-08-25 21:03:21 -04:00
|
|
|
|
|
|
|
// Treated as plaintext
|
2021-05-15 23:48:47 -04:00
|
|
|
page.Mediatype = structs.TextPlain
|
|
|
|
} else {
|
|
|
|
// Not text
|
|
|
|
return nil, ErrBadMediatype
|
2020-06-20 00:09:01 -04:00
|
|
|
}
|
|
|
|
|
2021-05-15 23:48:47 -04:00
|
|
|
return &page, nil
|
2020-06-20 00:09:01 -04:00
|
|
|
}
|