1
0
mirror of https://github.com/makew0rld/amfora.git synced 2024-12-04 14:46:29 -05:00

🚧 Initial work on supporting many charsets

This commit is contained in:
makeworld 2020-06-19 13:26:10 -04:00
parent 41075c8c41
commit 0d5c38e060
3 changed files with 26 additions and 9 deletions

1
go.mod
View File

@ -16,6 +16,7 @@ require (
github.com/spf13/viper v1.7.0 github.com/spf13/viper v1.7.0
github.com/stretchr/testify v1.6.0 github.com/stretchr/testify v1.6.0
gitlab.com/tslocum/cview v1.4.8-0.20200614211415-f477be8ba472 gitlab.com/tslocum/cview v1.4.8-0.20200614211415-f477be8ba472
golang.org/x/text v0.3.3
gopkg.in/ini.v1 v1.57.0 // indirect gopkg.in/ini.v1 v1.57.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20200603094226-e3079894b1e8 // indirect gopkg.in/yaml.v3 v3.0.0-20200603094226-e3079894b1e8 // indirect
) )

2
go.sum
View File

@ -290,6 +290,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@ -17,6 +17,7 @@ import (
"github.com/makeworld-the-better-one/go-gemini" "github.com/makeworld-the-better-one/go-gemini"
"github.com/spf13/viper" "github.com/spf13/viper"
"gitlab.com/tslocum/cview" "gitlab.com/tslocum/cview"
"golang.org/x/text/encoding/ianaindex"
) )
// CanDisplay returns true if the response is supported by Amfora // CanDisplay returns true if the response is supported by Amfora
@ -31,15 +32,16 @@ func CanDisplay(res *gemini.Response) bool {
if err != nil { if err != nil {
return false return false
} }
if strings.ToLower(params["charset"]) != "utf-8" && strings.ToLower(params["charset"]) != "us-ascii" && params["charset"] != "" {
// Amfora doesn't support other charsets
return false
}
if mediatype != "text/gemini" && mediatype != "text/plain" { if mediatype != "text/gemini" && mediatype != "text/plain" {
// Amfora doesn't support other filetypes // Amfora doesn't support other filetypes
return false return false
} }
return true // Check if there is an encoding for the charset already
if params["charset"] == "" {
return true // Means UTF-8
}
_, err = ianaindex.MIME.Encoding(params["charset"]) // Lowercasing is done inside
return err == nil
} }
// convertRegularGemini converts non-preformatted blocks of text/gemini // convertRegularGemini converts non-preformatted blocks of text/gemini
@ -207,23 +209,35 @@ func MakePage(url string, res *gemini.Response) (*structs.Page, error) {
return nil, errors.New("not valid content for a Page") return nil, errors.New("not valid content for a Page")
} }
content, err := ioutil.ReadAll(res.Body) // TODO: Don't use all memory on large pages rawText, err := ioutil.ReadAll(res.Body) // TODO: Don't use all memory on large pages
if err != nil { if err != nil {
return nil, err return nil, err
} }
res.Body.Close() res.Body.Close()
mediatype, _, _ := mime.ParseMediaType(res.Meta) mediatype, params, _ := mime.ParseMediaType(res.Meta)
// Convert content first
var utfText string
if params["charset"] == "" || strings.ToLower(params["charset"]) == "us-ascii" {
utfText = string(rawText)
} else {
encoding, _ := ianaindex.MIME.Encoding(params["charset"])
utfText, err = encoding.NewDecoder().String(string(rawText))
if err != nil {
return nil, err
}
}
if mediatype == "text/plain" { if mediatype == "text/plain" {
return &structs.Page{ return &structs.Page{
Url: url, Url: url,
Content: string(content), Content: utfText,
Links: []string{}, // Plaintext has no links Links: []string{}, // Plaintext has no links
}, nil }, nil
} }
if mediatype == "text/gemini" { if mediatype == "text/gemini" {
rendered, links := RenderGemini(string(content)) rendered, links := RenderGemini(utfText)
return &structs.Page{ return &structs.Page{
Url: url, Url: url,
Content: rendered, Content: rendered,