2020-09-11 10:48:39 -04:00
|
|
|
// Copyright 2020 The Gitea Authors. All rights reserved.
|
2022-11-27 13:20:29 -05:00
|
|
|
// SPDX-License-Identifier: MIT
|
2020-09-11 10:48:39 -04:00
|
|
|
|
|
|
|
package markdown
|
|
|
|
|
|
|
|
import (
|
2022-09-13 12:33:37 -04:00
|
|
|
"bytes"
|
2020-09-11 10:48:39 -04:00
|
|
|
"errors"
|
2022-09-13 12:33:37 -04:00
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
2020-09-11 10:48:39 -04:00
|
|
|
|
2022-09-13 12:33:37 -04:00
|
|
|
"gopkg.in/yaml.v3"
|
2020-09-11 10:48:39 -04:00
|
|
|
)
|
|
|
|
|
2022-09-13 12:33:37 -04:00
|
|
|
func isYAMLSeparator(line []byte) bool {
|
|
|
|
idx := 0
|
|
|
|
for ; idx < len(line); idx++ {
|
|
|
|
if line[idx] >= utf8.RuneSelf {
|
|
|
|
r, sz := utf8.DecodeRune(line[idx:])
|
|
|
|
if !unicode.IsSpace(r) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
idx += sz
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if line[idx] != ' ' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dashCount := 0
|
|
|
|
for ; idx < len(line); idx++ {
|
|
|
|
if line[idx] != '-' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
dashCount++
|
|
|
|
}
|
|
|
|
if dashCount < 3 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
for ; idx < len(line); idx++ {
|
|
|
|
if line[idx] >= utf8.RuneSelf {
|
|
|
|
r, sz := utf8.DecodeRune(line[idx:])
|
|
|
|
if !unicode.IsSpace(r) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
idx += sz
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if line[idx] != ' ' {
|
2020-09-11 10:48:39 -04:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
2022-09-13 12:33:37 -04:00
|
|
|
return true
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// ExtractMetadata consumes a markdown file, parses YAML frontmatter,
|
|
|
|
// and returns the frontmatter metadata separated from the markdown content
|
2023-07-04 14:36:08 -04:00
|
|
|
func ExtractMetadata(contents string, out any) (string, error) {
|
2022-09-13 12:33:37 -04:00
|
|
|
body, err := ExtractMetadataBytes([]byte(contents), out)
|
|
|
|
return string(body), err
|
|
|
|
}
|
|
|
|
|
|
|
|
// ExtractMetadata consumes a markdown file, parses YAML frontmatter,
|
|
|
|
// and returns the frontmatter metadata separated from the markdown content
|
2023-07-04 14:36:08 -04:00
|
|
|
func ExtractMetadataBytes(contents []byte, out any) ([]byte, error) {
|
2022-09-13 12:33:37 -04:00
|
|
|
var front, body []byte
|
|
|
|
|
|
|
|
start, end := 0, len(contents)
|
|
|
|
idx := bytes.IndexByte(contents[start:], '\n')
|
|
|
|
if idx >= 0 {
|
|
|
|
end = start + idx
|
|
|
|
}
|
|
|
|
line := contents[start:end]
|
|
|
|
|
|
|
|
if !isYAMLSeparator(line) {
|
|
|
|
return contents, errors.New("frontmatter must start with a separator line")
|
|
|
|
}
|
|
|
|
frontMatterStart := end + 1
|
|
|
|
for start = frontMatterStart; start < len(contents); start = end + 1 {
|
|
|
|
end = len(contents)
|
|
|
|
idx := bytes.IndexByte(contents[start:], '\n')
|
|
|
|
if idx >= 0 {
|
|
|
|
end = start + idx
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|
2022-09-13 12:33:37 -04:00
|
|
|
line := contents[start:end]
|
2020-09-11 10:48:39 -04:00
|
|
|
if isYAMLSeparator(line) {
|
2022-09-13 12:33:37 -04:00
|
|
|
front = contents[frontMatterStart:start]
|
2022-10-05 14:55:36 -04:00
|
|
|
if end+1 < len(contents) {
|
|
|
|
body = contents[end+1:]
|
|
|
|
}
|
2020-09-12 21:48:47 -04:00
|
|
|
break
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-12 21:48:47 -04:00
|
|
|
if len(front) == 0 {
|
2022-09-13 12:33:37 -04:00
|
|
|
return contents, errors.New("could not determine metadata")
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|
|
|
|
|
2022-09-13 12:33:37 -04:00
|
|
|
if err := yaml.Unmarshal(front, out); err != nil {
|
|
|
|
return contents, err
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|
2022-09-13 12:33:37 -04:00
|
|
|
return body, nil
|
2020-09-11 10:48:39 -04:00
|
|
|
}
|