2022-06-25 13:06:01 -04:00
|
|
|
// Copyright 2022 The Gitea Authors. All rights reserved.
|
2022-11-27 13:20:29 -05:00
|
|
|
// SPDX-License-Identifier: MIT
|
2022-06-25 13:06:01 -04:00
|
|
|
|
|
|
|
package sitemap
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/xml"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
const (
|
|
|
|
sitemapFileLimit = 50 * 1024 * 1024 // the maximum size of a sitemap file
|
|
|
|
urlsLimit = 50000
|
2022-06-25 13:06:01 -04:00
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
schemaURL = "http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
|
|
urlsetName = "urlset"
|
|
|
|
sitemapindexName = "sitemapindex"
|
|
|
|
)
|
|
|
|
|
|
|
|
// URL represents a single sitemap entry
|
2022-06-25 13:06:01 -04:00
|
|
|
type URL struct {
|
|
|
|
URL string `xml:"loc"`
|
|
|
|
LastMod *time.Time `xml:"lastmod,omitempty"`
|
|
|
|
}
|
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
// Sitemap represents a sitemap
|
2022-06-25 13:06:01 -04:00
|
|
|
type Sitemap struct {
|
|
|
|
XMLName xml.Name
|
|
|
|
Namespace string `xml:"xmlns,attr"`
|
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
URLs []URL `xml:"url"`
|
|
|
|
Sitemaps []URL `xml:"sitemap"`
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewSitemap creates a sitemap
|
|
|
|
func NewSitemap() *Sitemap {
|
|
|
|
return &Sitemap{
|
2022-12-30 10:31:00 -05:00
|
|
|
XMLName: xml.Name{Local: urlsetName},
|
|
|
|
Namespace: schemaURL,
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
// NewSitemapIndex creates a sitemap index.
|
2022-06-25 13:06:01 -04:00
|
|
|
func NewSitemapIndex() *Sitemap {
|
|
|
|
return &Sitemap{
|
2022-12-30 10:31:00 -05:00
|
|
|
XMLName: xml.Name{Local: sitemapindexName},
|
|
|
|
Namespace: schemaURL,
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add adds a URL to the sitemap
|
|
|
|
func (s *Sitemap) Add(u URL) {
|
2022-12-30 10:31:00 -05:00
|
|
|
if s.XMLName.Local == sitemapindexName {
|
|
|
|
s.Sitemaps = append(s.Sitemaps, u)
|
|
|
|
} else {
|
|
|
|
s.URLs = append(s.URLs, u)
|
|
|
|
}
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
|
2022-12-30 10:31:00 -05:00
|
|
|
// WriteTo writes the sitemap to a response
|
2022-06-25 13:06:01 -04:00
|
|
|
func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
|
2022-12-30 10:31:00 -05:00
|
|
|
if l := len(s.URLs); l > urlsLimit {
|
|
|
|
return 0, fmt.Errorf("The sitemap contains %d URLs, but only %d are allowed", l, urlsLimit)
|
|
|
|
}
|
|
|
|
if l := len(s.Sitemaps); l > urlsLimit {
|
|
|
|
return 0, fmt.Errorf("The sitemap contains %d sub-sitemaps, but only %d are allowed", l, urlsLimit)
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
buf := bytes.NewBufferString(xml.Header)
|
|
|
|
if err := xml.NewEncoder(buf).Encode(s); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if err := buf.WriteByte('\n'); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if buf.Len() > sitemapFileLimit {
|
2022-12-30 10:31:00 -05:00
|
|
|
return 0, fmt.Errorf("The sitemap has %d bytes, but only %d are allowed", buf.Len(), sitemapFileLimit)
|
2022-06-25 13:06:01 -04:00
|
|
|
}
|
|
|
|
return buf.WriteTo(w)
|
|
|
|
}
|