1
0
mirror of https://github.com/makew0rld/amfora.git synced 2024-12-04 14:46:29 -05:00

🚧 Sorting, update funcs, mutexes

This commit is contained in:
makeworld 2020-08-16 17:42:45 -04:00
parent 4e91ad87bd
commit d254917b36
3 changed files with 212 additions and 29 deletions

View File

@ -39,7 +39,7 @@ func main() {
}
err = feeds.Init()
if err != nil {
fmt.Fprintf(os.Stderr, "Config error: %v\n", err)
fmt.Fprintf(os.Stderr, "feeds.json error: %v\n", err)
os.Exit(1)
}

View File

@ -1,45 +1,32 @@
package feeds
import (
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"io"
"mime"
"os"
"path"
"sort"
"strings"
"sync"
"time"
"github.com/makeworld-the-better-one/amfora/client"
"github.com/makeworld-the-better-one/amfora/config"
"github.com/makeworld-the-better-one/go-gemini"
"github.com/mmcdole/gofeed"
)
/*
Example JSON.
{
"feeds": {
"url1": <gofeed.Feed>,
"url2: <gofeed.Feed>"
},
"pages": {
"url1": "hash",
"url2": "hash"
}
}
var (
ErrSaving = errors.New("couldn't save JSON to disk")
ErrNotSuccess = errors.New("status 20 not returned")
ErrNotFeed = errors.New("not a valid feed")
)
"pages" are the pages tracked for changes that aren't feeds.
The hash is SHA-256.
*/
// Decoded JSON
type feedJson struct {
Feeds map[string]*gofeed.Feed `json:"feeds"`
Pages map[string]string `json:"pages"`
}
var data feedJson
var ErrSaving = errors.New("couldn't save JSON to disk")
var writeMu = sync.Mutex{}
// Init should be called after config.Init.
func Init() error {
@ -51,20 +38,28 @@ func Init() error {
return fmt.Errorf("feeds json is corrupted: %v", err)
}
return nil
// TODO: Start pulling all feeds in another thread
}
// IsTracked returns true of the feed/page URL is already being tracked.
// IsTracked returns true if the feed/page URL is already being tracked.
func IsTracked(url string) bool {
data.feedMu.RLock()
for u := range data.Feeds {
if url == u {
data.feedMu.RUnlock()
return true
}
}
data.feedMu.RUnlock()
data.pageMu.RLock()
for u := range data.Pages {
if url == u {
data.pageMu.RUnlock()
return true
}
}
data.pageMu.RUnlock()
return false
}
@ -79,10 +74,16 @@ func GetFeed(mediatype, filename string, r io.Reader) (*gofeed.Feed, bool) {
return nil, false
}
feed, err := gofeed.NewParser().Parse(r)
if feed == nil {
return nil, false
}
return feed, err == nil
}
func writeJson() error {
writeMu.Lock()
defer writeMu.Unlock()
f, err := os.OpenFile(config.FeedPath, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
return err
@ -91,27 +92,126 @@ func writeJson() error {
enc := json.NewEncoder(f)
enc.SetEscapeHTML(false)
enc.SetIndent("", " ")
data.feedMu.Lock()
data.pageMu.Lock()
err = enc.Encode(&data)
data.feedMu.Unlock()
data.pageMu.Unlock()
return err
}
// AddFeed stores a feed.
// It can be used to update a feed for a URL, although the package
// will handle that on its own.
func AddFeed(url string, feed *gofeed.Feed) error {
if feed == nil {
panic("feed is nil")
}
sort.Sort(feed)
// Remove any content to save memory and disk space
for _, item := range feed.Items {
item.Content = ""
}
data.feedMu.Lock()
data.Feeds[url] = feed
data.feedMu.Unlock()
err := writeJson()
if err != nil {
// Don't use in-memory if it couldn't be saved
data.feedMu.Lock()
delete(data.Feeds, url)
data.feedMu.Unlock()
return ErrSaving
}
return nil
}
// AddPage stores a page URL to track for changes.
// Do not use it to update a page, as it only resets the hash.
func AddPage(url string) error {
data.Pages[url] = "" // No hash yet
data.pageMu.Lock()
data.Pages[url] = &pageJson{} // No hash yet
data.pageMu.Unlock()
err := writeJson()
if err != nil {
// Don't use in-memory if it couldn't be saved
data.pageMu.Lock()
delete(data.Pages, url)
data.pageMu.Unlock()
return ErrSaving
}
return nil
}
func updateFeed(url string) error {
res, err := client.Fetch(url)
if err != nil {
if res != nil {
res.Body.Close()
}
return err
}
defer res.Body.Close()
if res.Status != gemini.StatusSuccess {
return ErrNotSuccess
}
mediatype, _, err := mime.ParseMediaType(res.Meta)
if err != nil {
return err
}
filename := path.Base(url)
feed, ok := GetFeed(mediatype, filename, res.Body)
if !ok {
return ErrNotFeed
}
return AddFeed(url, feed)
}
func updatePage(url string) error {
res, err := client.Fetch(url)
if err != nil {
if res != nil {
res.Body.Close()
}
return err
}
defer res.Body.Close()
if res.Status != gemini.StatusSuccess {
return ErrNotSuccess
}
h := sha256.New()
if _, err := io.Copy(h, res.Body); err != nil {
return err
}
data.pageMu.Lock()
data.Pages[url] = &pageJson{
Hash: fmt.Sprintf("%x", h.Sum(nil)),
Updated: time.Now().UTC(),
}
data.pageMu.Unlock()
err = writeJson()
if err != nil {
// Don't use in-memory if it couldn't be saved
data.pageMu.Lock()
delete(data.Pages, url)
data.pageMu.Unlock()
return err
}
return nil
}
// updateAll updates all feeds and pages.
// It should run in goroutine at a regular interval.
func updateAll() {
}

83
feeds/structs.go Normal file
View File

@ -0,0 +1,83 @@
package feeds
import (
"sync"
"time"
"github.com/mmcdole/gofeed"
)
/*
Example JSON.
{
"feeds": {
"url1": <gofeed.Feed>,
"url2": <gofeed.Feed>,
},
"pages": {
"url1": {
"hash": <hash>,
"updated": <time>
},
"url2": {
"hash": <hash>,
"updated": <time>
}
}
}
"pages" are the pages tracked for changes that aren't feeds.
The hash used is SHA-256.
The time is in RFC 3339 format, preferably in the UTC timezone.
*/
// Decoded JSON
type jsonData struct {
feedMu sync.RWMutex
pageMu sync.RWMutex
Feeds map[string]*gofeed.Feed `json:"feeds,omitempty"`
Pages map[string]*pageJson `json:"pages,omitempty"`
}
type pageJson struct {
Hash string `json:"hash"`
Updated time.Time `json:"updated"`
}
var data jsonData // Global instance of jsonData - loaded from JSON and used
// PageEntry is a single item on a feed page.
// It is used both for tracked feeds and pages.
type PageEntry struct {
Author string
Title string
URL string
Published time.Time
}
// PageEntries is new-to-old list of Entry structs, used to create a feed page.
// It should always be assumed to be sorted when used in other packages.
type PageEntries struct {
sync.RWMutex
Entries []*PageEntry
}
// Implement sort.Interface
func (e *PageEntries) Len() int {
e.RLock()
defer e.RUnlock()
return len(e.Entries)
}
func (e *PageEntries) Less(i, j int) bool {
e.RLock()
defer e.RUnlock()
return e.Entries[i].Published.Before(e.Entries[j].Published)
}
func (e *PageEntries) Swap(i, j int) {
e.Lock()
e.Entries[i], e.Entries[j] = e.Entries[j], e.Entries[i]
e.Unlock()
}