redesign flat file structure to key/value store using buntdb

This commit is contained in:
Wouter Groeneveld 2021-04-18 15:39:29 +02:00
parent c246081729
commit 8cd3cb1f1e
19 changed files with 442 additions and 275 deletions

View File

@ -24,10 +24,13 @@ func (wm Mention) String() string {
return fmt.Sprintf("source: %s, target: %s", wm.Source, wm.Target)
}
func (wm Mention) AsPath(conf *common.Config) string {
filename := fmt.Sprintf("%x", md5.Sum([]byte("source="+wm.Source+",target="+wm.Target)))
func (wm Mention) Domain(conf *common.Config) string {
domain, _ := conf.FetchDomain(wm.Target)
return conf.DataPath + "/" + domain + "/" + filename + ".json"
return domain
}
func (wm Mention) Key() string {
return fmt.Sprintf("%x", md5.Sum([]byte("source="+wm.Source+",target="+wm.Target)))
}
func (wm Mention) SourceUrl() *url.URL {

View File

@ -23,7 +23,7 @@ type IndiewebDataResult struct {
Data []*IndiewebData `json:"json"`
}
func WrapResult(data []*IndiewebData) IndiewebDataResult {
func ResultSuccess(data []*IndiewebData) IndiewebDataResult {
return IndiewebDataResult{
Status: "success",
Data: data,

View File

@ -4,6 +4,7 @@ import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/webmention/recv"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"encoding/xml"
"fmt"
@ -12,7 +13,7 @@ import (
"net/http"
)
func HandlePost(conf *common.Config) http.HandlerFunc {
func HandlePost(conf *common.Config, db db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
body, err := ioutil.ReadAll(r.Body)
if err != nil {
@ -38,6 +39,7 @@ func HandlePost(conf *common.Config) http.HandlerFunc {
receiver := &recv.Receiver{
RestClient: &rest.HttpClient{},
Conf: conf,
Repo: db,
}
go receiver.Receive(wm)
pingbackSuccess(w)

View File

@ -10,11 +10,12 @@ import (
// not that contempt with passing conf, but can't create receivers on non-local types, and won't move specifics into package app
// https://blog.questionable.services/article/http-handler-error-handling-revisited/ is the better idea, but more work
func (s *server) routes() {
cnf := s.conf
c := s.conf
db := s.repo
s.router.HandleFunc("/", index.Handle(cnf)).Methods("GET")
s.router.HandleFunc("/pingback", pingback.HandlePost(cnf)).Methods("POST")
s.router.HandleFunc("/webmention", webmention.HandlePost(cnf)).Methods("POST")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleGet(cnf))).Methods("GET")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandlePut(cnf))).Methods("PUT")
s.router.HandleFunc("/", index.Handle(c)).Methods("GET")
s.router.HandleFunc("/pingback", pingback.HandlePost(c, db)).Methods("POST")
s.router.HandleFunc("/webmention", webmention.HandlePost(c, db)).Methods("POST")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleGet(db))).Methods("GET")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandlePut(c, db))).Methods("PUT")
}

View File

@ -1,6 +1,7 @@
package app
import (
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"github.com/MagnusFrater/helmet"
"net/http"
@ -16,6 +17,7 @@ import (
type server struct {
router *mux.Router
conf *common.Config
repo db.MentionRepo
}
func (s *server) authorizedOnly(h http.HandlerFunc) http.HandlerFunc {
@ -44,9 +46,10 @@ func ipFrom(r *http.Request) string {
func Start() {
r := mux.NewRouter()
config := common.Configure()
repo := db.NewMentionRepo(config)
helmet := helmet.Default()
server := &server{router: r, conf: config}
server := &server{router: r, conf: config, repo: repo}
server.routes()
http.Handle("/", r)

View File

@ -2,9 +2,9 @@ package webmention
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/webmention/load"
"brainbaking.com/go-jamming/app/webmention/recv"
"brainbaking.com/go-jamming/app/webmention/send"
"brainbaking.com/go-jamming/db"
"github.com/gorilla/mux"
"net/http"
@ -16,18 +16,14 @@ var (
httpClient = &rest.HttpClient{}
)
func HandleGet(conf *common.Config) http.HandlerFunc {
func HandleGet(repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
domain := mux.Vars(r)["domain"]
conf.Lock(domain)
defer conf.Unlock(domain)
result := load.FromDisk(domain, conf.DataPath)
rest.Json(w, result)
rest.Json(w, repo.GetAll(domain))
}
}
func HandlePut(conf *common.Config) http.HandlerFunc {
func HandlePut(conf *common.Config, repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
since := getSinceQueryParam(r)
domain := mux.Vars(r)["domain"]
@ -35,6 +31,7 @@ func HandlePut(conf *common.Config) http.HandlerFunc {
snder := &send.Sender{
RestClient: httpClient,
Conf: conf,
Repo: repo,
}
go snder.Send(domain, since)
rest.Accept(w)
@ -50,7 +47,7 @@ func getSinceQueryParam(r *http.Request) string {
return since
}
func HandlePost(conf *common.Config) http.HandlerFunc {
func HandlePost(conf *common.Config, repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
r.ParseForm()
if !validate(r, r.Header, conf) {
@ -71,6 +68,7 @@ func HandlePost(conf *common.Config) http.HandlerFunc {
recv := &recv.Receiver{
RestClient: httpClient,
Conf: conf,
Repo: repo,
}
go recv.Receive(wm)

View File

@ -1,39 +0,0 @@
package load
import (
"brainbaking.com/go-jamming/app/mf"
"io/ioutil"
"path"
)
// FromDisk assumes that params have already been validated.
func FromDisk(domain string, dataPath string) mf.IndiewebDataResult {
loadPath := path.Join(dataPath, domain)
info, _ := ioutil.ReadDir(loadPath)
amountOfFiles := len(info)
sema := make(chan struct{}, 20)
results := make(chan *mf.IndiewebData, amountOfFiles)
for _, file := range info {
go func(fileName string) {
sema <- struct{}{}
defer func() { <-sema }()
results <- mf.RequireFromFile(path.Join(loadPath, fileName))
}(file.Name())
}
indiewebResults := gather(amountOfFiles, results)
return mf.WrapResult(indiewebResults)
}
func gather(amount int, results <-chan *mf.IndiewebData) []*mf.IndiewebData {
var indiewebResults []*mf.IndiewebData
for i := 0; i < amount; i++ {
result := <-results
if !result.IsEmpty() {
indiewebResults = append(indiewebResults, result)
}
}
return indiewebResults
}

View File

@ -1,69 +0,0 @@
package load
import (
"fmt"
"github.com/stretchr/testify/assert"
"io/ioutil"
"os"
"sort"
"strings"
"testing"
)
// stress tests to see what concurrent disk access is like. Runs fine, even with 5000 runs and 100 files.
// this means worker pools do not have to be implemented in FromDisk().
// However, if runs := 10000, some results are empty. At other times, even ioutil.ReadDir() panics...
// The rate limiter should catch this, combined with a domain read lock in the caller.
// Furthermore, a run of 1 and files of 50k breaks the OS without using a semaphore to limit the nr. of open files!
func TestFromDiskStressTest(t *testing.T) {
runs := 100
files := 100
os.MkdirAll("testdata/somedomain", os.ModePerm)
defer os.RemoveAll("testdata")
for i := 0; i < files; i++ {
json := `{"author":{"name":"Jef Klakveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Manias Lock On to Belgiums third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}`
ioutil.WriteFile(fmt.Sprintf("testdata/somedomain/%d.json", i), []byte(json), os.ModePerm)
}
amounts := make(chan int, runs)
for i := 0; i < runs; i++ {
go func(nr int) {
data := FromDisk("somedomain", "testdata")
itms := len(data.Data)
fmt.Printf("From disk #%d - found %d items\n", nr, itms)
amounts <- itms
}(i)
}
fmt.Println("Asserting...")
for i := 0; i < runs; i++ {
actual := <-amounts
assert.Equal(t, files, actual)
}
}
func TestFromDiskReturnsAllJsonFilesFromDiskWrappedInResult(t *testing.T) {
os.MkdirAll("testdata/somedomain", os.ModePerm)
defer os.RemoveAll("testdata")
json1 := `{"author":{"name":"Wouter Groeneveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Manias Lock On to Belgiums third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}`
json2 := `{"author":{"name":"Jef Klakveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Manias Lock On to Belgiums third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}`
ioutil.WriteFile("testdata/somedomain/testjson1.json", []byte(json1), os.ModePerm)
ioutil.WriteFile("testdata/somedomain/testjson2.json", []byte(json2), os.ModePerm)
result := FromDisk("somedomain", "testdata")
sort.SliceStable(result.Data, func(i, j int) bool {
comp := strings.Compare(result.Data[i].Author.Name, result.Data[j].Author.Name)
if comp > 0 {
return false
}
return true
})
assert.Equal(t, "success", result.Status)
assert.Equal(t, "Jef Klakveld", result.Data[0].Author.Name)
assert.Equal(t, "Wouter Groeneveld", result.Data[1].Author.Name)
}

View File

@ -3,11 +3,8 @@ package recv
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"encoding/json"
"io/fs"
"io/ioutil"
"os"
"regexp"
"strings"
@ -20,6 +17,7 @@ import (
type Receiver struct {
RestClient rest.Client
Conf *common.Config
Repo db.MentionRepo
}
func (recv *Receiver) Receive(wm mf.Mention) {
@ -28,18 +26,13 @@ func (recv *Receiver) Receive(wm mf.Mention) {
if geterr != nil {
log.Warn().Err(geterr).Msg(" ABORT: invalid url")
recv.deletePossibleOlderWebmention(wm)
recv.Repo.Delete(wm)
return
}
recv.processSourceBody(body, wm)
}
// Deletes a possible webmention. Ignores remove errors.
func (recv *Receiver) deletePossibleOlderWebmention(wm mf.Mention) {
os.Remove(wm.AsPath(recv.Conf))
}
func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
if !strings.Contains(body, wm.Target) {
log.Warn().Str("target", wm.Target).Msg("ABORT: no mention of target found in html src of source!")
@ -49,10 +42,11 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
data := microformats.Parse(strings.NewReader(body), wm.SourceUrl())
indieweb := recv.convertBodyToIndiewebData(body, wm, mf.HEntry(data))
if err := recv.saveWebmentionToDisk(wm, indieweb); err != nil {
log.Err(err).Msg("Unable to save Webmention to disk")
key, err := recv.Repo.Save(wm, indieweb)
if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("processSourceBody: failed to save json to db")
}
log.Info().Str("file", wm.AsPath(recv.Conf)).Msg("OK: Webmention processed.")
log.Info().Str("key", key).Msg("OK: Webmention processed.")
}
func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, hEntry *microformats.Microformat) *mf.IndiewebData {
@ -62,21 +56,6 @@ func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, hEnt
return recv.parseBodyAsIndiewebSite(hEntry, wm)
}
func (recv *Receiver) saveWebmentionToDisk(wm mf.Mention, indieweb *mf.IndiewebData) error {
domain, _ := recv.Conf.FetchDomain(wm.Target)
recv.Conf.Lock(domain)
defer recv.Conf.Unlock(domain)
jsonData, jsonErr := json.Marshal(indieweb)
if jsonErr != nil {
return jsonErr
}
err := ioutil.WriteFile(wm.AsPath(recv.Conf), jsonData, fs.ModePerm)
if err != nil {
return err
}
return nil
}
// see https://github.com/willnorris/microformats/blob/main/microformats.go
func (recv *Receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm mf.Mention) *mf.IndiewebData {
return &mf.IndiewebData{

View File

@ -2,11 +2,11 @@ package recv
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/db"
"encoding/json"
"errors"
"github.com/stretchr/testify/assert"
"io/ioutil"
"net/http"
"os"
"testing"
"time"
@ -19,25 +19,7 @@ var conf = &common.Config{
"jefklakscodex.com",
"brainbaking.com",
},
DataPath: "testdata",
}
func TestConvertWebmentionToPath(t *testing.T) {
wm := mf.Mention{
Source: "https://brainbaking.com",
Target: "https://jefklakscodex.com/articles",
}
result := wm.AsPath(conf)
if result != "testdata/jefklakscodex.com/99be66594fdfcf482545fead8e7e4948.json" {
t.Fatalf("md5 hash check failed, got " + result)
}
}
func writeSomethingTo(filename string) {
file, _ := os.Create(filename)
file.WriteString("lolz")
defer file.Close()
Connection: ":memory:",
}
func TestReceive(t *testing.T) {
@ -100,15 +82,14 @@ func TestReceive(t *testing.T) {
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
os.MkdirAll("testdata/brainbaking.com", os.ModePerm)
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
defer os.RemoveAll("testdata")
common.Now = func() time.Time {
return time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC)
}
repo := db.NewMentionRepo(conf)
receiver := &Receiver{
Conf: common.NewConfig(conf),
Conf: conf,
Repo: repo,
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc(t, "../../../mocks/"),
},
@ -116,22 +97,23 @@ func TestReceive(t *testing.T) {
receiver.Receive(tc.wm)
actualJson, _ := ioutil.ReadFile(tc.wm.AsPath(conf))
actual := repo.Get(tc.wm)
actualJson, _ := json.Marshal(actual)
assert.JSONEq(t, tc.json, string(actualJson))
})
}
}
func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) {
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
defer os.RemoveAll("testdata")
repo := db.NewMentionRepo(conf)
wm := mf.Mention{
Source: "https://brainbaking.com",
Target: "https://jefklakscodex.com/articles",
}
filename := wm.AsPath(conf)
writeSomethingTo(filename)
repo.Save(wm, &mf.IndiewebData{
Name: "something something",
})
client := &mocks.RestClientMock{
GetBodyFunc: func(url string) (http.Header, string, error) {
@ -139,12 +121,14 @@ func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testi
},
}
receiver := &Receiver{
Conf: common.NewConfig(conf),
Conf: conf,
RestClient: client,
Repo: repo,
}
receiver.Receive(wm)
assert.NoFileExists(t, filename)
indb := repo.Get(wm)
assert.Empty(t, indb)
}
func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) {
@ -152,32 +136,31 @@ func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) {
Source: "https://brainbaking.com/valid-indieweb-source.html",
Target: "https://brainbaking.com/valid-indieweb-source.html",
}
filename := wm.AsPath(conf)
writeSomethingTo(filename)
repo := db.NewMentionRepo(conf)
receiver := &Receiver{
Conf: common.NewConfig(conf),
Conf: conf,
Repo: repo,
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc(t, "../../../mocks/"),
},
}
receiver.Receive(wm)
assert.NoFileExists(t, filename)
assert.Empty(t, repo.GetAll("brainbaking.com").Data)
}
func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) {
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
defer os.RemoveAll("testdata")
wm := mf.Mention{
Source: "https://brainbaking.com",
Target: "https://jefklakscodex.com/articles",
}
repo := db.NewMentionRepo(conf)
receiver := &Receiver{
Conf: common.NewConfig(conf),
Conf: conf,
Repo: repo,
}
receiver.processSourceBody("<html>my nice body</html>", wm)
assert.NoFileExists(t, wm.AsPath(conf))
assert.Empty(t, repo.Get(wm))
}

View File

@ -4,11 +4,9 @@ import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/pingback/send"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/rs/zerolog/log"
"io/fs"
"io/ioutil"
"strings"
"sync"
"time"
@ -17,27 +15,27 @@ import (
type Sender struct {
RestClient rest.Client
Conf *common.Config
Repo db.MentionRepo
}
func (snder *Sender) sinceForDomain(domain string, since string) time.Time {
if since != "" {
return common.IsoToTime(since)
}
sinceConf, err := ioutil.ReadFile(fmt.Sprintf("%s/%s-since.txt", snder.Conf.DataPath, domain))
sinceConf, err := snder.Repo.Since(domain)
if err != nil {
log.Warn().Str("since", since).Msg("No query param, and no config found. Reverting to beginning of time...")
log.Warn().Str("domain", domain).Msg("No query param, and no config found. Reverting to beginning of time...")
return time.Time{}
}
return common.IsoToTime(string(sinceConf))
return sinceConf
}
func (snder *Sender) saveSinceForDomain(domain string, since time.Time) {
ioutil.WriteFile(fmt.Sprintf("%s/%s-since.txt", snder.Conf.DataPath, domain), []byte(common.TimeToIso(since)), fs.ModePerm)
func (snder *Sender) updateSinceForDomain(domain string) {
snder.Repo.UpdateSince(domain, common.Now())
}
func (snder *Sender) Send(domain string, since string) {
snder.Conf.Lock(domain)
defer snder.Conf.Unlock(domain)
timeSince := snder.sinceForDomain(domain, since)
log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`)
feedUrl := "https://" + domain + "/index.xml"
@ -52,7 +50,7 @@ func (snder *Sender) Send(domain string, since string) {
return
}
snder.saveSinceForDomain(domain, timeSince)
snder.updateSinceForDomain(domain)
}
func (snder *Sender) parseRssFeed(feed string, since time.Time) error {

View File

@ -3,24 +3,30 @@ package send
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/mocks"
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/stretchr/testify/assert"
"io/ioutil"
"net/http"
"net/url"
"os"
"sync"
"testing"
"time"
)
var conf = &common.Config{
Connection: ":memory:",
AllowedWebmentionSources: []string{
"domain",
},
}
func TestSinceForDomain(t *testing.T) {
cases := []struct {
label string
sinceInParam string
sinceInFile string
sinceInDb string
expected time.Time
}{
{
@ -43,19 +49,14 @@ func TestSinceForDomain(t *testing.T) {
},
}
snder := Sender{
Conf: &common.Config{
DataPath: "testdata",
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
os.MkdirAll("testdata", os.ModePerm)
defer os.RemoveAll("testdata")
if tc.sinceInFile != "" {
ioutil.WriteFile("testdata/domain-since.txt", []byte(tc.sinceInFile), os.ModePerm)
snder := Sender{
Conf: conf,
Repo: db.NewMentionRepo(conf),
}
if tc.sinceInDb != "" {
snder.Repo.UpdateSince("domain", common.IsoToTime(tc.sinceInDb))
}
actual := snder.sinceForDomain("domain", tc.sinceInParam)
@ -95,10 +96,9 @@ func TestSendMentionAsWebmention(t *testing.T) {
// The rate limiter fixes this, and in reality, we never send out 10k links anyway.
func TestSendMentionIntegrationStressTest(t *testing.T) {
snder := Sender{
Conf: common.Configure(),
Conf: conf,
RestClient: &rest.HttpClient{},
}
defer os.RemoveAll("data")
runs := 100
responses := make(chan bool, runs)
@ -143,10 +143,10 @@ func TestSendMentionIntegrationStressTest(t *testing.T) {
func TestSendIntegrationTestCanSendBothWebmentionsAndPingbacks(t *testing.T) {
posted := map[string]interface{}{}
var lock = sync.Mutex{}
defer os.RemoveAll("data")
snder := Sender{
Conf: common.Configure(),
Conf: conf,
Repo: db.NewMentionRepo(conf),
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc(t, "./../../../mocks/"),
PostFunc: func(url string, contentType string, body string) error {

View File

@ -3,12 +3,9 @@ package common
import (
"encoding/json"
"errors"
"io/ioutil"
"os"
"strings"
"sync"
"github.com/rs/zerolog/log"
"io/ioutil"
"strings"
)
type Config struct {
@ -16,23 +13,9 @@ type Config struct {
Token string `json:"token"`
UtcOffset int `json:"utcOffset"`
DataPath string `json:"dataPath"`
Connection string `json:"conString"`
AllowedWebmentionSources []string `json:"allowedWebmentionSources"`
DisallowedWebmentionDomains []string `json:"disallowedWebmentionDomains"`
domainLocks map[string]*sync.RWMutex
}
func (c *Config) Lock(domain string) {
c.domainLocks[domain].Lock()
}
func (c *Config) RLock(domain string) {
c.domainLocks[domain].RLock()
}
func (c *Config) RUnLock(domain string) {
c.domainLocks[domain].RUnlock()
}
func (c *Config) Unlock(domain string) {
c.domainLocks[domain].Unlock()
}
func (c *Config) missingKeys() []string {
@ -79,34 +62,12 @@ func (c *Config) FetchDomain(url string) (string, error) {
return "", errors.New("no allowed domain found for url " + url)
}
func NewConfig(c *Config) *Config {
conf := &Config{
Port: c.Port,
Token: c.Token,
UtcOffset: c.UtcOffset,
DataPath: c.DataPath,
AllowedWebmentionSources: c.AllowedWebmentionSources,
DisallowedWebmentionDomains: c.DisallowedWebmentionDomains,
domainLocks: map[string]*sync.RWMutex{},
}
initConfig(conf)
return conf
}
func Configure() *Config {
conf := config()
initConfig(conf)
return conf
}
func initConfig(conf *Config) {
conf.domainLocks = map[string]*sync.RWMutex{}
for _, domain := range conf.AllowedWebmentionSources {
conf.domainLocks[domain] = &sync.RWMutex{}
os.MkdirAll(conf.DataPath+"/"+domain, os.ModePerm)
log.Info().Str("allowedDomain", domain).Msg("Configured")
}
return conf
}
func config() *Config {
@ -136,6 +97,7 @@ func defaultConfig() *Config {
Token: "miauwkes",
UtcOffset: 60,
DataPath: "data",
Connection: "data/mentions.db",
AllowedWebmentionSources: []string{"brainbaking.com", "jefklakscodex.com"},
DisallowedWebmentionDomains: []string{"youtube.com"},
}

146
db/repo.go Normal file
View File

@ -0,0 +1,146 @@
// A database wrapper package for BuntDB that persists indieweb (meta)data.
// Most functions silently suppress errors as with consistent states, it would be impossible.
package db
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"encoding/json"
"fmt"
"github.com/rs/zerolog/log"
"github.com/tidwall/buntdb"
"time"
)
type MentionRepoBunt struct {
db *buntdb.DB
conf *common.Config
}
type MentionRepo interface {
Save(key mf.Mention, data *mf.IndiewebData) (string, error)
Delete(key mf.Mention)
Since(domain string) (time.Time, error)
UpdateSince(domain string, since time.Time)
Get(key mf.Mention) *mf.IndiewebData
GetAll(domain string) mf.IndiewebDataResult
}
// UpdateSince updates the since timestamp to now. Logs but ignores errors.
func (r *MentionRepoBunt) UpdateSince(domain string, since time.Time) {
err := r.db.Update(func(tx *buntdb.Tx) error {
_, _, err := tx.Set(fmt.Sprintf("%s:since", domain), common.TimeToIso(since), nil)
return err
})
if err != nil {
log.Error().Err(err).Msg("UpdateSince: unable to save")
}
}
// Since fetches the last timestamp of the mf send.
// Returns converted found instance, or an error if none found.
func (r *MentionRepoBunt) Since(domain string) (time.Time, error) {
var since string
err := r.db.View(func(tx *buntdb.Tx) error {
val, err := tx.Get(fmt.Sprintf("%s:since", domain))
since = val
return err
})
if err != nil {
return time.Time{}, err
}
return common.IsoToTime(since), nil
}
// Delete removes a possibly present mention by key. Ignores possible errors.
func (r *MentionRepoBunt) Delete(wm mf.Mention) {
key := r.mentionToKey(wm)
r.db.Update(func(tx *buntdb.Tx) error {
tx.Delete(key)
return nil
})
}
// Save saves the mention by marshalling data. Returns the key or a marshal/persist error.
func (r *MentionRepoBunt) Save(wm mf.Mention, data *mf.IndiewebData) (string, error) {
jsonData, err := json.Marshal(data)
if err != nil {
return "", err
}
key := r.mentionToKey(wm)
err = r.db.Update(func(tx *buntdb.Tx) error {
_, _, err := tx.Set(key, string(jsonData), nil)
return err
})
if err != nil {
return "", err
}
return key, nil
}
func (r *MentionRepoBunt) mentionToKey(wm mf.Mention) string {
return fmt.Sprintf("%s:%s", wm.Key(), wm.Domain(r.conf))
}
// Get returns a single unmarshalled json value based on the mention key.
// It returns the unmarshalled result or nil if something went wrong.
func (r *MentionRepoBunt) Get(wm mf.Mention) *mf.IndiewebData {
var data mf.IndiewebData
key := r.mentionToKey(wm)
err := r.db.View(func(tx *buntdb.Tx) error {
val, err := tx.Get(key)
if err != nil {
return err
}
err = json.Unmarshal([]byte(val), &data)
if err != nil {
return err
}
return nil
})
if err != nil {
log.Error().Err(err).Str("key", key).Msg("repo get: unable to retrieve key")
return nil
}
return &data
}
// GetAll returns a wrapped data result for all mentions for a particular domain.
// Intentionally ignores marshal errors, db should be consistent!
// Warning, this will potentially marshall 10k strings!
func (r *MentionRepoBunt) GetAll(domain string) mf.IndiewebDataResult {
var data []*mf.IndiewebData
err := r.db.View(func(tx *buntdb.Tx) error {
return tx.Ascend(domain, func(key, value string) bool {
var result mf.IndiewebData
json.Unmarshal([]byte(value), &result)
data = append(data, &result)
return true
})
})
if err != nil {
log.Error().Err(err).Msg("get all: failed to ascend from view")
return mf.IndiewebDataResult{}
}
return mf.ResultSuccess(data)
}
// NewMentionRepo opens a database connection using default buntdb settings.
// It also creates necessary indexes based on the passed domain config.
// This panics if it cannot open the db.
func NewMentionRepo(c *common.Config) *MentionRepoBunt {
repo := &MentionRepoBunt{
conf: c,
}
db, err := buntdb.Open(c.Connection)
if err != nil {
log.Fatal().Str("constr", c.Connection).Msg("new mention repo: cannot open db")
}
repo.db = db
for _, domain := range c.AllowedWebmentionSources {
db.CreateIndex(domain, fmt.Sprintf("*:%s", domain), buntdb.IndexString)
}
return repo
}

128
db/repo_test.go Normal file
View File

@ -0,0 +1,128 @@
package db
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/tidwall/buntdb"
"os"
"testing"
"time"
)
var (
conf = &common.Config{
Connection: ":memory:",
AllowedWebmentionSources: []string{
"pussycat.com",
},
}
)
func TestDelete(t *testing.T) {
db := NewMentionRepo(conf)
wm := mf.Mention{
Target: "pussycat.com/coolpussy.html",
}
db.Save(wm, &mf.IndiewebData{
Name: "lolz",
})
db.Delete(wm)
results := db.GetAll("pussycat.com")
assert.Equal(t, 0, len(results.Data))
}
func TestUpdateSince(t *testing.T) {
db := NewMentionRepo(conf)
nowStamp := time.Date(2020, 10, 13, 14, 15, 0, 0, time.UTC)
db.UpdateSince("pussycat.com", nowStamp)
since, err := db.Since("pussycat.com")
assert.NoError(t, err)
assert.Equal(t, nowStamp, since)
}
func TestSinceFirstTimeIsEmptytime(t *testing.T) {
db := NewMentionRepo(conf)
since, err := db.Since("pussycat.com")
assert.Equal(t, buntdb.ErrNotFound, err)
assert.Equal(t, time.Time{}, since)
}
func TestGet(t *testing.T) {
db := NewMentionRepo(conf)
wm := mf.Mention{
Target: "pussycat.com/coolpussy.html",
}
db.Save(wm, &mf.IndiewebData{
Name: "lolz",
})
result := db.Get(wm)
assert.Equal(t, "lolz", result.Name)
}
func BenchmarkMentionRepoBunt_GetAll(b *testing.B) {
defer os.Remove("test.db")
db := NewMentionRepo(&common.Config{
Connection: "test.db",
AllowedWebmentionSources: []string{
"pussycat.com",
},
})
items := 10000
fmt.Printf(" -- Saving %d items\n", items)
for n := 0; n < items; n++ {
db.Save(mf.Mention{
Source: fmt.Sprintf("https://blahsource.com/%d/ding.html", n),
Target: fmt.Sprintf("https://pussycat.com/%d/ding.html", n),
}, &mf.IndiewebData{
Name: fmt.Sprintf("benchmark %d", n),
Author: mf.IndiewebAuthor{
Name: fmt.Sprintf("author %d", n),
},
})
}
b.Run(fmt.Sprintf(" -- Benchmark Get All for #%d\n", b.N), func(b *testing.B) {
for n := 0; n < b.N; n++ {
db.GetAll("pussycat.com")
}
})
}
func TestGetAllAndSaveSomeJson(t *testing.T) {
db := NewMentionRepo(conf)
db.Save(mf.Mention{
Target: "pussycat.com/coolpussy.html",
}, &mf.IndiewebData{
Name: "lolz",
})
results := db.GetAll("pussycat.com")
assert.Equal(t, 1, len(results.Data))
assert.Equal(t, "lolz", results.Data[0].Name)
}
func TestGetFiltersBasedOnDomain(t *testing.T) {
db := NewMentionRepo(conf)
db.Save(mf.Mention{
Target: "pussycat.com/coolpussy.html",
}, &mf.IndiewebData{
Name: "lolz",
})
db.Save(mf.Mention{
Target: "dingeling.com/dogshateus.html",
}, &mf.IndiewebData{
Name: "amaigat",
})
results := db.GetAll("pussycat.com")
assert.Equal(t, 1, len(results.Data))
assert.Equal(t, "lolz", results.Data[0].Name)
}

1
go.mod
View File

@ -9,6 +9,7 @@ require (
github.com/hashicorp/go-retryablehttp v0.6.8
github.com/rs/zerolog v1.21.0
github.com/stretchr/testify v1.7.0
github.com/tidwall/buntdb v1.2.3 // indirect
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
willnorris.com/go/microformats v1.1.1
)

17
go.sum
View File

@ -28,6 +28,23 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tidwall/btree v0.4.2 h1:aLwwJlG+InuFzdAPuBf9YCAR1LvSQ9zhC5aorFPlIPs=
github.com/tidwall/btree v0.4.2/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=
github.com/tidwall/buntdb v1.2.3 h1:AoGVe4yrhKmnEPHrPrW5EUOATHOCIk4VtFvd8xn/ZtU=
github.com/tidwall/buntdb v1.2.3/go.mod h1:+i/gBwYOHWG19wLgwMXFLkl00twh9+VWkkaOhuNQ4PA=
github.com/tidwall/gjson v1.7.4 h1:19cchw8FOxkG5mdLRkGf9jqIqEyqdZhPqW60XfyFxk8=
github.com/tidwall/gjson v1.7.4/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk=
github.com/tidwall/grect v0.1.1 h1:+kMEkxhoqB7rniVXzMEIA66XwU07STgINqxh+qVIndY=
github.com/tidwall/grect v0.1.1/go.mod h1:CzvbGiFbWUwiJ1JohXLb28McpyBsI00TK9Y6pDWLGRQ=
github.com/tidwall/lotsa v1.0.2/go.mod h1:X6NiU+4yHA3fE3Puvpnn1XMDrFZrE9JO2/w+UMuqgR8=
github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE=
github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.1.0 h1:K3hMW5epkdAVwibsQEfR/7Zj0Qgt4DxtNumTq/VloO8=
github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/rtred v0.1.2 h1:exmoQtOLvDoO8ud++6LwVsAMTu0KPzLTUrMln8u1yu8=
github.com/tidwall/rtred v0.1.2/go.mod h1:hd69WNXQ5RP9vHd7dqekAz+RIdtfBogmglkZSRxCHFQ=
github.com/tidwall/tinyqueue v0.1.1 h1:SpNEvEggbpyN5DIReaJ2/1ndroY8iyEGxPYxoSaymYE=
github.com/tidwall/tinyqueue v0.1.1/go.mod h1:O/QNHwrnjqr6IHItYrzoHAKYhBkLI67Q096fQP5zMYw=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=

59
migrate-db.go Normal file
View File

@ -0,0 +1,59 @@
package main
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"encoding/json"
"fmt"
"github.com/rs/zerolog/log"
"io/ioutil"
"os"
)
func mai() {
cnf := common.Configure()
os.Remove(cnf.Connection)
repo := db.NewMentionRepo(cnf)
log.Info().Str("dbconfig", cnf.Connection).Msg("Starting migration...")
for _, domain := range cnf.AllowedWebmentionSources {
fmt.Printf("Processing domain %s...\n", domain)
entries, err := os.ReadDir(fmt.Sprintf("%s/%s", cnf.DataPath, domain))
if err != nil {
log.Fatal().Err(err).Msg("Error while reading import path")
}
for _, file := range entries {
filename := fmt.Sprintf("%s/%s/%s", cnf.DataPath, domain, file.Name())
data, err := ioutil.ReadFile(filename)
if err != nil {
log.Fatal().Str("file", filename).Err(err).Msg("Error while reading file")
}
var indiewebData mf.IndiewebData
json.Unmarshal(data, &indiewebData)
mention := mf.Mention{
Source: indiewebData.Source,
Target: indiewebData.Target,
}
log.Info().Stringer("wm", mention).Str("file", filename).Msg("Re-saving entry")
repo.Save(mention, &indiewebData)
}
}
log.Info().Str("dbconfig", cnf.Connection).Msg("Checking for since files...")
for _, domain := range cnf.AllowedWebmentionSources {
since, err := ioutil.ReadFile(fmt.Sprintf("%s/%s-since.txt", cnf.DataPath, domain))
if err != nil {
log.Warn().Str("domain", domain).Msg("No since found, skipping")
continue
}
log.Info().Str("domain", domain).Str("since", string(since)).Msg("Saving since")
repo.UpdateSince(domain, common.IsoToTime(string(since)))
}
log.Info().Str("dbconfig", cnf.Connection).Msg("Done! Check db")
}

View File

@ -1,5 +0,0 @@
package main
func mainz() {
//time.Tick()
}