diff --git a/app/mf/mention.go b/app/mf/mention.go index 551cd95..5c3080e 100644 --- a/app/mf/mention.go +++ b/app/mf/mention.go @@ -24,10 +24,13 @@ func (wm Mention) String() string { return fmt.Sprintf("source: %s, target: %s", wm.Source, wm.Target) } -func (wm Mention) AsPath(conf *common.Config) string { - filename := fmt.Sprintf("%x", md5.Sum([]byte("source="+wm.Source+",target="+wm.Target))) +func (wm Mention) Domain(conf *common.Config) string { domain, _ := conf.FetchDomain(wm.Target) - return conf.DataPath + "/" + domain + "/" + filename + ".json" + return domain +} + +func (wm Mention) Key() string { + return fmt.Sprintf("%x", md5.Sum([]byte("source="+wm.Source+",target="+wm.Target))) } func (wm Mention) SourceUrl() *url.URL { diff --git a/app/mf/microformats.go b/app/mf/microformats.go index 5e219d6..60794b3 100644 --- a/app/mf/microformats.go +++ b/app/mf/microformats.go @@ -23,7 +23,7 @@ type IndiewebDataResult struct { Data []*IndiewebData `json:"json"` } -func WrapResult(data []*IndiewebData) IndiewebDataResult { +func ResultSuccess(data []*IndiewebData) IndiewebDataResult { return IndiewebDataResult{ Status: "success", Data: data, diff --git a/app/pingback/handler.go b/app/pingback/handler.go index e81aee7..7ee7749 100644 --- a/app/pingback/handler.go +++ b/app/pingback/handler.go @@ -4,6 +4,7 @@ import ( "brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/webmention/recv" "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" "encoding/xml" "fmt" @@ -12,7 +13,7 @@ import ( "net/http" ) -func HandlePost(conf *common.Config) http.HandlerFunc { +func HandlePost(conf *common.Config, db db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { body, err := ioutil.ReadAll(r.Body) if err != nil { @@ -38,6 +39,7 @@ func HandlePost(conf *common.Config) http.HandlerFunc { receiver := &recv.Receiver{ RestClient: &rest.HttpClient{}, Conf: conf, + Repo: db, } go receiver.Receive(wm) pingbackSuccess(w) diff --git a/app/routes.go b/app/routes.go index 3a78358..eab0112 100644 --- a/app/routes.go +++ b/app/routes.go @@ -10,11 +10,12 @@ import ( // not that contempt with passing conf, but can't create receivers on non-local types, and won't move specifics into package app // https://blog.questionable.services/article/http-handler-error-handling-revisited/ is the better idea, but more work func (s *server) routes() { - cnf := s.conf + c := s.conf + db := s.repo - s.router.HandleFunc("/", index.Handle(cnf)).Methods("GET") - s.router.HandleFunc("/pingback", pingback.HandlePost(cnf)).Methods("POST") - s.router.HandleFunc("/webmention", webmention.HandlePost(cnf)).Methods("POST") - s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleGet(cnf))).Methods("GET") - s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandlePut(cnf))).Methods("PUT") + s.router.HandleFunc("/", index.Handle(c)).Methods("GET") + s.router.HandleFunc("/pingback", pingback.HandlePost(c, db)).Methods("POST") + s.router.HandleFunc("/webmention", webmention.HandlePost(c, db)).Methods("POST") + s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleGet(db))).Methods("GET") + s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandlePut(c, db))).Methods("PUT") } diff --git a/app/server.go b/app/server.go index 537acef..f68cf9c 100644 --- a/app/server.go +++ b/app/server.go @@ -1,6 +1,7 @@ package app import ( + "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" "github.com/MagnusFrater/helmet" "net/http" @@ -16,6 +17,7 @@ import ( type server struct { router *mux.Router conf *common.Config + repo db.MentionRepo } func (s *server) authorizedOnly(h http.HandlerFunc) http.HandlerFunc { @@ -44,9 +46,10 @@ func ipFrom(r *http.Request) string { func Start() { r := mux.NewRouter() config := common.Configure() + repo := db.NewMentionRepo(config) helmet := helmet.Default() - server := &server{router: r, conf: config} + server := &server{router: r, conf: config, repo: repo} server.routes() http.Handle("/", r) diff --git a/app/webmention/handler.go b/app/webmention/handler.go index 70b089f..305688d 100644 --- a/app/webmention/handler.go +++ b/app/webmention/handler.go @@ -2,9 +2,9 @@ package webmention import ( "brainbaking.com/go-jamming/app/mf" - "brainbaking.com/go-jamming/app/webmention/load" "brainbaking.com/go-jamming/app/webmention/recv" "brainbaking.com/go-jamming/app/webmention/send" + "brainbaking.com/go-jamming/db" "github.com/gorilla/mux" "net/http" @@ -16,18 +16,14 @@ var ( httpClient = &rest.HttpClient{} ) -func HandleGet(conf *common.Config) http.HandlerFunc { +func HandleGet(repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { domain := mux.Vars(r)["domain"] - conf.Lock(domain) - defer conf.Unlock(domain) - result := load.FromDisk(domain, conf.DataPath) - - rest.Json(w, result) + rest.Json(w, repo.GetAll(domain)) } } -func HandlePut(conf *common.Config) http.HandlerFunc { +func HandlePut(conf *common.Config, repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { since := getSinceQueryParam(r) domain := mux.Vars(r)["domain"] @@ -35,6 +31,7 @@ func HandlePut(conf *common.Config) http.HandlerFunc { snder := &send.Sender{ RestClient: httpClient, Conf: conf, + Repo: repo, } go snder.Send(domain, since) rest.Accept(w) @@ -50,7 +47,7 @@ func getSinceQueryParam(r *http.Request) string { return since } -func HandlePost(conf *common.Config) http.HandlerFunc { +func HandlePost(conf *common.Config, repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { r.ParseForm() if !validate(r, r.Header, conf) { @@ -71,6 +68,7 @@ func HandlePost(conf *common.Config) http.HandlerFunc { recv := &recv.Receiver{ RestClient: httpClient, Conf: conf, + Repo: repo, } go recv.Receive(wm) diff --git a/app/webmention/load/loader.go b/app/webmention/load/loader.go deleted file mode 100644 index b1549f0..0000000 --- a/app/webmention/load/loader.go +++ /dev/null @@ -1,39 +0,0 @@ -package load - -import ( - "brainbaking.com/go-jamming/app/mf" - "io/ioutil" - "path" -) - -// FromDisk assumes that params have already been validated. -func FromDisk(domain string, dataPath string) mf.IndiewebDataResult { - loadPath := path.Join(dataPath, domain) - info, _ := ioutil.ReadDir(loadPath) - amountOfFiles := len(info) - - sema := make(chan struct{}, 20) - results := make(chan *mf.IndiewebData, amountOfFiles) - - for _, file := range info { - go func(fileName string) { - sema <- struct{}{} - defer func() { <-sema }() - results <- mf.RequireFromFile(path.Join(loadPath, fileName)) - }(file.Name()) - } - - indiewebResults := gather(amountOfFiles, results) - return mf.WrapResult(indiewebResults) -} - -func gather(amount int, results <-chan *mf.IndiewebData) []*mf.IndiewebData { - var indiewebResults []*mf.IndiewebData - for i := 0; i < amount; i++ { - result := <-results - if !result.IsEmpty() { - indiewebResults = append(indiewebResults, result) - } - } - return indiewebResults -} diff --git a/app/webmention/load/loader_test.go b/app/webmention/load/loader_test.go deleted file mode 100644 index 2ffe846..0000000 --- a/app/webmention/load/loader_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package load - -import ( - "fmt" - "github.com/stretchr/testify/assert" - "io/ioutil" - "os" - "sort" - "strings" - "testing" -) - -// stress tests to see what concurrent disk access is like. Runs fine, even with 5000 runs and 100 files. -// this means worker pools do not have to be implemented in FromDisk(). -// However, if runs := 10000, some results are empty. At other times, even ioutil.ReadDir() panics... -// The rate limiter should catch this, combined with a domain read lock in the caller. -// Furthermore, a run of 1 and files of 50k breaks the OS without using a semaphore to limit the nr. of open files! -func TestFromDiskStressTest(t *testing.T) { - runs := 100 - files := 100 - - os.MkdirAll("testdata/somedomain", os.ModePerm) - defer os.RemoveAll("testdata") - - for i := 0; i < files; i++ { - json := `{"author":{"name":"Jef Klakveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Mania’s Lock On to Belgium’s third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}` - ioutil.WriteFile(fmt.Sprintf("testdata/somedomain/%d.json", i), []byte(json), os.ModePerm) - } - - amounts := make(chan int, runs) - for i := 0; i < runs; i++ { - go func(nr int) { - data := FromDisk("somedomain", "testdata") - itms := len(data.Data) - - fmt.Printf("From disk #%d - found %d items\n", nr, itms) - amounts <- itms - }(i) - } - - fmt.Println("Asserting...") - for i := 0; i < runs; i++ { - actual := <-amounts - assert.Equal(t, files, actual) - } -} - -func TestFromDiskReturnsAllJsonFilesFromDiskWrappedInResult(t *testing.T) { - os.MkdirAll("testdata/somedomain", os.ModePerm) - defer os.RemoveAll("testdata") - - json1 := `{"author":{"name":"Wouter Groeneveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Mania’s Lock On to Belgium’s third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}` - json2 := `{"author":{"name":"Jef Klakveld","picture":"https://brainbaking.com/img/avatar.jpg"},"name":"I much prefer Sonic Mania's Lock On to Belgium's t...","content":"I much prefer Sonic Mania’s Lock On to Belgium’s third Lock Down. Sigh. At least 16-bit 2D platformers make me smile: https://jefklakscodex.com/articles/reviews/sonic-mania/\n\n\n\nEnclosed Toot image","published":"2021-03-25T10:45:00","url":"https://brainbaking.com/notes/2021/03/25h10m45s09/","type":"mention","source":"https://brainbaking.com/notes/2021/03/25h10m45s09/","target":"https://jefklakscodex.com/articles/reviews/sonic-mania/"}` - ioutil.WriteFile("testdata/somedomain/testjson1.json", []byte(json1), os.ModePerm) - ioutil.WriteFile("testdata/somedomain/testjson2.json", []byte(json2), os.ModePerm) - - result := FromDisk("somedomain", "testdata") - sort.SliceStable(result.Data, func(i, j int) bool { - comp := strings.Compare(result.Data[i].Author.Name, result.Data[j].Author.Name) - if comp > 0 { - return false - } - return true - }) - - assert.Equal(t, "success", result.Status) - assert.Equal(t, "Jef Klakveld", result.Data[0].Author.Name) - assert.Equal(t, "Wouter Groeneveld", result.Data[1].Author.Name) -} diff --git a/app/webmention/recv/receive.go b/app/webmention/recv/receive.go index c8f4a50..77f182f 100644 --- a/app/webmention/recv/receive.go +++ b/app/webmention/recv/receive.go @@ -3,11 +3,8 @@ package recv import ( "brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" - "encoding/json" - "io/fs" - "io/ioutil" - "os" "regexp" "strings" @@ -20,6 +17,7 @@ import ( type Receiver struct { RestClient rest.Client Conf *common.Config + Repo db.MentionRepo } func (recv *Receiver) Receive(wm mf.Mention) { @@ -28,18 +26,13 @@ func (recv *Receiver) Receive(wm mf.Mention) { if geterr != nil { log.Warn().Err(geterr).Msg(" ABORT: invalid url") - recv.deletePossibleOlderWebmention(wm) + recv.Repo.Delete(wm) return } recv.processSourceBody(body, wm) } -// Deletes a possible webmention. Ignores remove errors. -func (recv *Receiver) deletePossibleOlderWebmention(wm mf.Mention) { - os.Remove(wm.AsPath(recv.Conf)) -} - func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { if !strings.Contains(body, wm.Target) { log.Warn().Str("target", wm.Target).Msg("ABORT: no mention of target found in html src of source!") @@ -49,10 +42,11 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { data := microformats.Parse(strings.NewReader(body), wm.SourceUrl()) indieweb := recv.convertBodyToIndiewebData(body, wm, mf.HEntry(data)) - if err := recv.saveWebmentionToDisk(wm, indieweb); err != nil { - log.Err(err).Msg("Unable to save Webmention to disk") + key, err := recv.Repo.Save(wm, indieweb) + if err != nil { + log.Error().Err(err).Stringer("wm", wm).Msg("processSourceBody: failed to save json to db") } - log.Info().Str("file", wm.AsPath(recv.Conf)).Msg("OK: Webmention processed.") + log.Info().Str("key", key).Msg("OK: Webmention processed.") } func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, hEntry *microformats.Microformat) *mf.IndiewebData { @@ -62,21 +56,6 @@ func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, hEnt return recv.parseBodyAsIndiewebSite(hEntry, wm) } -func (recv *Receiver) saveWebmentionToDisk(wm mf.Mention, indieweb *mf.IndiewebData) error { - domain, _ := recv.Conf.FetchDomain(wm.Target) - recv.Conf.Lock(domain) - defer recv.Conf.Unlock(domain) - jsonData, jsonErr := json.Marshal(indieweb) - if jsonErr != nil { - return jsonErr - } - err := ioutil.WriteFile(wm.AsPath(recv.Conf), jsonData, fs.ModePerm) - if err != nil { - return err - } - return nil -} - // see https://github.com/willnorris/microformats/blob/main/microformats.go func (recv *Receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm mf.Mention) *mf.IndiewebData { return &mf.IndiewebData{ diff --git a/app/webmention/recv/receive_test.go b/app/webmention/recv/receive_test.go index 1b2f9a4..27e5d36 100644 --- a/app/webmention/recv/receive_test.go +++ b/app/webmention/recv/receive_test.go @@ -2,11 +2,11 @@ package recv import ( "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/db" + "encoding/json" "errors" "github.com/stretchr/testify/assert" - "io/ioutil" "net/http" - "os" "testing" "time" @@ -19,25 +19,7 @@ var conf = &common.Config{ "jefklakscodex.com", "brainbaking.com", }, - DataPath: "testdata", -} - -func TestConvertWebmentionToPath(t *testing.T) { - wm := mf.Mention{ - Source: "https://brainbaking.com", - Target: "https://jefklakscodex.com/articles", - } - - result := wm.AsPath(conf) - if result != "testdata/jefklakscodex.com/99be66594fdfcf482545fead8e7e4948.json" { - t.Fatalf("md5 hash check failed, got " + result) - } -} - -func writeSomethingTo(filename string) { - file, _ := os.Create(filename) - file.WriteString("lolz") - defer file.Close() + Connection: ":memory:", } func TestReceive(t *testing.T) { @@ -100,15 +82,14 @@ func TestReceive(t *testing.T) { for _, tc := range cases { t.Run(tc.label, func(t *testing.T) { - os.MkdirAll("testdata/brainbaking.com", os.ModePerm) - os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm) - defer os.RemoveAll("testdata") common.Now = func() time.Time { return time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC) } + repo := db.NewMentionRepo(conf) receiver := &Receiver{ - Conf: common.NewConfig(conf), + Conf: conf, + Repo: repo, RestClient: &mocks.RestClientMock{ GetBodyFunc: mocks.RelPathGetBodyFunc(t, "../../../mocks/"), }, @@ -116,22 +97,23 @@ func TestReceive(t *testing.T) { receiver.Receive(tc.wm) - actualJson, _ := ioutil.ReadFile(tc.wm.AsPath(conf)) + actual := repo.Get(tc.wm) + actualJson, _ := json.Marshal(actual) assert.JSONEq(t, tc.json, string(actualJson)) }) } } func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) { - os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm) - defer os.RemoveAll("testdata") + repo := db.NewMentionRepo(conf) wm := mf.Mention{ Source: "https://brainbaking.com", Target: "https://jefklakscodex.com/articles", } - filename := wm.AsPath(conf) - writeSomethingTo(filename) + repo.Save(wm, &mf.IndiewebData{ + Name: "something something", + }) client := &mocks.RestClientMock{ GetBodyFunc: func(url string) (http.Header, string, error) { @@ -139,12 +121,14 @@ func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testi }, } receiver := &Receiver{ - Conf: common.NewConfig(conf), + Conf: conf, RestClient: client, + Repo: repo, } receiver.Receive(wm) - assert.NoFileExists(t, filename) + indb := repo.Get(wm) + assert.Empty(t, indb) } func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) { @@ -152,32 +136,31 @@ func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) { Source: "https://brainbaking.com/valid-indieweb-source.html", Target: "https://brainbaking.com/valid-indieweb-source.html", } - filename := wm.AsPath(conf) - writeSomethingTo(filename) + repo := db.NewMentionRepo(conf) receiver := &Receiver{ - Conf: common.NewConfig(conf), + Conf: conf, + Repo: repo, RestClient: &mocks.RestClientMock{ GetBodyFunc: mocks.RelPathGetBodyFunc(t, "../../../mocks/"), }, } receiver.Receive(wm) - assert.NoFileExists(t, filename) + assert.Empty(t, repo.GetAll("brainbaking.com").Data) } func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) { - os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm) - defer os.RemoveAll("testdata") - wm := mf.Mention{ Source: "https://brainbaking.com", Target: "https://jefklakscodex.com/articles", } + repo := db.NewMentionRepo(conf) receiver := &Receiver{ - Conf: common.NewConfig(conf), + Conf: conf, + Repo: repo, } receiver.processSourceBody("my nice body", wm) - assert.NoFileExists(t, wm.AsPath(conf)) + assert.Empty(t, repo.Get(wm)) } diff --git a/app/webmention/send/send.go b/app/webmention/send/send.go index e156379..e98bf50 100644 --- a/app/webmention/send/send.go +++ b/app/webmention/send/send.go @@ -4,11 +4,9 @@ import ( "brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/pingback/send" "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" - "fmt" "github.com/rs/zerolog/log" - "io/fs" - "io/ioutil" "strings" "sync" "time" @@ -17,27 +15,27 @@ import ( type Sender struct { RestClient rest.Client Conf *common.Config + Repo db.MentionRepo } func (snder *Sender) sinceForDomain(domain string, since string) time.Time { if since != "" { return common.IsoToTime(since) } - sinceConf, err := ioutil.ReadFile(fmt.Sprintf("%s/%s-since.txt", snder.Conf.DataPath, domain)) + + sinceConf, err := snder.Repo.Since(domain) if err != nil { - log.Warn().Str("since", since).Msg("No query param, and no config found. Reverting to beginning of time...") + log.Warn().Str("domain", domain).Msg("No query param, and no config found. Reverting to beginning of time...") return time.Time{} } - return common.IsoToTime(string(sinceConf)) + return sinceConf } -func (snder *Sender) saveSinceForDomain(domain string, since time.Time) { - ioutil.WriteFile(fmt.Sprintf("%s/%s-since.txt", snder.Conf.DataPath, domain), []byte(common.TimeToIso(since)), fs.ModePerm) +func (snder *Sender) updateSinceForDomain(domain string) { + snder.Repo.UpdateSince(domain, common.Now()) } func (snder *Sender) Send(domain string, since string) { - snder.Conf.Lock(domain) - defer snder.Conf.Unlock(domain) timeSince := snder.sinceForDomain(domain, since) log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`) feedUrl := "https://" + domain + "/index.xml" @@ -52,7 +50,7 @@ func (snder *Sender) Send(domain string, since string) { return } - snder.saveSinceForDomain(domain, timeSince) + snder.updateSinceForDomain(domain) } func (snder *Sender) parseRssFeed(feed string, since time.Time) error { diff --git a/app/webmention/send/send_test.go b/app/webmention/send/send_test.go index 9655613..186c90c 100644 --- a/app/webmention/send/send_test.go +++ b/app/webmention/send/send_test.go @@ -3,24 +3,30 @@ package send import ( "brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/mocks" "brainbaking.com/go-jamming/rest" "fmt" "github.com/stretchr/testify/assert" - "io/ioutil" "net/http" "net/url" - "os" "sync" "testing" "time" ) +var conf = &common.Config{ + Connection: ":memory:", + AllowedWebmentionSources: []string{ + "domain", + }, +} + func TestSinceForDomain(t *testing.T) { cases := []struct { label string sinceInParam string - sinceInFile string + sinceInDb string expected time.Time }{ { @@ -43,19 +49,14 @@ func TestSinceForDomain(t *testing.T) { }, } - snder := Sender{ - Conf: &common.Config{ - DataPath: "testdata", - }, - } - for _, tc := range cases { t.Run(tc.label, func(t *testing.T) { - os.MkdirAll("testdata", os.ModePerm) - defer os.RemoveAll("testdata") - - if tc.sinceInFile != "" { - ioutil.WriteFile("testdata/domain-since.txt", []byte(tc.sinceInFile), os.ModePerm) + snder := Sender{ + Conf: conf, + Repo: db.NewMentionRepo(conf), + } + if tc.sinceInDb != "" { + snder.Repo.UpdateSince("domain", common.IsoToTime(tc.sinceInDb)) } actual := snder.sinceForDomain("domain", tc.sinceInParam) @@ -95,10 +96,9 @@ func TestSendMentionAsWebmention(t *testing.T) { // The rate limiter fixes this, and in reality, we never send out 10k links anyway. func TestSendMentionIntegrationStressTest(t *testing.T) { snder := Sender{ - Conf: common.Configure(), + Conf: conf, RestClient: &rest.HttpClient{}, } - defer os.RemoveAll("data") runs := 100 responses := make(chan bool, runs) @@ -143,10 +143,10 @@ func TestSendMentionIntegrationStressTest(t *testing.T) { func TestSendIntegrationTestCanSendBothWebmentionsAndPingbacks(t *testing.T) { posted := map[string]interface{}{} var lock = sync.Mutex{} - defer os.RemoveAll("data") snder := Sender{ - Conf: common.Configure(), + Conf: conf, + Repo: db.NewMentionRepo(conf), RestClient: &mocks.RestClientMock{ GetBodyFunc: mocks.RelPathGetBodyFunc(t, "./../../../mocks/"), PostFunc: func(url string, contentType string, body string) error { diff --git a/common/config.go b/common/config.go index f01ed88..98de6ae 100644 --- a/common/config.go +++ b/common/config.go @@ -3,12 +3,9 @@ package common import ( "encoding/json" "errors" - "io/ioutil" - "os" - "strings" - "sync" - "github.com/rs/zerolog/log" + "io/ioutil" + "strings" ) type Config struct { @@ -16,23 +13,9 @@ type Config struct { Token string `json:"token"` UtcOffset int `json:"utcOffset"` DataPath string `json:"dataPath"` + Connection string `json:"conString"` AllowedWebmentionSources []string `json:"allowedWebmentionSources"` DisallowedWebmentionDomains []string `json:"disallowedWebmentionDomains"` - - domainLocks map[string]*sync.RWMutex -} - -func (c *Config) Lock(domain string) { - c.domainLocks[domain].Lock() -} -func (c *Config) RLock(domain string) { - c.domainLocks[domain].RLock() -} -func (c *Config) RUnLock(domain string) { - c.domainLocks[domain].RUnlock() -} -func (c *Config) Unlock(domain string) { - c.domainLocks[domain].Unlock() } func (c *Config) missingKeys() []string { @@ -79,34 +62,12 @@ func (c *Config) FetchDomain(url string) (string, error) { return "", errors.New("no allowed domain found for url " + url) } -func NewConfig(c *Config) *Config { - conf := &Config{ - Port: c.Port, - Token: c.Token, - UtcOffset: c.UtcOffset, - DataPath: c.DataPath, - AllowedWebmentionSources: c.AllowedWebmentionSources, - DisallowedWebmentionDomains: c.DisallowedWebmentionDomains, - domainLocks: map[string]*sync.RWMutex{}, - } - initConfig(conf) - return conf -} - func Configure() *Config { conf := config() - initConfig(conf) - return conf -} - -func initConfig(conf *Config) { - conf.domainLocks = map[string]*sync.RWMutex{} for _, domain := range conf.AllowedWebmentionSources { - conf.domainLocks[domain] = &sync.RWMutex{} - os.MkdirAll(conf.DataPath+"/"+domain, os.ModePerm) - log.Info().Str("allowedDomain", domain).Msg("Configured") } + return conf } func config() *Config { @@ -136,6 +97,7 @@ func defaultConfig() *Config { Token: "miauwkes", UtcOffset: 60, DataPath: "data", + Connection: "data/mentions.db", AllowedWebmentionSources: []string{"brainbaking.com", "jefklakscodex.com"}, DisallowedWebmentionDomains: []string{"youtube.com"}, } diff --git a/db/repo.go b/db/repo.go new file mode 100644 index 0000000..022fb19 --- /dev/null +++ b/db/repo.go @@ -0,0 +1,146 @@ +// A database wrapper package for BuntDB that persists indieweb (meta)data. +// Most functions silently suppress errors as with consistent states, it would be impossible. +package db + +import ( + "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/common" + "encoding/json" + "fmt" + "github.com/rs/zerolog/log" + "github.com/tidwall/buntdb" + "time" +) + +type MentionRepoBunt struct { + db *buntdb.DB + conf *common.Config +} + +type MentionRepo interface { + Save(key mf.Mention, data *mf.IndiewebData) (string, error) + Delete(key mf.Mention) + Since(domain string) (time.Time, error) + UpdateSince(domain string, since time.Time) + Get(key mf.Mention) *mf.IndiewebData + GetAll(domain string) mf.IndiewebDataResult +} + +// UpdateSince updates the since timestamp to now. Logs but ignores errors. +func (r *MentionRepoBunt) UpdateSince(domain string, since time.Time) { + err := r.db.Update(func(tx *buntdb.Tx) error { + _, _, err := tx.Set(fmt.Sprintf("%s:since", domain), common.TimeToIso(since), nil) + return err + }) + if err != nil { + log.Error().Err(err).Msg("UpdateSince: unable to save") + } +} + +// Since fetches the last timestamp of the mf send. +// Returns converted found instance, or an error if none found. +func (r *MentionRepoBunt) Since(domain string) (time.Time, error) { + var since string + err := r.db.View(func(tx *buntdb.Tx) error { + val, err := tx.Get(fmt.Sprintf("%s:since", domain)) + since = val + return err + }) + if err != nil { + return time.Time{}, err + } + return common.IsoToTime(since), nil +} + +// Delete removes a possibly present mention by key. Ignores possible errors. +func (r *MentionRepoBunt) Delete(wm mf.Mention) { + key := r.mentionToKey(wm) + r.db.Update(func(tx *buntdb.Tx) error { + tx.Delete(key) + return nil + }) +} + +// Save saves the mention by marshalling data. Returns the key or a marshal/persist error. +func (r *MentionRepoBunt) Save(wm mf.Mention, data *mf.IndiewebData) (string, error) { + jsonData, err := json.Marshal(data) + if err != nil { + return "", err + } + key := r.mentionToKey(wm) + err = r.db.Update(func(tx *buntdb.Tx) error { + _, _, err := tx.Set(key, string(jsonData), nil) + return err + }) + if err != nil { + return "", err + } + return key, nil +} + +func (r *MentionRepoBunt) mentionToKey(wm mf.Mention) string { + return fmt.Sprintf("%s:%s", wm.Key(), wm.Domain(r.conf)) +} + +// Get returns a single unmarshalled json value based on the mention key. +// It returns the unmarshalled result or nil if something went wrong. +func (r *MentionRepoBunt) Get(wm mf.Mention) *mf.IndiewebData { + var data mf.IndiewebData + key := r.mentionToKey(wm) + err := r.db.View(func(tx *buntdb.Tx) error { + val, err := tx.Get(key) + if err != nil { + return err + } + err = json.Unmarshal([]byte(val), &data) + if err != nil { + return err + } + return nil + }) + if err != nil { + log.Error().Err(err).Str("key", key).Msg("repo get: unable to retrieve key") + return nil + } + return &data +} + +// GetAll returns a wrapped data result for all mentions for a particular domain. +// Intentionally ignores marshal errors, db should be consistent! +// Warning, this will potentially marshall 10k strings! +func (r *MentionRepoBunt) GetAll(domain string) mf.IndiewebDataResult { + var data []*mf.IndiewebData + err := r.db.View(func(tx *buntdb.Tx) error { + return tx.Ascend(domain, func(key, value string) bool { + var result mf.IndiewebData + json.Unmarshal([]byte(value), &result) + data = append(data, &result) + return true + }) + }) + if err != nil { + log.Error().Err(err).Msg("get all: failed to ascend from view") + return mf.IndiewebDataResult{} + } + return mf.ResultSuccess(data) +} + +// NewMentionRepo opens a database connection using default buntdb settings. +// It also creates necessary indexes based on the passed domain config. +// This panics if it cannot open the db. +func NewMentionRepo(c *common.Config) *MentionRepoBunt { + repo := &MentionRepoBunt{ + conf: c, + } + db, err := buntdb.Open(c.Connection) + if err != nil { + log.Fatal().Str("constr", c.Connection).Msg("new mention repo: cannot open db") + } + repo.db = db + + for _, domain := range c.AllowedWebmentionSources { + db.CreateIndex(domain, fmt.Sprintf("*:%s", domain), buntdb.IndexString) + } + + return repo +} diff --git a/db/repo_test.go b/db/repo_test.go new file mode 100644 index 0000000..693fdfc --- /dev/null +++ b/db/repo_test.go @@ -0,0 +1,128 @@ +package db + +import ( + "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/common" + "fmt" + "github.com/stretchr/testify/assert" + "github.com/tidwall/buntdb" + "os" + "testing" + "time" +) + +var ( + conf = &common.Config{ + Connection: ":memory:", + AllowedWebmentionSources: []string{ + "pussycat.com", + }, + } +) + +func TestDelete(t *testing.T) { + db := NewMentionRepo(conf) + wm := mf.Mention{ + Target: "pussycat.com/coolpussy.html", + } + db.Save(wm, &mf.IndiewebData{ + Name: "lolz", + }) + db.Delete(wm) + + results := db.GetAll("pussycat.com") + assert.Equal(t, 0, len(results.Data)) +} + +func TestUpdateSince(t *testing.T) { + db := NewMentionRepo(conf) + nowStamp := time.Date(2020, 10, 13, 14, 15, 0, 0, time.UTC) + + db.UpdateSince("pussycat.com", nowStamp) + since, err := db.Since("pussycat.com") + + assert.NoError(t, err) + assert.Equal(t, nowStamp, since) +} + +func TestSinceFirstTimeIsEmptytime(t *testing.T) { + db := NewMentionRepo(conf) + since, err := db.Since("pussycat.com") + + assert.Equal(t, buntdb.ErrNotFound, err) + assert.Equal(t, time.Time{}, since) +} + +func TestGet(t *testing.T) { + db := NewMentionRepo(conf) + wm := mf.Mention{ + Target: "pussycat.com/coolpussy.html", + } + db.Save(wm, &mf.IndiewebData{ + Name: "lolz", + }) + + result := db.Get(wm) + assert.Equal(t, "lolz", result.Name) +} + +func BenchmarkMentionRepoBunt_GetAll(b *testing.B) { + defer os.Remove("test.db") + db := NewMentionRepo(&common.Config{ + Connection: "test.db", + AllowedWebmentionSources: []string{ + "pussycat.com", + }, + }) + + items := 10000 + fmt.Printf(" -- Saving %d items\n", items) + for n := 0; n < items; n++ { + db.Save(mf.Mention{ + Source: fmt.Sprintf("https://blahsource.com/%d/ding.html", n), + Target: fmt.Sprintf("https://pussycat.com/%d/ding.html", n), + }, &mf.IndiewebData{ + Name: fmt.Sprintf("benchmark %d", n), + Author: mf.IndiewebAuthor{ + Name: fmt.Sprintf("author %d", n), + }, + }) + } + + b.Run(fmt.Sprintf(" -- Benchmark Get All for #%d\n", b.N), func(b *testing.B) { + for n := 0; n < b.N; n++ { + db.GetAll("pussycat.com") + } + }) +} + +func TestGetAllAndSaveSomeJson(t *testing.T) { + db := NewMentionRepo(conf) + db.Save(mf.Mention{ + Target: "pussycat.com/coolpussy.html", + }, &mf.IndiewebData{ + Name: "lolz", + }) + + results := db.GetAll("pussycat.com") + assert.Equal(t, 1, len(results.Data)) + assert.Equal(t, "lolz", results.Data[0].Name) +} + +func TestGetFiltersBasedOnDomain(t *testing.T) { + db := NewMentionRepo(conf) + db.Save(mf.Mention{ + Target: "pussycat.com/coolpussy.html", + }, &mf.IndiewebData{ + Name: "lolz", + }) + db.Save(mf.Mention{ + Target: "dingeling.com/dogshateus.html", + }, &mf.IndiewebData{ + Name: "amaigat", + }) + + results := db.GetAll("pussycat.com") + assert.Equal(t, 1, len(results.Data)) + assert.Equal(t, "lolz", results.Data[0].Name) +} diff --git a/go.mod b/go.mod index 5215333..0dda4fc 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/hashicorp/go-retryablehttp v0.6.8 github.com/rs/zerolog v1.21.0 github.com/stretchr/testify v1.7.0 + github.com/tidwall/buntdb v1.2.3 // indirect golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba willnorris.com/go/microformats v1.1.1 ) diff --git a/go.sum b/go.sum index 6b61e27..e4b1d47 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,23 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/tidwall/btree v0.4.2 h1:aLwwJlG+InuFzdAPuBf9YCAR1LvSQ9zhC5aorFPlIPs= +github.com/tidwall/btree v0.4.2/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8= +github.com/tidwall/buntdb v1.2.3 h1:AoGVe4yrhKmnEPHrPrW5EUOATHOCIk4VtFvd8xn/ZtU= +github.com/tidwall/buntdb v1.2.3/go.mod h1:+i/gBwYOHWG19wLgwMXFLkl00twh9+VWkkaOhuNQ4PA= +github.com/tidwall/gjson v1.7.4 h1:19cchw8FOxkG5mdLRkGf9jqIqEyqdZhPqW60XfyFxk8= +github.com/tidwall/gjson v1.7.4/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk= +github.com/tidwall/grect v0.1.1 h1:+kMEkxhoqB7rniVXzMEIA66XwU07STgINqxh+qVIndY= +github.com/tidwall/grect v0.1.1/go.mod h1:CzvbGiFbWUwiJ1JohXLb28McpyBsI00TK9Y6pDWLGRQ= +github.com/tidwall/lotsa v1.0.2/go.mod h1:X6NiU+4yHA3fE3Puvpnn1XMDrFZrE9JO2/w+UMuqgR8= +github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE= +github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.1.0 h1:K3hMW5epkdAVwibsQEfR/7Zj0Qgt4DxtNumTq/VloO8= +github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/tidwall/rtred v0.1.2 h1:exmoQtOLvDoO8ud++6LwVsAMTu0KPzLTUrMln8u1yu8= +github.com/tidwall/rtred v0.1.2/go.mod h1:hd69WNXQ5RP9vHd7dqekAz+RIdtfBogmglkZSRxCHFQ= +github.com/tidwall/tinyqueue v0.1.1 h1:SpNEvEggbpyN5DIReaJ2/1ndroY8iyEGxPYxoSaymYE= +github.com/tidwall/tinyqueue v0.1.1/go.mod h1:O/QNHwrnjqr6IHItYrzoHAKYhBkLI67Q096fQP5zMYw= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= diff --git a/migrate-db.go b/migrate-db.go new file mode 100644 index 0000000..4c85e1f --- /dev/null +++ b/migrate-db.go @@ -0,0 +1,59 @@ +package main + +import ( + "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" + "encoding/json" + "fmt" + "github.com/rs/zerolog/log" + "io/ioutil" + "os" +) + +func mai() { + cnf := common.Configure() + os.Remove(cnf.Connection) + repo := db.NewMentionRepo(cnf) + + log.Info().Str("dbconfig", cnf.Connection).Msg("Starting migration...") + for _, domain := range cnf.AllowedWebmentionSources { + fmt.Printf("Processing domain %s...\n", domain) + entries, err := os.ReadDir(fmt.Sprintf("%s/%s", cnf.DataPath, domain)) + if err != nil { + log.Fatal().Err(err).Msg("Error while reading import path") + } + + for _, file := range entries { + filename := fmt.Sprintf("%s/%s/%s", cnf.DataPath, domain, file.Name()) + data, err := ioutil.ReadFile(filename) + if err != nil { + log.Fatal().Str("file", filename).Err(err).Msg("Error while reading file") + } + + var indiewebData mf.IndiewebData + json.Unmarshal(data, &indiewebData) + mention := mf.Mention{ + Source: indiewebData.Source, + Target: indiewebData.Target, + } + + log.Info().Stringer("wm", mention).Str("file", filename).Msg("Re-saving entry") + repo.Save(mention, &indiewebData) + } + } + + log.Info().Str("dbconfig", cnf.Connection).Msg("Checking for since files...") + for _, domain := range cnf.AllowedWebmentionSources { + since, err := ioutil.ReadFile(fmt.Sprintf("%s/%s-since.txt", cnf.DataPath, domain)) + if err != nil { + log.Warn().Str("domain", domain).Msg("No since found, skipping") + continue + } + + log.Info().Str("domain", domain).Str("since", string(since)).Msg("Saving since") + repo.UpdateSince(domain, common.IsoToTime(string(since))) + } + + log.Info().Str("dbconfig", cnf.Connection).Msg("Done! Check db") +} diff --git a/playground.go b/playground.go deleted file mode 100644 index a34932b..0000000 --- a/playground.go +++ /dev/null @@ -1,5 +0,0 @@ -package main - -func mainz() { - //time.Tick() -}