From 989495a18e7c58b865a4bcf84f9824696d13a6ad Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Wed, 22 Jun 2022 15:19:53 +0200 Subject: [PATCH] webmention.io import functionality implementation --- app/external/importer.go | 42 ++++++++--- app/external/importer_test.go | 56 +++++++++++++++ app/external/webmentionio.go | 9 +-- app/external/webmentionio_test.go | 86 +++++++++++++++++++++++ app/mf/microformats.go | 13 +--- {mocks => app/notifier}/stringnotifier.go | 2 +- app/webmention/recv/receive.go | 16 ++--- app/webmention/recv/receive_test.go | 5 +- common/strings.go | 8 +++ main.go | 15 +++- 10 files changed, 218 insertions(+), 34 deletions(-) create mode 100644 app/external/importer_test.go rename {mocks => app/notifier}/stringnotifier.go (96%) create mode 100644 common/strings.go diff --git a/app/external/importer.go b/app/external/importer.go index 2796404..919c3df 100644 --- a/app/external/importer.go +++ b/app/external/importer.go @@ -2,6 +2,11 @@ package external import ( "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/app/notifier" + "brainbaking.com/go-jamming/app/webmention/recv" + "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" + "brainbaking.com/go-jamming/rest" "github.com/rs/zerolog/log" "os" "reflect" @@ -11,9 +16,13 @@ type Importer interface { TryImport(data []byte) ([]*mf.IndiewebData, error) } -func Import(file string) { - log.Info().Str("file", file).Msg("Starting import...") +type ImportBootstrapper struct { + RestClient rest.Client + Conf *common.Config + Repo db.MentionRepo +} +func (ib *ImportBootstrapper) Import(file string) { bytes, err := os.ReadFile(file) if err != nil { log.Err(err).Msg("Unable to read file") @@ -28,9 +37,10 @@ func Import(file string) { for _, i := range importers { convertedData, err = i.TryImport(bytes) - if err != nil { - log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed: ") + if err != nil || len(convertedData) == 0 { + log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed or returned zero entries") } else { + log.Info().Str("importType", reflect.TypeOf(i).String()).Msg("Suitable converter found!") break } } @@ -40,8 +50,24 @@ func Import(file string) { return } - // TODO store author pictures locally (and mutate wm for local URL) - // TODO strip content + trim? - // TODO save converted data in db - // TODO whitelist domains? + log.Info().Msg("Conversion succeeded, persisting to data storage...") + recv := &recv.Receiver{ + RestClient: ib.RestClient, + Conf: ib.Conf, + Repo: ib.Repo, + Notifier: ¬ifier.StringNotifier{}, + } + + for _, wm := range convertedData { + mention := mf.Mention{ + Source: wm.Source, + Target: wm.Target, + } + ib.Conf.AddToWhitelist(mention.SourceDomain()) + + recv.ProcessAuthorPicture(wm) + recv.ProcessWhitelistedMention(mention, wm) + } + + log.Info().Msg("All done, enjoy your go-jammed mentions!") } diff --git a/app/external/importer_test.go b/app/external/importer_test.go new file mode 100644 index 0000000..e16910b --- /dev/null +++ b/app/external/importer_test.go @@ -0,0 +1,56 @@ +package external + +import ( + "brainbaking.com/go-jamming/common" + "brainbaking.com/go-jamming/db" + "brainbaking.com/go-jamming/mocks" + "github.com/stretchr/testify/assert" + "os" + "sort" + "testing" +) + +var ( + cnf = &common.Config{ + BaseURL: "http://localhost:1337/", + Port: 1337, + Token: "miauwkes", + AllowedWebmentionSources: []string{"chrisburnell.com"}, + Blacklist: []string{}, + Whitelist: []string{"chrisburnell.com"}, + } +) + +func TestImport(t *testing.T) { + repo := db.NewMentionRepo(cnf) + bootstrapper := ImportBootstrapper{ + Conf: cnf, + Repo: repo, + RestClient: &mocks.RestClientMock{ + // this will make sure each picture GET fails + // otherwise this test is REALLY slow. It will fallback to anonymous pictures + GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"), + }, + } + + t.Cleanup(func() { + os.Remove("config.json") + db.Purge() + }) + + bootstrapper.Import("../../mocks/external/wmio.json") + + entries := repo.GetAll("chrisburnell.com") + assert.Equal(t, 25, len(entries.Data)) + sort.Slice(entries.Data, func(i, j int) bool { + return entries.Data[i].PublishedDate().After(entries.Data[j].PublishedDate()) + }) + + assert.Equal(t, "https://chrisburnell.com/note/1655219889/", entries.Data[0].Source) + assert.Equal(t, "/pictures/anonymous", entries.Data[0].Author.Picture) + assert.Equal(t, "", entries.Data[10].Name) + assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", entries.Data[20].Content) + + assert.Contains(t, cnf.Whitelist, "jacky.wtf") + assert.Contains(t, cnf.Whitelist, "martymcgui.re") +} diff --git a/app/external/webmentionio.go b/app/external/webmentionio.go index ccd958b..7b421f2 100644 --- a/app/external/webmentionio.go +++ b/app/external/webmentionio.go @@ -106,17 +106,18 @@ func convert(wmio WebmentionIOMention) *mf.IndiewebData { } func nameOf(wmio WebmentionIOMention, iType mf.MfType) string { - if iType == mf.TypeReply { + if (iType == mf.TypeReply || iType == mf.TypeLike) && wmio.Data.Name == "" { return wmio.Data.Content } return wmio.Data.Name } func contentOf(wmio WebmentionIOMention, iType mf.MfType) string { - if iType == mf.TypeReply { - return wmio.Activity.Sentence + content := wmio.Data.Content + if iType == mf.TypeReply || (iType == mf.TypeLike && content == "") { + content = wmio.Activity.Sentence } - return wmio.Data.Content + return common.Shorten(content) } // typeOf returns the mf.MfType from a wmio mention. diff --git a/app/external/webmentionio_test.go b/app/external/webmentionio_test.go index 3fd58a2..508b73f 100644 --- a/app/external/webmentionio_test.go +++ b/app/external/webmentionio_test.go @@ -4,8 +4,94 @@ import ( "brainbaking.com/go-jamming/app/mf" "github.com/stretchr/testify/assert" "testing" + "time" ) +func TestTryImportPublishedDates(t *testing.T) { + wmio := &WebmentionIOImporter{} + cases := []struct { + label string + mention string + expectedDate string + }{ + { + "no dates reverts to first", + `{ "links": [ { } ] }`, + time.Time{}.Format(mf.DateFormatWithTimeZone), + }, + { + "no published date reverts to verified date", + `{ "links": [ { "verified_date": "2022-05-25T14:28:10+00:00" } ] }`, + "2022-05-25T14:28:10+00:00", + }, + { + "published date present takes preference over rest", + `{ "links": [ { "data": { "published": "2020-01-25T14:28:10+00:00" }, "verified_date": "2022-05-25T14:28:10+00:00" } ] }`, + "2020-01-25T14:28:10+00:00", + }, + } + + for _, tc := range cases { + t.Run(tc.label, func(t *testing.T) { + res, err := wmio.TryImport([]byte(tc.mention)) + assert.NoError(t, err) + + assert.Equal(t, tc.expectedDate, res[0].Published) + }) + } +} + +func TestTryImportErrorIfInvalidFormat(t *testing.T) { + wmio := &WebmentionIOImporter{} + mention := `haha` + + _, err := wmio.TryImport([]byte(mention)) + assert.Error(t, err) +} + +func TestTryImportForLikeWithMissingAuthor(t *testing.T) { + wmio := &WebmentionIOImporter{} + mention := `{ "links": [ +{ + "source": "https://jacky.wtf/2022/5/BRQo", + "verified": true, + "verified_date": "2022-05-25T14:28:10+00:00", + "id": 1404286, + "private": false, + "data": { + "url": "https://jacky.wtf/2022/5/BRQo", + "name": null, + "content": null, + "published": "2022-05-25T14:26:12+00:00", + "published_ts": 1653488772 + }, + "activity": { + "type": "like", + "sentence": "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", + "sentence_html": "someone liked a post https://chrisburnell.com/article/changing-with-the-times/" + }, + "target": "https://chrisburnell.com/article/changing-with-the-times/" + } +] }` + + res, err := wmio.TryImport([]byte(mention)) + assert.NoError(t, err) + assert.Equal(t, 1, len(res)) + result := res[0] + + assert.Equal(t, "https://chrisburnell.com/article/changing-with-the-times/", result.Target) + assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Source) + + assert.Equal(t, mf.TypeLike, result.IndiewebType) + assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", result.Content) + assert.Equal(t, "", result.Name) + assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Url) + assert.Equal(t, "2022-05-25T14:26:12+00:00", result.Published) + + assert.Equal(t, "", result.Author.Name) + assert.Equal(t, "", result.Author.Picture) +} + func TestTryImportForReply(t *testing.T) { wmio := &WebmentionIOImporter{} mention := `{ "links": [ diff --git a/app/mf/microformats.go b/app/mf/microformats.go index 1166a49..9198c69 100644 --- a/app/mf/microformats.go +++ b/app/mf/microformats.go @@ -100,13 +100,6 @@ func PublishedNow() string { return common.Now().UTC().Format(DateFormatWithTimeZone) } -func shorten(txt string) string { - if len(txt) <= 250 { - return txt - } - return txt[:250] + "..." -} - // Go stuff: entry.Properties["name"][0].(string), // JS stuff: hEntry.properties?.name?.[0] // The problem: convoluted syntax and no optional chaining! @@ -273,12 +266,12 @@ func Url(hEntry *microformats.Microformat, source string) string { func Content(hEntry *microformats.Microformat) string { bridgyTwitterContent := Str(hEntry, "bridgy-twitter-content") if bridgyTwitterContent != "" { - return shorten(bridgyTwitterContent) + return common.Shorten(bridgyTwitterContent) } summary := Str(hEntry, "summary") if summary != "" { - return shorten(summary) + return common.Shorten(summary) } contentEntry := Map(hEntry, "content")["value"] - return shorten(contentEntry) + return common.Shorten(contentEntry) } diff --git a/mocks/stringnotifier.go b/app/notifier/stringnotifier.go similarity index 96% rename from mocks/stringnotifier.go rename to app/notifier/stringnotifier.go index 1d42d79..f4fabd5 100644 --- a/mocks/stringnotifier.go +++ b/app/notifier/stringnotifier.go @@ -1,4 +1,4 @@ -package mocks +package notifier import ( "brainbaking.com/go-jamming/app/mf" diff --git a/app/webmention/recv/receive.go b/app/webmention/recv/receive.go index 8b8f1da..ef47a8d 100644 --- a/app/webmention/recv/receive.go +++ b/app/webmention/recv/receive.go @@ -28,7 +28,7 @@ var ( errPicUnableToDownload = errors.New("Unable to download author picture") errPicNoRealImage = errors.New("Downloaded author picture is not a real image") errPicUnableToSave = errors.New("Unable to save downloaded author picture") - errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain") + errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's from a silo domain") ) func (recv *Receiver) Receive(wm mf.Mention) { @@ -62,16 +62,16 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { data := microformats.Parse(strings.NewReader(body), wm.SourceUrl()) indieweb := recv.convertBodyToIndiewebData(body, wm, data) - recv.processAuthorPicture(indieweb) + recv.ProcessAuthorPicture(indieweb) if recv.Conf.IsWhitelisted(wm.Source) { - recv.processWhitelistedMention(wm, indieweb) + recv.ProcessWhitelistedMention(wm, indieweb) } else { - recv.processMentionInModeration(wm, indieweb) + recv.ProcessMentionInModeration(wm, indieweb) } } -func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) { +func (recv *Receiver) ProcessMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) { key, err := recv.Repo.InModeration(wm, indieweb) if err != nil { log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to in moderation db") @@ -83,7 +83,7 @@ func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.Ind log.Info().Str("key", key).Msg("OK: Webmention processed, in moderation.") } -func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) { +func (recv *Receiver) ProcessWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) { key, err := recv.Repo.Save(wm, indieweb) if err != nil { log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db") @@ -95,7 +95,7 @@ func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.Indi log.Info().Str("key", key).Msg("OK: Webmention processed, in whitelist.") } -func (recv *Receiver) processAuthorPicture(indieweb *mf.IndiewebData) { +func (recv *Receiver) ProcessAuthorPicture(indieweb *mf.IndiewebData) { if indieweb.Author.Picture != "" { err := recv.saveAuthorPictureLocally(indieweb) if err != nil { @@ -153,7 +153,7 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf // saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header. // If it succeeds, it alters the picture path to a local /pictures/x one. // If it fails, it returns an error. -// This refuses to download from silo sources such as brid.gy because of privacy concerns. +// If strict is true, this refuses to download from silo sources such as brid.gy because of privacy concerns. func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error { srcDomain := rest.Domain(indieweb.Source) if common.Includes(rest.SiloDomains, srcDomain) { diff --git a/app/webmention/recv/receive_test.go b/app/webmention/recv/receive_test.go index 259008b..375ff0e 100644 --- a/app/webmention/recv/receive_test.go +++ b/app/webmention/recv/receive_test.go @@ -2,6 +2,7 @@ package recv import ( "brainbaking.com/go-jamming/app/mf" + "brainbaking.com/go-jamming/app/notifier" "brainbaking.com/go-jamming/db" "encoding/json" "errors" @@ -255,7 +256,7 @@ func TestReceiveFromNotInWhitelistSavesInModerationAndNotifies(t *testing.T) { } repo := db.NewMentionRepo(cnf) t.Cleanup(db.Purge) - notifierMock := &mocks.StringNotifier{ + notifierMock := ¬ifier.StringNotifier{ Conf: cnf, Output: "", } @@ -415,7 +416,7 @@ func TestProcessAuthorPictureAnonymizesIfEmpty(t *testing.T) { Picture: "", }, } - recv.processAuthorPicture(indieweb) + recv.ProcessAuthorPicture(indieweb) assert.Equal(t, "/pictures/anonymous", indieweb.Author.Picture) } diff --git a/common/strings.go b/common/strings.go new file mode 100644 index 0000000..465362a --- /dev/null +++ b/common/strings.go @@ -0,0 +1,8 @@ +package common + +func Shorten(txt string) string { + if len(txt) <= 250 { + return txt + } + return txt[:250] + "..." +} diff --git a/main.go b/main.go index efb133c..a9a0deb 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "brainbaking.com/go-jamming/app/external" "brainbaking.com/go-jamming/common" "brainbaking.com/go-jamming/db" + "brainbaking.com/go-jamming/rest" "flag" "os" @@ -42,7 +43,7 @@ func main() { } if importing { - external.Import(*importFile) + importWebmentionFile(*importFile) os.Exit(0) } @@ -50,6 +51,18 @@ func main() { app.Start() } +func importWebmentionFile(file string) { + log.Info().Str("file", file).Msg("Starting import...") + + config := common.Configure() + bootstrapper := external.ImportBootstrapper{ + RestClient: &rest.HttpClient{}, + Conf: config, + Repo: db.NewMentionRepo(config), + } + bootstrapper.Import(file) +} + func blacklistDomain(domain string) { log.Info().Str("domain", domain).Msg("Blacklisting...") config := common.Configure()