webmention.io import functionality implementation

This commit is contained in:
Wouter Groeneveld 2022-06-22 15:19:53 +02:00
parent fb5ba20292
commit 989495a18e
10 changed files with 218 additions and 34 deletions

View File

@ -2,6 +2,11 @@ package external
import ( import (
"brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/notifier"
"brainbaking.com/go-jamming/app/webmention/recv"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"os" "os"
"reflect" "reflect"
@ -11,9 +16,13 @@ type Importer interface {
TryImport(data []byte) ([]*mf.IndiewebData, error) TryImport(data []byte) ([]*mf.IndiewebData, error)
} }
func Import(file string) { type ImportBootstrapper struct {
log.Info().Str("file", file).Msg("Starting import...") RestClient rest.Client
Conf *common.Config
Repo db.MentionRepo
}
func (ib *ImportBootstrapper) Import(file string) {
bytes, err := os.ReadFile(file) bytes, err := os.ReadFile(file)
if err != nil { if err != nil {
log.Err(err).Msg("Unable to read file") log.Err(err).Msg("Unable to read file")
@ -28,9 +37,10 @@ func Import(file string) {
for _, i := range importers { for _, i := range importers {
convertedData, err = i.TryImport(bytes) convertedData, err = i.TryImport(bytes)
if err != nil { if err != nil || len(convertedData) == 0 {
log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed: ") log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed or returned zero entries")
} else { } else {
log.Info().Str("importType", reflect.TypeOf(i).String()).Msg("Suitable converter found!")
break break
} }
} }
@ -40,8 +50,24 @@ func Import(file string) {
return return
} }
// TODO store author pictures locally (and mutate wm for local URL) log.Info().Msg("Conversion succeeded, persisting to data storage...")
// TODO strip content + trim? recv := &recv.Receiver{
// TODO save converted data in db RestClient: ib.RestClient,
// TODO whitelist domains? Conf: ib.Conf,
Repo: ib.Repo,
Notifier: &notifier.StringNotifier{},
}
for _, wm := range convertedData {
mention := mf.Mention{
Source: wm.Source,
Target: wm.Target,
}
ib.Conf.AddToWhitelist(mention.SourceDomain())
recv.ProcessAuthorPicture(wm)
recv.ProcessWhitelistedMention(mention, wm)
}
log.Info().Msg("All done, enjoy your go-jammed mentions!")
} }

56
app/external/importer_test.go vendored Normal file
View File

@ -0,0 +1,56 @@
package external
import (
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/mocks"
"github.com/stretchr/testify/assert"
"os"
"sort"
"testing"
)
var (
cnf = &common.Config{
BaseURL: "http://localhost:1337/",
Port: 1337,
Token: "miauwkes",
AllowedWebmentionSources: []string{"chrisburnell.com"},
Blacklist: []string{},
Whitelist: []string{"chrisburnell.com"},
}
)
func TestImport(t *testing.T) {
repo := db.NewMentionRepo(cnf)
bootstrapper := ImportBootstrapper{
Conf: cnf,
Repo: repo,
RestClient: &mocks.RestClientMock{
// this will make sure each picture GET fails
// otherwise this test is REALLY slow. It will fallback to anonymous pictures
GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"),
},
}
t.Cleanup(func() {
os.Remove("config.json")
db.Purge()
})
bootstrapper.Import("../../mocks/external/wmio.json")
entries := repo.GetAll("chrisburnell.com")
assert.Equal(t, 25, len(entries.Data))
sort.Slice(entries.Data, func(i, j int) bool {
return entries.Data[i].PublishedDate().After(entries.Data[j].PublishedDate())
})
assert.Equal(t, "https://chrisburnell.com/note/1655219889/", entries.Data[0].Source)
assert.Equal(t, "/pictures/anonymous", entries.Data[0].Author.Picture)
assert.Equal(t, "", entries.Data[10].Name)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", entries.Data[20].Content)
assert.Contains(t, cnf.Whitelist, "jacky.wtf")
assert.Contains(t, cnf.Whitelist, "martymcgui.re")
}

View File

@ -106,17 +106,18 @@ func convert(wmio WebmentionIOMention) *mf.IndiewebData {
} }
func nameOf(wmio WebmentionIOMention, iType mf.MfType) string { func nameOf(wmio WebmentionIOMention, iType mf.MfType) string {
if iType == mf.TypeReply { if (iType == mf.TypeReply || iType == mf.TypeLike) && wmio.Data.Name == "" {
return wmio.Data.Content return wmio.Data.Content
} }
return wmio.Data.Name return wmio.Data.Name
} }
func contentOf(wmio WebmentionIOMention, iType mf.MfType) string { func contentOf(wmio WebmentionIOMention, iType mf.MfType) string {
if iType == mf.TypeReply { content := wmio.Data.Content
return wmio.Activity.Sentence if iType == mf.TypeReply || (iType == mf.TypeLike && content == "") {
content = wmio.Activity.Sentence
} }
return wmio.Data.Content return common.Shorten(content)
} }
// typeOf returns the mf.MfType from a wmio mention. // typeOf returns the mf.MfType from a wmio mention.

View File

@ -4,8 +4,94 @@ import (
"brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/mf"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"testing" "testing"
"time"
) )
func TestTryImportPublishedDates(t *testing.T) {
wmio := &WebmentionIOImporter{}
cases := []struct {
label string
mention string
expectedDate string
}{
{
"no dates reverts to first",
`{ "links": [ { } ] }`,
time.Time{}.Format(mf.DateFormatWithTimeZone),
},
{
"no published date reverts to verified date",
`{ "links": [ { "verified_date": "2022-05-25T14:28:10+00:00" } ] }`,
"2022-05-25T14:28:10+00:00",
},
{
"published date present takes preference over rest",
`{ "links": [ { "data": { "published": "2020-01-25T14:28:10+00:00" }, "verified_date": "2022-05-25T14:28:10+00:00" } ] }`,
"2020-01-25T14:28:10+00:00",
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
res, err := wmio.TryImport([]byte(tc.mention))
assert.NoError(t, err)
assert.Equal(t, tc.expectedDate, res[0].Published)
})
}
}
func TestTryImportErrorIfInvalidFormat(t *testing.T) {
wmio := &WebmentionIOImporter{}
mention := `haha`
_, err := wmio.TryImport([]byte(mention))
assert.Error(t, err)
}
func TestTryImportForLikeWithMissingAuthor(t *testing.T) {
wmio := &WebmentionIOImporter{}
mention := `{ "links": [
{
"source": "https://jacky.wtf/2022/5/BRQo",
"verified": true,
"verified_date": "2022-05-25T14:28:10+00:00",
"id": 1404286,
"private": false,
"data": {
"url": "https://jacky.wtf/2022/5/BRQo",
"name": null,
"content": null,
"published": "2022-05-25T14:26:12+00:00",
"published_ts": 1653488772
},
"activity": {
"type": "like",
"sentence": "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/",
"sentence_html": "<a href=\"https://jacky.wtf/2022/5/BRQo\">someone</a> liked a post <a href=\"https://chrisburnell.com/article/changing-with-the-times/\">https://chrisburnell.com/article/changing-with-the-times/</a>"
},
"target": "https://chrisburnell.com/article/changing-with-the-times/"
}
] }`
res, err := wmio.TryImport([]byte(mention))
assert.NoError(t, err)
assert.Equal(t, 1, len(res))
result := res[0]
assert.Equal(t, "https://chrisburnell.com/article/changing-with-the-times/", result.Target)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Source)
assert.Equal(t, mf.TypeLike, result.IndiewebType)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", result.Content)
assert.Equal(t, "", result.Name)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Url)
assert.Equal(t, "2022-05-25T14:26:12+00:00", result.Published)
assert.Equal(t, "", result.Author.Name)
assert.Equal(t, "", result.Author.Picture)
}
func TestTryImportForReply(t *testing.T) { func TestTryImportForReply(t *testing.T) {
wmio := &WebmentionIOImporter{} wmio := &WebmentionIOImporter{}
mention := `{ "links": [ mention := `{ "links": [

View File

@ -100,13 +100,6 @@ func PublishedNow() string {
return common.Now().UTC().Format(DateFormatWithTimeZone) return common.Now().UTC().Format(DateFormatWithTimeZone)
} }
func shorten(txt string) string {
if len(txt) <= 250 {
return txt
}
return txt[:250] + "..."
}
// Go stuff: entry.Properties["name"][0].(string), // Go stuff: entry.Properties["name"][0].(string),
// JS stuff: hEntry.properties?.name?.[0] // JS stuff: hEntry.properties?.name?.[0]
// The problem: convoluted syntax and no optional chaining! // The problem: convoluted syntax and no optional chaining!
@ -273,12 +266,12 @@ func Url(hEntry *microformats.Microformat, source string) string {
func Content(hEntry *microformats.Microformat) string { func Content(hEntry *microformats.Microformat) string {
bridgyTwitterContent := Str(hEntry, "bridgy-twitter-content") bridgyTwitterContent := Str(hEntry, "bridgy-twitter-content")
if bridgyTwitterContent != "" { if bridgyTwitterContent != "" {
return shorten(bridgyTwitterContent) return common.Shorten(bridgyTwitterContent)
} }
summary := Str(hEntry, "summary") summary := Str(hEntry, "summary")
if summary != "" { if summary != "" {
return shorten(summary) return common.Shorten(summary)
} }
contentEntry := Map(hEntry, "content")["value"] contentEntry := Map(hEntry, "content")["value"]
return shorten(contentEntry) return common.Shorten(contentEntry)
} }

View File

@ -1,4 +1,4 @@
package mocks package notifier
import ( import (
"brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/mf"

View File

@ -28,7 +28,7 @@ var (
errPicUnableToDownload = errors.New("Unable to download author picture") errPicUnableToDownload = errors.New("Unable to download author picture")
errPicNoRealImage = errors.New("Downloaded author picture is not a real image") errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
errPicUnableToSave = errors.New("Unable to save downloaded author picture") errPicUnableToSave = errors.New("Unable to save downloaded author picture")
errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain") errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's from a silo domain")
) )
func (recv *Receiver) Receive(wm mf.Mention) { func (recv *Receiver) Receive(wm mf.Mention) {
@ -62,16 +62,16 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
data := microformats.Parse(strings.NewReader(body), wm.SourceUrl()) data := microformats.Parse(strings.NewReader(body), wm.SourceUrl())
indieweb := recv.convertBodyToIndiewebData(body, wm, data) indieweb := recv.convertBodyToIndiewebData(body, wm, data)
recv.processAuthorPicture(indieweb) recv.ProcessAuthorPicture(indieweb)
if recv.Conf.IsWhitelisted(wm.Source) { if recv.Conf.IsWhitelisted(wm.Source) {
recv.processWhitelistedMention(wm, indieweb) recv.ProcessWhitelistedMention(wm, indieweb)
} else { } else {
recv.processMentionInModeration(wm, indieweb) recv.ProcessMentionInModeration(wm, indieweb)
} }
} }
func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) { func (recv *Receiver) ProcessMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) {
key, err := recv.Repo.InModeration(wm, indieweb) key, err := recv.Repo.InModeration(wm, indieweb)
if err != nil { if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to in moderation db") log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to in moderation db")
@ -83,7 +83,7 @@ func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.Ind
log.Info().Str("key", key).Msg("OK: Webmention processed, in moderation.") log.Info().Str("key", key).Msg("OK: Webmention processed, in moderation.")
} }
func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) { func (recv *Receiver) ProcessWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) {
key, err := recv.Repo.Save(wm, indieweb) key, err := recv.Repo.Save(wm, indieweb)
if err != nil { if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db") log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db")
@ -95,7 +95,7 @@ func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.Indi
log.Info().Str("key", key).Msg("OK: Webmention processed, in whitelist.") log.Info().Str("key", key).Msg("OK: Webmention processed, in whitelist.")
} }
func (recv *Receiver) processAuthorPicture(indieweb *mf.IndiewebData) { func (recv *Receiver) ProcessAuthorPicture(indieweb *mf.IndiewebData) {
if indieweb.Author.Picture != "" { if indieweb.Author.Picture != "" {
err := recv.saveAuthorPictureLocally(indieweb) err := recv.saveAuthorPictureLocally(indieweb)
if err != nil { if err != nil {
@ -153,7 +153,7 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf
// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header. // saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header.
// If it succeeds, it alters the picture path to a local /pictures/x one. // If it succeeds, it alters the picture path to a local /pictures/x one.
// If it fails, it returns an error. // If it fails, it returns an error.
// This refuses to download from silo sources such as brid.gy because of privacy concerns. // If strict is true, this refuses to download from silo sources such as brid.gy because of privacy concerns.
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error { func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error {
srcDomain := rest.Domain(indieweb.Source) srcDomain := rest.Domain(indieweb.Source)
if common.Includes(rest.SiloDomains, srcDomain) { if common.Includes(rest.SiloDomains, srcDomain) {

View File

@ -2,6 +2,7 @@ package recv
import ( import (
"brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/notifier"
"brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/db"
"encoding/json" "encoding/json"
"errors" "errors"
@ -255,7 +256,7 @@ func TestReceiveFromNotInWhitelistSavesInModerationAndNotifies(t *testing.T) {
} }
repo := db.NewMentionRepo(cnf) repo := db.NewMentionRepo(cnf)
t.Cleanup(db.Purge) t.Cleanup(db.Purge)
notifierMock := &mocks.StringNotifier{ notifierMock := &notifier.StringNotifier{
Conf: cnf, Conf: cnf,
Output: "", Output: "",
} }
@ -415,7 +416,7 @@ func TestProcessAuthorPictureAnonymizesIfEmpty(t *testing.T) {
Picture: "", Picture: "",
}, },
} }
recv.processAuthorPicture(indieweb) recv.ProcessAuthorPicture(indieweb)
assert.Equal(t, "/pictures/anonymous", indieweb.Author.Picture) assert.Equal(t, "/pictures/anonymous", indieweb.Author.Picture)
} }

8
common/strings.go Normal file
View File

@ -0,0 +1,8 @@
package common
func Shorten(txt string) string {
if len(txt) <= 250 {
return txt
}
return txt[:250] + "..."
}

15
main.go
View File

@ -4,6 +4,7 @@ import (
"brainbaking.com/go-jamming/app/external" "brainbaking.com/go-jamming/app/external"
"brainbaking.com/go-jamming/common" "brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"flag" "flag"
"os" "os"
@ -42,7 +43,7 @@ func main() {
} }
if importing { if importing {
external.Import(*importFile) importWebmentionFile(*importFile)
os.Exit(0) os.Exit(0)
} }
@ -50,6 +51,18 @@ func main() {
app.Start() app.Start()
} }
func importWebmentionFile(file string) {
log.Info().Str("file", file).Msg("Starting import...")
config := common.Configure()
bootstrapper := external.ImportBootstrapper{
RestClient: &rest.HttpClient{},
Conf: config,
Repo: db.NewMentionRepo(config),
}
bootstrapper.Import(file)
}
func blacklistDomain(domain string) { func blacklistDomain(domain string) {
log.Info().Str("domain", domain).Msg("Blacklisting...") log.Info().Str("domain", domain).Msg("Blacklisting...")
config := common.Configure() config := common.Configure()