webmention.io import functionality implementation

This commit is contained in:
Wouter Groeneveld 2022-06-22 15:19:53 +02:00
parent fb5ba20292
commit 989495a18e
10 changed files with 218 additions and 34 deletions

View File

@ -2,6 +2,11 @@ package external
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/notifier"
"brainbaking.com/go-jamming/app/webmention/recv"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"github.com/rs/zerolog/log"
"os"
"reflect"
@ -11,9 +16,13 @@ type Importer interface {
TryImport(data []byte) ([]*mf.IndiewebData, error)
}
func Import(file string) {
log.Info().Str("file", file).Msg("Starting import...")
type ImportBootstrapper struct {
RestClient rest.Client
Conf *common.Config
Repo db.MentionRepo
}
func (ib *ImportBootstrapper) Import(file string) {
bytes, err := os.ReadFile(file)
if err != nil {
log.Err(err).Msg("Unable to read file")
@ -28,9 +37,10 @@ func Import(file string) {
for _, i := range importers {
convertedData, err = i.TryImport(bytes)
if err != nil {
log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed: ")
if err != nil || len(convertedData) == 0 {
log.Warn().Str("importType", reflect.TypeOf(i).String()).Msg("Importer failed or returned zero entries")
} else {
log.Info().Str("importType", reflect.TypeOf(i).String()).Msg("Suitable converter found!")
break
}
}
@ -40,8 +50,24 @@ func Import(file string) {
return
}
// TODO store author pictures locally (and mutate wm for local URL)
// TODO strip content + trim?
// TODO save converted data in db
// TODO whitelist domains?
log.Info().Msg("Conversion succeeded, persisting to data storage...")
recv := &recv.Receiver{
RestClient: ib.RestClient,
Conf: ib.Conf,
Repo: ib.Repo,
Notifier: &notifier.StringNotifier{},
}
for _, wm := range convertedData {
mention := mf.Mention{
Source: wm.Source,
Target: wm.Target,
}
ib.Conf.AddToWhitelist(mention.SourceDomain())
recv.ProcessAuthorPicture(wm)
recv.ProcessWhitelistedMention(mention, wm)
}
log.Info().Msg("All done, enjoy your go-jammed mentions!")
}

56
app/external/importer_test.go vendored Normal file
View File

@ -0,0 +1,56 @@
package external
import (
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/mocks"
"github.com/stretchr/testify/assert"
"os"
"sort"
"testing"
)
var (
cnf = &common.Config{
BaseURL: "http://localhost:1337/",
Port: 1337,
Token: "miauwkes",
AllowedWebmentionSources: []string{"chrisburnell.com"},
Blacklist: []string{},
Whitelist: []string{"chrisburnell.com"},
}
)
func TestImport(t *testing.T) {
repo := db.NewMentionRepo(cnf)
bootstrapper := ImportBootstrapper{
Conf: cnf,
Repo: repo,
RestClient: &mocks.RestClientMock{
// this will make sure each picture GET fails
// otherwise this test is REALLY slow. It will fallback to anonymous pictures
GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"),
},
}
t.Cleanup(func() {
os.Remove("config.json")
db.Purge()
})
bootstrapper.Import("../../mocks/external/wmio.json")
entries := repo.GetAll("chrisburnell.com")
assert.Equal(t, 25, len(entries.Data))
sort.Slice(entries.Data, func(i, j int) bool {
return entries.Data[i].PublishedDate().After(entries.Data[j].PublishedDate())
})
assert.Equal(t, "https://chrisburnell.com/note/1655219889/", entries.Data[0].Source)
assert.Equal(t, "/pictures/anonymous", entries.Data[0].Author.Picture)
assert.Equal(t, "", entries.Data[10].Name)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", entries.Data[20].Content)
assert.Contains(t, cnf.Whitelist, "jacky.wtf")
assert.Contains(t, cnf.Whitelist, "martymcgui.re")
}

View File

@ -106,17 +106,18 @@ func convert(wmio WebmentionIOMention) *mf.IndiewebData {
}
func nameOf(wmio WebmentionIOMention, iType mf.MfType) string {
if iType == mf.TypeReply {
if (iType == mf.TypeReply || iType == mf.TypeLike) && wmio.Data.Name == "" {
return wmio.Data.Content
}
return wmio.Data.Name
}
func contentOf(wmio WebmentionIOMention, iType mf.MfType) string {
if iType == mf.TypeReply {
return wmio.Activity.Sentence
content := wmio.Data.Content
if iType == mf.TypeReply || (iType == mf.TypeLike && content == "") {
content = wmio.Activity.Sentence
}
return wmio.Data.Content
return common.Shorten(content)
}
// typeOf returns the mf.MfType from a wmio mention.

View File

@ -4,8 +4,94 @@ import (
"brainbaking.com/go-jamming/app/mf"
"github.com/stretchr/testify/assert"
"testing"
"time"
)
func TestTryImportPublishedDates(t *testing.T) {
wmio := &WebmentionIOImporter{}
cases := []struct {
label string
mention string
expectedDate string
}{
{
"no dates reverts to first",
`{ "links": [ { } ] }`,
time.Time{}.Format(mf.DateFormatWithTimeZone),
},
{
"no published date reverts to verified date",
`{ "links": [ { "verified_date": "2022-05-25T14:28:10+00:00" } ] }`,
"2022-05-25T14:28:10+00:00",
},
{
"published date present takes preference over rest",
`{ "links": [ { "data": { "published": "2020-01-25T14:28:10+00:00" }, "verified_date": "2022-05-25T14:28:10+00:00" } ] }`,
"2020-01-25T14:28:10+00:00",
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
res, err := wmio.TryImport([]byte(tc.mention))
assert.NoError(t, err)
assert.Equal(t, tc.expectedDate, res[0].Published)
})
}
}
func TestTryImportErrorIfInvalidFormat(t *testing.T) {
wmio := &WebmentionIOImporter{}
mention := `haha`
_, err := wmio.TryImport([]byte(mention))
assert.Error(t, err)
}
func TestTryImportForLikeWithMissingAuthor(t *testing.T) {
wmio := &WebmentionIOImporter{}
mention := `{ "links": [
{
"source": "https://jacky.wtf/2022/5/BRQo",
"verified": true,
"verified_date": "2022-05-25T14:28:10+00:00",
"id": 1404286,
"private": false,
"data": {
"url": "https://jacky.wtf/2022/5/BRQo",
"name": null,
"content": null,
"published": "2022-05-25T14:26:12+00:00",
"published_ts": 1653488772
},
"activity": {
"type": "like",
"sentence": "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/",
"sentence_html": "<a href=\"https://jacky.wtf/2022/5/BRQo\">someone</a> liked a post <a href=\"https://chrisburnell.com/article/changing-with-the-times/\">https://chrisburnell.com/article/changing-with-the-times/</a>"
},
"target": "https://chrisburnell.com/article/changing-with-the-times/"
}
] }`
res, err := wmio.TryImport([]byte(mention))
assert.NoError(t, err)
assert.Equal(t, 1, len(res))
result := res[0]
assert.Equal(t, "https://chrisburnell.com/article/changing-with-the-times/", result.Target)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Source)
assert.Equal(t, mf.TypeLike, result.IndiewebType)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo liked a post https://chrisburnell.com/article/changing-with-the-times/", result.Content)
assert.Equal(t, "", result.Name)
assert.Equal(t, "https://jacky.wtf/2022/5/BRQo", result.Url)
assert.Equal(t, "2022-05-25T14:26:12+00:00", result.Published)
assert.Equal(t, "", result.Author.Name)
assert.Equal(t, "", result.Author.Picture)
}
func TestTryImportForReply(t *testing.T) {
wmio := &WebmentionIOImporter{}
mention := `{ "links": [

View File

@ -100,13 +100,6 @@ func PublishedNow() string {
return common.Now().UTC().Format(DateFormatWithTimeZone)
}
func shorten(txt string) string {
if len(txt) <= 250 {
return txt
}
return txt[:250] + "..."
}
// Go stuff: entry.Properties["name"][0].(string),
// JS stuff: hEntry.properties?.name?.[0]
// The problem: convoluted syntax and no optional chaining!
@ -273,12 +266,12 @@ func Url(hEntry *microformats.Microformat, source string) string {
func Content(hEntry *microformats.Microformat) string {
bridgyTwitterContent := Str(hEntry, "bridgy-twitter-content")
if bridgyTwitterContent != "" {
return shorten(bridgyTwitterContent)
return common.Shorten(bridgyTwitterContent)
}
summary := Str(hEntry, "summary")
if summary != "" {
return shorten(summary)
return common.Shorten(summary)
}
contentEntry := Map(hEntry, "content")["value"]
return shorten(contentEntry)
return common.Shorten(contentEntry)
}

View File

@ -1,4 +1,4 @@
package mocks
package notifier
import (
"brainbaking.com/go-jamming/app/mf"

View File

@ -28,7 +28,7 @@ var (
errPicUnableToDownload = errors.New("Unable to download author picture")
errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
errPicUnableToSave = errors.New("Unable to save downloaded author picture")
errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain")
errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's from a silo domain")
)
func (recv *Receiver) Receive(wm mf.Mention) {
@ -62,16 +62,16 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
data := microformats.Parse(strings.NewReader(body), wm.SourceUrl())
indieweb := recv.convertBodyToIndiewebData(body, wm, data)
recv.processAuthorPicture(indieweb)
recv.ProcessAuthorPicture(indieweb)
if recv.Conf.IsWhitelisted(wm.Source) {
recv.processWhitelistedMention(wm, indieweb)
recv.ProcessWhitelistedMention(wm, indieweb)
} else {
recv.processMentionInModeration(wm, indieweb)
recv.ProcessMentionInModeration(wm, indieweb)
}
}
func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) {
func (recv *Receiver) ProcessMentionInModeration(wm mf.Mention, indieweb *mf.IndiewebData) {
key, err := recv.Repo.InModeration(wm, indieweb)
if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to in moderation db")
@ -83,7 +83,7 @@ func (recv *Receiver) processMentionInModeration(wm mf.Mention, indieweb *mf.Ind
log.Info().Str("key", key).Msg("OK: Webmention processed, in moderation.")
}
func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) {
func (recv *Receiver) ProcessWhitelistedMention(wm mf.Mention, indieweb *mf.IndiewebData) {
key, err := recv.Repo.Save(wm, indieweb)
if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db")
@ -95,7 +95,7 @@ func (recv *Receiver) processWhitelistedMention(wm mf.Mention, indieweb *mf.Indi
log.Info().Str("key", key).Msg("OK: Webmention processed, in whitelist.")
}
func (recv *Receiver) processAuthorPicture(indieweb *mf.IndiewebData) {
func (recv *Receiver) ProcessAuthorPicture(indieweb *mf.IndiewebData) {
if indieweb.Author.Picture != "" {
err := recv.saveAuthorPictureLocally(indieweb)
if err != nil {
@ -153,7 +153,7 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf
// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header.
// If it succeeds, it alters the picture path to a local /pictures/x one.
// If it fails, it returns an error.
// This refuses to download from silo sources such as brid.gy because of privacy concerns.
// If strict is true, this refuses to download from silo sources such as brid.gy because of privacy concerns.
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error {
srcDomain := rest.Domain(indieweb.Source)
if common.Includes(rest.SiloDomains, srcDomain) {

View File

@ -2,6 +2,7 @@ package recv
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/notifier"
"brainbaking.com/go-jamming/db"
"encoding/json"
"errors"
@ -255,7 +256,7 @@ func TestReceiveFromNotInWhitelistSavesInModerationAndNotifies(t *testing.T) {
}
repo := db.NewMentionRepo(cnf)
t.Cleanup(db.Purge)
notifierMock := &mocks.StringNotifier{
notifierMock := &notifier.StringNotifier{
Conf: cnf,
Output: "",
}
@ -415,7 +416,7 @@ func TestProcessAuthorPictureAnonymizesIfEmpty(t *testing.T) {
Picture: "",
},
}
recv.processAuthorPicture(indieweb)
recv.ProcessAuthorPicture(indieweb)
assert.Equal(t, "/pictures/anonymous", indieweb.Author.Picture)
}

8
common/strings.go Normal file
View File

@ -0,0 +1,8 @@
package common
func Shorten(txt string) string {
if len(txt) <= 250 {
return txt
}
return txt[:250] + "..."
}

15
main.go
View File

@ -4,6 +4,7 @@ import (
"brainbaking.com/go-jamming/app/external"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"flag"
"os"
@ -42,7 +43,7 @@ func main() {
}
if importing {
external.Import(*importFile)
importWebmentionFile(*importFile)
os.Exit(0)
}
@ -50,6 +51,18 @@ func main() {
app.Start()
}
func importWebmentionFile(file string) {
log.Info().Str("file", file).Msg("Starting import...")
config := common.Configure()
bootstrapper := external.ImportBootstrapper{
RestClient: &rest.HttpClient{},
Conf: config,
Repo: db.NewMentionRepo(config),
}
bootstrapper.Import(file)
}
func blacklistDomain(domain string) {
log.Info().Str("domain", domain).Msg("Blacklisting...")
config := common.Configure()