receive webmention json processing + testify

This commit is contained in:
Wouter Groeneveld 2021-04-09 10:12:14 +02:00
parent bacc66872d
commit 97be3c8323
7 changed files with 275 additions and 80 deletions

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="GrazieInspection" enabled="false" level="TYPO" enabled_by_default="false" />
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View File

@ -1,11 +1,46 @@
package webmention
import "willnorris.com/go/microformats"
import (
"time"
"willnorris.com/go/microformats"
)
const (
DateFormat = "2006-01-02T15:04:05"
)
type indiewebAuthor struct {
Name string `json:"name"`
Picture string `json:"picture"`
}
type indiewebData struct {
Author indiewebAuthor `json:"author"`
Name string `json:"name"`
Content string `json:"content"`
Published string `json:"published"`
Url string `json:"url"`
IndiewebType string `json:"type"`
Source string `json:"source"`
Target string `json:"target"`
}
var now = time.Now
func publishedNow(utcOffset int) string {
return now().UTC().Add(time.Duration(utcOffset) * time.Minute).Format("2006-01-02T15:04:05")
}
func shorten(txt string) string {
if len(txt) <= 250 {
return txt
}
return txt[0:250] + "..."
}
// Go stuff: entry.Properties["name"][0].(string),
// JS stuff: hEntry.properties?.name?.[0]
// The problem: convoluted syntax and no optional chaining!
func mfstr(mf *microformats.Microformat, key string) string {
func mfStr(mf *microformats.Microformat, key string) string {
val := mf.Properties[key]
if len(val) == 0 {
return ""
@ -24,7 +59,7 @@ func mfstr(mf *microformats.Microformat, key string) string {
return str
}
func mfmap(mf *microformats.Microformat, key string) map[string]string {
func mfMap(mf *microformats.Microformat, key string) map[string]string {
val := mf.Properties[key]
if len(val) == 0 {
return map[string]string{}
@ -36,7 +71,7 @@ func mfmap(mf *microformats.Microformat, key string) map[string]string {
return mapVal
}
func mfprop(mf *microformats.Microformat, key string) *microformats.Microformat {
func mfProp(mf *microformats.Microformat, key string) *microformats.Microformat {
val := mf.Properties[key]
if len(val) == 0 {
return &microformats.Microformat{

View File

@ -2,14 +2,17 @@
package webmention
import (
"fmt"
"net/url"
"strings"
"os"
"crypto/md5"
"encoding/json"
"fmt"
"github.com/wgroeneveld/go-jamming/common"
"github.com/wgroeneveld/go-jamming/rest"
"io/fs"
"io/ioutil"
"net/url"
"os"
"regexp"
"strings"
"github.com/rs/zerolog/log"
"willnorris.com/go/microformats"
@ -68,21 +71,6 @@ func getHEntry(data *microformats.Data) *microformats.Microformat {
return nil
}
type indiewebAuthor struct {
name string
picture string
}
type indiewebData struct {
author indiewebAuthor
name string
content string
published string // TODO to a date
url string
dateType string // TODO json property "type"
source string
target string
}
func (recv *receiver) processSourceBody(body string, wm webmention) {
if !strings.Contains(body, wm.target) {
@ -95,63 +83,119 @@ func (recv *receiver) processSourceBody(body string, wm webmention) {
hEntry := getHEntry(data)
var indieweb *indiewebData
if hEntry == nil {
indieweb = parseBodyAsNonIndiewebSite(body, wm)
indieweb = recv.parseBodyAsNonIndiewebSite(body, wm)
} else {
indieweb = parseBodyAsIndiewebSite(hEntry, wm)
indieweb = recv.parseBodyAsIndiewebSite(hEntry, wm)
}
saveWebmentionToDisk(wm, indieweb)
recv.saveWebmentionToDisk(wm, indieweb)
log.Info().Str("file", wm.asPath(recv.conf)).Msg("OK: webmention processed.")
}
func saveWebmentionToDisk(wm webmention, indieweb *indiewebData) {
func (recv *receiver) saveWebmentionToDisk(wm webmention, indieweb *indiewebData) {
jsonData, jsonErr := json.Marshal(indieweb)
if jsonErr != nil {
log.Err(jsonErr).Msg("Unable to serialize webmention into JSON")
}
err := ioutil.WriteFile(wm.asPath(recv.conf), jsonData, fs.ModePerm)
if err != nil {
log.Err(err).Msg("Unable to save webmention to disk")
}
}
// TODO I'm smelling very unstable code, apply https://golang.org/doc/effective_go#recover here?
// see https://github.com/willnorris/microformats/blob/main/microformats.go
func parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm webmention) *indiewebData {
name := mfstr(hEntry, "name")
authorName := mfstr(mfprop(hEntry, "author"), "name")
if authorName == "" {
authorName = mfprop(hEntry, "author").Value
}
// TODO sometimes it's picture.value??
pic := mfstr(mfprop(hEntry, "author"), "photo")
summary := mfstr(hEntry, "summary")
contentEntry := mfmap(hEntry, "content")["value"]
bridgyTwitterContent := mfstr(hEntry, "bridgy-twitter-content")
func (recv *receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm webmention) *indiewebData {
name := mfStr(hEntry, "name")
pic := mfStr(mfProp(hEntry, "author"), "photo")
mfType := determineMfType(hEntry)
return &indiewebData{
name: name,
author: indiewebAuthor{
name: authorName,
picture: pic,
Name: name,
Author: indiewebAuthor{
Name: determineAuthorName(hEntry),
Picture: pic,
},
content: determineContent(summary, contentEntry, bridgyTwitterContent),
source: wm.source,
target: wm.target,
Content: determineContent(hEntry),
Url: determineUrl(hEntry, wm.source),
Published: determinePublishedDate(hEntry, recv.conf.UtcOffset),
Source: wm.source,
Target: wm.target,
IndiewebType: mfType,
}
//len(entry.Properties["hoopw"])
}
func shorten(txt string) string {
if len(txt) <= 250 {
return txt
func determinePublishedDate(hEntry *microformats.Microformat, utcOffset int) string {
publishedDate := mfStr(hEntry, "published")
if publishedDate == "" {
return publishedNow(utcOffset)
}
return txt[0:250] + "..."
return publishedDate
}
func determineContent(summary string, contentEntry string, bridgyTwitterContent string) string {
func determineAuthorName(hEntry *microformats.Microformat) string {
authorName := mfStr(mfProp(hEntry, "author"), "name")
if authorName == "" {
return mfProp(hEntry, "author").Value
}
return authorName
}
func determineMfType(hEntry *microformats.Microformat) string {
likeOf := mfStr(hEntry, "like-of")
if likeOf != "" {
return "like"
}
bookmarkOf := mfStr(hEntry, "bookmark-of")
if bookmarkOf != "" {
return "bookmark"
}
return "mention"
}
// Mastodon uids start with "tag:server", but we do want indieweb uids from other sources
func determineUrl(hEntry *microformats.Microformat, source string) string {
uid := mfStr(hEntry, "uid")
if uid != "" && strings.HasPrefix(uid, "http") {
return uid
}
url := mfStr(hEntry, "url")
if url != "" {
return url
}
return source
}
func determineContent(hEntry *microformats.Microformat) string {
bridgyTwitterContent := mfStr(hEntry, "bridgy-twitter-content")
if bridgyTwitterContent != "" {
return shorten(bridgyTwitterContent)
} else if summary != "" {
}
summary := mfStr(hEntry, "summary")
if summary != "" {
return shorten(summary)
}
contentEntry := mfMap(hEntry, "content")["value"]
return shorten(contentEntry)
}
func parseBodyAsNonIndiewebSite(body string, wm webmention) *indiewebData {
return nil
func (recv *receiver) parseBodyAsNonIndiewebSite(body string, wm webmention) *indiewebData {
r := regexp.MustCompile(`<title>(.*?)<\/title>`)
titleMatch := r.FindStringSubmatch(body)
title := wm.source
if titleMatch != nil {
title = titleMatch[1]
}
return &indiewebData{
Author: indiewebAuthor{
Name: wm.source,
},
Name: title,
Content: title,
Published: publishedNow(recv.conf.UtcOffset),
Url: wm.source,
IndiewebType: "mention",
Source: wm.source,
Target: wm.target,
}
}

View File

@ -3,8 +3,11 @@ package webmention
import (
"errors"
"github.com/stretchr/testify/assert"
"io/ioutil"
"os"
"testing"
"time"
"github.com/wgroeneveld/go-jamming/common"
"github.com/wgroeneveld/go-jamming/mocks"
@ -36,24 +39,86 @@ func writeSomethingTo(filename string) {
defer file.Close()
}
func TestReceiveTargetExistsSavesWebmentionToDisk(t *testing.T) {
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
//defer os.RemoveAll("testdata")
wm := webmention{
source: "https://brainbaking.com",
target: "https://jefklakscodex.com/articles",
}
//filename := wm.asPath(conf)
receiver := &receiver {
conf: conf,
restClient: &mocks.RestClientMock{
GetBodyFunc: mocks.BodyFunc(t, "../../mocks/valid-indieweb-source.html"),
func TestReceive(t *testing.T) {
cases := []struct {
label string
wm webmention
json string
} {
{
label: "receive a webmention bookmark via twitter",
wm: webmention{
source: "https://brainbaking.com/valid-bridgy-twitter-source.html",
target: "https://brainbaking.com/post/2021/03/the-indieweb-mixed-bag",
},
json: `{"author":{"name":"Jamie Tanna","picture":"https://www.jvt.me/img/profile.png"},"name":"","content":"Recommended read:\nThe IndieWeb Mixed Bag - Thoughts about the (d)evolution of blog interactions\nhttps://brainbaking.com/post/2021/03/the-indieweb-mixed-bag/","published":"2021-03-15T12:42:00+0000","url":"https://brainbaking.com/mf2/2021/03/1bkre/","type":"bookmark","source":"https://brainbaking.com/valid-bridgy-twitter-source.html","target":"https://brainbaking.com/post/2021/03/the-indieweb-mixed-bag"}`,
},
{
label: "receive a brid.gy webmention like",
wm: webmention{
source: "https://brainbaking.com/valid-bridgy-like.html",
// wrapped in a a class="u-like-of" tag
target: "https://brainbaking.com/valid-indieweb-target.html",
},
// no dates in bridgy-to-mastodon likes...
json: `{"author":{"name":"Stampeding Longhorn","picture":"https://cdn.social.linux.pizza/v1/AUTH_91eb37814936490c95da7b85993cc2ff/sociallinuxpizza/accounts/avatars/000/185/996/original/9e36da0c093cfc9b.png"},"name":"","content":"","published":"2020-01-01T12:30:00","url":"https://chat.brainbaking.com/notice/A4nx1rFwKUJYSe4TqK#favorited-by-A4nwg4LYyh4WgrJOXg","type":"like","source":"https://brainbaking.com/valid-bridgy-like.html","target":"https://brainbaking.com/valid-indieweb-target.html"}`,
},
{
label: "receive a brid.gy webmention that has a url and photo without value",
wm: webmention{
source: "https://brainbaking.com/valid-bridgy-source.html",
target: "https://brainbaking.com/valid-indieweb-target.html",
},
json: `{"author":{"name":"Stampeding Longhorn", "picture":"https://cdn.social.linux.pizza/v1/AUTH_91eb37814936490c95da7b85993cc2ff/sociallinuxpizza/accounts/avatars/000/185/996/original/9e36da0c093cfc9b.png"}, "content":"@wouter The cat pictures are awesome. for jest tests!", "name":"@wouter The cat pictures are awesome. for jest tests!", "published":"2021-03-02T16:17:18.000Z", "source":"https://brainbaking.com/valid-bridgy-source.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://social.linux.pizza/@StampedingLonghorn/105821099684887793"}`,
},
{
label: "receive saves a JSON file of indieweb-metadata if all is valid",
wm: webmention{
source: "https://brainbaking.com/valid-indieweb-source.html",
target: "https://jefklakscodex.com/articles",
},
json: `{"author":{"name":"Wouter Groeneveld","picture":"https://brainbaking.com//img/avatar.jpg"},"name":"I just learned about https://www.inklestudios.com/...","content":"This is cool, I just found out about valid indieweb target - so cool","published":"2021-03-06T12:41:00","url":"https://brainbaking.com/notes/2021/03/06h12m41s48/","type":"mention","source":"https://brainbaking.com/valid-indieweb-source.html","target":"https://jefklakscodex.com/articles"}`,
},
{
label: "receive saves a JSON file of indieweb-metadata with summary as content if present",
wm: webmention{
source: "https://brainbaking.com/valid-indieweb-source-with-summary.html",
target: "https://brainbaking.com/valid-indieweb-target.html",
},
json: `{"author":{"name":"Wouter Groeneveld", "picture":"https://brainbaking.com//img/avatar.jpg"}, "content":"This is cool, this is a summary!", "name":"I just learned about https://www.inklestudios.com/...", "published":"2021-03-06T12:41:00", "source":"https://brainbaking.com/valid-indieweb-source-with-summary.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://brainbaking.com/notes/2021/03/06h12m41s48/"}`,
},
{
label: "receive saves a JSON file of non-indieweb-data such as title if all is valid",
wm: webmention{
source: "https://brainbaking.com/valid-nonindieweb-source.html",
target: "https://brainbaking.com/valid-indieweb-target.html",
},
json: `{"author":{"name":"https://brainbaking.com/valid-nonindieweb-source.html", "picture":""}, "content":"Diablo 2 Twenty Years Later: A Retrospective | Jefklaks Codex", "name":"Diablo 2 Twenty Years Later: A Retrospective | Jefklaks Codex", "published":"2020-01-01T12:30:00", "source":"https://brainbaking.com/valid-nonindieweb-source.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://brainbaking.com/valid-nonindieweb-source.html"}`,
},
}
receiver.receive(wm)
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
os.MkdirAll("testdata/brainbaking.com", os.ModePerm)
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
defer os.RemoveAll("testdata")
now = func() time.Time {
return time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC)
}
receiver := &receiver {
conf: conf,
restClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc(t),
},
}
receiver.receive(tc.wm)
actualJson, _ := ioutil.ReadFile(tc.wm.asPath(conf))
assert.JSONEq(t, tc.json, string(actualJson))
})
}
}
func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) {
@ -78,9 +143,26 @@ func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testi
}
receiver.receive(wm)
if _, err := os.Stat(filename); err == nil {
t.Fatalf("Expected possibly older webmention to be deleted, but it wasn't!")
}
assert.NoFileExists(t, filename)
}
func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) {
wm := webmention{
source: "https://brainbaking.com/valid-indieweb-source.html",
target: "https://brainbaking.com/valid-indieweb-source.html",
}
filename := wm.asPath(conf)
writeSomethingTo(filename)
receiver := &receiver {
conf: conf,
restClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc(t),
},
}
receiver.receive(wm)
assert.NoFileExists(t, filename)
}
func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) {
@ -91,15 +173,11 @@ func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.
source: "https://brainbaking.com",
target: "https://jefklakscodex.com/articles",
}
filename := wm.asPath(conf)
receiver := &receiver {
conf: conf,
}
receiver.processSourceBody("<html>my nice body</html>", wm)
if _, err := os.Stat(filename); err == nil {
t.Fatalf("Expected no file to be created!")
}
assert.NoFileExists(t, wm.asPath(conf))
}

1
go.mod
View File

@ -5,5 +5,6 @@ go 1.16
require (
github.com/gorilla/mux v1.8.0
github.com/rs/zerolog v1.21.0
github.com/stretchr/testify v1.7.0 // indirect
willnorris.com/go/microformats v1.1.1 // indirect
)

11
go.sum
View File

@ -1,14 +1,22 @@
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.21.0 h1:Q3vdXlfLNT+OftyBHsU0Y445MD+8m8axjKgf2si0QcM=
github.com/rs/zerolog v1.21.0/go.mod h1:ZPhntP/xmq1nnND05hhpAh2QMhSsA4UN3MGZ6O2J3hM=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@ -36,5 +44,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
willnorris.com/go/microformats v1.1.1 h1:h5tk2luq6KBIRcwMGdksxdeea4GGuWrRFie5460OAbo=
willnorris.com/go/microformats v1.1.1/go.mod h1:kvVnWrkkEscVAIITCEoiTX66Hcyg59C7q0E49mb9TJ0=

View File

@ -2,6 +2,7 @@
package mocks
import (
"strings"
"testing"
"io/ioutil"
"net/http"
@ -21,6 +22,19 @@ func (m *RestClientMock) GetBody(url string) (string, error) {
return m.GetBodyFunc(url)
}
func RelPathGetBodyFunc(t *testing.T) func(string) (string, error) {
return func(url string) (string, error) {
// url: https://brainbaking.com/something-something.html
// want: ../../mocks/something-something.html
mockfile := "../../mocks/" + strings.ReplaceAll(url, "https://brainbaking.com/", "")
html, err := ioutil.ReadFile(mockfile)
if err != nil {
t.Error(err)
}
return string(html), nil
}
}
func BodyFunc(t *testing.T, mockfile string) func(string) (string, error) {
html, err := ioutil.ReadFile(mockfile)
if err != nil {