From 97be3c8323a5ef3bbd4cdd1c8dcd71255c5c23b9 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Fri, 9 Apr 2021 10:12:14 +0200 Subject: [PATCH] receive webmention json processing + testify --- .idea/inspectionProfiles/Project_Default.xml | 12 ++ app/webmention/microformats.go | 43 +++++- app/webmention/receive.go | 150 ++++++++++++------- app/webmention/receive_test.go | 124 ++++++++++++--- go.mod | 1 + go.sum | 11 ++ mocks/restclient.go | 14 ++ 7 files changed, 275 insertions(+), 80 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..09dbac8 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/app/webmention/microformats.go b/app/webmention/microformats.go index e69827c..176ae9b 100644 --- a/app/webmention/microformats.go +++ b/app/webmention/microformats.go @@ -1,11 +1,46 @@ package webmention -import "willnorris.com/go/microformats" +import ( + "time" + "willnorris.com/go/microformats" +) + +const ( + DateFormat = "2006-01-02T15:04:05" +) + +type indiewebAuthor struct { + Name string `json:"name"` + Picture string `json:"picture"` +} + +type indiewebData struct { + Author indiewebAuthor `json:"author"` + Name string `json:"name"` + Content string `json:"content"` + Published string `json:"published"` + Url string `json:"url"` + IndiewebType string `json:"type"` + Source string `json:"source"` + Target string `json:"target"` +} + +var now = time.Now +func publishedNow(utcOffset int) string { + return now().UTC().Add(time.Duration(utcOffset) * time.Minute).Format("2006-01-02T15:04:05") +} + +func shorten(txt string) string { + if len(txt) <= 250 { + return txt + } + return txt[0:250] + "..." +} // Go stuff: entry.Properties["name"][0].(string), // JS stuff: hEntry.properties?.name?.[0] // The problem: convoluted syntax and no optional chaining! -func mfstr(mf *microformats.Microformat, key string) string { +func mfStr(mf *microformats.Microformat, key string) string { val := mf.Properties[key] if len(val) == 0 { return "" @@ -24,7 +59,7 @@ func mfstr(mf *microformats.Microformat, key string) string { return str } -func mfmap(mf *microformats.Microformat, key string) map[string]string { +func mfMap(mf *microformats.Microformat, key string) map[string]string { val := mf.Properties[key] if len(val) == 0 { return map[string]string{} @@ -36,7 +71,7 @@ func mfmap(mf *microformats.Microformat, key string) map[string]string { return mapVal } -func mfprop(mf *microformats.Microformat, key string) *microformats.Microformat { +func mfProp(mf *microformats.Microformat, key string) *microformats.Microformat { val := mf.Properties[key] if len(val) == 0 { return µformats.Microformat{ diff --git a/app/webmention/receive.go b/app/webmention/receive.go index e0d5b6a..8d75a73 100644 --- a/app/webmention/receive.go +++ b/app/webmention/receive.go @@ -2,14 +2,17 @@ package webmention import ( - "fmt" - "net/url" - "strings" - "os" "crypto/md5" - + "encoding/json" + "fmt" "github.com/wgroeneveld/go-jamming/common" "github.com/wgroeneveld/go-jamming/rest" + "io/fs" + "io/ioutil" + "net/url" + "os" + "regexp" + "strings" "github.com/rs/zerolog/log" "willnorris.com/go/microformats" @@ -68,21 +71,6 @@ func getHEntry(data *microformats.Data) *microformats.Microformat { return nil } -type indiewebAuthor struct { - name string - picture string -} - -type indiewebData struct { - author indiewebAuthor - name string - content string - published string // TODO to a date - url string - dateType string // TODO json property "type" - source string - target string -} func (recv *receiver) processSourceBody(body string, wm webmention) { if !strings.Contains(body, wm.target) { @@ -95,63 +83,119 @@ func (recv *receiver) processSourceBody(body string, wm webmention) { hEntry := getHEntry(data) var indieweb *indiewebData if hEntry == nil { - indieweb = parseBodyAsNonIndiewebSite(body, wm) + indieweb = recv.parseBodyAsNonIndiewebSite(body, wm) } else { - indieweb = parseBodyAsIndiewebSite(hEntry, wm) + indieweb = recv.parseBodyAsIndiewebSite(hEntry, wm) } - saveWebmentionToDisk(wm, indieweb) + recv.saveWebmentionToDisk(wm, indieweb) log.Info().Str("file", wm.asPath(recv.conf)).Msg("OK: webmention processed.") } -func saveWebmentionToDisk(wm webmention, indieweb *indiewebData) { - +func (recv *receiver) saveWebmentionToDisk(wm webmention, indieweb *indiewebData) { + jsonData, jsonErr := json.Marshal(indieweb) + if jsonErr != nil { + log.Err(jsonErr).Msg("Unable to serialize webmention into JSON") + } + err := ioutil.WriteFile(wm.asPath(recv.conf), jsonData, fs.ModePerm) + if err != nil { + log.Err(err).Msg("Unable to save webmention to disk") + } } // TODO I'm smelling very unstable code, apply https://golang.org/doc/effective_go#recover here? // see https://github.com/willnorris/microformats/blob/main/microformats.go -func parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm webmention) *indiewebData { - name := mfstr(hEntry, "name") - authorName := mfstr(mfprop(hEntry, "author"), "name") - if authorName == "" { - authorName = mfprop(hEntry, "author").Value - } - // TODO sometimes it's picture.value?? - pic := mfstr(mfprop(hEntry, "author"), "photo") - summary := mfstr(hEntry, "summary") - contentEntry := mfmap(hEntry, "content")["value"] - bridgyTwitterContent := mfstr(hEntry, "bridgy-twitter-content") +func (recv *receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm webmention) *indiewebData { + name := mfStr(hEntry, "name") + pic := mfStr(mfProp(hEntry, "author"), "photo") + mfType := determineMfType(hEntry) return &indiewebData{ - name: name, - author: indiewebAuthor{ - name: authorName, - picture: pic, + Name: name, + Author: indiewebAuthor{ + Name: determineAuthorName(hEntry), + Picture: pic, }, - content: determineContent(summary, contentEntry, bridgyTwitterContent), - source: wm.source, - target: wm.target, + Content: determineContent(hEntry), + Url: determineUrl(hEntry, wm.source), + Published: determinePublishedDate(hEntry, recv.conf.UtcOffset), + Source: wm.source, + Target: wm.target, + IndiewebType: mfType, } - - //len(entry.Properties["hoopw"]) } -func shorten(txt string) string { - if len(txt) <= 250 { - return txt +func determinePublishedDate(hEntry *microformats.Microformat, utcOffset int) string { + publishedDate := mfStr(hEntry, "published") + if publishedDate == "" { + return publishedNow(utcOffset) } - return txt[0:250] + "..." + return publishedDate } -func determineContent(summary string, contentEntry string, bridgyTwitterContent string) string { +func determineAuthorName(hEntry *microformats.Microformat) string { + authorName := mfStr(mfProp(hEntry, "author"), "name") + if authorName == "" { + return mfProp(hEntry, "author").Value + } + return authorName +} + +func determineMfType(hEntry *microformats.Microformat) string { + likeOf := mfStr(hEntry, "like-of") + if likeOf != "" { + return "like" + } + bookmarkOf := mfStr(hEntry, "bookmark-of") + if bookmarkOf != "" { + return "bookmark" + } + return "mention" +} + +// Mastodon uids start with "tag:server", but we do want indieweb uids from other sources +func determineUrl(hEntry *microformats.Microformat, source string) string { + uid := mfStr(hEntry, "uid") + if uid != "" && strings.HasPrefix(uid, "http") { + return uid + } + url := mfStr(hEntry, "url") + if url != "" { + return url + } + return source +} + +func determineContent(hEntry *microformats.Microformat) string { + bridgyTwitterContent := mfStr(hEntry, "bridgy-twitter-content") if bridgyTwitterContent != "" { return shorten(bridgyTwitterContent) - } else if summary != "" { + } + summary := mfStr(hEntry, "summary") + if summary != "" { return shorten(summary) } + contentEntry := mfMap(hEntry, "content")["value"] return shorten(contentEntry) } -func parseBodyAsNonIndiewebSite(body string, wm webmention) *indiewebData { - return nil +func (recv *receiver) parseBodyAsNonIndiewebSite(body string, wm webmention) *indiewebData { + r := regexp.MustCompile(`(.*?)<\/title>`) + titleMatch := r.FindStringSubmatch(body) + title := wm.source + if titleMatch != nil { + title = titleMatch[1] + } + return &indiewebData{ + Author: indiewebAuthor{ + Name: wm.source, + }, + Name: title, + Content: title, + Published: publishedNow(recv.conf.UtcOffset), + Url: wm.source, + IndiewebType: "mention", + Source: wm.source, + Target: wm.target, + } } diff --git a/app/webmention/receive_test.go b/app/webmention/receive_test.go index fb27b40..4a61092 100644 --- a/app/webmention/receive_test.go +++ b/app/webmention/receive_test.go @@ -3,8 +3,11 @@ package webmention import ( "errors" + "github.com/stretchr/testify/assert" + "io/ioutil" "os" "testing" + "time" "github.com/wgroeneveld/go-jamming/common" "github.com/wgroeneveld/go-jamming/mocks" @@ -36,24 +39,86 @@ func writeSomethingTo(filename string) { defer file.Close() } -func TestReceiveTargetExistsSavesWebmentionToDisk(t *testing.T) { - os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm) - //defer os.RemoveAll("testdata") - - wm := webmention{ - source: "https://brainbaking.com", - target: "https://jefklakscodex.com/articles", - } - //filename := wm.asPath(conf) - - receiver := &receiver { - conf: conf, - restClient: &mocks.RestClientMock{ - GetBodyFunc: mocks.BodyFunc(t, "../../mocks/valid-indieweb-source.html"), +func TestReceive(t *testing.T) { + cases := []struct { + label string + wm webmention + json string + } { + { + label: "receive a webmention bookmark via twitter", + wm: webmention{ + source: "https://brainbaking.com/valid-bridgy-twitter-source.html", + target: "https://brainbaking.com/post/2021/03/the-indieweb-mixed-bag", + }, + json: `{"author":{"name":"Jamie Tanna","picture":"https://www.jvt.me/img/profile.png"},"name":"","content":"Recommended read:\nThe IndieWeb Mixed Bag - Thoughts about the (d)evolution of blog interactions\nhttps://brainbaking.com/post/2021/03/the-indieweb-mixed-bag/","published":"2021-03-15T12:42:00+0000","url":"https://brainbaking.com/mf2/2021/03/1bkre/","type":"bookmark","source":"https://brainbaking.com/valid-bridgy-twitter-source.html","target":"https://brainbaking.com/post/2021/03/the-indieweb-mixed-bag"}`, + }, + { + label: "receive a brid.gy webmention like", + wm: webmention{ + source: "https://brainbaking.com/valid-bridgy-like.html", + // wrapped in a a class="u-like-of" tag + target: "https://brainbaking.com/valid-indieweb-target.html", + }, + // no dates in bridgy-to-mastodon likes... + json: `{"author":{"name":"Stampeding Longhorn","picture":"https://cdn.social.linux.pizza/v1/AUTH_91eb37814936490c95da7b85993cc2ff/sociallinuxpizza/accounts/avatars/000/185/996/original/9e36da0c093cfc9b.png"},"name":"","content":"","published":"2020-01-01T12:30:00","url":"https://chat.brainbaking.com/notice/A4nx1rFwKUJYSe4TqK#favorited-by-A4nwg4LYyh4WgrJOXg","type":"like","source":"https://brainbaking.com/valid-bridgy-like.html","target":"https://brainbaking.com/valid-indieweb-target.html"}`, + }, + { + label: "receive a brid.gy webmention that has a url and photo without value", + wm: webmention{ + source: "https://brainbaking.com/valid-bridgy-source.html", + target: "https://brainbaking.com/valid-indieweb-target.html", + }, + json: `{"author":{"name":"Stampeding Longhorn", "picture":"https://cdn.social.linux.pizza/v1/AUTH_91eb37814936490c95da7b85993cc2ff/sociallinuxpizza/accounts/avatars/000/185/996/original/9e36da0c093cfc9b.png"}, "content":"@wouter The cat pictures are awesome. for jest tests!", "name":"@wouter The cat pictures are awesome. for jest tests!", "published":"2021-03-02T16:17:18.000Z", "source":"https://brainbaking.com/valid-bridgy-source.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://social.linux.pizza/@StampedingLonghorn/105821099684887793"}`, + }, + { + label: "receive saves a JSON file of indieweb-metadata if all is valid", + wm: webmention{ + source: "https://brainbaking.com/valid-indieweb-source.html", + target: "https://jefklakscodex.com/articles", + }, + json: `{"author":{"name":"Wouter Groeneveld","picture":"https://brainbaking.com//img/avatar.jpg"},"name":"I just learned about https://www.inklestudios.com/...","content":"This is cool, I just found out about valid indieweb target - so cool","published":"2021-03-06T12:41:00","url":"https://brainbaking.com/notes/2021/03/06h12m41s48/","type":"mention","source":"https://brainbaking.com/valid-indieweb-source.html","target":"https://jefklakscodex.com/articles"}`, + }, + { + label: "receive saves a JSON file of indieweb-metadata with summary as content if present", + wm: webmention{ + source: "https://brainbaking.com/valid-indieweb-source-with-summary.html", + target: "https://brainbaking.com/valid-indieweb-target.html", + }, + json: `{"author":{"name":"Wouter Groeneveld", "picture":"https://brainbaking.com//img/avatar.jpg"}, "content":"This is cool, this is a summary!", "name":"I just learned about https://www.inklestudios.com/...", "published":"2021-03-06T12:41:00", "source":"https://brainbaking.com/valid-indieweb-source-with-summary.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://brainbaking.com/notes/2021/03/06h12m41s48/"}`, + }, + { + label: "receive saves a JSON file of non-indieweb-data such as title if all is valid", + wm: webmention{ + source: "https://brainbaking.com/valid-nonindieweb-source.html", + target: "https://brainbaking.com/valid-indieweb-target.html", + }, + json: `{"author":{"name":"https://brainbaking.com/valid-nonindieweb-source.html", "picture":""}, "content":"Diablo 2 Twenty Years Later: A Retrospective | Jefklaks Codex", "name":"Diablo 2 Twenty Years Later: A Retrospective | Jefklaks Codex", "published":"2020-01-01T12:30:00", "source":"https://brainbaking.com/valid-nonindieweb-source.html", "target":"https://brainbaking.com/valid-indieweb-target.html", "type":"mention", "url":"https://brainbaking.com/valid-nonindieweb-source.html"}`, }, } - receiver.receive(wm) + for _, tc := range cases { + t.Run(tc.label, func(t *testing.T) { + os.MkdirAll("testdata/brainbaking.com", os.ModePerm) + os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm) + defer os.RemoveAll("testdata") + now = func() time.Time { + return time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC) + } + + receiver := &receiver { + conf: conf, + restClient: &mocks.RestClientMock{ + GetBodyFunc: mocks.RelPathGetBodyFunc(t), + }, + } + + receiver.receive(tc.wm) + + actualJson, _ := ioutil.ReadFile(tc.wm.asPath(conf)) + assert.JSONEq(t, tc.json, string(actualJson)) + }) + } } func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) { @@ -78,9 +143,26 @@ func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testi } receiver.receive(wm) - if _, err := os.Stat(filename); err == nil { - t.Fatalf("Expected possibly older webmention to be deleted, but it wasn't!") - } + assert.NoFileExists(t, filename) +} + +func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) { + wm := webmention{ + source: "https://brainbaking.com/valid-indieweb-source.html", + target: "https://brainbaking.com/valid-indieweb-source.html", + } + filename := wm.asPath(conf) + writeSomethingTo(filename) + + receiver := &receiver { + conf: conf, + restClient: &mocks.RestClientMock{ + GetBodyFunc: mocks.RelPathGetBodyFunc(t), + }, + } + + receiver.receive(wm) + assert.NoFileExists(t, filename) } func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) { @@ -91,15 +173,11 @@ func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing. source: "https://brainbaking.com", target: "https://jefklakscodex.com/articles", } - filename := wm.asPath(conf) - receiver := &receiver { conf: conf, } receiver.processSourceBody("<html>my nice body</html>", wm) - if _, err := os.Stat(filename); err == nil { - t.Fatalf("Expected no file to be created!") - } + assert.NoFileExists(t, wm.asPath(conf)) } diff --git a/go.mod b/go.mod index 5328479..8acbdaf 100644 --- a/go.mod +++ b/go.mod @@ -5,5 +5,6 @@ go 1.16 require ( github.com/gorilla/mux v1.8.0 github.com/rs/zerolog v1.21.0 + github.com/stretchr/testify v1.7.0 // indirect willnorris.com/go/microformats v1.1.1 // indirect ) diff --git a/go.sum b/go.sum index d4bb43a..87eef9f 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,22 @@ github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/zerolog v1.21.0 h1:Q3vdXlfLNT+OftyBHsU0Y445MD+8m8axjKgf2si0QcM= github.com/rs/zerolog v1.21.0/go.mod h1:ZPhntP/xmq1nnND05hhpAh2QMhSsA4UN3MGZ6O2J3hM= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -36,5 +44,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= willnorris.com/go/microformats v1.1.1 h1:h5tk2luq6KBIRcwMGdksxdeea4GGuWrRFie5460OAbo= willnorris.com/go/microformats v1.1.1/go.mod h1:kvVnWrkkEscVAIITCEoiTX66Hcyg59C7q0E49mb9TJ0= diff --git a/mocks/restclient.go b/mocks/restclient.go index 31f5329..8d02693 100644 --- a/mocks/restclient.go +++ b/mocks/restclient.go @@ -2,6 +2,7 @@ package mocks import ( + "strings" "testing" "io/ioutil" "net/http" @@ -21,6 +22,19 @@ func (m *RestClientMock) GetBody(url string) (string, error) { return m.GetBodyFunc(url) } +func RelPathGetBodyFunc(t *testing.T) func(string) (string, error) { + return func(url string) (string, error) { + // url: https://brainbaking.com/something-something.html + // want: ../../mocks/something-something.html + mockfile := "../../mocks/" + strings.ReplaceAll(url, "https://brainbaking.com/", "") + html, err := ioutil.ReadFile(mockfile) + if err != nil { + t.Error(err) + } + return string(html), nil + } +} + func BodyFunc(t *testing.T, mockfile string) func(string) (string, error) { html, err := ioutil.ReadFile(mockfile) if err != nil {