From 8779eb01ee7a01f502fa8403d56b3642454fec51 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Sun, 25 Apr 2021 12:18:31 +0200 Subject: [PATCH] webmention.rocks unit tests and fixes --- app/webmention/handler.go | 21 ++++++-- app/webmention/send/discoverer.go | 43 +++++++++++++--- app/webmention/send/discoverer_test.go | 44 +++++++++++++++- app/webmention/send/rsslinkcollector.go | 7 +-- app/webmention/send/rsslinkcollector_test.go | 5 ++ app/webmention/send/send.go | 28 +++++++++- app/webmention/send/send_test.go | 38 ++++++++++++++ mocks/blank/webmention-rocks-22.html | 23 +++++++++ mocks/restclient.go | 23 ++++----- mocks/single-send-test.html | 11 ++++ mocks/webmention-rocks-1-headers.json | 3 ++ mocks/webmention-rocks-1.html | 54 ++++++++++++++++++++ mocks/webmention-rocks-11-headers.json | 3 ++ mocks/webmention-rocks-11.html | 53 +++++++++++++++++++ mocks/webmention-rocks-15.html | 54 ++++++++++++++++++++ mocks/webmention-rocks-18-headers.json | 4 ++ mocks/webmention-rocks-18.html | 1 + mocks/webmention-rocks-19-headers.json | 3 ++ mocks/webmention-rocks-19.html | 1 + mocks/webmention-rocks-20.html | 53 +++++++++++++++++++ rest/client.go | 4 +- rest/client_test.go | 23 +++++++++ 22 files changed, 471 insertions(+), 28 deletions(-) create mode 100644 mocks/blank/webmention-rocks-22.html create mode 100644 mocks/single-send-test.html create mode 100644 mocks/webmention-rocks-1-headers.json create mode 100644 mocks/webmention-rocks-1.html create mode 100644 mocks/webmention-rocks-11-headers.json create mode 100644 mocks/webmention-rocks-11.html create mode 100644 mocks/webmention-rocks-15.html create mode 100644 mocks/webmention-rocks-18-headers.json create mode 100644 mocks/webmention-rocks-18.html create mode 100644 mocks/webmention-rocks-19-headers.json create mode 100644 mocks/webmention-rocks-19.html create mode 100644 mocks/webmention-rocks-20.html diff --git a/app/webmention/handler.go b/app/webmention/handler.go index ed31675..d5ee04a 100644 --- a/app/webmention/handler.go +++ b/app/webmention/handler.go @@ -25,20 +25,35 @@ func HandleGet(repo db.MentionRepo) http.HandlerFunc { func HandlePut(conf *common.Config, repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - since := getSinceQueryParam(r) + since := sinceQueryParam(r) domain := mux.Vars(r)["domain"] + source := sourceQueryParam(r) snder := &send.Sender{ RestClient: httpClient, Conf: conf, Repo: repo, } - go snder.Send(domain, since) + + if source != "" { + go snder.SendSingle(domain, source) + } else { + go snder.Send(domain, since) + } + rest.Accept(w) } } -func getSinceQueryParam(r *http.Request) string { +func sourceQueryParam(r *http.Request) string { + sourceParam := r.URL.Query()["source"] + if len(sourceParam) > 0 { + return sourceParam[0] + } + return "" +} + +func sinceQueryParam(r *http.Request) string { sinceParam := r.URL.Query()["since"] since := "" if len(sinceParam) > 0 { diff --git a/app/webmention/send/discoverer.go b/app/webmention/send/discoverer.go index f51e37f..94d900f 100644 --- a/app/webmention/send/discoverer.go +++ b/app/webmention/send/discoverer.go @@ -2,7 +2,10 @@ package send import ( "brainbaking.com/go-jamming/rest" + "fmt" "github.com/rs/zerolog/log" + "net/url" + "regexp" "strings" "willnorris.com/go/microformats" ) @@ -13,6 +16,10 @@ const ( typePingback string = "pingback" ) +var ( + relWebmention = regexp.MustCompile(`rel="??'??webmention`) +) + func (sndr *Sender) discover(target string) (link string, mentionType string) { mentionType = typeUnknown header, body, err := sndr.RestClient.GetBody(target) @@ -20,19 +27,29 @@ func (sndr *Sender) discover(target string) (link string, mentionType string) { log.Warn().Str("target", target).Msg("Failed to discover possible endpoint, aborting send") return } + link = header.Get(rest.RequestUrl) // default to a possible redirect of the target - if strings.Contains(header.Get("link"), typeWebmention) { - return buildWebmentionHeaderLink(header.Get("link")), typeWebmention + // prefer links in the header over the html itself. + for _, possibleLink := range header.Values("link") { + if relWebmention.MatchString(possibleLink) { + return buildWebmentionHeaderLink(possibleLink, rest.BaseUrlOf(link)), typeWebmention + } } if header.Get("X-Pingback") != "" { return header.Get("X-Pingback"), typePingback } // this also complies with w3.org regulations: relative endpoint could be possible - format := microformats.Parse(strings.NewReader(body), rest.BaseUrlOf(target)) + baseUrl, _ := url.Parse(link) + format := microformats.Parse(strings.NewReader(body), baseUrl) if len(format.Rels[typeWebmention]) > 0 { mentionType = typeWebmention - link = format.Rels[typeWebmention][0] + for _, possibleWm := range format.Rels[typeWebmention] { + if possibleWm != link { + link = possibleWm + return + } + } } else if len(format.Rels[typePingback]) > 0 { mentionType = typePingback link = format.Rels[typePingback][0] @@ -41,8 +58,22 @@ func (sndr *Sender) discover(target string) (link string, mentionType string) { return } +// buildWebmentionHeaderLink tries to extract the link from the link header. // e.g. Link: ; rel="webmention" -func buildWebmentionHeaderLink(link string) string { +// could also be comma-separated, e.g. ; rel="other", ; rel="webmention" +func buildWebmentionHeaderLink(link string, baseUrl *url.URL) (wm string) { + if strings.Contains(link, ",") { + for _, possibleLink := range strings.Split(link, ",") { + if relWebmention.MatchString(possibleLink) { + link = strings.TrimSpace(possibleLink) + } + } + } raw := strings.Split(link, ";")[0][1:] - return raw[:len(raw)-1] + wm = raw[:len(raw)-1] + if strings.HasPrefix(wm, "/") { + wm = fmt.Sprintf("%s%s", baseUrl, wm) + } + + return } diff --git a/app/webmention/send/discoverer_test.go b/app/webmention/send/discoverer_test.go index babb891..baaaf75 100644 --- a/app/webmention/send/discoverer_test.go +++ b/app/webmention/send/discoverer_test.go @@ -22,7 +22,7 @@ func TestDiscover(t *testing.T) { { "discover 'unknown' if no link is present", "https://brainbaking.com/link-discover-test-none.html", - "", + "https://brainbaking.com/link-discover-test-none.html", typeUnknown, }, { @@ -55,6 +55,48 @@ func TestDiscover(t *testing.T) { "http://aaronpk.example/webmention-endpoint", typeWebmention, }, + { + "webmentions: https://webmention.rocks/test/1 relative path in header", + "https://brainbaking.com/webmention-rocks-1.html", + "https://brainbaking.com/test/1/webmention?head=true", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/11 prefer links in the header even if in head or body also present", + "https://brainbaking.com/webmention-rocks-11.html", + "https://brainbaking.com/test/11/webmention", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/15 empty link rel means it is its own endpoint", + "https://brainbaking.com/webmention-rocks-15.html", + "https://brainbaking.com/webmention-rocks-15.html", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/18 discover link if multiple present in multiple headers", + "https://brainbaking.com/webmention-rocks-18.html", + "https://webmention.rocks/test/18/webmention?head=true", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/19 discover link if multiple present comma-separated in single header", + "https://brainbaking.com/webmention-rocks-19.html", + "https://webmention.rocks/test/19/webmention?head=true", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/20 discover link in body href if header is empty", + "https://brainbaking.com/webmention-rocks-20.html", + "https://brainbaking.com/test/20/webmention", + typeWebmention, + }, + { + "webmentions: https://webmention.rocks/test/22 discover link relative to the page instead of the domain", + "https://brainbaking.com/blank/webmention-rocks-22.html", + "https://brainbaking.com/blank/22/webmention", + typeWebmention, + }, { "webmentions: discover link if sole entry somewhere in html", "https://brainbaking.com/link-discover-test-single.html", diff --git a/app/webmention/send/rsslinkcollector.go b/app/webmention/send/rsslinkcollector.go index 5eb8547..962d041 100644 --- a/app/webmention/send/rsslinkcollector.go +++ b/app/webmention/send/rsslinkcollector.go @@ -4,6 +4,7 @@ import ( "brainbaking.com/go-jamming/app/rss" "brainbaking.com/go-jamming/common" "regexp" + "strings" "time" ) @@ -47,7 +48,7 @@ func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) { if since.IsZero() || since.Before(rssitem.PubDateAsTime()) { items = append(items, RSSItem{ link: rssitem.Link, - hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description), + hrefs: snder.collectUniqueHrefsFromHtml(rssitem.Description), }) } } @@ -59,12 +60,12 @@ var ( extRegexp = regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`) ) -func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string { +func (snder *Sender) collectUniqueHrefsFromHtml(html string) []string { urlmap := common.NewSet() for _, match := range hrefRegexp.FindAllStringSubmatch(html, -1) { url := match[1] // [0] is the match of the entire expression, [1] is the capture group - if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) { + if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) && !strings.HasPrefix(url, "#") { urlmap.Add(url) } } diff --git a/app/webmention/send/rsslinkcollector_test.go b/app/webmention/send/rsslinkcollector_test.go index bcf1b7e..ea301af 100644 --- a/app/webmention/send/rsslinkcollector_test.go +++ b/app/webmention/send/rsslinkcollector_test.go @@ -31,6 +31,11 @@ func TestCollectSuite(t *testing.T) { suite.Run(t, new(CollectSuite)) } +func (s *CollectSuite) TestCollectUniqueHrefsFromHtmlShouldNotContainInlineLinks() { + links := s.snder.collectUniqueHrefsFromHtml(`sup`) + assert.Empty(s.T(), links) +} + func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() { items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z")) assert.NoError(s.T(), err) diff --git a/app/webmention/send/send.go b/app/webmention/send/send.go index e98bf50..7ff4529 100644 --- a/app/webmention/send/send.go +++ b/app/webmention/send/send.go @@ -6,6 +6,7 @@ import ( "brainbaking.com/go-jamming/common" "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" + "fmt" "github.com/rs/zerolog/log" "strings" "sync" @@ -35,10 +36,35 @@ func (snder *Sender) updateSinceForDomain(domain string) { snder.Repo.UpdateSince(domain, common.Now()) } +// SendSingle sends out webmentions serially for a single source. +// It does validate the relative path against the domain, which is supposed to be served using https. +func (snder *Sender) SendSingle(domain string, relSource string) { + source := fmt.Sprintf("https://%s/%s", domain, relSource) + log.Info().Str("url", source).Msg(` OK: someone wants to send a single mention`) + + _, html, err := snder.RestClient.GetBody(source) + if err != nil { + log.Err(err).Str("url", source).Msg("Unable to validate source, send aborted") + return + } + + for _, href := range snder.collectUniqueHrefsFromHtml(html) { + if strings.HasPrefix(href, "http") { + snder.sendMention(mf.Mention{ + Source: source, + Target: href, + }) + } + } +} + +// Send sends out multiple webmentions based on since and what's posted in the RSS feed. +// It first GETs domain/index.xml and goes from there. func (snder *Sender) Send(domain string, since string) { timeSince := snder.sinceForDomain(domain, since) - log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`) feedUrl := "https://" + domain + "/index.xml" + + log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`) _, feed, err := snder.RestClient.GetBody(feedUrl) if err != nil { log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted") diff --git a/app/webmention/send/send_test.go b/app/webmention/send/send_test.go index f9984fd..0af8625 100644 --- a/app/webmention/send/send_test.go +++ b/app/webmention/send/send_test.go @@ -70,6 +70,44 @@ func TestSinceForDomain(t *testing.T) { } } +func TestSendSingleDoesNotSendIfRelPathNotFound(t *testing.T) { + var postedSomething bool + snder := Sender{ + Conf: conf, + RestClient: &mocks.RestClientMock{ + GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"), + PostFormFunc: func(endpt string, formValues url.Values) error { + postedSomething = true + return nil + }, + }, + } + + snder.SendSingle("brainbaking.com", "unknown-file") + assert.False(t, postedSomething) +} + +func TestSendSingleSendsMentionsBasedOnRelativeDomain(t *testing.T) { + passedFormValues := url.Values{} + var endpoint string + snder := Sender{ + Conf: conf, + RestClient: &mocks.RestClientMock{ + GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"), + PostFormFunc: func(endpt string, formValues url.Values) error { + passedFormValues = formValues + endpoint = endpt + return nil + }, + }, + } + + snder.SendSingle("brainbaking.com", "single-send-test.html") + assert.Equal(t, "http://aaronpk.example/webmention-endpoint-body", endpoint) + assert.Equal(t, "https://brainbaking.com/single-send-test.html", passedFormValues.Get("source")) + assert.Equal(t, "https://brainbaking.com/link-discover-test-single.html", passedFormValues.Get("target")) +} + func TestSendMentionAsWebmention(t *testing.T) { passedFormValues := url.Values{} snder := Sender{ diff --git a/mocks/blank/webmention-rocks-22.html b/mocks/blank/webmention-rocks-22.html new file mode 100644 index 0000000..6c663cc --- /dev/null +++ b/mocks/blank/webmention-rocks-22.html @@ -0,0 +1,23 @@ + + + + + + + + Webmention Rocks! + + + + + + + + + + + + +This post's Webmention endpoint is relative to the page rather than relative to the host. + + diff --git a/mocks/restclient.go b/mocks/restclient.go index 27c872b..70d6841 100644 --- a/mocks/restclient.go +++ b/mocks/restclient.go @@ -1,13 +1,13 @@ package mocks import ( + "brainbaking.com/go-jamming/rest" "encoding/json" "github.com/rs/zerolog/log" "io/ioutil" "net/http" "net/url" "strings" - "testing" ) // neat trick! https://medium.com/@matryer/meet-moq-easily-mock-interfaces-in-go-476444187d10 @@ -37,6 +37,9 @@ func (m *RestClientMock) Post(url string, contentType string, body string) error func toHttpHeader(header map[string]interface{}) http.Header { httpHeader := http.Header{} for key, value := range header { + if key == "link1" || key == "link2" { + key = "link" + } httpHeader.Add(key, value.(string)) } return httpHeader @@ -55,21 +58,15 @@ func RelPathGetBodyFunc(relPath string) func(string) (http.Header, string, error headerData, headerFileErr := ioutil.ReadFile(strings.ReplaceAll(mockfile, ".html", "-headers.json")) if headerFileErr != nil { - return http.Header{}, string(html), nil + header := http.Header{} + header.Set(rest.RequestUrl, url) // mimic actual implementation to track possible redirects + return header, string(html), nil } headerJson := map[string]interface{}{} json.Unmarshal(headerData, &headerJson) - return toHttpHeader(headerJson), string(html), nil - } -} - -func BodyFunc(t *testing.T, mockfile string) func(string) (string, error) { - html, err := ioutil.ReadFile(mockfile) - if err != nil { - t.Error(err) - } - return func(url string) (string, error) { - return string(html), nil + header := toHttpHeader(headerJson) + header.Set(rest.RequestUrl, url) // mimic actual implementation to track possible redirects + return header, string(html), nil } } diff --git a/mocks/single-send-test.html b/mocks/single-send-test.html new file mode 100644 index 0000000..98ae801 --- /dev/null +++ b/mocks/single-send-test.html @@ -0,0 +1,11 @@ + + + + + +This is a cool blog post. +Let's add a link to send stuff to: here. + +GG! + + diff --git a/mocks/webmention-rocks-1-headers.json b/mocks/webmention-rocks-1-headers.json new file mode 100644 index 0000000..1db65d5 --- /dev/null +++ b/mocks/webmention-rocks-1-headers.json @@ -0,0 +1,3 @@ +{ + "link": "; rel=webmention" +} \ No newline at end of file diff --git a/mocks/webmention-rocks-1.html b/mocks/webmention-rocks-1.html new file mode 100644 index 0000000..fa27bab --- /dev/null +++ b/mocks/webmention-rocks-1.html @@ -0,0 +1,54 @@ + + + + + + + + Webmention Rocks! + + + + + + + + + + + + + + +
+
+
+ + Webmention Rocks! + +
+
+

Discovery Test #15

+
This post has a <link> tag where the href value is an empty string, meaning the page is its own Webmention endpoint. This tests the relative URL resolver of the sender to ensure an empty string is resolved to the page's URL.
+ +
+
+ + +
+ + + +
+ + + + diff --git a/mocks/webmention-rocks-11-headers.json b/mocks/webmention-rocks-11-headers.json new file mode 100644 index 0000000..dbc66b6 --- /dev/null +++ b/mocks/webmention-rocks-11-headers.json @@ -0,0 +1,3 @@ +{ + "link": "; rel=\"webmention\"" +} \ No newline at end of file diff --git a/mocks/webmention-rocks-11.html b/mocks/webmention-rocks-11.html new file mode 100644 index 0000000..617145a --- /dev/null +++ b/mocks/webmention-rocks-11.html @@ -0,0 +1,53 @@ + + + + + + + + Webmention Rocks! + + + + + + + + + + + + + + +
+
+
+ + Webmention Rocks! + +
+
+

Discovery Test #11

+
This post advertises its Webmention endpoint in the HTTP Link header, HTML <link> tag, as well as an <a> tag. Your Webmention client must only send a Webmention to the one in the Link header.
+ +
+
+ +
+ + + +
+ + + + diff --git a/mocks/webmention-rocks-15.html b/mocks/webmention-rocks-15.html new file mode 100644 index 0000000..fa27bab --- /dev/null +++ b/mocks/webmention-rocks-15.html @@ -0,0 +1,54 @@ + + + + + + + + Webmention Rocks! + + + + + + + + + + + + + + +
+
+
+ + Webmention Rocks! + +
+
+

Discovery Test #15

+
This post has a <link> tag where the href value is an empty string, meaning the page is its own Webmention endpoint. This tests the relative URL resolver of the sender to ensure an empty string is resolved to the page's URL.
+ +
+
+ + +
+ + + +
+ + + + diff --git a/mocks/webmention-rocks-18-headers.json b/mocks/webmention-rocks-18-headers.json new file mode 100644 index 0000000..797d3ed --- /dev/null +++ b/mocks/webmention-rocks-18-headers.json @@ -0,0 +1,4 @@ +{ + "link1": "; rel=\"other\"", + "link2": "; rel=\"webmention\"" +} \ No newline at end of file diff --git a/mocks/webmention-rocks-18.html b/mocks/webmention-rocks-18.html new file mode 100644 index 0000000..989f51a --- /dev/null +++ b/mocks/webmention-rocks-18.html @@ -0,0 +1 @@ +test! diff --git a/mocks/webmention-rocks-19-headers.json b/mocks/webmention-rocks-19-headers.json new file mode 100644 index 0000000..3e2395f --- /dev/null +++ b/mocks/webmention-rocks-19-headers.json @@ -0,0 +1,3 @@ +{ + "link": "; rel=\"other\", ; rel=\"webmention\"" +} \ No newline at end of file diff --git a/mocks/webmention-rocks-19.html b/mocks/webmention-rocks-19.html new file mode 100644 index 0000000..989f51a --- /dev/null +++ b/mocks/webmention-rocks-19.html @@ -0,0 +1 @@ +test! diff --git a/mocks/webmention-rocks-20.html b/mocks/webmention-rocks-20.html new file mode 100644 index 0000000..d8952da --- /dev/null +++ b/mocks/webmention-rocks-20.html @@ -0,0 +1,53 @@ + + + + + + + + Webmention Rocks! + + + + + + + + + + + + + + +
+
+
+ + Webmention Rocks! + +
+
+

Discovery Test #20

+
This post has a <link> tag which has no href attribute. Your Webmention client should not find this link tag, and should send the webmention to this endpoint instead.
+ +
+
+ +
+ + + +
+ + + + diff --git a/rest/client.go b/rest/client.go index 4a45957..7606f2d 100644 --- a/rest/client.go +++ b/rest/client.go @@ -23,7 +23,8 @@ type HttpClient struct { } const ( - MaxBytes = 5000000 // 5 MiB + MaxBytes = 5000000 // 5 MiB + RequestUrl string = "requestUrl" ) var ( @@ -80,6 +81,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) { if readerr != nil { return nil, "", fmt.Errorf("GET from %s: unable to read body: %w", url, readerr) } + resp.Header.Set(RequestUrl, resp.Request.URL.String()) return resp.Header, string(body), nil } diff --git a/rest/client_test.go b/rest/client_test.go index 771587f..3f9576a 100644 --- a/rest/client_test.go +++ b/rest/client_test.go @@ -32,6 +32,29 @@ func TestGetBodyWithinLimitsReturnsHeadersAndBodyString(t *testing.T) { assert.Contains(t, body, "