webmention.rocks unit tests and fixes

This commit is contained in:
Wouter Groeneveld 2021-04-25 12:18:31 +02:00
parent ceabec241a
commit 8779eb01ee
22 changed files with 471 additions and 28 deletions

View File

@ -25,20 +25,35 @@ func HandleGet(repo db.MentionRepo) http.HandlerFunc {
func HandlePut(conf *common.Config, repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
since := getSinceQueryParam(r)
since := sinceQueryParam(r)
domain := mux.Vars(r)["domain"]
source := sourceQueryParam(r)
snder := &send.Sender{
RestClient: httpClient,
Conf: conf,
Repo: repo,
}
go snder.Send(domain, since)
if source != "" {
go snder.SendSingle(domain, source)
} else {
go snder.Send(domain, since)
}
rest.Accept(w)
}
}
func getSinceQueryParam(r *http.Request) string {
func sourceQueryParam(r *http.Request) string {
sourceParam := r.URL.Query()["source"]
if len(sourceParam) > 0 {
return sourceParam[0]
}
return ""
}
func sinceQueryParam(r *http.Request) string {
sinceParam := r.URL.Query()["since"]
since := ""
if len(sinceParam) > 0 {

View File

@ -2,7 +2,10 @@ package send
import (
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/rs/zerolog/log"
"net/url"
"regexp"
"strings"
"willnorris.com/go/microformats"
)
@ -13,6 +16,10 @@ const (
typePingback string = "pingback"
)
var (
relWebmention = regexp.MustCompile(`rel="??'??webmention`)
)
func (sndr *Sender) discover(target string) (link string, mentionType string) {
mentionType = typeUnknown
header, body, err := sndr.RestClient.GetBody(target)
@ -20,19 +27,29 @@ func (sndr *Sender) discover(target string) (link string, mentionType string) {
log.Warn().Str("target", target).Msg("Failed to discover possible endpoint, aborting send")
return
}
link = header.Get(rest.RequestUrl) // default to a possible redirect of the target
if strings.Contains(header.Get("link"), typeWebmention) {
return buildWebmentionHeaderLink(header.Get("link")), typeWebmention
// prefer links in the header over the html itself.
for _, possibleLink := range header.Values("link") {
if relWebmention.MatchString(possibleLink) {
return buildWebmentionHeaderLink(possibleLink, rest.BaseUrlOf(link)), typeWebmention
}
}
if header.Get("X-Pingback") != "" {
return header.Get("X-Pingback"), typePingback
}
// this also complies with w3.org regulations: relative endpoint could be possible
format := microformats.Parse(strings.NewReader(body), rest.BaseUrlOf(target))
baseUrl, _ := url.Parse(link)
format := microformats.Parse(strings.NewReader(body), baseUrl)
if len(format.Rels[typeWebmention]) > 0 {
mentionType = typeWebmention
link = format.Rels[typeWebmention][0]
for _, possibleWm := range format.Rels[typeWebmention] {
if possibleWm != link {
link = possibleWm
return
}
}
} else if len(format.Rels[typePingback]) > 0 {
mentionType = typePingback
link = format.Rels[typePingback][0]
@ -41,8 +58,22 @@ func (sndr *Sender) discover(target string) (link string, mentionType string) {
return
}
// buildWebmentionHeaderLink tries to extract the link from the link header.
// e.g. Link: <http://aaronpk.example/webmention-endpoint>; rel="webmention"
func buildWebmentionHeaderLink(link string) string {
// could also be comma-separated, e.g. <https://webmention.rocks/test/19/webmention/error>; rel="other", <https://webmention.rocks/test/19/webmention?head=true>; rel="webmention"
func buildWebmentionHeaderLink(link string, baseUrl *url.URL) (wm string) {
if strings.Contains(link, ",") {
for _, possibleLink := range strings.Split(link, ",") {
if relWebmention.MatchString(possibleLink) {
link = strings.TrimSpace(possibleLink)
}
}
}
raw := strings.Split(link, ";")[0][1:]
return raw[:len(raw)-1]
wm = raw[:len(raw)-1]
if strings.HasPrefix(wm, "/") {
wm = fmt.Sprintf("%s%s", baseUrl, wm)
}
return
}

View File

@ -22,7 +22,7 @@ func TestDiscover(t *testing.T) {
{
"discover 'unknown' if no link is present",
"https://brainbaking.com/link-discover-test-none.html",
"",
"https://brainbaking.com/link-discover-test-none.html",
typeUnknown,
},
{
@ -55,6 +55,48 @@ func TestDiscover(t *testing.T) {
"http://aaronpk.example/webmention-endpoint",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/1 relative path in header",
"https://brainbaking.com/webmention-rocks-1.html",
"https://brainbaking.com/test/1/webmention?head=true",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/11 prefer links in the header even if in head or body also present",
"https://brainbaking.com/webmention-rocks-11.html",
"https://brainbaking.com/test/11/webmention",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/15 empty link rel means it is its own endpoint",
"https://brainbaking.com/webmention-rocks-15.html",
"https://brainbaking.com/webmention-rocks-15.html",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/18 discover link if multiple present in multiple headers",
"https://brainbaking.com/webmention-rocks-18.html",
"https://webmention.rocks/test/18/webmention?head=true",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/19 discover link if multiple present comma-separated in single header",
"https://brainbaking.com/webmention-rocks-19.html",
"https://webmention.rocks/test/19/webmention?head=true",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/20 discover link in body href if header is empty",
"https://brainbaking.com/webmention-rocks-20.html",
"https://brainbaking.com/test/20/webmention",
typeWebmention,
},
{
"webmentions: https://webmention.rocks/test/22 discover link relative to the page instead of the domain",
"https://brainbaking.com/blank/webmention-rocks-22.html",
"https://brainbaking.com/blank/22/webmention",
typeWebmention,
},
{
"webmentions: discover link if sole entry somewhere in html",
"https://brainbaking.com/link-discover-test-single.html",

View File

@ -4,6 +4,7 @@ import (
"brainbaking.com/go-jamming/app/rss"
"brainbaking.com/go-jamming/common"
"regexp"
"strings"
"time"
)
@ -47,7 +48,7 @@ func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) {
if since.IsZero() || since.Before(rssitem.PubDateAsTime()) {
items = append(items, RSSItem{
link: rssitem.Link,
hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description),
hrefs: snder.collectUniqueHrefsFromHtml(rssitem.Description),
})
}
}
@ -59,12 +60,12 @@ var (
extRegexp = regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`)
)
func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string {
func (snder *Sender) collectUniqueHrefsFromHtml(html string) []string {
urlmap := common.NewSet()
for _, match := range hrefRegexp.FindAllStringSubmatch(html, -1) {
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) {
if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) && !strings.HasPrefix(url, "#") {
urlmap.Add(url)
}
}

View File

@ -31,6 +31,11 @@ func TestCollectSuite(t *testing.T) {
suite.Run(t, new(CollectSuite))
}
func (s *CollectSuite) TestCollectUniqueHrefsFromHtmlShouldNotContainInlineLinks() {
links := s.snder.collectUniqueHrefsFromHtml(`<html><body><a href="#inline">sup</a></body></html>`)
assert.Empty(s.T(), links)
}
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
assert.NoError(s.T(), err)

View File

@ -6,6 +6,7 @@ import (
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/rs/zerolog/log"
"strings"
"sync"
@ -35,10 +36,35 @@ func (snder *Sender) updateSinceForDomain(domain string) {
snder.Repo.UpdateSince(domain, common.Now())
}
// SendSingle sends out webmentions serially for a single source.
// It does validate the relative path against the domain, which is supposed to be served using https.
func (snder *Sender) SendSingle(domain string, relSource string) {
source := fmt.Sprintf("https://%s/%s", domain, relSource)
log.Info().Str("url", source).Msg(` OK: someone wants to send a single mention`)
_, html, err := snder.RestClient.GetBody(source)
if err != nil {
log.Err(err).Str("url", source).Msg("Unable to validate source, send aborted")
return
}
for _, href := range snder.collectUniqueHrefsFromHtml(html) {
if strings.HasPrefix(href, "http") {
snder.sendMention(mf.Mention{
Source: source,
Target: href,
})
}
}
}
// Send sends out multiple webmentions based on since and what's posted in the RSS feed.
// It first GETs domain/index.xml and goes from there.
func (snder *Sender) Send(domain string, since string) {
timeSince := snder.sinceForDomain(domain, since)
log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`)
feedUrl := "https://" + domain + "/index.xml"
log.Info().Str("domain", domain).Time("since", timeSince).Msg(` OK: someone wants to send mentions`)
_, feed, err := snder.RestClient.GetBody(feedUrl)
if err != nil {
log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted")

View File

@ -70,6 +70,44 @@ func TestSinceForDomain(t *testing.T) {
}
}
func TestSendSingleDoesNotSendIfRelPathNotFound(t *testing.T) {
var postedSomething bool
snder := Sender{
Conf: conf,
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"),
PostFormFunc: func(endpt string, formValues url.Values) error {
postedSomething = true
return nil
},
},
}
snder.SendSingle("brainbaking.com", "unknown-file")
assert.False(t, postedSomething)
}
func TestSendSingleSendsMentionsBasedOnRelativeDomain(t *testing.T) {
passedFormValues := url.Values{}
var endpoint string
snder := Sender{
Conf: conf,
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"),
PostFormFunc: func(endpt string, formValues url.Values) error {
passedFormValues = formValues
endpoint = endpt
return nil
},
},
}
snder.SendSingle("brainbaking.com", "single-send-test.html")
assert.Equal(t, "http://aaronpk.example/webmention-endpoint-body", endpoint)
assert.Equal(t, "https://brainbaking.com/single-send-test.html", passedFormValues.Get("source"))
assert.Equal(t, "https://brainbaking.com/link-discover-test-single.html", passedFormValues.Get("target"))
}
func TestSendMentionAsWebmention(t *testing.T) {
passedFormValues := url.Values{}
snder := Sender{

View File

@ -0,0 +1,23 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<title>Webmention Rocks!</title>
<link href="/assets/semantic.min.css" rel="stylesheet">
<link href="/assets/style.css" rel="stylesheet">
<link href="/assets/test.css" rel="stylesheet">
<script src="/assets/jquery-1.11.3.min.js"></script>
<script src="/assets/semantic.min.js"></script>
<script src="/assets/script.js"></script>
<link rel="webmention" href="22/webmention">
</head>
<body>
This post's Webmention endpoint is relative to the page rather than relative to the host.
</body>
</html>

View File

@ -1,13 +1,13 @@
package mocks
import (
"brainbaking.com/go-jamming/rest"
"encoding/json"
"github.com/rs/zerolog/log"
"io/ioutil"
"net/http"
"net/url"
"strings"
"testing"
)
// neat trick! https://medium.com/@matryer/meet-moq-easily-mock-interfaces-in-go-476444187d10
@ -37,6 +37,9 @@ func (m *RestClientMock) Post(url string, contentType string, body string) error
func toHttpHeader(header map[string]interface{}) http.Header {
httpHeader := http.Header{}
for key, value := range header {
if key == "link1" || key == "link2" {
key = "link"
}
httpHeader.Add(key, value.(string))
}
return httpHeader
@ -55,21 +58,15 @@ func RelPathGetBodyFunc(relPath string) func(string) (http.Header, string, error
headerData, headerFileErr := ioutil.ReadFile(strings.ReplaceAll(mockfile, ".html", "-headers.json"))
if headerFileErr != nil {
return http.Header{}, string(html), nil
header := http.Header{}
header.Set(rest.RequestUrl, url) // mimic actual implementation to track possible redirects
return header, string(html), nil
}
headerJson := map[string]interface{}{}
json.Unmarshal(headerData, &headerJson)
return toHttpHeader(headerJson), string(html), nil
}
}
func BodyFunc(t *testing.T, mockfile string) func(string) (string, error) {
html, err := ioutil.ReadFile(mockfile)
if err != nil {
t.Error(err)
}
return func(url string) (string, error) {
return string(html), nil
header := toHttpHeader(headerJson)
header.Set(rest.RequestUrl, url) // mimic actual implementation to track possible redirects
return header, string(html), nil
}
}

View File

@ -0,0 +1,11 @@
<html>
<head>
</head>
<body>
This is a cool blog post.
Let's add a link to send stuff to: <a href="https://brainbaking.com/link-discover-test-single.html">here</a>.
GG!
</body>
</html>

View File

@ -0,0 +1,3 @@
{
"link": "</test/1/webmention?head=true>; rel=webmention"
}

View File

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<title>Webmention Rocks!</title>
<link href="/assets/semantic.min.css" rel="stylesheet">
<link href="/assets/style.css" rel="stylesheet">
<link href="/assets/test.css" rel="stylesheet">
<script src="/assets/jquery-1.11.3.min.js"></script>
<script src="/assets/semantic.min.js"></script>
<script src="/assets/script.js"></script>
<link rel="webmention" href="">
</head>
<body>
<div class="post-container h-entry">
<div class="post-main has-responses">
<div class="left p-author h-card">
<a href="/">
<img src="/assets/webmention-rocks-icon.png" width="80" class="u-photo" alt="Webmention Rocks!">
</a>
</div>
<div class="right">
<h1 class="p-name"><a href="/test/15">Discovery Test #15</a></h1>
<div class="e-content">This post has a &lt;link&gt; tag where the href value is an empty string, meaning the page is its own Webmention endpoint. This tests the relative URL resolver of the sender to ensure an empty string is resolved to the page's URL.</div>
<div class="meta">
<a href="/test/15" class="u-url">
Published:
<time class="dt-published" datetime="2016-04-14T23:26:48+02:00">
Thursday April 14, 2016 11:26pm +02:00 </time>
</a>
</div>
</div>
</div>
</div>
<div class="post-footer">
<p>Responses are stored for 48 hours and may be deleted after that time.</p>
</div>
</div>
<div id="test-num" data-num="15"></div>
<script src="/assets/streaming.js"></script>
</body>
</html>

View File

@ -0,0 +1,3 @@
{
"link": "</test/11/webmention>; rel=\"webmention\""
}

View File

@ -0,0 +1,53 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<title>Webmention Rocks!</title>
<link href="/assets/semantic.min.css" rel="stylesheet">
<link href="/assets/style.css" rel="stylesheet">
<link href="/assets/test.css" rel="stylesheet">
<script src="/assets/jquery-1.11.3.min.js"></script>
<script src="/assets/semantic.min.js"></script>
<script src="/assets/script.js"></script>
<link rel="webmention" href="/test/11/webmention/error">
</head>
<body>
<div class="post-container h-entry">
<div class="post-main has-responses">
<div class="left p-author h-card">
<a href="/">
<img src="/assets/webmention-rocks-icon.png" width="80" class="u-photo" alt="Webmention Rocks!">
</a>
</div>
<div class="right">
<h1 class="p-name"><a href="/test/11">Discovery Test #11</a></h1>
<div class="e-content">This post advertises its Webmention endpoint in the HTTP Link header, HTML &lt;link&gt; tag, as well as an <a href="/test/11/webmention/error" rel="webmention">&lt;a&gt; tag</a>. Your Webmention client must only send a Webmention to the one in the Link header.</div>
<div class="meta">
<a href="/test/11" class="u-url">
Published:
<time class="dt-published" datetime="2016-04-14T17:05:16+02:00">
Thursday April 14, 2016 5:05pm +02:00 </time>
</a>
</div>
</div>
</div>
</div>
<div class="post-footer">
<p>Responses are stored for 48 hours and may be deleted after that time.</p>
</div>
</div>
<div id="test-num" data-num="11"></div>
<script src="/assets/streaming.js"></script>
</body>
</html>

View File

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<title>Webmention Rocks!</title>
<link href="/assets/semantic.min.css" rel="stylesheet">
<link href="/assets/style.css" rel="stylesheet">
<link href="/assets/test.css" rel="stylesheet">
<script src="/assets/jquery-1.11.3.min.js"></script>
<script src="/assets/semantic.min.js"></script>
<script src="/assets/script.js"></script>
<link rel="webmention" href="">
</head>
<body>
<div class="post-container h-entry">
<div class="post-main has-responses">
<div class="left p-author h-card">
<a href="/">
<img src="/assets/webmention-rocks-icon.png" width="80" class="u-photo" alt="Webmention Rocks!">
</a>
</div>
<div class="right">
<h1 class="p-name"><a href="/test/15">Discovery Test #15</a></h1>
<div class="e-content">This post has a &lt;link&gt; tag where the href value is an empty string, meaning the page is its own Webmention endpoint. This tests the relative URL resolver of the sender to ensure an empty string is resolved to the page's URL.</div>
<div class="meta">
<a href="/test/15" class="u-url">
Published:
<time class="dt-published" datetime="2016-04-14T23:26:48+02:00">
Thursday April 14, 2016 11:26pm +02:00 </time>
</a>
</div>
</div>
</div>
</div>
<div class="post-footer">
<p>Responses are stored for 48 hours and may be deleted after that time.</p>
</div>
</div>
<div id="test-num" data-num="15"></div>
<script src="/assets/streaming.js"></script>
</body>
</html>

View File

@ -0,0 +1,4 @@
{
"link1": "<https://webmention.rocks/test/18/webmention/error>; rel=\"other\"",
"link2": "<https://webmention.rocks/test/18/webmention?head=true>; rel=\"webmention\""
}

View File

@ -0,0 +1 @@
test!

View File

@ -0,0 +1,3 @@
{
"link": "<https://webmention.rocks/test/19/webmention/error>; rel=\"other\", <https://webmention.rocks/test/19/webmention?head=true>; rel=\"webmention\""
}

View File

@ -0,0 +1 @@
test!

View File

@ -0,0 +1,53 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<title>Webmention Rocks!</title>
<link href="/assets/semantic.min.css" rel="stylesheet">
<link href="/assets/style.css" rel="stylesheet">
<link href="/assets/test.css" rel="stylesheet">
<script src="/assets/jquery-1.11.3.min.js"></script>
<script src="/assets/semantic.min.js"></script>
<script src="/assets/script.js"></script>
</head>
<body>
<div class="post-container h-entry">
<div class="post-main has-responses">
<div class="left p-author h-card">
<a href="/">
<img src="/assets/webmention-rocks-icon.png" width="80" class="u-photo" alt="Webmention Rocks!">
</a>
</div>
<div class="right">
<h1 class="p-name"><a href="/test/20">Discovery Test #20</a></h1>
<div class="e-content">This post has a &lt;link&gt; tag <link rel="webmention"> which has no href attribute. Your Webmention client should not find this link tag, and should send the webmention to <a href="/test/20/webmention" rel="webmention">this endpoint</a> instead.</div>
<div class="meta">
<a href="/test/20" class="u-url">
Published:
<time class="dt-published" datetime="2016-04-15T11:03:35+02:00">
Friday April 15, 2016 11:03am +02:00 </time>
</a>
</div>
</div>
</div>
</div>
<div class="post-footer">
<p>Responses are stored for 48 hours and may be deleted after that time.</p>
</div>
</div>
<div id="test-num" data-num="20"></div>
<script src="/assets/streaming.js"></script>
</body>
</html>

View File

@ -23,7 +23,8 @@ type HttpClient struct {
}
const (
MaxBytes = 5000000 // 5 MiB
MaxBytes = 5000000 // 5 MiB
RequestUrl string = "requestUrl"
)
var (
@ -80,6 +81,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) {
if readerr != nil {
return nil, "", fmt.Errorf("GET from %s: unable to read body: %w", url, readerr)
}
resp.Header.Set(RequestUrl, resp.Request.URL.String())
return resp.Header, string(body), nil
}

View File

@ -32,6 +32,29 @@ func TestGetBodyWithinLimitsReturnsHeadersAndBodyString(t *testing.T) {
assert.Contains(t, body, "<rss")
}
func TestGetBodyFollowsRedirect(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/1", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("location", "2")
w.WriteHeader(302)
})
mux.HandleFunc("/2", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write([]byte("nice!"))
})
srv := &http.Server{Addr: ":6666", Handler: mux}
defer srv.Close()
go func() {
srv.ListenAndServe()
}()
headers, body, err := client.GetBody("http://localhost:6666/1")
assert.NoError(t, err)
assert.Equal(t, "http://localhost:6666/2", headers.Get(RequestUrl))
assert.Equal(t, "nice!", body)
}
func TestGetBodyOf404ReturnsError(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {