implement auto-discovering of rss endpoints besides /index.xml

This commit is contained in:
Wouter Groeneveld 2021-05-20 16:09:46 +02:00
parent f503ea8c27
commit e9817549f6
8 changed files with 181 additions and 13 deletions

View File

@ -117,6 +117,19 @@ This means if you made changes in-between, and they appear in the RSS feed as re
No. The server will automatically store the latest push, and if it's called again, it will not send out anything if nothing more recent was found in your RSS feed based on the last published link. Providing the parameter merely lets you override the behavior.
**Which RSS feed will it use?**
It will attempt to auto-discover them with a HEAD call, in the following order:
1. `/all/index.xml`
2. `/index.xml`
3. `/feed`
4. `/feed/index.xml`
If none provied a status of 200 with content-type `application/xml`, it will abort and log an error.
Note that this _requires your site to be on HTTPS_!!
#### 1.4 `DELETE /webmention/:domain/:token?source=x&target=y`
Deletes a webmention or logs a warning if no relevant mention found.

View File

@ -2,6 +2,7 @@ package send
import (
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/rs/zerolog/log"
"net/url"
"regexp"
@ -17,9 +18,30 @@ const (
var (
relWebmention = regexp.MustCompile(`rel="??'??webmention`)
possibleFeedEndpoints = []string{
"all/index.xml",
"index.xml",
"feed",
"feed/index.xml",
}
)
func (sndr *Sender) discover(target string) (link string, mentionType string) {
func (sndr *Sender) discoverRssFeed(domain string) (string, error) {
for _, endpt := range possibleFeedEndpoints {
feedUrl := fmt.Sprintf("https://%s/%s", domain, endpt)
resp, err := sndr.RestClient.Head(feedUrl)
if err != nil || !rest.IsStatusOk(resp) || resp.Header.Get("Content-Type") != "text/xml" {
continue
}
return feedUrl, nil
}
return "", fmt.Errorf("Unable to discover RSS feed for domain %s", domain)
}
func (sndr *Sender) discoverMentionEndpoint(target string) (link string, mentionType string) {
mentionType = typeUnknown
header, body, err := sndr.RestClient.GetBody(target)
if err != nil {

View File

@ -3,24 +3,90 @@ package send
import (
"brainbaking.com/go-jamming/mocks"
"brainbaking.com/go-jamming/rest"
"fmt"
"github.com/stretchr/testify/assert"
"net/http"
"strings"
"testing"
)
func TestDiscoverE2EWithRedirect(t *testing.T) {
func TestDiscoverMentionEndpointE2EWithRedirect(t *testing.T) {
t.Skip("Skipping TestDiscoverE2EWithRedirect, webmention.rocks is slow.")
var sender = &Sender{
RestClient: &rest.HttpClient{},
}
link, wmType := sender.discover("https://webmention.rocks/test/23/page")
link, wmType := sender.discoverMentionEndpoint("https://webmention.rocks/test/23/page")
assert.Equal(t, typeWebmention, wmType)
expectedUrl := "https://webmention.rocks/test/23/page/webmention-endpoint/"
assert.Truef(t, strings.HasPrefix(link, expectedUrl), "should start with %s, but was %s", expectedUrl, link)
}
func TestDiscover(t *testing.T) {
func TestDisccoverRssFeedPrefersFirstEntriesOverLater(t *testing.T) {
var snder = &Sender{
RestClient: &mocks.RestClientMock{
HeadFunc: func(s string) (*http.Response, error) {
if strings.HasSuffix(s, "/index.xml") {
return &http.Response{
Header: map[string][]string{
"Content-Type": {"text/xml"},
},
StatusCode: 200,
}, nil
}
return nil, fmt.Errorf("BOOM")
},
},
}
feed, err := snder.discoverRssFeed("blah.com")
assert.NoError(t, err)
assert.Equal(t, "https://blah.com/all/index.xml", feed)
}
func TestDiscoverRssFeedNoneFoundReturnsError(t *testing.T) {
var snder = &Sender{
RestClient: &mocks.RestClientMock{
HeadFunc: func(s string) (*http.Response, error) {
return nil, fmt.Errorf("BOOM")
},
},
}
_, err := snder.discoverRssFeed("blah.com")
assert.Error(t, err)
}
func TestDiscoverRssFeedFirstNotXmlReturnsSecondWorkingOne(t *testing.T) {
var snder = &Sender{
RestClient: &mocks.RestClientMock{
HeadFunc: func(s string) (*http.Response, error) {
if strings.HasSuffix(s, "/all/index.xml") {
return &http.Response{
Header: map[string][]string{
"Content-Type": {"text/html"},
},
StatusCode: 200,
}, nil
}
if strings.HasSuffix(s, "/feed") {
return &http.Response{
Header: map[string][]string{
"Content-Type": {"text/xml"},
},
StatusCode: 200,
}, nil
}
return nil, fmt.Errorf("BOOM")
},
},
}
feed, err := snder.discoverRssFeed("blah.com")
assert.NoError(t, err)
assert.Equal(t, "https://blah.com/feed", feed)
}
func TestDiscoverMentionEndpoint(t *testing.T) {
var sender = &Sender{
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"),
@ -126,7 +192,7 @@ func TestDiscover(t *testing.T) {
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
link, mentionType := sender.discover(tc.url)
link, mentionType := sender.discoverMentionEndpoint(tc.url)
assert.Equal(t, tc.expectedLink, link)
assert.Equal(t, tc.expectedType, mentionType)
})

View File

@ -44,10 +44,15 @@ func (snder *Sender) SendSingle(domain string, relSource string) {
// It first GETs domain/index.xml and goes from there.
func (snder *Sender) Send(domain string) {
lastSent := snder.Repo.LastSentMention(domain)
feedUrl := "https://" + domain + "/index.xml"
log.Info().Str("domain", domain).Str("lastsent", lastSent).Msg(` OK: someone wants to send mentions`)
feedUrl, err := snder.discoverRssFeed(domain)
if err != nil {
log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted")
return
}
_, feed, err := snder.RestClient.GetBody(feedUrl)
// just to be sure. Should not produce an error due to check above, but you never know.
if err != nil {
log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted")
return
@ -60,7 +65,7 @@ func (snder *Sender) Send(domain string) {
}
snder.Repo.UpdateLastSentMention(domain, lastSent)
log.Info().Str("domain", domain).Str("lastsent", lastSent).Msg(` OK: send processed.`)
log.Info().Str("feed", feedUrl).Str("lastsent", lastSent).Msg(` OK: send processed.`)
}
func (snder *Sender) parseRssFeed(feed string, lastSentLink string) (string, error) {
@ -106,7 +111,7 @@ var mentionFuncs = map[string]func(snder *Sender, mention mf.Mention, endpoint s
}
func (snder *Sender) sendMention(mention mf.Mention) {
endpoint, mentionType := snder.discover(mention.Target)
endpoint, mentionType := snder.discoverMentionEndpoint(mention.Target)
mentionFuncs[mentionType](snder, mention, endpoint)
}

View File

@ -141,6 +141,7 @@ func TestSendIntegrationTestCanSendBothWebmentionsAndPingbacks(t *testing.T) {
Repo: db.NewMentionRepo(conf),
RestClient: &mocks.RestClientMock{
GetBodyFunc: mocks.RelPathGetBodyFunc("./../../../mocks/"),
HeadFunc: mocks.Head200ContentXml(),
PostFunc: func(url string, contentType string, body string) error {
lock.Lock()
defer lock.Unlock()

41
mocks/all/index.xml Normal file
View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Brain Baking</title>
<link>https://brainbaking.com/</link>
<description>Recent content on Brain Baking</description>
<generator>Hugo -- gohugo.io</generator>
<language>en-us</language>
<managingEditor>Wouter Groeneveld</managingEditor>
<webMaster>Wouter Groeneveld</webMaster>
<lastBuildDate>Tue, 16 Mar 2021 17:07:14 +0000</lastBuildDate>
<atom:link href="https://brainbaking.com/index.xml" rel="self" type="application/rss+xml" />
<item>
<title>@celia @kev I have read both you and Kev&#39;s post on...</title>
<link>https://brainbaking.com/notes/2021/03/16h17m07s14/</link>
<comments>https://brainbaking.com/notes/2021/03/16h17m07s14/#commento</comments>
<pubDate>Tue, 16 Mar 2021 17:07:14 +0000</pubDate>
<author>Wouter Groeneveld</author>
<guid isPermaLink="true">https://brainbaking.com/notes/2021/03/16h17m07s14/</guid>
<description>
<![CDATA[
<p>hi there! test discovering: <a href="https://brainbaking.com/link-discover-test-single.html">single</a>. Nice!</p>
<p>this one is a pingback-only <a href="https://brainbaking.com/pingback-discover-test-single.html">single</a> one. Not good!</p>
<p>another cool link: <a href="https://brainbaking.com/link-discover-test-multiple.html">multiple</a></p>
<p>last but not least: <a href="https://brainbaking.com/link-discover-test-none.html">Nothin!</a> What a shame.</p>
]]>
</description>
</item>
</channel>
</rss>

View File

@ -12,16 +12,20 @@ import (
// neat trick! https://medium.com/@matryer/meet-moq-easily-mock-interfaces-in-go-476444187d10
type RestClientMock struct {
HeadFunc func(string) (*http.Response, error)
GetFunc func(string) (*http.Response, error)
GetBodyFunc func(string) (http.Header, string, error)
PostFunc func(string, string, string) error
PostFormFunc func(string, url.Values) error
}
// although these are still requied to match the rest.Client interface.
// although these are still required to match the rest.Client interface.
func (m *RestClientMock) Get(url string) (*http.Response, error) {
return m.GetFunc(url)
}
func (m *RestClientMock) Head(url string) (*http.Response, error) {
return m.HeadFunc(url)
}
func (m *RestClientMock) GetBody(url string) (http.Header, string, error) {
return m.GetBodyFunc(url)
}
@ -45,6 +49,17 @@ func toHttpHeader(header map[string]interface{}) http.Header {
return httpHeader
}
func Head200ContentXml() func(string) (*http.Response, error) {
return func(s string) (*http.Response, error) {
return &http.Response{
Header: map[string][]string{
"Content-Type": {"text/xml"},
},
StatusCode: 200,
}, nil
}
}
func RelPathGetBodyFunc(relPath string) func(string) (http.Header, string, error) {
return func(url string) (http.Header, string, error) {
log.Debug().Str("url", url).Msg(" - GET call")

View File

@ -14,6 +14,7 @@ import (
type Client interface {
Get(url string) (*http.Response, error)
Head(url string) (*http.Response, error)
Post(url string, contentType string, body string) error
GetBody(url string) (http.Header, string, error)
PostForm(url string, formData url.Values) error
@ -42,12 +43,16 @@ var (
ResponseAboveLimit = errors.New("response bigger than limit")
)
func (client *HttpClient) Head(url string) (*http.Response, error) {
return jammingHttp.Head(url)
}
func (client *HttpClient) PostForm(url string, formData url.Values) error {
resp, err := jammingHttp.PostForm(url, formData)
if err != nil {
return fmt.Errorf("POST Form to %s: %v", url, err)
}
if !isStatusOk(resp) {
if !IsStatusOk(resp) {
return fmt.Errorf("POST Form to %s: Status code is not OK (%d)", url, resp.StatusCode)
}
return nil
@ -58,7 +63,7 @@ func (client *HttpClient) Post(url string, contenType string, body string) error
if err != nil {
return fmt.Errorf("POST to %s: %v", url, err)
}
if !isStatusOk(resp) {
if !IsStatusOk(resp) {
return fmt.Errorf("POST to %s: Status code is not OK (%d)", url, resp.StatusCode)
}
return nil
@ -72,7 +77,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) {
return nil, "", fmt.Errorf("GET from %s: %w", url, geterr)
}
if !isStatusOk(resp) {
if !IsStatusOk(resp) {
return nil, "", fmt.Errorf("GET from %s: Status code is not OK (%d)", url, resp.StatusCode)
}
@ -85,7 +90,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) {
return resp.Header, string(body), nil
}
func isStatusOk(resp *http.Response) bool {
func IsStatusOk(resp *http.Response) bool {
return resp.StatusCode >= 200 && resp.StatusCode <= 299
}