From e9817549f6b727f66938583d9a055332e1a68798 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Thu, 20 May 2021 16:09:46 +0200 Subject: [PATCH] implement auto-discovering of rss endpoints besides /index.xml --- README.md | 13 +++++ app/webmention/send/discoverer.go | 24 ++++++++- app/webmention/send/discoverer_test.go | 74 ++++++++++++++++++++++++-- app/webmention/send/send.go | 11 ++-- app/webmention/send/send_test.go | 1 + mocks/all/index.xml | 41 ++++++++++++++ mocks/restclient.go | 17 +++++- rest/client.go | 13 +++-- 8 files changed, 181 insertions(+), 13 deletions(-) create mode 100644 mocks/all/index.xml diff --git a/README.md b/README.md index 701a3f1..51808c5 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,19 @@ This means if you made changes in-between, and they appear in the RSS feed as re No. The server will automatically store the latest push, and if it's called again, it will not send out anything if nothing more recent was found in your RSS feed based on the last published link. Providing the parameter merely lets you override the behavior. +**Which RSS feed will it use?** + +It will attempt to auto-discover them with a HEAD call, in the following order: + +1. `/all/index.xml` +2. `/index.xml` +3. `/feed` +4. `/feed/index.xml` + +If none provied a status of 200 with content-type `application/xml`, it will abort and log an error. + +Note that this _requires your site to be on HTTPS_!! + #### 1.4 `DELETE /webmention/:domain/:token?source=x&target=y` Deletes a webmention or logs a warning if no relevant mention found. diff --git a/app/webmention/send/discoverer.go b/app/webmention/send/discoverer.go index 06de32a..19ad9c0 100644 --- a/app/webmention/send/discoverer.go +++ b/app/webmention/send/discoverer.go @@ -2,6 +2,7 @@ package send import ( "brainbaking.com/go-jamming/rest" + "fmt" "github.com/rs/zerolog/log" "net/url" "regexp" @@ -17,9 +18,30 @@ const ( var ( relWebmention = regexp.MustCompile(`rel="??'??webmention`) + + possibleFeedEndpoints = []string{ + "all/index.xml", + "index.xml", + "feed", + "feed/index.xml", + } ) -func (sndr *Sender) discover(target string) (link string, mentionType string) { +func (sndr *Sender) discoverRssFeed(domain string) (string, error) { + for _, endpt := range possibleFeedEndpoints { + feedUrl := fmt.Sprintf("https://%s/%s", domain, endpt) + resp, err := sndr.RestClient.Head(feedUrl) + + if err != nil || !rest.IsStatusOk(resp) || resp.Header.Get("Content-Type") != "text/xml" { + continue + } + + return feedUrl, nil + } + return "", fmt.Errorf("Unable to discover RSS feed for domain %s", domain) +} + +func (sndr *Sender) discoverMentionEndpoint(target string) (link string, mentionType string) { mentionType = typeUnknown header, body, err := sndr.RestClient.GetBody(target) if err != nil { diff --git a/app/webmention/send/discoverer_test.go b/app/webmention/send/discoverer_test.go index 9b9987f..36c35cf 100644 --- a/app/webmention/send/discoverer_test.go +++ b/app/webmention/send/discoverer_test.go @@ -3,24 +3,90 @@ package send import ( "brainbaking.com/go-jamming/mocks" "brainbaking.com/go-jamming/rest" + "fmt" "github.com/stretchr/testify/assert" + "net/http" "strings" "testing" ) -func TestDiscoverE2EWithRedirect(t *testing.T) { +func TestDiscoverMentionEndpointE2EWithRedirect(t *testing.T) { t.Skip("Skipping TestDiscoverE2EWithRedirect, webmention.rocks is slow.") var sender = &Sender{ RestClient: &rest.HttpClient{}, } - link, wmType := sender.discover("https://webmention.rocks/test/23/page") + link, wmType := sender.discoverMentionEndpoint("https://webmention.rocks/test/23/page") assert.Equal(t, typeWebmention, wmType) expectedUrl := "https://webmention.rocks/test/23/page/webmention-endpoint/" assert.Truef(t, strings.HasPrefix(link, expectedUrl), "should start with %s, but was %s", expectedUrl, link) } -func TestDiscover(t *testing.T) { +func TestDisccoverRssFeedPrefersFirstEntriesOverLater(t *testing.T) { + var snder = &Sender{ + RestClient: &mocks.RestClientMock{ + HeadFunc: func(s string) (*http.Response, error) { + if strings.HasSuffix(s, "/index.xml") { + return &http.Response{ + Header: map[string][]string{ + "Content-Type": {"text/xml"}, + }, + StatusCode: 200, + }, nil + } + return nil, fmt.Errorf("BOOM") + }, + }, + } + + feed, err := snder.discoverRssFeed("blah.com") + assert.NoError(t, err) + assert.Equal(t, "https://blah.com/all/index.xml", feed) +} + +func TestDiscoverRssFeedNoneFoundReturnsError(t *testing.T) { + var snder = &Sender{ + RestClient: &mocks.RestClientMock{ + HeadFunc: func(s string) (*http.Response, error) { + return nil, fmt.Errorf("BOOM") + }, + }, + } + + _, err := snder.discoverRssFeed("blah.com") + assert.Error(t, err) +} +func TestDiscoverRssFeedFirstNotXmlReturnsSecondWorkingOne(t *testing.T) { + var snder = &Sender{ + RestClient: &mocks.RestClientMock{ + HeadFunc: func(s string) (*http.Response, error) { + if strings.HasSuffix(s, "/all/index.xml") { + return &http.Response{ + Header: map[string][]string{ + "Content-Type": {"text/html"}, + }, + StatusCode: 200, + }, nil + } + if strings.HasSuffix(s, "/feed") { + return &http.Response{ + Header: map[string][]string{ + "Content-Type": {"text/xml"}, + }, + StatusCode: 200, + }, nil + } + return nil, fmt.Errorf("BOOM") + }, + }, + } + + feed, err := snder.discoverRssFeed("blah.com") + assert.NoError(t, err) + assert.Equal(t, "https://blah.com/feed", feed) +} + +func TestDiscoverMentionEndpoint(t *testing.T) { var sender = &Sender{ RestClient: &mocks.RestClientMock{ GetBodyFunc: mocks.RelPathGetBodyFunc("../../../mocks/"), @@ -126,7 +192,7 @@ func TestDiscover(t *testing.T) { } for _, tc := range cases { t.Run(tc.label, func(t *testing.T) { - link, mentionType := sender.discover(tc.url) + link, mentionType := sender.discoverMentionEndpoint(tc.url) assert.Equal(t, tc.expectedLink, link) assert.Equal(t, tc.expectedType, mentionType) }) diff --git a/app/webmention/send/send.go b/app/webmention/send/send.go index defef1a..a8a0740 100644 --- a/app/webmention/send/send.go +++ b/app/webmention/send/send.go @@ -44,10 +44,15 @@ func (snder *Sender) SendSingle(domain string, relSource string) { // It first GETs domain/index.xml and goes from there. func (snder *Sender) Send(domain string) { lastSent := snder.Repo.LastSentMention(domain) - feedUrl := "https://" + domain + "/index.xml" log.Info().Str("domain", domain).Str("lastsent", lastSent).Msg(` OK: someone wants to send mentions`) + feedUrl, err := snder.discoverRssFeed(domain) + if err != nil { + log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted") + return + } _, feed, err := snder.RestClient.GetBody(feedUrl) + // just to be sure. Should not produce an error due to check above, but you never know. if err != nil { log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted") return @@ -60,7 +65,7 @@ func (snder *Sender) Send(domain string) { } snder.Repo.UpdateLastSentMention(domain, lastSent) - log.Info().Str("domain", domain).Str("lastsent", lastSent).Msg(` OK: send processed.`) + log.Info().Str("feed", feedUrl).Str("lastsent", lastSent).Msg(` OK: send processed.`) } func (snder *Sender) parseRssFeed(feed string, lastSentLink string) (string, error) { @@ -106,7 +111,7 @@ var mentionFuncs = map[string]func(snder *Sender, mention mf.Mention, endpoint s } func (snder *Sender) sendMention(mention mf.Mention) { - endpoint, mentionType := snder.discover(mention.Target) + endpoint, mentionType := snder.discoverMentionEndpoint(mention.Target) mentionFuncs[mentionType](snder, mention, endpoint) } diff --git a/app/webmention/send/send_test.go b/app/webmention/send/send_test.go index 5336a54..b0d5175 100644 --- a/app/webmention/send/send_test.go +++ b/app/webmention/send/send_test.go @@ -141,6 +141,7 @@ func TestSendIntegrationTestCanSendBothWebmentionsAndPingbacks(t *testing.T) { Repo: db.NewMentionRepo(conf), RestClient: &mocks.RestClientMock{ GetBodyFunc: mocks.RelPathGetBodyFunc("./../../../mocks/"), + HeadFunc: mocks.Head200ContentXml(), PostFunc: func(url string, contentType string, body string) error { lock.Lock() defer lock.Unlock() diff --git a/mocks/all/index.xml b/mocks/all/index.xml new file mode 100644 index 0000000..09afc9b --- /dev/null +++ b/mocks/all/index.xml @@ -0,0 +1,41 @@ + + + + Brain Baking + https://brainbaking.com/ + Recent content on Brain Baking + Hugo -- gohugo.io + en-us + Wouter Groeneveld + Wouter Groeneveld + Tue, 16 Mar 2021 17:07:14 +0000 + + + + + + @celia @kev I have read both you and Kev's post on... + https://brainbaking.com/notes/2021/03/16h17m07s14/ + https://brainbaking.com/notes/2021/03/16h17m07s14/#commento + Tue, 16 Mar 2021 17:07:14 +0000 + Wouter Groeneveld + https://brainbaking.com/notes/2021/03/16h17m07s14/ + + + + + hi there! test discovering: single. Nice!

+ +

this one is a pingback-only single one. Not good!

+ +

another cool link: multiple

+ +

last but not least: Nothin! What a shame.

+ + ]]> +
+
+
+
diff --git a/mocks/restclient.go b/mocks/restclient.go index 70d6841..2a3bc19 100644 --- a/mocks/restclient.go +++ b/mocks/restclient.go @@ -12,16 +12,20 @@ import ( // neat trick! https://medium.com/@matryer/meet-moq-easily-mock-interfaces-in-go-476444187d10 type RestClientMock struct { + HeadFunc func(string) (*http.Response, error) GetFunc func(string) (*http.Response, error) GetBodyFunc func(string) (http.Header, string, error) PostFunc func(string, string, string) error PostFormFunc func(string, url.Values) error } -// although these are still requied to match the rest.Client interface. +// although these are still required to match the rest.Client interface. func (m *RestClientMock) Get(url string) (*http.Response, error) { return m.GetFunc(url) } +func (m *RestClientMock) Head(url string) (*http.Response, error) { + return m.HeadFunc(url) +} func (m *RestClientMock) GetBody(url string) (http.Header, string, error) { return m.GetBodyFunc(url) } @@ -45,6 +49,17 @@ func toHttpHeader(header map[string]interface{}) http.Header { return httpHeader } +func Head200ContentXml() func(string) (*http.Response, error) { + return func(s string) (*http.Response, error) { + return &http.Response{ + Header: map[string][]string{ + "Content-Type": {"text/xml"}, + }, + StatusCode: 200, + }, nil + } +} + func RelPathGetBodyFunc(relPath string) func(string) (http.Header, string, error) { return func(url string) (http.Header, string, error) { log.Debug().Str("url", url).Msg(" - GET call") diff --git a/rest/client.go b/rest/client.go index 7606f2d..e84527b 100644 --- a/rest/client.go +++ b/rest/client.go @@ -14,6 +14,7 @@ import ( type Client interface { Get(url string) (*http.Response, error) + Head(url string) (*http.Response, error) Post(url string, contentType string, body string) error GetBody(url string) (http.Header, string, error) PostForm(url string, formData url.Values) error @@ -42,12 +43,16 @@ var ( ResponseAboveLimit = errors.New("response bigger than limit") ) +func (client *HttpClient) Head(url string) (*http.Response, error) { + return jammingHttp.Head(url) +} + func (client *HttpClient) PostForm(url string, formData url.Values) error { resp, err := jammingHttp.PostForm(url, formData) if err != nil { return fmt.Errorf("POST Form to %s: %v", url, err) } - if !isStatusOk(resp) { + if !IsStatusOk(resp) { return fmt.Errorf("POST Form to %s: Status code is not OK (%d)", url, resp.StatusCode) } return nil @@ -58,7 +63,7 @@ func (client *HttpClient) Post(url string, contenType string, body string) error if err != nil { return fmt.Errorf("POST to %s: %v", url, err) } - if !isStatusOk(resp) { + if !IsStatusOk(resp) { return fmt.Errorf("POST to %s: Status code is not OK (%d)", url, resp.StatusCode) } return nil @@ -72,7 +77,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) { return nil, "", fmt.Errorf("GET from %s: %w", url, geterr) } - if !isStatusOk(resp) { + if !IsStatusOk(resp) { return nil, "", fmt.Errorf("GET from %s: Status code is not OK (%d)", url, resp.StatusCode) } @@ -85,7 +90,7 @@ func (client *HttpClient) GetBody(url string) (http.Header, string, error) { return resp.Header, string(body), nil } -func isStatusOk(resp *http.Response) bool { +func IsStatusOk(resp *http.Response) bool { return resp.StatusCode >= 200 && resp.StatusCode <= 299 }