diff --git a/app/mf/microformats.go b/app/mf/microformats.go index 77b5a00..d559887 100644 --- a/app/mf/microformats.go +++ b/app/mf/microformats.go @@ -34,7 +34,7 @@ type IndiewebData struct { Content string `json:"content"` Published string `json:"published"` Url string `json:"url"` - IndiewebType string `json:"type"` + IndiewebType MfType `json:"type"` Source string `json:"source"` Target string `json:"target"` } @@ -47,7 +47,7 @@ func shorten(txt string) string { if len(txt) <= 250 { return txt } - return txt[0:250] + "..." + return txt[:250] + "..." } // Go stuff: entry.Properties["name"][0].(string), @@ -103,7 +103,7 @@ func Prop(mf *microformats.Microformat, key string) *microformats.Microformat { return val[0].(*microformats.Microformat) } -func DeterminePublishedDate(hEntry *microformats.Microformat, utcOffset int) string { +func Published(hEntry *microformats.Microformat, utcOffset int) string { publishedDate := Str(hEntry, "published") if publishedDate == "" { return PublishedNow(utcOffset) @@ -119,20 +119,28 @@ func DetermineAuthorName(hEntry *microformats.Microformat) string { return authorName } -func DetermineType(hEntry *microformats.Microformat) string { +type MfType string + +const ( + TypeLike MfType = "like" + TypeBookmark MfType = "bookmark" + TypeMention MfType = "mention" +) + +func Type(hEntry *microformats.Microformat) MfType { likeOf := Str(hEntry, "like-of") if likeOf != "" { - return "like" + return TypeLike } bookmarkOf := Str(hEntry, "bookmark-of") if bookmarkOf != "" { - return "bookmark" + return TypeBookmark } - return "mention" + return TypeMention } // Mastodon uids start with "tag:server", but we do want indieweb uids from other sources -func DetermineUrl(hEntry *microformats.Microformat, source string) string { +func Url(hEntry *microformats.Microformat, source string) string { uid := Str(hEntry, "uid") if uid != "" && strings.HasPrefix(uid, "http") { return uid @@ -144,7 +152,7 @@ func DetermineUrl(hEntry *microformats.Microformat, source string) string { return source } -func DetermineContent(hEntry *microformats.Microformat) string { +func Content(hEntry *microformats.Microformat) string { bridgyTwitterContent := Str(hEntry, "bridgy-twitter-content") if bridgyTwitterContent != "" { return shorten(bridgyTwitterContent) diff --git a/app/pingback/handler.go b/app/pingback/handler.go index 17dfbd3..ba7b836 100644 --- a/app/pingback/handler.go +++ b/app/pingback/handler.go @@ -34,7 +34,7 @@ func HandlePost(conf *common.Config) http.HandlerFunc { Source: rpc.Source(), Target: rpc.Target(), } - receiver := recv.Receiver{ + receiver := &recv.Receiver{ RestClient: &rest.HttpClient{}, Conf: conf, } diff --git a/app/pingback/send/send.go b/app/pingback/send/send.go index bad6bd0..3df64e2 100644 --- a/app/pingback/send/send.go +++ b/app/pingback/send/send.go @@ -44,8 +44,8 @@ type Sender struct { func (sender *Sender) SendPingbackToEndpoint(endpoint string, mention mf.Mention) { err := sender.RestClient.Post(endpoint, "text/xml", body.fill(mention)) if err != nil { - log.Err(err).Str("endpoint", endpoint).Str("wm", mention.String()).Msg("Unable to send pingback") + log.Err(err).Stringer("wm", mention).Msg("Unable to send pingback") return } - log.Info().Str("endpoint", endpoint).Str("wm", mention.String()).Msg("Pingback sent") + log.Info().Str("endpoint", endpoint).Stringer("wm", mention).Msg("Pingback sent") } diff --git a/app/rss/feed.go b/app/rss/feed.go index 4be1d66..727e4b9 100644 --- a/app/rss/feed.go +++ b/app/rss/feed.go @@ -8,7 +8,7 @@ import ( "time" ) -// someone already did this for me, yay! https://siongui.github.io/2015/03/03/go-parse-web-feed-rss-atom/ +// someone already did this for me, yay! type Rss2 struct { XMLName xml.Name `xml:"rss"` Version string `xml:"version,attr"` @@ -61,6 +61,7 @@ type Entry struct { Author Author `xml:"author"` } +// Based on https://siongui.github.io/2015/03/03/go-parse-web-feed-rss-atom/ func ParseFeed(content []byte) (*Rss2, error) { v := &Rss2{} err := xml.Unmarshal(content, v) @@ -77,5 +78,5 @@ func ParseFeed(content []byte) (*Rss2, error) { return v, nil } - return v, errors.New("not RSS 2.0") + return v, errors.New("ParseFeed: not RSS 2.0") } diff --git a/app/webmention/handler.go b/app/webmention/handler.go index b0f76a5..0f822ec 100644 --- a/app/webmention/handler.go +++ b/app/webmention/handler.go @@ -12,7 +12,9 @@ import ( "brainbaking.com/go-jamming/rest" ) -var httpClient = &rest.HttpClient{} +var ( + httpClient = &rest.HttpClient{} +) func HandleGet(conf *common.Config) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { diff --git a/app/webmention/recv/receive.go b/app/webmention/recv/receive.go index c29e150..b13cfa7 100644 --- a/app/webmention/recv/receive.go +++ b/app/webmention/recv/receive.go @@ -23,11 +23,11 @@ type Receiver struct { } func (recv *Receiver) Receive(wm mf.Mention) { - log.Info().Str("Webmention", wm.String()).Msg("OK: looks valid") + log.Info().Stringer("wm", wm).Msg("OK: looks valid") _, body, geterr := recv.RestClient.GetBody(wm.Source) if geterr != nil { - log.Warn().Str("source", wm.Source).Msg(" ABORT: invalid url") + log.Warn().Err(geterr).Msg(" ABORT: invalid url") recv.deletePossibleOlderWebmention(wm) return } @@ -35,6 +35,7 @@ func (recv *Receiver) Receive(wm mf.Mention) { recv.processSourceBody(body, wm) } +// Deletes a possible webmention. Ignores remove errors. func (recv *Receiver) deletePossibleOlderWebmention(wm mf.Mention) { os.Remove(wm.AsPath(recv.Conf)) } @@ -48,7 +49,9 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { data := microformats.Parse(strings.NewReader(body), wm.SourceUrl()) indieweb := recv.convertBodyToIndiewebData(body, wm, mf.HEntry(data)) - recv.saveWebmentionToDisk(wm, indieweb) + if err := recv.saveWebmentionToDisk(wm, indieweb); err != nil { + log.Err(err).Msg("Unable to save Webmention to disk") + } log.Info().Str("file", wm.AsPath(recv.Conf)).Msg("OK: Webmention processed.") } @@ -59,46 +62,41 @@ func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, hEnt return recv.parseBodyAsIndiewebSite(hEntry, wm) } -func (recv *Receiver) saveWebmentionToDisk(wm mf.Mention, indieweb *mf.IndiewebData) { +func (recv *Receiver) saveWebmentionToDisk(wm mf.Mention, indieweb *mf.IndiewebData) error { jsonData, jsonErr := json.Marshal(indieweb) if jsonErr != nil { - log.Err(jsonErr).Msg("Unable to serialize Webmention into JSON") + return jsonErr } err := ioutil.WriteFile(wm.AsPath(recv.Conf), jsonData, fs.ModePerm) if err != nil { - log.Err(err).Msg("Unable to save Webmention to disk") + return err } + return nil } -// TODO I'm smelling very unstable code, apply https://golang.org/doc/effective_go#recover here? // see https://github.com/willnorris/microformats/blob/main/microformats.go func (recv *Receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm mf.Mention) *mf.IndiewebData { - name := mf.Str(hEntry, "name") - pic := mf.Str(mf.Prop(hEntry, "author"), "photo") - mfType := mf.DetermineType(hEntry) - return &mf.IndiewebData{ - Name: name, + Name: mf.Str(hEntry, "name"), Author: mf.IndiewebAuthor{ Name: mf.DetermineAuthorName(hEntry), - Picture: pic, + Picture: mf.Str(mf.Prop(hEntry, "author"), "photo"), }, - Content: mf.DetermineContent(hEntry), - Url: mf.DetermineUrl(hEntry, wm.Source), - Published: mf.DeterminePublishedDate(hEntry, recv.Conf.UtcOffset), + Content: mf.Content(hEntry), + Url: mf.Url(hEntry, wm.Source), + Published: mf.Published(hEntry, recv.Conf.UtcOffset), Source: wm.Source, Target: wm.Target, - IndiewebType: mfType, + IndiewebType: mf.Type(hEntry), } } +var ( + titleRegexp = regexp.MustCompile(`(.*?)<\/title>`) +) + func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf.IndiewebData { - r := regexp.MustCompile(`<title>(.*?)<\/title>`) - titleMatch := r.FindStringSubmatch(body) - title := wm.Source - if titleMatch != nil { - title = titleMatch[1] - } + title := nonIndiewebTitle(body, wm) return &mf.IndiewebData{ Author: mf.IndiewebAuthor{ Name: wm.Source, @@ -107,8 +105,17 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf Content: title, Published: mf.PublishedNow(recv.Conf.UtcOffset), Url: wm.Source, - IndiewebType: "mention", + IndiewebType: mf.TypeMention, Source: wm.Source, Target: wm.Target, } } + +func nonIndiewebTitle(body string, wm mf.Mention) string { + titleMatch := titleRegexp.FindStringSubmatch(body) + title := wm.Source + if titleMatch != nil { + title = titleMatch[1] + } + return title +} diff --git a/app/webmention/send/discoverer.go b/app/webmention/send/discoverer.go index 1dc3ae6..f51e37f 100644 --- a/app/webmention/send/discoverer.go +++ b/app/webmention/send/discoverer.go @@ -8,41 +8,41 @@ import ( ) const ( - TypeWebmention string = "webmention" - TypeUnknown string = "unknown" - TypePingback string = "pingback" + typeWebmention string = "webmention" + typeUnknown string = "unknown" + typePingback string = "pingback" ) func (sndr *Sender) discover(target string) (link string, mentionType string) { - mentionType = TypeUnknown + mentionType = typeUnknown header, body, err := sndr.RestClient.GetBody(target) if err != nil { log.Warn().Str("target", target).Msg("Failed to discover possible endpoint, aborting send") return } - if strings.Contains(header.Get("link"), TypeWebmention) { - return buildWebmentionHeaderLink(header.Get("link")), TypeWebmention + if strings.Contains(header.Get("link"), typeWebmention) { + return buildWebmentionHeaderLink(header.Get("link")), typeWebmention } if header.Get("X-Pingback") != "" { - return header.Get("X-Pingback"), TypePingback + return header.Get("X-Pingback"), typePingback } // this also complies with w3.org regulations: relative endpoint could be possible format := microformats.Parse(strings.NewReader(body), rest.BaseUrlOf(target)) - if len(format.Rels[TypeWebmention]) > 0 { - mentionType = TypeWebmention - link = format.Rels[TypeWebmention][0] - } else if len(format.Rels[TypePingback]) > 0 { - mentionType = TypePingback - link = format.Rels[TypePingback][0] + if len(format.Rels[typeWebmention]) > 0 { + mentionType = typeWebmention + link = format.Rels[typeWebmention][0] + } else if len(format.Rels[typePingback]) > 0 { + mentionType = typePingback + link = format.Rels[typePingback][0] } return } +// e.g. Link: <http://aaronpk.example/webmention-endpoint>; rel="webmention" func buildWebmentionHeaderLink(link string) string { - // e.g. Link: <http://aaronpk.example/webmention-endpoint>; rel="webmention" raw := strings.Split(link, ";")[0][1:] return raw[:len(raw)-1] } diff --git a/app/webmention/send/discoverer_test.go b/app/webmention/send/discoverer_test.go index 4df0d71..abfe7e4 100644 --- a/app/webmention/send/discoverer_test.go +++ b/app/webmention/send/discoverer_test.go @@ -23,49 +23,49 @@ func TestDiscover(t *testing.T) { "discover 'unknown' if no link is present", "https://brainbaking.com/link-discover-test-none.html", "", - TypeUnknown, + typeUnknown, }, { "prefer webmentions over pingbacks if both links are present", "https://brainbaking.com/link-discover-bothtypes.html", "http://aaronpk.example/webmention-endpoint", - TypeWebmention, + typeWebmention, }, { "pingbacks: discover link if present in header", "https://brainbaking.com/pingback-discover-test.html", "http://aaronpk.example/pingback-endpoint", - TypePingback, + typePingback, }, { "pingbacks: discover link if sole entry somewhere in html", "https://brainbaking.com/pingback-discover-test-single.html", "http://aaronpk.example/pingback-endpoint-body", - TypePingback, + typePingback, }, { "pingbacks: use link in header if multiple present in html", "https://brainbaking.com/pingback-discover-test-multiple.html", "http://aaronpk.example/pingback-endpoint-header", - TypePingback, + typePingback, }, { "webmentions: discover link if present in header", "https://brainbaking.com/link-discover-test.html", "http://aaronpk.example/webmention-endpoint", - TypeWebmention, + typeWebmention, }, { "webmentions: discover link if sole entry somewhere in html", "https://brainbaking.com/link-discover-test-single.html", "http://aaronpk.example/webmention-endpoint-body", - TypeWebmention, + typeWebmention, }, { "webmentions: use link in header if multiple present in html", "https://brainbaking.com/link-discover-test-multiple.html", "http://aaronpk.example/webmention-endpoint-header", - TypeWebmention, + typeWebmention, }, } for _, tc := range cases { diff --git a/app/webmention/send/rsslinkcollector.go b/app/webmention/send/rsslinkcollector.go index 3f4efef..5eb8547 100644 --- a/app/webmention/send/rsslinkcollector.go +++ b/app/webmention/send/rsslinkcollector.go @@ -54,14 +54,17 @@ func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) { return items, nil } +var ( + hrefRegexp = regexp.MustCompile(`href="(.+?)"`) + extRegexp = regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`) +) + func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string { - r := regexp.MustCompile(`href="(.+?)"`) - ext := regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`) urlmap := common.NewSet() - for _, match := range r.FindAllStringSubmatch(html, -1) { + for _, match := range hrefRegexp.FindAllStringSubmatch(html, -1) { url := match[1] // [0] is the match of the entire expression, [1] is the capture group - if !ext.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) { + if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) { urlmap.Add(url) } } diff --git a/app/webmention/send/send.go b/app/webmention/send/send.go index c6efc54..6d62bb2 100644 --- a/app/webmention/send/send.go +++ b/app/webmention/send/send.go @@ -17,21 +17,22 @@ type Sender struct { func (snder *Sender) Send(domain string, since string) { log.Info().Str("domain", domain).Str("since", since).Msg(` OK: someone wants to send mentions`) - _, feed, err := snder.RestClient.GetBody("https://" + domain + "/index.xml") + feedUrl := "https://" + domain + "/index.xml" + _, feed, err := snder.RestClient.GetBody(feedUrl) if err != nil { - log.Err(err).Str("domain", domain).Msg("Unable to retrieve RSS feed, aborting send") + log.Err(err).Str("url", feedUrl).Msg("Unable to retrieve RSS feed, send aborted") return } - snder.parseRssFeed(feed, common.IsoToTime(since)) - log.Info().Str("domain", domain).Str("since", since).Msg(` OK: sending done.`) + if err = snder.parseRssFeed(feed, common.IsoToTime(since)); err != nil { + log.Err(err).Str("url", feedUrl).Msg("Unable to parse RSS feed, send aborted") + } } -func (snder *Sender) parseRssFeed(feed string, since time.Time) { +func (snder *Sender) parseRssFeed(feed string, since time.Time) error { items, err := snder.Collect(feed, since) if err != nil { - log.Err(err).Msg("Unable to parse RSS fed, aborting send") - return + return err } var wg sync.WaitGroup @@ -51,12 +52,13 @@ func (snder *Sender) parseRssFeed(feed string, since time.Time) { } } wg.Wait() + return nil } var mentionFuncs = map[string]func(snder *Sender, mention mf.Mention, endpoint string){ - TypeUnknown: func(snder *Sender, mention mf.Mention, endpoint string) {}, - TypeWebmention: sendMentionAsWebmention, - TypePingback: sendMentionAsPingback, + typeUnknown: func(snder *Sender, mention mf.Mention, endpoint string) {}, + typeWebmention: sendMentionAsWebmention, + typePingback: sendMentionAsPingback, } func (snder *Sender) sendMention(mention mf.Mention) { @@ -67,10 +69,10 @@ func (snder *Sender) sendMention(mention mf.Mention) { func sendMentionAsWebmention(snder *Sender, mention mf.Mention, endpoint string) { err := snder.RestClient.PostForm(endpoint, mention.AsFormValues()) if err != nil { - log.Err(err).Str("endpoint", endpoint).Str("wm", mention.String()).Msg("Webmention send failed") + log.Err(err).Str("endpoint", endpoint).Stringer("wm", mention).Msg("Webmention send failed") return } - log.Info().Str("endpoint", endpoint).Str("wm", mention.String()).Msg("OK: webmention sent.") + log.Info().Str("endpoint", endpoint).Stringer("wm", mention).Msg("OK: webmention sent.") } func sendMentionAsPingback(snder *Sender, mention mf.Mention, endpoint string) { diff --git a/rest/client.go b/rest/client.go index 651592b..b682b24 100644 --- a/rest/client.go +++ b/rest/client.go @@ -37,7 +37,7 @@ var ( func (client *HttpClient) PostForm(url string, formData url.Values) error { resp, err := jammingHttp.PostForm(url, formData) if err != nil { - return err + return fmt.Errorf("POST Form to %s: %v", url, err) } if !isStatusOk(resp) { return fmt.Errorf("POST Form to %s: Status code is not OK (%d)", url, resp.StatusCode) @@ -48,7 +48,7 @@ func (client *HttpClient) PostForm(url string, formData url.Values) error { func (client *HttpClient) Post(url string, contenType string, body string) error { resp, err := jammingHttp.Post(url, contenType, strings.NewReader(body)) if err != nil { - return err + return fmt.Errorf("POST to %s: %v", url, err) } if !isStatusOk(resp) { return fmt.Errorf("POST to %s: Status code is not OK (%d)", url, resp.StatusCode) @@ -60,28 +60,19 @@ func (client *HttpClient) Post(url string, contenType string, body string) error func (client *HttpClient) GetBody(url string) (http.Header, string, error) { resp, geterr := client.Get(url) if geterr != nil { - return nil, "", geterr + return nil, "", fmt.Errorf("GET from %s: %v", url, geterr) } - body, err := ReadBodyFromResponse(resp) - if err != nil { - return nil, "", err - } - - return resp.Header, body, nil -} - -func ReadBodyFromResponse(resp *http.Response) (string, error) { if !isStatusOk(resp) { - return "", fmt.Errorf("Status code is not OK (%d)", resp.StatusCode) + return nil, "", fmt.Errorf("GET from %s: Status code is not OK (%d)", url, resp.StatusCode) } body, readerr := ioutil.ReadAll(resp.Body) defer resp.Body.Close() if readerr != nil { - return "", readerr + return nil, "", fmt.Errorf("GET from %s: unable to read body: %v", url, readerr) } - return string(body), nil + return resp.Header, string(body), nil } func isStatusOk(resp *http.Response) bool {