2021-04-09 18:04:04 +02:00
|
|
|
package recv
|
2021-04-07 17:31:23 +02:00
|
|
|
|
|
|
|
import (
|
2021-04-09 18:04:04 +02:00
|
|
|
"brainbaking.com/go-jamming/app/mf"
|
|
|
|
"brainbaking.com/go-jamming/common"
|
2021-04-18 15:39:29 +02:00
|
|
|
"brainbaking.com/go-jamming/db"
|
2021-04-09 18:04:04 +02:00
|
|
|
"brainbaking.com/go-jamming/rest"
|
2021-04-22 16:02:59 +02:00
|
|
|
"errors"
|
2021-04-19 20:22:38 +02:00
|
|
|
"fmt"
|
2021-04-09 10:12:14 +02:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
2021-04-07 17:31:23 +02:00
|
|
|
|
|
|
|
"github.com/rs/zerolog/log"
|
2021-04-08 12:50:15 +02:00
|
|
|
"willnorris.com/go/microformats"
|
2021-04-07 17:31:23 +02:00
|
|
|
)
|
|
|
|
|
2021-04-09 12:40:37 +02:00
|
|
|
type Receiver struct {
|
|
|
|
RestClient rest.Client
|
|
|
|
Conf *common.Config
|
2021-04-18 15:39:29 +02:00
|
|
|
Repo db.MentionRepo
|
2021-04-08 09:54:47 +02:00
|
|
|
}
|
|
|
|
|
2021-04-22 16:02:59 +02:00
|
|
|
var (
|
2021-04-22 16:03:03 +02:00
|
|
|
titleRegexp = regexp.MustCompile(`<title>(.*?)<\/title>`)
|
|
|
|
|
2021-05-01 20:22:56 +02:00
|
|
|
errPicUnableToDownload = errors.New("Unable to download author picture")
|
|
|
|
errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
|
|
|
|
errPicUnableToSave = errors.New("Unable to save downloaded author picture")
|
|
|
|
errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain")
|
|
|
|
|
|
|
|
siloDomains = []string{"brid.gy", "twitter.com"}
|
2021-04-22 16:02:59 +02:00
|
|
|
)
|
|
|
|
|
2021-04-09 14:21:25 +02:00
|
|
|
func (recv *Receiver) Receive(wm mf.Mention) {
|
2021-04-13 09:10:32 +02:00
|
|
|
log.Info().Stringer("wm", wm).Msg("OK: looks valid")
|
2021-04-11 09:50:27 +02:00
|
|
|
_, body, geterr := recv.RestClient.GetBody(wm.Source)
|
2021-04-07 17:31:23 +02:00
|
|
|
|
|
|
|
if geterr != nil {
|
2021-04-13 09:10:32 +02:00
|
|
|
log.Warn().Err(geterr).Msg(" ABORT: invalid url")
|
2021-04-18 15:39:29 +02:00
|
|
|
recv.Repo.Delete(wm)
|
2021-04-07 17:31:23 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-04-08 09:54:47 +02:00
|
|
|
recv.processSourceBody(body, wm)
|
2021-04-07 17:31:23 +02:00
|
|
|
}
|
|
|
|
|
2021-04-09 14:21:25 +02:00
|
|
|
func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
|
2021-04-09 12:40:37 +02:00
|
|
|
if !strings.Contains(body, wm.Target) {
|
|
|
|
log.Warn().Str("target", wm.Target).Msg("ABORT: no mention of target found in html src of source!")
|
2021-04-08 12:50:15 +02:00
|
|
|
return
|
|
|
|
}
|
2021-04-25 15:45:00 +02:00
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
log.Error().Str("panic", fmt.Sprintf("%q", r)).Stringer("wm", wm).Msg("ABORT: panic recovery while processing wm")
|
|
|
|
}
|
|
|
|
}()
|
2021-04-08 12:50:15 +02:00
|
|
|
|
2021-04-09 14:21:25 +02:00
|
|
|
data := microformats.Parse(strings.NewReader(body), wm.SourceUrl())
|
2021-04-25 15:11:16 +02:00
|
|
|
indieweb := recv.convertBodyToIndiewebData(body, wm, data)
|
2021-05-01 20:37:52 +02:00
|
|
|
recv.processAuthorPicture(indieweb)
|
|
|
|
|
|
|
|
key, err := recv.Repo.Save(wm, indieweb)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db")
|
|
|
|
}
|
|
|
|
log.Info().Str("key", key).Msg("OK: Webmention processed.")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (recv *Receiver) processAuthorPicture(indieweb *mf.IndiewebData) {
|
2021-04-19 20:22:38 +02:00
|
|
|
if indieweb.Author.Picture != "" {
|
2021-04-22 16:02:59 +02:00
|
|
|
err := recv.saveAuthorPictureLocally(indieweb)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Str("url", indieweb.Author.Picture).Msg("Failed to save picture. Reverting to anonymous")
|
2021-05-01 20:37:52 +02:00
|
|
|
indieweb.Author.AnonymizePicture()
|
2021-05-01 20:22:56 +02:00
|
|
|
|
|
|
|
if err == errWontDownloadBecauseOfPrivacy {
|
|
|
|
indieweb.Author.AnonymizeName()
|
|
|
|
}
|
2021-04-22 16:02:59 +02:00
|
|
|
}
|
2021-05-01 20:37:52 +02:00
|
|
|
} else {
|
|
|
|
indieweb.Author.AnonymizePicture()
|
2021-04-19 20:22:38 +02:00
|
|
|
}
|
2021-04-09 12:40:37 +02:00
|
|
|
}
|
|
|
|
|
2021-04-25 15:11:16 +02:00
|
|
|
func (recv *Receiver) convertBodyToIndiewebData(body string, wm mf.Mention, mfRoot *microformats.Data) *mf.IndiewebData {
|
|
|
|
hEntry := mf.HEntry(mfRoot)
|
|
|
|
hCard := mf.HCard(mfRoot)
|
2021-04-08 16:37:04 +02:00
|
|
|
if hEntry == nil {
|
2021-04-09 12:40:37 +02:00
|
|
|
return recv.parseBodyAsNonIndiewebSite(body, wm)
|
2021-04-08 16:37:04 +02:00
|
|
|
}
|
2021-04-25 15:11:16 +02:00
|
|
|
return recv.parseBodyAsIndiewebSite(hEntry, hCard, wm)
|
2021-04-08 16:37:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// see https://github.com/willnorris/microformats/blob/main/microformats.go
|
2021-04-25 15:11:16 +02:00
|
|
|
func (recv *Receiver) parseBodyAsIndiewebSite(hEntry *microformats.Microformat, hCard *microformats.Microformat, wm mf.Mention) *mf.IndiewebData {
|
2021-04-09 14:21:25 +02:00
|
|
|
return &mf.IndiewebData{
|
2021-04-25 15:11:16 +02:00
|
|
|
Name: mf.Str(hEntry, "name"),
|
|
|
|
Author: mf.NewAuthor(hEntry, hCard),
|
2021-04-13 09:10:32 +02:00
|
|
|
Content: mf.Content(hEntry),
|
|
|
|
Url: mf.Url(hEntry, wm.Source),
|
2021-04-26 11:15:11 +02:00
|
|
|
Published: mf.Published(hEntry, recv.Conf.Zone()),
|
2021-04-09 21:00:54 +02:00
|
|
|
Source: wm.Source,
|
|
|
|
Target: wm.Target,
|
2021-04-13 09:10:32 +02:00
|
|
|
IndiewebType: mf.Type(hEntry),
|
2021-04-08 16:37:04 +02:00
|
|
|
}
|
2021-04-09 10:12:14 +02:00
|
|
|
}
|
|
|
|
|
2021-04-09 14:21:25 +02:00
|
|
|
func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf.IndiewebData {
|
2021-04-13 09:10:32 +02:00
|
|
|
title := nonIndiewebTitle(body, wm)
|
2021-04-09 14:21:25 +02:00
|
|
|
return &mf.IndiewebData{
|
|
|
|
Author: mf.IndiewebAuthor{
|
2021-04-09 12:40:37 +02:00
|
|
|
Name: wm.Source,
|
2021-04-09 10:12:14 +02:00
|
|
|
},
|
2021-04-09 21:00:54 +02:00
|
|
|
Name: title,
|
|
|
|
Content: title,
|
2021-04-26 11:15:11 +02:00
|
|
|
Published: mf.PublishedNow(recv.Conf.Zone()),
|
2021-04-09 21:00:54 +02:00
|
|
|
Url: wm.Source,
|
2021-04-13 09:10:32 +02:00
|
|
|
IndiewebType: mf.TypeMention,
|
2021-04-09 21:00:54 +02:00
|
|
|
Source: wm.Source,
|
|
|
|
Target: wm.Target,
|
2021-04-09 10:12:14 +02:00
|
|
|
}
|
2021-04-07 17:31:23 +02:00
|
|
|
}
|
2021-04-13 09:10:32 +02:00
|
|
|
|
2021-04-22 16:02:59 +02:00
|
|
|
// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header.
|
2021-04-19 21:38:53 +02:00
|
|
|
// If it succeeds, it alters the picture path to a local /pictures/x one.
|
2021-04-22 16:02:59 +02:00
|
|
|
// If it fails, it returns an error.
|
2021-05-01 20:22:56 +02:00
|
|
|
// This refuses to download from silo sources such as brid.gy because of privacy concerns.
|
2021-04-22 16:02:59 +02:00
|
|
|
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error {
|
2021-05-01 20:22:56 +02:00
|
|
|
srcDomain := rest.Domain(indieweb.Source)
|
|
|
|
for _, siloDomain := range siloDomains {
|
|
|
|
if srcDomain == siloDomain {
|
|
|
|
return errWontDownloadBecauseOfPrivacy
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-19 20:22:38 +02:00
|
|
|
_, picData, err := recv.RestClient.GetBody(indieweb.Author.Picture)
|
|
|
|
if err != nil {
|
2021-04-22 16:02:59 +02:00
|
|
|
return errPicUnableToDownload
|
2021-04-19 20:22:38 +02:00
|
|
|
}
|
2021-04-22 16:02:59 +02:00
|
|
|
if len(picData) < 8 || !rest.IsRealImage([]byte(picData[0:8])) {
|
|
|
|
return errPicNoRealImage
|
|
|
|
}
|
|
|
|
|
2021-04-19 20:22:38 +02:00
|
|
|
_, dberr := recv.Repo.SavePicture(picData, srcDomain)
|
|
|
|
if dberr != nil {
|
2021-04-22 16:02:59 +02:00
|
|
|
return errPicUnableToSave
|
2021-04-19 20:22:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", srcDomain)
|
2021-04-22 16:02:59 +02:00
|
|
|
return nil
|
2021-04-19 20:22:38 +02:00
|
|
|
}
|
|
|
|
|
2021-04-13 09:10:32 +02:00
|
|
|
func nonIndiewebTitle(body string, wm mf.Mention) string {
|
|
|
|
titleMatch := titleRegexp.FindStringSubmatch(body)
|
|
|
|
title := wm.Source
|
|
|
|
if titleMatch != nil {
|
|
|
|
title = titleMatch[1]
|
|
|
|
}
|
|
|
|
return title
|
|
|
|
}
|