anonymize mentions coming from silos such as brid.gy

This commit is contained in:
Wouter Groeneveld 2021-05-01 20:22:56 +02:00
parent 40ae44b2fd
commit c5dcb45de9
5 changed files with 62 additions and 6 deletions

View File

@ -224,3 +224,8 @@ That's pretty flexible. I have not taken the trouble to put this into the config
A separate goroutine cleans up ips each 2 minutes, the TTL is 5 minutes. See `limiter.go`.
Database migrations are run using the `-migrate` flag.
## TODOs
- Pictures are bound to domain names only. That means `brid.gy` will net a single picture. Perhaps the combination domain + user would be more appropriate?

View File

@ -19,6 +19,8 @@ const (
)
var (
// This is similar to Hugo's string-to-date casting system
// See https://github.com/spf13/cast/blob/master/caste.go
supportedFormats = []string{
dateFormatWithTimeZone,
dateFormatWithAbsoluteTimeZone,
@ -38,6 +40,10 @@ func (ia *IndiewebAuthor) Anonymize() {
ia.Picture = fmt.Sprintf("/pictures/%s", Anonymous)
}
func (ia *IndiewebAuthor) AnonymizeName() {
ia.Name = "Anonymous"
}
type IndiewebDataResult struct {
Status string `json:"status"`
Data []*IndiewebData `json:"json"`

View File

@ -18,12 +18,16 @@ func init() {
}
}
const (
bridgy = "brid.gy"
)
// Handle handles picture GET calls.
// It does not validate the picture query as it's part of a composite key anyway.
func Handle(repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
picDomain := mux.Vars(r)["picture"]
if picDomain == mf.Anonymous {
if picDomain == mf.Anonymous || picDomain == bridgy {
servePicture(w, anonymous)
return
}

View File

@ -23,9 +23,12 @@ type Receiver struct {
var (
titleRegexp = regexp.MustCompile(`<title>(.*?)<\/title>`)
errPicUnableToDownload = errors.New("Unable to download author picture")
errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
errPicUnableToSave = errors.New("Unable to save downloaded author picture")
errPicUnableToDownload = errors.New("Unable to download author picture")
errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
errPicUnableToSave = errors.New("Unable to save downloaded author picture")
errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain")
siloDomains = []string{"brid.gy", "twitter.com"}
)
func (recv *Receiver) Receive(wm mf.Mention) {
@ -59,6 +62,10 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
if err != nil {
log.Error().Err(err).Str("url", indieweb.Author.Picture).Msg("Failed to save picture. Reverting to anonymous")
indieweb.Author.Anonymize()
if err == errWontDownloadBecauseOfPrivacy {
indieweb.Author.AnonymizeName()
}
}
}
@ -111,7 +118,15 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf
// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header.
// If it succeeds, it alters the picture path to a local /pictures/x one.
// If it fails, it returns an error.
// This refuses to download from silo sources such as brid.gy because of privacy concerns.
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error {
srcDomain := rest.Domain(indieweb.Source)
for _, siloDomain := range siloDomains {
if srcDomain == siloDomain {
return errWontDownloadBecauseOfPrivacy
}
}
_, picData, err := recv.RestClient.GetBody(indieweb.Author.Picture)
if err != nil {
return errPicUnableToDownload
@ -120,7 +135,6 @@ func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error
return errPicNoRealImage
}
srcDomain := rest.Domain(indieweb.Source)
_, dberr := recv.Repo.SavePicture(picData, srcDomain)
if dberr != nil {
return errPicUnableToSave

View File

@ -37,6 +37,12 @@ func TestSaveAuthorPictureLocally(t *testing.T) {
"/pictures/brainbaking.com",
nil,
},
{
"Refuses to download if it's from a silo domain and possibly involves GDPR privacy issues",
"https://brid.gy/picture.jpg",
"https://brid.gy/picture.jpg",
errWontDownloadBecauseOfPrivacy,
},
{
"Absolute URL does not get replaced but error if no valid image",
"https://brainbaking.com/index.xml",
@ -63,7 +69,7 @@ func TestSaveAuthorPictureLocally(t *testing.T) {
}
indieweb := &mf.IndiewebData{
Source: "https://brainbaking.com",
Source: tc.pictureUrl,
Author: mf.IndiewebAuthor{
Picture: tc.pictureUrl,
},
@ -242,6 +248,27 @@ func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) {
assert.Empty(t, repo.GetAll("brainbaking.com").Data)
}
func TestProcessSourceBodyAnonymizesBothAuthorPictureAndNameIfComingFromSilo(t *testing.T) {
wm := mf.Mention{
Source: "https://brid.gy/post/twitter/ChrisAldrich/1387130900962443264",
Target: "https://brainbaking.com/",
}
repo := db.NewMentionRepo(conf)
receiver := &Receiver{
Conf: conf,
Repo: repo,
}
src, err := ioutil.ReadFile("../../../mocks/valid-bridgy-source.html")
assert.NoError(t, err)
receiver.processSourceBody(string(src), wm)
savedMention := repo.Get(wm)
assert.Equal(t, "Anonymous", savedMention.Author.Name)
assert.Equal(t, "/pictures/anonymous", savedMention.Author.Picture)
}
func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) {
wm := mf.Mention{
Source: "https://brainbaking.com",