diff --git a/README.md b/README.md index efa837c..f6b3f33 100644 --- a/README.md +++ b/README.md @@ -224,3 +224,8 @@ That's pretty flexible. I have not taken the trouble to put this into the config A separate goroutine cleans up ips each 2 minutes, the TTL is 5 minutes. See `limiter.go`. Database migrations are run using the `-migrate` flag. + +## TODOs + +- Pictures are bound to domain names only. That means `brid.gy` will net a single picture. Perhaps the combination domain + user would be more appropriate? + diff --git a/app/mf/microformats.go b/app/mf/microformats.go index 94343a0..1b1d8d7 100644 --- a/app/mf/microformats.go +++ b/app/mf/microformats.go @@ -19,6 +19,8 @@ const ( ) var ( + // This is similar to Hugo's string-to-date casting system + // See https://github.com/spf13/cast/blob/master/caste.go supportedFormats = []string{ dateFormatWithTimeZone, dateFormatWithAbsoluteTimeZone, @@ -38,6 +40,10 @@ func (ia *IndiewebAuthor) Anonymize() { ia.Picture = fmt.Sprintf("/pictures/%s", Anonymous) } +func (ia *IndiewebAuthor) AnonymizeName() { + ia.Name = "Anonymous" +} + type IndiewebDataResult struct { Status string `json:"status"` Data []*IndiewebData `json:"json"` diff --git a/app/pictures/handler.go b/app/pictures/handler.go index 017de24..59644d3 100644 --- a/app/pictures/handler.go +++ b/app/pictures/handler.go @@ -18,12 +18,16 @@ func init() { } } +const ( + bridgy = "brid.gy" +) + // Handle handles picture GET calls. // It does not validate the picture query as it's part of a composite key anyway. func Handle(repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { picDomain := mux.Vars(r)["picture"] - if picDomain == mf.Anonymous { + if picDomain == mf.Anonymous || picDomain == bridgy { servePicture(w, anonymous) return } diff --git a/app/webmention/recv/receive.go b/app/webmention/recv/receive.go index f70cd6a..860aa7c 100644 --- a/app/webmention/recv/receive.go +++ b/app/webmention/recv/receive.go @@ -23,9 +23,12 @@ type Receiver struct { var ( titleRegexp = regexp.MustCompile(`(.*?)<\/title>`) - errPicUnableToDownload = errors.New("Unable to download author picture") - errPicNoRealImage = errors.New("Downloaded author picture is not a real image") - errPicUnableToSave = errors.New("Unable to save downloaded author picture") + errPicUnableToDownload = errors.New("Unable to download author picture") + errPicNoRealImage = errors.New("Downloaded author picture is not a real image") + errPicUnableToSave = errors.New("Unable to save downloaded author picture") + errWontDownloadBecauseOfPrivacy = errors.New("Will not save locally because it's form a silo domain") + + siloDomains = []string{"brid.gy", "twitter.com"} ) func (recv *Receiver) Receive(wm mf.Mention) { @@ -59,6 +62,10 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { if err != nil { log.Error().Err(err).Str("url", indieweb.Author.Picture).Msg("Failed to save picture. Reverting to anonymous") indieweb.Author.Anonymize() + + if err == errWontDownloadBecauseOfPrivacy { + indieweb.Author.AnonymizeName() + } } } @@ -111,7 +118,15 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf // saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header. // If it succeeds, it alters the picture path to a local /pictures/x one. // If it fails, it returns an error. +// This refuses to download from silo sources such as brid.gy because of privacy concerns. func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error { + srcDomain := rest.Domain(indieweb.Source) + for _, siloDomain := range siloDomains { + if srcDomain == siloDomain { + return errWontDownloadBecauseOfPrivacy + } + } + _, picData, err := recv.RestClient.GetBody(indieweb.Author.Picture) if err != nil { return errPicUnableToDownload @@ -120,7 +135,6 @@ func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error return errPicNoRealImage } - srcDomain := rest.Domain(indieweb.Source) _, dberr := recv.Repo.SavePicture(picData, srcDomain) if dberr != nil { return errPicUnableToSave diff --git a/app/webmention/recv/receive_test.go b/app/webmention/recv/receive_test.go index d97c3e6..bf17065 100644 --- a/app/webmention/recv/receive_test.go +++ b/app/webmention/recv/receive_test.go @@ -37,6 +37,12 @@ func TestSaveAuthorPictureLocally(t *testing.T) { "/pictures/brainbaking.com", nil, }, + { + "Refuses to download if it's from a silo domain and possibly involves GDPR privacy issues", + "https://brid.gy/picture.jpg", + "https://brid.gy/picture.jpg", + errWontDownloadBecauseOfPrivacy, + }, { "Absolute URL does not get replaced but error if no valid image", "https://brainbaking.com/index.xml", @@ -63,7 +69,7 @@ func TestSaveAuthorPictureLocally(t *testing.T) { } indieweb := &mf.IndiewebData{ - Source: "https://brainbaking.com", + Source: tc.pictureUrl, Author: mf.IndiewebAuthor{ Picture: tc.pictureUrl, }, @@ -242,6 +248,27 @@ func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) { assert.Empty(t, repo.GetAll("brainbaking.com").Data) } +func TestProcessSourceBodyAnonymizesBothAuthorPictureAndNameIfComingFromSilo(t *testing.T) { + wm := mf.Mention{ + Source: "https://brid.gy/post/twitter/ChrisAldrich/1387130900962443264", + Target: "https://brainbaking.com/", + } + repo := db.NewMentionRepo(conf) + receiver := &Receiver{ + Conf: conf, + Repo: repo, + } + + src, err := ioutil.ReadFile("../../../mocks/valid-bridgy-source.html") + assert.NoError(t, err) + + receiver.processSourceBody(string(src), wm) + savedMention := repo.Get(wm) + + assert.Equal(t, "Anonymous", savedMention.Author.Name) + assert.Equal(t, "/pictures/anonymous", savedMention.Author.Picture) +} + func TestProcessSourceBodyAbortsIfNoMentionOfTargetFoundInSourceHtml(t *testing.T) { wm := mf.Mention{ Source: "https://brainbaking.com",