diff --git a/app/mf/microformats.go b/app/mf/microformats.go index f9edf47..a8de66a 100644 --- a/app/mf/microformats.go +++ b/app/mf/microformats.go @@ -2,6 +2,7 @@ package mf import ( "brainbaking.com/go-jamming/common" + "fmt" "strings" "time" "willnorris.com/go/microformats" @@ -9,6 +10,7 @@ import ( const ( DateFormat = "2006-01-02T15:04:05" + Anonymous = "anonymous" ) type IndiewebAuthor struct { @@ -16,6 +18,10 @@ type IndiewebAuthor struct { Picture string `json:"picture"` } +func (ia IndiewebAuthor) Anonymize() { + ia.Picture = fmt.Sprintf("/pictures/%s", Anonymous) +} + type IndiewebDataResult struct { Status string `json:"status"` Data []*IndiewebData `json:"json"` diff --git a/app/pictures/handler.go b/app/pictures/handler.go index 79be4f1..017de24 100644 --- a/app/pictures/handler.go +++ b/app/pictures/handler.go @@ -1,6 +1,7 @@ package pictures import ( + "brainbaking.com/go-jamming/app/mf" "brainbaking.com/go-jamming/db" _ "embed" "github.com/gorilla/mux" @@ -11,10 +12,6 @@ import ( //go:embed anonymous.jpg var anonymous []byte -const ( - Anonymous = "anonymous" -) - func init() { if anonymous == nil { log.Fatal().Msg("embedded anonymous image missing?") @@ -26,7 +23,7 @@ func init() { func Handle(repo db.MentionRepo) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { picDomain := mux.Vars(r)["picture"] - if picDomain == Anonymous { + if picDomain == mf.Anonymous { servePicture(w, anonymous) return } diff --git a/app/webmention/recv/receive.go b/app/webmention/recv/receive.go index 8329fcc..63b4eae 100644 --- a/app/webmention/recv/receive.go +++ b/app/webmention/recv/receive.go @@ -2,10 +2,10 @@ package recv import ( "brainbaking.com/go-jamming/app/mf" - "brainbaking.com/go-jamming/app/pictures" "brainbaking.com/go-jamming/common" "brainbaking.com/go-jamming/db" "brainbaking.com/go-jamming/rest" + "errors" "fmt" "regexp" "strings" @@ -20,6 +20,12 @@ type Receiver struct { Repo db.MentionRepo } +var ( + errPicUnableToDownload = errors.New("Unable to download author picture") + errPicNoRealImage = errors.New("Downloaded author picture is not a real image") + errPicUnableToSave = errors.New("Unable to save downloaded author picture") +) + func (recv *Receiver) Receive(wm mf.Mention) { log.Info().Stringer("wm", wm).Msg("OK: looks valid") _, body, geterr := recv.RestClient.GetBody(wm.Source) @@ -42,12 +48,16 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) { data := microformats.Parse(strings.NewReader(body), wm.SourceUrl()) indieweb := recv.convertBodyToIndiewebData(body, wm, mf.HEntry(data)) if indieweb.Author.Picture != "" { - recv.saveAuthorPictureLocally(indieweb) + err := recv.saveAuthorPictureLocally(indieweb) + if err != nil { + log.Error().Err(err).Str("url", indieweb.Author.Picture).Msg("Failed to save picture. Reverting to anonymous") + indieweb.Author.Anonymize() + } } key, err := recv.Repo.Save(wm, indieweb) if err != nil { - log.Error().Err(err).Stringer("wm", wm).Msg("processSourceBody: failed to save json to db") + log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db") } log.Info().Str("key", key).Msg("OK: Webmention processed.") } @@ -96,26 +106,26 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf } } -// saveAuthorPictureLocally tries to download the author picture. +// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header. // If it succeeds, it alters the picture path to a local /pictures/x one. -// If it fails, it falls back to a local default image. -// This *should* also validate image byte headers, like https://stackoverflow.com/questions/670546/determine-if-file-is-an-image -func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) { +// If it fails, it returns an error. +func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error { _, picData, err := recv.RestClient.GetBody(indieweb.Author.Picture) if err != nil { - log.Warn().Err(err).Str("url", indieweb.Author.Picture).Msg("Unable to download author picture. Reverting to anonymous.") - indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", pictures.Anonymous) - return + return errPicUnableToDownload } + if len(picData) < 8 || !rest.IsRealImage([]byte(picData[0:8])) { + return errPicNoRealImage + } + srcDomain := rest.Domain(indieweb.Source) _, dberr := recv.Repo.SavePicture(picData, srcDomain) if dberr != nil { - log.Warn().Err(err).Str("url", indieweb.Author.Picture).Msg("Unable to save downloaded author picture. Reverting to anonymous.") - indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", pictures.Anonymous) - return + return errPicUnableToSave } indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", srcDomain) + return nil } func nonIndiewebTitle(body string, wm mf.Mention) string { diff --git a/app/webmention/recv/receive_test.go b/app/webmention/recv/receive_test.go index 3da7d11..0f4cbc5 100644 --- a/app/webmention/recv/receive_test.go +++ b/app/webmention/recv/receive_test.go @@ -27,16 +27,25 @@ func TestSaveAuthorPictureLocally(t *testing.T) { label string pictureUrl string expectedPictureUrl string + expectedError error }{ { "Absolute URL gets 'downloaded' and replaced by relative", "https://brainbaking.com/picture.jpg", "/pictures/brainbaking.com", + nil, }, { - "Absolute URL gets replaced by anonymous if download fails", + "Absolute URL does not get replaced but error if no valid image", + "https://brainbaking.com/index.xml", + "https://brainbaking.com/index.xml", + errPicNoRealImage, + }, + { + "Absolute URL does not get replaced but error if download fails", "https://brainbaking.com/thedogatemypic-nowitsmissing-shiii.png", - "/pictures/anonymous", + "https://brainbaking.com/thedogatemypic-nowitsmissing-shiii.png", + errPicUnableToDownload, }, } @@ -57,9 +66,10 @@ func TestSaveAuthorPictureLocally(t *testing.T) { Picture: tc.pictureUrl, }, } - recv.saveAuthorPictureLocally(indieweb) + err := recv.saveAuthorPictureLocally(indieweb) assert.Equal(t, tc.expectedPictureUrl, indieweb.Author.Picture) + assert.Equal(t, tc.expectedError, err) }) } } diff --git a/mocks/picture.bmp b/mocks/picture.bmp new file mode 100644 index 0000000..c4bd6c8 Binary files /dev/null and b/mocks/picture.bmp differ diff --git a/mocks/picture.gif b/mocks/picture.gif new file mode 100644 index 0000000..3474200 Binary files /dev/null and b/mocks/picture.gif differ diff --git a/mocks/picture.png b/mocks/picture.png new file mode 100644 index 0000000..aa4df1b Binary files /dev/null and b/mocks/picture.png differ diff --git a/mocks/picture.tiff b/mocks/picture.tiff new file mode 100644 index 0000000..1961484 Binary files /dev/null and b/mocks/picture.tiff differ diff --git a/mocks/picture.webp b/mocks/picture.webp new file mode 100644 index 0000000..88a284b Binary files /dev/null and b/mocks/picture.webp differ diff --git a/rest/utils.go b/rest/utils.go index 740a8ef..b667b6e 100644 --- a/rest/utils.go +++ b/rest/utils.go @@ -33,6 +33,40 @@ func Domain(target string) string { return fmt.Sprintf("%s.%s", split[1], split[2]) } +type imageType []byte + +var ( + jpg = imageType{0xFF, 0xD8} + bmp = imageType{0x42, 0x4D} + gif = imageType{0x47, 0x49, 0x46} + png = imageType{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A} + tiffI = imageType{0x49, 0x49, 0x2A, 0x00} + tiffM = imageType{0x4D, 0x4D, 0x00, 0x2A} + webp = imageType{0x52, 0x49, 0x46, 0x46} // RIFF 32 bits + supportedImageTypes = []imageType{jpg, png, gif, bmp, webp, tiffI, tiffM} +) + +// IsRealImage checks the first few bytes of the provided data to see if it's a real image. +// Image headers supported: gif/jpg/png/webp/bmp +func IsRealImage(data []byte) bool { + if len(data) < 8 { + return false + } + + for _, imgType := range supportedImageTypes { + checkedBits := 0 + for i, bit := range imgType { + if data[i] == bit { + checkedBits++ + } + } + if checkedBits == len(imgType) { + return true + } + } + return false +} + func Json(w http.ResponseWriter, data interface{}) { w.WriteHeader(200) bytes, _ := json.MarshalIndent(data, "", " ") diff --git a/rest/utils_test.go b/rest/utils_test.go index 9e69df1..af5683e 100644 --- a/rest/utils_test.go +++ b/rest/utils_test.go @@ -1,10 +1,70 @@ package rest import ( + "fmt" "github.com/stretchr/testify/assert" + "io/ioutil" "testing" ) +func TestIsRealImage(t *testing.T) { + cases := []struct { + label string + imgpath string + expected bool + }{ + { + "jpeg is a valid image", + "../mocks/picture.jpg", + true, + }, + { + "bmp is a valid image", + "../mocks/picture.bmp", + true, + }, + { + "xml is not a valid image", + "../mocks/index.xml", + false, + }, + { + "empty data is not a valid image", + "", + false, + }, + { + "png is a valid image", + "../mocks/picture.png", + true, + }, + { + "gif is a valid image", + "../mocks/picture.gif", + true, + }, + { + "webp is a valid image", + "../mocks/picture.webp", + true, + }, + { + "tiff is a valid image", + "../mocks/picture.tiff", + true, + }, + } + + for _, tc := range cases { + t.Run(tc.label, func(t *testing.T) { + data, _ := ioutil.ReadFile(tc.imgpath) + fmt.Printf("Path: %s, Data: % x\n", tc.imgpath, data) + + assert.Equal(t, tc.expected, IsRealImage(data)) + }) + } +} + func TestDomainParseFromTarget(t *testing.T) { cases := []struct { label string