built-in genuine image checker based on byte headers

This commit is contained in:
Wouter Groeneveld 2021-04-22 16:02:59 +02:00
parent 3d3f17590e
commit f90af3b076
11 changed files with 138 additions and 21 deletions

View File

@ -2,6 +2,7 @@ package mf
import (
"brainbaking.com/go-jamming/common"
"fmt"
"strings"
"time"
"willnorris.com/go/microformats"
@ -9,6 +10,7 @@ import (
const (
DateFormat = "2006-01-02T15:04:05"
Anonymous = "anonymous"
)
type IndiewebAuthor struct {
@ -16,6 +18,10 @@ type IndiewebAuthor struct {
Picture string `json:"picture"`
}
func (ia IndiewebAuthor) Anonymize() {
ia.Picture = fmt.Sprintf("/pictures/%s", Anonymous)
}
type IndiewebDataResult struct {
Status string `json:"status"`
Data []*IndiewebData `json:"json"`

View File

@ -1,6 +1,7 @@
package pictures
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/db"
_ "embed"
"github.com/gorilla/mux"
@ -11,10 +12,6 @@ import (
//go:embed anonymous.jpg
var anonymous []byte
const (
Anonymous = "anonymous"
)
func init() {
if anonymous == nil {
log.Fatal().Msg("embedded anonymous image missing?")
@ -26,7 +23,7 @@ func init() {
func Handle(repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
picDomain := mux.Vars(r)["picture"]
if picDomain == Anonymous {
if picDomain == mf.Anonymous {
servePicture(w, anonymous)
return
}

View File

@ -2,10 +2,10 @@ package recv
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/app/pictures"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"brainbaking.com/go-jamming/rest"
"errors"
"fmt"
"regexp"
"strings"
@ -20,6 +20,12 @@ type Receiver struct {
Repo db.MentionRepo
}
var (
errPicUnableToDownload = errors.New("Unable to download author picture")
errPicNoRealImage = errors.New("Downloaded author picture is not a real image")
errPicUnableToSave = errors.New("Unable to save downloaded author picture")
)
func (recv *Receiver) Receive(wm mf.Mention) {
log.Info().Stringer("wm", wm).Msg("OK: looks valid")
_, body, geterr := recv.RestClient.GetBody(wm.Source)
@ -42,12 +48,16 @@ func (recv *Receiver) processSourceBody(body string, wm mf.Mention) {
data := microformats.Parse(strings.NewReader(body), wm.SourceUrl())
indieweb := recv.convertBodyToIndiewebData(body, wm, mf.HEntry(data))
if indieweb.Author.Picture != "" {
recv.saveAuthorPictureLocally(indieweb)
err := recv.saveAuthorPictureLocally(indieweb)
if err != nil {
log.Error().Err(err).Str("url", indieweb.Author.Picture).Msg("Failed to save picture. Reverting to anonymous")
indieweb.Author.Anonymize()
}
}
key, err := recv.Repo.Save(wm, indieweb)
if err != nil {
log.Error().Err(err).Stringer("wm", wm).Msg("processSourceBody: failed to save json to db")
log.Error().Err(err).Stringer("wm", wm).Msg("Failed to save new mention to db")
}
log.Info().Str("key", key).Msg("OK: Webmention processed.")
}
@ -96,26 +106,26 @@ func (recv *Receiver) parseBodyAsNonIndiewebSite(body string, wm mf.Mention) *mf
}
}
// saveAuthorPictureLocally tries to download the author picture.
// saveAuthorPictureLocally tries to download the author picture and checks if it's valid based on img header.
// If it succeeds, it alters the picture path to a local /pictures/x one.
// If it fails, it falls back to a local default image.
// This *should* also validate image byte headers, like https://stackoverflow.com/questions/670546/determine-if-file-is-an-image
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) {
// If it fails, it returns an error.
func (recv *Receiver) saveAuthorPictureLocally(indieweb *mf.IndiewebData) error {
_, picData, err := recv.RestClient.GetBody(indieweb.Author.Picture)
if err != nil {
log.Warn().Err(err).Str("url", indieweb.Author.Picture).Msg("Unable to download author picture. Reverting to anonymous.")
indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", pictures.Anonymous)
return
return errPicUnableToDownload
}
if len(picData) < 8 || !rest.IsRealImage([]byte(picData[0:8])) {
return errPicNoRealImage
}
srcDomain := rest.Domain(indieweb.Source)
_, dberr := recv.Repo.SavePicture(picData, srcDomain)
if dberr != nil {
log.Warn().Err(err).Str("url", indieweb.Author.Picture).Msg("Unable to save downloaded author picture. Reverting to anonymous.")
indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", pictures.Anonymous)
return
return errPicUnableToSave
}
indieweb.Author.Picture = fmt.Sprintf("/pictures/%s", srcDomain)
return nil
}
func nonIndiewebTitle(body string, wm mf.Mention) string {

View File

@ -27,16 +27,25 @@ func TestSaveAuthorPictureLocally(t *testing.T) {
label string
pictureUrl string
expectedPictureUrl string
expectedError error
}{
{
"Absolute URL gets 'downloaded' and replaced by relative",
"https://brainbaking.com/picture.jpg",
"/pictures/brainbaking.com",
nil,
},
{
"Absolute URL gets replaced by anonymous if download fails",
"Absolute URL does not get replaced but error if no valid image",
"https://brainbaking.com/index.xml",
"https://brainbaking.com/index.xml",
errPicNoRealImage,
},
{
"Absolute URL does not get replaced but error if download fails",
"https://brainbaking.com/thedogatemypic-nowitsmissing-shiii.png",
"/pictures/anonymous",
"https://brainbaking.com/thedogatemypic-nowitsmissing-shiii.png",
errPicUnableToDownload,
},
}
@ -57,9 +66,10 @@ func TestSaveAuthorPictureLocally(t *testing.T) {
Picture: tc.pictureUrl,
},
}
recv.saveAuthorPictureLocally(indieweb)
err := recv.saveAuthorPictureLocally(indieweb)
assert.Equal(t, tc.expectedPictureUrl, indieweb.Author.Picture)
assert.Equal(t, tc.expectedError, err)
})
}
}

BIN
mocks/picture.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

BIN
mocks/picture.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

BIN
mocks/picture.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

BIN
mocks/picture.tiff Normal file

Binary file not shown.

BIN
mocks/picture.webp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1002 B

View File

@ -33,6 +33,40 @@ func Domain(target string) string {
return fmt.Sprintf("%s.%s", split[1], split[2])
}
type imageType []byte
var (
jpg = imageType{0xFF, 0xD8}
bmp = imageType{0x42, 0x4D}
gif = imageType{0x47, 0x49, 0x46}
png = imageType{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
tiffI = imageType{0x49, 0x49, 0x2A, 0x00}
tiffM = imageType{0x4D, 0x4D, 0x00, 0x2A}
webp = imageType{0x52, 0x49, 0x46, 0x46} // RIFF 32 bits
supportedImageTypes = []imageType{jpg, png, gif, bmp, webp, tiffI, tiffM}
)
// IsRealImage checks the first few bytes of the provided data to see if it's a real image.
// Image headers supported: gif/jpg/png/webp/bmp
func IsRealImage(data []byte) bool {
if len(data) < 8 {
return false
}
for _, imgType := range supportedImageTypes {
checkedBits := 0
for i, bit := range imgType {
if data[i] == bit {
checkedBits++
}
}
if checkedBits == len(imgType) {
return true
}
}
return false
}
func Json(w http.ResponseWriter, data interface{}) {
w.WriteHeader(200)
bytes, _ := json.MarshalIndent(data, "", " ")

View File

@ -1,10 +1,70 @@
package rest
import (
"fmt"
"github.com/stretchr/testify/assert"
"io/ioutil"
"testing"
)
func TestIsRealImage(t *testing.T) {
cases := []struct {
label string
imgpath string
expected bool
}{
{
"jpeg is a valid image",
"../mocks/picture.jpg",
true,
},
{
"bmp is a valid image",
"../mocks/picture.bmp",
true,
},
{
"xml is not a valid image",
"../mocks/index.xml",
false,
},
{
"empty data is not a valid image",
"",
false,
},
{
"png is a valid image",
"../mocks/picture.png",
true,
},
{
"gif is a valid image",
"../mocks/picture.gif",
true,
},
{
"webp is a valid image",
"../mocks/picture.webp",
true,
},
{
"tiff is a valid image",
"../mocks/picture.tiff",
true,
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
data, _ := ioutil.ReadFile(tc.imgpath)
fmt.Printf("Path: %s, Data: % x\n", tc.imgpath, data)
assert.Equal(t, tc.expected, IsRealImage(data))
})
}
}
func TestDomainParseFromTarget(t *testing.T) {
cases := []struct {
label string