From d0eddc30471509f2bc7629684253694322728026 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Mon, 18 Apr 2022 10:18:00 +0200 Subject: [PATCH] another attempt at fighting spam --- README.md | 15 +++++++++ common/config.go | 19 +++++++++++ common/config_test.go | 73 +++++++++++++++++++++++++++++++++++++++++++ db/repo.go | 15 +++++++++ db/repo_test.go | 28 +++++++++++++++++ main.go | 24 +++++++++++++- 6 files changed, 173 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 51808c5..f95aea1 100644 --- a/README.md +++ b/README.md @@ -181,3 +181,18 @@ That's pretty flexible. I have not taken the trouble to put this into the config A separate goroutine cleans up ips each 2 minutes, the TTL is 5 minutes. See `limiter.go`. Database migrations are run using the `-migrate` flag. + +--- + +## Fighting spam + +Since Go-jamming still supports Pingbacks, spam could be an issue. However, if the URL doesn't contain a genuine link, the mention will be immediately dropped. + +Still, spammers always find a way and sometimes even create fake blog posts with real links to your blog. In that case, simply add the domain to the `blacklist` in `config.json`. + +Adding this **manually** will not remove existing spam in your DB! The `-blacklist` flag is there to: + +1. Automatically add it to the `blacklist` array in the config file; +2. Automatically search the DB for all allowed domains for spam from the blacklist and remove it. (Check for string match on the URL) + +How to use: `./go-jamming -blacklist annoyingspam.com`. This will exit after the above actions. Then you can simply restart the server with `./go-jamming`. \ No newline at end of file diff --git a/common/config.go b/common/config.go index ccb07e8..296f85d 100644 --- a/common/config.go +++ b/common/config.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "github.com/rs/zerolog/log" + "io/fs" "io/ioutil" "strings" "time" @@ -70,6 +71,24 @@ func Configure() *Config { return c } +func (c *Config) AddToBlacklist(domain string) { + for _, d := range c.Blacklist { + if d == domain { + return + } + } + + c.Blacklist = append(c.Blacklist, domain) +} + +func (c *Config) Save() { + bytes, _ := json.Marshal(c) // we assume a correct internral state here + err := ioutil.WriteFile("config.json", bytes, fs.ModePerm) + if err != nil { + log.Err(err).Msg("Unable to save config.json to disk!") + } +} + func config() *Config { confData, err := ioutil.ReadFile("config.json") if err != nil { diff --git a/common/config_test.go b/common/config_test.go index 5709547..7887f2f 100644 --- a/common/config_test.go +++ b/common/config_test.go @@ -2,9 +2,82 @@ package common import ( "github.com/stretchr/testify/assert" + "io/fs" + "io/ioutil" + "os" "testing" ) +func TestReadFromJsonMalformedReversToDefaults(t *testing.T) { + err := ioutil.WriteFile("config.json", []byte("dinges"), fs.ModePerm) + if err != nil { + assert.Failf(t, "Error writing test config.json: %s", err.Error()) + } + + config := Configure() + assert.Contains(t, config.AllowedWebmentionSources, "brainbaking.com") + os.Remove("config.json") +} + +func TestReadFromJsonWithCorrectJsonData(t *testing.T) { + confString := `{ + "port": 1337, + "host": "localhost", + "token": "miauwkes", + "conString": "mentions.db", + "utcOffset": 60, + "allowedWebmentionSources": [ + "snoopy.be" + ], + "blacklist": [ + "youtube.com" + ] + }` + err := ioutil.WriteFile("config.json", []byte(confString), fs.ModePerm) + if err != nil { + assert.Failf(t, "Error writing test config.json: %s", err.Error()) + } + + config := Configure() + assert.Contains(t, config.AllowedWebmentionSources, "snoopy.be") + assert.Equal(t, 1, len(config.AllowedWebmentionSources)) + os.Remove("config.json") +} + +func TestSaveAfterAddingANewBlacklistEntry(t *testing.T) { + config := Configure() + config.AddToBlacklist("somethingnew.be") + config.Save() + + newConfig := Configure() + assert.Contains(t, newConfig.Blacklist, "somethingnew.be") + os.Remove("config.json") +} + +func TestAddToBlacklistNotYetAddsToList(t *testing.T) { + conf := Config{ + Blacklist: []string{ + "youtube.com", + }, + } + + conf.AddToBlacklist("dinges.be") + assert.Contains(t, conf.Blacklist, "dinges.be") + assert.Equal(t, 2, len(conf.Blacklist)) +} + +func TestAddToBlacklistAlreadyAddedDoNotAddAgain(t *testing.T) { + conf := Config{ + Blacklist: []string{ + "youtube.com", + }, + } + + conf.AddToBlacklist("youtube.com") + assert.Contains(t, conf.Blacklist, "youtube.com") + assert.Equal(t, 1, len(conf.Blacklist)) +} + func TestIsBlacklisted(t *testing.T) { cases := []struct { label string diff --git a/db/repo.go b/db/repo.go index fe8eb27..9cdb6b5 100644 --- a/db/repo.go +++ b/db/repo.go @@ -9,6 +9,7 @@ import ( "fmt" "github.com/rs/zerolog/log" "github.com/tidwall/buntdb" + "strings" ) type MentionRepoBunt struct { @@ -19,6 +20,7 @@ type MentionRepo interface { Save(key mf.Mention, data *mf.IndiewebData) (string, error) SavePicture(bytes string, domain string) (string, error) Delete(key mf.Mention) + CleanupSpam(domain string, blacklist []string) LastSentMention(domain string) string UpdateLastSentMention(domain string, lastSent string) Get(key mf.Mention) *mf.IndiewebData @@ -26,6 +28,17 @@ type MentionRepo interface { GetAll(domain string) mf.IndiewebDataResult } +// CleanupSpam removes potential blacklisted spam from the webmention database by checking the url of each entry. +func (r *MentionRepoBunt) CleanupSpam(domain string, blacklist []string) { + for _, mention := range r.GetAll(domain).Data { + for _, blacklisted := range blacklist { + if strings.Contains(mention.Url, blacklisted) { + r.Delete(mention.AsMention()) + } + } + } +} + // UpdateLastSentMention updates the last sent mention link. Logs but ignores errors. func (r *MentionRepoBunt) UpdateLastSentMention(domain string, lastSentMentionLink string) { err := r.db.Update(func(tx *buntdb.Tx) error { @@ -65,6 +78,8 @@ func (r *MentionRepoBunt) Delete(wm mf.Mention) { }) if err != nil { log.Warn().Err(err).Str("key", key).Stringer("wm", wm).Msg("Unable to delete") + } else { + log.Debug().Str("key", key).Stringer("wm", wm).Msg("Deleted.") } } diff --git a/db/repo_test.go b/db/repo_test.go index 157eb93..050d201 100644 --- a/db/repo_test.go +++ b/db/repo_test.go @@ -32,6 +32,34 @@ func TestSaveAndGetPicture(t *testing.T) { assert.Equal(t, data, picDataAfterSave) } +func TestCleanupSpam(t *testing.T) { + db := NewMentionRepo(conf) + db.Save(mf.Mention{ + Source: "https://naar.hier/jup", + Target: "https://pussycat.com/coolpussy.html", + }, &mf.IndiewebData{ + Name: "lolz", + Target: "https://pussycat.com/coolpussy.html", + Source: "https://naar.hier/jup", + Url: "https://naar.hier", + }) + db.Save(mf.Mention{ + Source: "https://spam.be/malicious", + Target: "https://pussycat.com/dinges", + }, &mf.IndiewebData{ + Name: "kapot", + Target: "https://pussycat.com/dinges", + Source: "https://spam.be/malicious", + Url: "https://spam.be", + }) + + db.CleanupSpam("pussycat.com", []string{"spam.be", "jaak.com"}) + + results := db.GetAll("pussycat.com") + assert.Equal(t, 1, len(results.Data)) + assert.Equal(t, "lolz", results.Data[0].Name) +} + func TestDelete(t *testing.T) { db := NewMentionRepo(conf) wm := mf.Mention{ diff --git a/main.go b/main.go index 8d4878a..e8f9de7 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "brainbaking.com/go-jamming/common" "brainbaking.com/go-jamming/db" "flag" "os" @@ -16,11 +17,13 @@ func main() { verboseFlag := flag.Bool("verbose", false, "Verbose mode (pretty print log, debug level)") migrateFlag := flag.Bool("migrate", false, "Run migration scripts for the DB and exit.") + blacklist := flag.String("blacklist", "", "Blacklist a domain name (also cleans spam from DB)") flag.Parse() + blacklisting := len(*blacklist) > 1 // logs by default to Stderr (/var/log/syslog). Rolling files possible via lumberjack. zerolog.SetGlobalLevel(zerolog.InfoLevel) - if *verboseFlag || *migrateFlag { + if *verboseFlag || *migrateFlag || blacklisting { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) zerolog.SetGlobalLevel(zerolog.DebugLevel) } @@ -30,10 +33,29 @@ func main() { os.Exit(0) } + if blacklisting { + blacklistDomain(*blacklist) + os.Exit(0) + } + log.Debug().Msg("Let's a go!") app.Start() } +func blacklistDomain(domain string) { + log.Info().Str("domain", domain).Msg("Blacklisting...") + config := common.Configure() + config.AddToBlacklist(domain) + config.Save() + + repo := db.NewMentionRepo(config) + for _, domain := range config.AllowedWebmentionSources { + repo.CleanupSpam(domain, config.Blacklist) + } + + log.Info().Msg("Blacklist done, exiting.") +} + func migrate() { log.Info().Msg("Starting db migration...") db.Migrate()