implement blacklisting and wm deleting if needed

This commit is contained in:
Wouter Groeneveld 2021-05-02 11:40:45 +02:00
parent 255fea17e0
commit b7a12b427f
15 changed files with 204 additions and 61 deletions

View File

@ -82,7 +82,7 @@ Place a `config.json` file in the same directory that looks like this: (below ar
"brainbaking.com",
"jefklakscodex.com"
],
"disallowedWebmentionDomains": [
"blacklist": [
"youtube.com"
]
}
@ -90,7 +90,7 @@ Place a `config.json` file in the same directory that looks like this: (below ar
- port, host: http server params
- token, allowedWebmentionSources: see below, used for authentication
- disallowedWebmentionDomains: if an URL from that domain is encountered in your feed, ignore it. Does not send mentions to it.
- blacklist: blacklist domains from which we do NOT send to or accept mentions from.
- utcOffset: offset in minutes for date processing, starting from UTC time.
- conString: file path to store all mentions and author avatars in a simple key/value store, based on [buntdb](https://github.com/tidwall/buntdb).
@ -181,6 +181,10 @@ This means if you made changes in-between, and they appear in the RSS feed as re
No. The server will automatically store the latest push, and if it's called again, it will not send out anything if nothing more recent was found in your RSS feed based on the last published link. Providing the parameter merely lets you override the behavior.
#### 1.4 `DELETE /webmention/:domain/:token?source=x&target=y`
Deletes a webmention or logs a warning if no relevant mention found.
### 2. Pingbacks
Pingbacks are in here for two reasons:

View File

@ -20,4 +20,5 @@ func (s *server) routes() {
s.router.HandleFunc("/webmention", webmention.HandlePost(c, db)).Methods("POST")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleGet(db))).Methods("GET")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandlePut(c, db))).Methods("PUT")
s.router.HandleFunc("/webmention/{domain}/{token}", s.authorizedOnly(webmention.HandleDelete(db))).Methods("DELETE")
}

View File

@ -23,6 +23,20 @@ func HandleGet(repo db.MentionRepo) http.HandlerFunc {
}
}
// HandleDelete deletes a possible webmention but does not verify source/target.
// If no or wrong parameters are provided, it will log a warning.
func HandleDelete(repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
r.ParseForm()
wm := mf.Mention{
Source: r.FormValue("source"),
Target: r.FormValue("target"),
}
repo.Delete(wm)
}
}
func HandlePut(conf *common.Config, repo db.MentionRepo) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
domain := mux.Vars(r)["domain"]

View File

@ -1,8 +1,10 @@
package webmention
import (
"brainbaking.com/go-jamming/app/mf"
"brainbaking.com/go-jamming/common"
"brainbaking.com/go-jamming/db"
"fmt"
"github.com/stretchr/testify/assert"
"io/ioutil"
"net/http"
@ -29,6 +31,32 @@ func init() {
repo = db.NewMentionRepo(cnf)
}
func TestHandleDelete(t *testing.T) {
wm := mf.Mention{
Source: "https://infos.by/markdown-v-nauke/",
Target: "https://brainbaking.com/post/2021/02/writing-academic-papers-in-markdown/",
}
_, err := repo.Save(wm, &mf.IndiewebData{
Source: wm.Source,
Target: wm.Target,
Name: "mytest",
})
assert.NoError(t, err)
assert.NotEmpty(t, repo.GetAll("brainbaking.com").Data)
ts := httptest.NewServer(HandleDelete(repo))
defer ts.Close()
client := &http.Client{}
req, err := http.NewRequest("DELETE", fmt.Sprintf("%s?source=%s&target=%s", ts.URL, wm.Source, wm.Target), nil)
assert.NoError(t, err)
_, err = client.Do(req)
assert.NoError(t, err)
assert.Empty(t, repo.GetAll("brainbaking.com").Data)
}
func TestHandlePostWithInvalidUrlsShouldReturnBadRequest(t *testing.T) {
ts := httptest.NewServer(HandlePost(cnf, repo))
defer ts.Close()

View File

@ -30,6 +30,11 @@ var (
)
func (recv *Receiver) Receive(wm mf.Mention) {
if recv.Conf.IsBlacklisted(wm.Source) {
log.Warn().Stringer("wm", wm).Msg(" ABORT: source url comes from blacklisted domain!")
return
}
log.Info().Stringer("wm", wm).Msg("OK: looks valid")
_, body, geterr := recv.RestClient.GetBody(wm.Source)

View File

@ -22,6 +22,9 @@ var conf = &common.Config{
"brainbaking.com",
},
ConString: ":memory:",
Blacklist: []string{
"blacklisted.com",
},
}
func TestSaveAuthorPictureLocally(t *testing.T) {
@ -229,6 +232,22 @@ func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testi
assert.Empty(t, indb)
}
func TestReceiveFromBlacklistedDomainDoesNothing(t *testing.T) {
wm := mf.Mention{
Source: "https://blacklisted.com/whoops",
Target: "https://brainbaking.com/valid-indieweb-source.html",
}
repo := db.NewMentionRepo(conf)
receiver := &Receiver{
Conf: conf,
Repo: repo,
}
receiver.Receive(wm)
assert.Empty(t, repo.GetAll("brainbaking.com").Data)
}
func TestReceiveTargetThatDoesNotPointToTheSourceDoesNothing(t *testing.T) {
wm := mf.Mention{
Source: "https://brainbaking.com/valid-indieweb-source.html",

View File

@ -65,7 +65,7 @@ func (snder *Sender) collectUniqueHrefsFromHtml(html string) []string {
for _, match := range hrefRegexp.FindAllStringSubmatch(html, -1) {
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
if !extRegexp.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) && !strings.HasPrefix(url, "#") {
if !extRegexp.MatchString(url) && !snder.Conf.IsBlacklisted(url) && !strings.HasPrefix(url, "#") {
urlmap.Add(url)
}
}

View File

@ -19,7 +19,7 @@ func (s *CollectSuite) SetupTest() {
s.xml = string(file)
s.snder = &Sender{
Conf: &common.Config{
DisallowedWebmentionDomains: []string{
Blacklist: []string{
"youtube.com",
},
},

View File

@ -0,0 +1,34 @@
package common
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestIncludes(t *testing.T) {
cases := []struct {
label string
arr []string
searchstr string
expected bool
}{
{
"element in array",
[]string{"one", "two"},
"two",
true,
},
{
"element not in array",
[]string{"one", "two"},
"three",
false,
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
assert.Equal(t, tc.expected, Includes(tc.arr, tc.searchstr))
})
}
}

View File

@ -1,6 +1,7 @@
package common
import (
"brainbaking.com/go-jamming/rest"
"encoding/json"
"errors"
"github.com/rs/zerolog/log"
@ -10,13 +11,21 @@ import (
)
type Config struct {
Port int `json:"port"`
Token string `json:"token"`
UtcOffset int `json:"utcOffset"`
DataPath string `json:"dataPath"`
ConString string `json:"conString"`
AllowedWebmentionSources []string `json:"allowedWebmentionSources"`
DisallowedWebmentionDomains []string `json:"disallowedWebmentionDomains"`
Port int `json:"port"`
Token string `json:"token"`
UtcOffset int `json:"utcOffset"`
DataPath string `json:"dataPath"`
ConString string `json:"conString"`
AllowedWebmentionSources []string `json:"allowedWebmentionSources"`
Blacklist []string `json:"blacklist"`
}
func (c *Config) IsBlacklisted(url string) bool {
if !strings.HasPrefix(url, "http") {
return false
}
domain := rest.Domain(url)
return Includes(c.Blacklist, domain)
}
func (c *Config) Zone() *time.Location {
@ -40,22 +49,8 @@ func (c *Config) missingKeys() []string {
return keys
}
func (c *Config) ContainsDisallowedDomain(url string) bool {
for _, domain := range c.DisallowedWebmentionDomains {
if strings.Contains(url, domain) {
return true
}
}
return false
}
func (c *Config) IsAnAllowedDomain(url string) bool {
for _, domain := range c.AllowedWebmentionSources {
if domain == url {
return true
}
}
return false
func (c *Config) IsAnAllowedDomain(domain string) bool {
return Includes(c.AllowedWebmentionSources, domain)
}
func (c *Config) FetchDomain(url string) (string, error) {
@ -98,11 +93,11 @@ func config() *Config {
func defaultConfig() *Config {
return &Config{
Port: 1337,
Token: "miauwkes",
UtcOffset: 60,
ConString: "data/mentions.db",
AllowedWebmentionSources: []string{"brainbaking.com", "jefklakscodex.com"},
DisallowedWebmentionDomains: []string{"youtube.com"},
Port: 1337,
Token: "miauwkes",
UtcOffset: 60,
ConString: "mentions.db",
AllowedWebmentionSources: []string{"brainbaking.com", "jefklakscodex.com"},
Blacklist: []string{"youtube.com"},
}
}

46
common/config_test.go Normal file
View File

@ -0,0 +1,46 @@
package common
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestIsBlacklisted(t *testing.T) {
cases := []struct {
label string
url string
expected bool
}{
{
"do not blacklist if domain is part of relative url",
"https://brainbaking.com/post/youtube.com-sucks",
false,
},
{
"blacklist if https domain is on the list",
"https://youtube.com/stuff",
true,
},
{
"blacklist if http domain is on the list",
"http://youtube.com/stuff",
true,
},
{
"do not blacklist if relative url",
"/youtube.com",
false,
},
}
conf := Config{
Blacklist: []string{
"youtube.com",
},
}
for _, tc := range cases {
t.Run(tc.label, func(t *testing.T) {
assert.Equal(t, tc.expected, conf.IsBlacklisted(tc.url))
})
}
}

View File

@ -1,21 +0,0 @@
package common
import "testing"
func TestIncludesElemInArrayTrue(t *testing.T) {
arr := []string{"one", "two"}
result := Includes(arr, "two")
if result != true {
t.Error("Should be in there")
}
}
func TestIncludesElemNotInArrayFalse(t *testing.T) {
arr := []string{"one", "two"}
result := Includes(arr, "three")
if result != false {
t.Error("Should NOT be in there")
}
}

View File

@ -56,13 +56,16 @@ func lastSentKey(domain string) string {
return fmt.Sprintf("%s:lastsent", domain)
}
// Delete removes a possibly present mention by key. Ignores possible errors.
// Delete removes a possibly present mention by key. Ignores but logs possible errors.
func (r *MentionRepoBunt) Delete(wm mf.Mention) {
key := r.mentionToKey(wm)
r.db.Update(func(tx *buntdb.Tx) error {
tx.Delete(key)
return nil
err := r.db.Update(func(tx *buntdb.Tx) error {
_, err := tx.Delete(key)
return err
})
if err != nil {
log.Warn().Err(err).Str("key", key).Stringer("wm", wm).Msg("Unable to delete")
}
}
func (r *MentionRepoBunt) SavePicture(bytes string, domain string) (string, error) {

View File

@ -23,11 +23,16 @@ func Unauthorized(w http.ResponseWriter) {
// Domain parses the target url to extract the domain as part of the allowed webmention targets.
// This is the same as conf.FetchDomain(wm.Target), only without config, and without error handling.
// Assumes http(s) protocol, which should have been validated by now.
// Assumes http(s) protocol, which should have been validated before calling this.
func Domain(target string) string {
withPossibleSubdomain := strings.Split(target, "/")[2]
slashes := strings.Split(target, "/")
if len(slashes) < 3 {
return target
}
withPossibleSubdomain := slashes[2]
split := strings.Split(withPossibleSubdomain, ".")
if len(split) == 2 {
if len(split) <= 2 {
return withPossibleSubdomain // that was the extension, not the subdomain.
}
return fmt.Sprintf("%s.%s", split[1], split[2])

View File

@ -91,6 +91,16 @@ func TestDomainParseFromTarget(t *testing.T) {
"https://mayonaise.frit.be/patatjes/zijn/lekker",
"frit.be",
},
{
"parse from localhost domain without extension",
"https://localhost:1313/stuff",
"localhost:1313",
},
{
"malformed http string with too little slashes simply returns same URL",
"https:*groovy.bla/stuff",
"https:*groovy.bla/stuff",
},
}
for _, tc := range cases {