diff --git a/go.mod b/go.mod index 5a03be1..aabce95 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/rs/zerolog v1.21.0 github.com/stretchr/testify v1.7.0 github.com/tidwall/buntdb v1.2.3 + golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 golang.org/x/time v0.0.0-20220411224347-583f2d630306 willnorris.com/go/microformats v1.1.1 ) diff --git a/go.sum b/go.sum index 52ee545..33a7055 100644 --- a/go.sum +++ b/go.sum @@ -56,18 +56,21 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 h1:HVyaeDAYux4pnY+D/SiwmLOR36ewZ4iGQIIrtnuCjFA= +golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba h1:O8mE0/t419eoIwhTFpKVkHiTs/Igowgfkj25AcZrtiE= -golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/time v0.0.0-20220411224347-583f2d630306 h1:+gHMid33q6pen7kv9xvT+JRinntgeXO2AeZVd0AWD3w= golang.org/x/time v0.0.0-20220411224347-583f2d630306/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/rest/utils.go b/rest/utils.go index b88ea11..0d301fc 100644 --- a/rest/utils.go +++ b/rest/utils.go @@ -2,7 +2,7 @@ package rest import ( "encoding/json" - "fmt" + "golang.org/x/net/publicsuffix" "net/http" "net/url" "strings" @@ -25,17 +25,24 @@ func Unauthorized(w http.ResponseWriter) { // This is the same as conf.FetchDomain(wm.Target), only without config, and without error handling. // Assumes http(s) protocol, which should have been validated before calling this. func Domain(target string) string { - slashes := strings.Split(target, "/") - if len(slashes) < 3 { + url, err := url.Parse(target) + if err != nil { + return target + } + host := url.Hostname() + if host == "" { return target } - withPossibleSubdomain := slashes[2] + suffix, _ := publicsuffix.PublicSuffix(host) + withPossibleSubdomain := strings.ReplaceAll(host, "."+suffix, "") + split := strings.Split(withPossibleSubdomain, ".") - if len(split) <= 2 { - return withPossibleSubdomain // that was the extension, not the subdomain. + if len(split) <= 1 { + return host } - return fmt.Sprintf("%s.%s", split[1], split[2]) + + return strings.Join(split[1:], ".") + "." + suffix } type imageType []byte @@ -52,7 +59,13 @@ var ( // SiloDomains are domains where mentions of multiple individuals may come from. // These are privacy issues and will be anonymized as such. - SiloDomains = []string{"brid.gy", "twitter.com", "facebook.com"} + SiloDomains = []string{ + "brid.gy", + "twitter.com", + "facebook.com", + "indieweb.social", + "mastodon.social", + } ) // IsRealImage checks the first few bytes of the provided data to see if it's a real image. diff --git a/rest/utils_test.go b/rest/utils_test.go index 61e476a..18e2a19 100644 --- a/rest/utils_test.go +++ b/rest/utils_test.go @@ -94,7 +94,17 @@ func TestDomainParseFromTarget(t *testing.T) { { "parse from localhost domain without extension", "https://localhost:1313/stuff", - "localhost:1313", + "localhost", + }, + { + "UK domain with two dots after the name", + "https://minutestomidnight.co.uk/blog/article.html", + "minutestomidnight.co.uk", + }, + { + "UK domain with subdomain", + "https://www.minutestomidnight.co.uk/blog/article.html", + "minutestomidnight.co.uk", }, { "malformed http string with too little slashes simply returns same URL",