fix domain extraction from .co.uk using publicsuffix
This commit is contained in:
parent
522cc3d746
commit
f61bda5c5a
1
go.mod
1
go.mod
|
@ -10,6 +10,7 @@ require (
|
|||
github.com/rs/zerolog v1.21.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tidwall/buntdb v1.2.3
|
||||
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4
|
||||
golang.org/x/time v0.0.0-20220411224347-583f2d630306
|
||||
willnorris.com/go/microformats v1.1.1
|
||||
)
|
||||
|
|
9
go.sum
9
go.sum
|
@ -56,18 +56,21 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r
|
|||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 h1:HVyaeDAYux4pnY+D/SiwmLOR36ewZ4iGQIIrtnuCjFA=
|
||||
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba h1:O8mE0/t419eoIwhTFpKVkHiTs/Igowgfkj25AcZrtiE=
|
||||
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/time v0.0.0-20220411224347-583f2d630306 h1:+gHMid33q6pen7kv9xvT+JRinntgeXO2AeZVd0AWD3w=
|
||||
golang.org/x/time v0.0.0-20220411224347-583f2d630306/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
|
|
@ -2,7 +2,7 @@ package rest
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"golang.org/x/net/publicsuffix"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
@ -25,17 +25,24 @@ func Unauthorized(w http.ResponseWriter) {
|
|||
// This is the same as conf.FetchDomain(wm.Target), only without config, and without error handling.
|
||||
// Assumes http(s) protocol, which should have been validated before calling this.
|
||||
func Domain(target string) string {
|
||||
slashes := strings.Split(target, "/")
|
||||
if len(slashes) < 3 {
|
||||
url, err := url.Parse(target)
|
||||
if err != nil {
|
||||
return target
|
||||
}
|
||||
host := url.Hostname()
|
||||
if host == "" {
|
||||
return target
|
||||
}
|
||||
|
||||
withPossibleSubdomain := slashes[2]
|
||||
suffix, _ := publicsuffix.PublicSuffix(host)
|
||||
withPossibleSubdomain := strings.ReplaceAll(host, "."+suffix, "")
|
||||
|
||||
split := strings.Split(withPossibleSubdomain, ".")
|
||||
if len(split) <= 2 {
|
||||
return withPossibleSubdomain // that was the extension, not the subdomain.
|
||||
if len(split) <= 1 {
|
||||
return host
|
||||
}
|
||||
return fmt.Sprintf("%s.%s", split[1], split[2])
|
||||
|
||||
return strings.Join(split[1:], ".") + "." + suffix
|
||||
}
|
||||
|
||||
type imageType []byte
|
||||
|
@ -52,7 +59,13 @@ var (
|
|||
|
||||
// SiloDomains are domains where mentions of multiple individuals may come from.
|
||||
// These are privacy issues and will be anonymized as such.
|
||||
SiloDomains = []string{"brid.gy", "twitter.com", "facebook.com"}
|
||||
SiloDomains = []string{
|
||||
"brid.gy",
|
||||
"twitter.com",
|
||||
"facebook.com",
|
||||
"indieweb.social",
|
||||
"mastodon.social",
|
||||
}
|
||||
)
|
||||
|
||||
// IsRealImage checks the first few bytes of the provided data to see if it's a real image.
|
||||
|
|
|
@ -94,7 +94,17 @@ func TestDomainParseFromTarget(t *testing.T) {
|
|||
{
|
||||
"parse from localhost domain without extension",
|
||||
"https://localhost:1313/stuff",
|
||||
"localhost:1313",
|
||||
"localhost",
|
||||
},
|
||||
{
|
||||
"UK domain with two dots after the name",
|
||||
"https://minutestomidnight.co.uk/blog/article.html",
|
||||
"minutestomidnight.co.uk",
|
||||
},
|
||||
{
|
||||
"UK domain with subdomain",
|
||||
"https://www.minutestomidnight.co.uk/blog/article.html",
|
||||
"minutestomidnight.co.uk",
|
||||
},
|
||||
{
|
||||
"malformed http string with too little slashes simply returns same URL",
|
||||
|
|
Loading…
Reference in New Issue