why am I writing my own set implementation? geez

This commit is contained in:
Wouter Groeneveld 2021-04-10 16:16:08 +02:00
parent 98695223ca
commit 044483f646
5 changed files with 93 additions and 30 deletions

View File

@ -5,7 +5,6 @@ import (
"encoding/xml" "encoding/xml"
"errors" "errors"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"html/template"
"time" "time"
) )
@ -24,13 +23,13 @@ type Rss2 struct {
type Item struct { type Item struct {
// Required // Required
Title string `xml:"title"` Title string `xml:"title"`
Link string `xml:"link"` Link string `xml:"link"`
Description template.HTML `xml:"description"` Description string `xml:"description"` // could also be template.HTML, not interested in that
// Optional // Optional
Content template.HTML `xml:"encoded"` Content string `xml:"encoded"`
PubDate string `xml:"pubDate"` PubDate string `xml:"pubDate"`
Comments string `xml:"comments"` Comments string `xml:"comments"`
} }
func (itm Item) PubDateAsTime() time.Time { func (itm Item) PubDateAsTime() time.Time {
@ -62,9 +61,9 @@ type Entry struct {
Author Author `xml:"author"` Author Author `xml:"author"`
} }
func ParseFeed(content []byte) (Rss2, error) { func ParseFeed(content []byte) (*Rss2, error) {
v := Rss2{} v := &Rss2{}
err := xml.Unmarshal(content, &v) err := xml.Unmarshal(content, v)
if err != nil { if err != nil {
return v, err return v, err
} }

View File

@ -2,6 +2,8 @@ package send
import ( import (
"brainbaking.com/go-jamming/app/rss" "brainbaking.com/go-jamming/app/rss"
"brainbaking.com/go-jamming/common"
"regexp"
"time" "time"
) )
@ -35,7 +37,7 @@ type RSSItem struct {
' ' ' '
} }
**/ **/
func Collect(xml string, since time.Time) ([]RSSItem, error) { func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) {
feed, err := rss.ParseFeed([]byte(xml)) feed, err := rss.ParseFeed([]byte(xml))
if err != nil { if err != nil {
return nil, err return nil, err
@ -44,9 +46,25 @@ func Collect(xml string, since time.Time) ([]RSSItem, error) {
for _, rssitem := range feed.ItemList { for _, rssitem := range feed.ItemList {
if since.Before(rssitem.PubDateAsTime()) { if since.Before(rssitem.PubDateAsTime()) {
items = append(items, RSSItem{ items = append(items, RSSItem{
link: rssitem.Link, link: rssitem.Link,
hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description),
}) })
} }
} }
return items, nil return items, nil
} }
func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string {
r := regexp.MustCompile(`href="(.+?)"`)
ext := regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`)
urlmap := common.NewSet()
for _, match := range r.FindAllStringSubmatch(html, -1) {
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
if !ext.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) {
urlmap.Add(url)
}
}
return urlmap.Keys()
}

View File

@ -10,12 +10,20 @@ import (
type CollectSuite struct { type CollectSuite struct {
suite.Suite suite.Suite
xml string xml string
snder *Sender
} }
func (s *CollectSuite) SetupTest() { func (s *CollectSuite) SetupTest() {
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml") file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
s.xml = string(file) s.xml = string(file)
s.snder = &Sender{
Conf: &common.Config{
DisallowedWebmentionDomains: []string{
"youtube.com",
},
},
}
} }
func TestCollectSuite(t *testing.T) { func TestCollectSuite(t *testing.T) {
@ -23,32 +31,21 @@ func TestCollectSuite(t *testing.T) {
} }
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() { func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
items, err := Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z")) items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
assert.NoError(s.T(), err) assert.NoError(s.T(), err)
last := items[len(items)-1] last := items[len(items)-1]
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link) assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
/* assert.ElementsMatch(s.T(), []string{
assert.Equal(s.T(), []string{ "https://dog.estate/@eli_oat",
"https://dog.estate/@eli_oat", "https://twitter.com/olesovhcom/status/1369478732247932929",
"https://twitter.com/olesovhcom/status/1369478732247932929", "/about",
"/aobut", }, last.hrefs)
}, last.hrefs)
*/
} }
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() { func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
} }
func (s *CollectSuite) TestCollectIfTimeTagFoundInContextThatActsAsAnUpdateStamp() {
}
func (s *CollectSuite) TestCollectsNotIfTimeTagFoundInContextButStillOlderThanSince() {
}
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() { func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
} }

40
common/collections.go Normal file
View File

@ -0,0 +1,40 @@
package common
type EmptySetVal struct{}
var member EmptySetVal
type Set struct {
data map[string]EmptySetVal
}
func NewSet() *Set {
return &Set{
data: map[string]EmptySetVal{},
}
}
func (set *Set) Add(val string) {
set.data[val] = member
}
func (set *Set) Del(val string) {
delete(set.data, val)
}
func (set *Set) Len() int {
return len(set.data)
}
func (set *Set) HasKey(key string) bool {
_, exists := set.data[key]
return exists
}
func (set *Set) Keys() []string {
keys := make([]string, 0, len(set.data))
for key := range set.data {
keys = append(keys, key)
}
return keys
}

View File

@ -18,6 +18,15 @@ type Config struct {
DisallowedWebmentionDomains []string DisallowedWebmentionDomains []string
} }
func (c *Config) ContainsDisallowedDomain(url string) bool {
for _, domain := range c.DisallowedWebmentionDomains {
if strings.Contains(url, domain) {
return true
}
}
return false
}
func (c *Config) IsAnAllowedDomain(url string) bool { func (c *Config) IsAnAllowedDomain(url string) bool {
for _, domain := range c.AllowedWebmentionSources { for _, domain := range c.AllowedWebmentionSources {
if domain == url { if domain == url {