why am I writing my own set implementation? geez

This commit is contained in:
Wouter Groeneveld 2021-04-10 16:16:08 +02:00
parent 98695223ca
commit 044483f646
5 changed files with 93 additions and 30 deletions

View File

@ -5,7 +5,6 @@ import (
"encoding/xml"
"errors"
"github.com/rs/zerolog/log"
"html/template"
"time"
)
@ -24,13 +23,13 @@ type Rss2 struct {
type Item struct {
// Required
Title string `xml:"title"`
Link string `xml:"link"`
Description template.HTML `xml:"description"`
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"` // could also be template.HTML, not interested in that
// Optional
Content template.HTML `xml:"encoded"`
PubDate string `xml:"pubDate"`
Comments string `xml:"comments"`
Content string `xml:"encoded"`
PubDate string `xml:"pubDate"`
Comments string `xml:"comments"`
}
func (itm Item) PubDateAsTime() time.Time {
@ -62,9 +61,9 @@ type Entry struct {
Author Author `xml:"author"`
}
func ParseFeed(content []byte) (Rss2, error) {
v := Rss2{}
err := xml.Unmarshal(content, &v)
func ParseFeed(content []byte) (*Rss2, error) {
v := &Rss2{}
err := xml.Unmarshal(content, v)
if err != nil {
return v, err
}

View File

@ -2,6 +2,8 @@ package send
import (
"brainbaking.com/go-jamming/app/rss"
"brainbaking.com/go-jamming/common"
"regexp"
"time"
)
@ -35,7 +37,7 @@ type RSSItem struct {
' '
}
**/
func Collect(xml string, since time.Time) ([]RSSItem, error) {
func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) {
feed, err := rss.ParseFeed([]byte(xml))
if err != nil {
return nil, err
@ -44,9 +46,25 @@ func Collect(xml string, since time.Time) ([]RSSItem, error) {
for _, rssitem := range feed.ItemList {
if since.Before(rssitem.PubDateAsTime()) {
items = append(items, RSSItem{
link: rssitem.Link,
link: rssitem.Link,
hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description),
})
}
}
return items, nil
}
func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string {
r := regexp.MustCompile(`href="(.+?)"`)
ext := regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`)
urlmap := common.NewSet()
for _, match := range r.FindAllStringSubmatch(html, -1) {
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
if !ext.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) {
urlmap.Add(url)
}
}
return urlmap.Keys()
}

View File

@ -10,12 +10,20 @@ import (
type CollectSuite struct {
suite.Suite
xml string
xml string
snder *Sender
}
func (s *CollectSuite) SetupTest() {
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
s.xml = string(file)
s.snder = &Sender{
Conf: &common.Config{
DisallowedWebmentionDomains: []string{
"youtube.com",
},
},
}
}
func TestCollectSuite(t *testing.T) {
@ -23,32 +31,21 @@ func TestCollectSuite(t *testing.T) {
}
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
items, err := Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
assert.NoError(s.T(), err)
last := items[len(items)-1]
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
/*
assert.Equal(s.T(), []string{
"https://dog.estate/@eli_oat",
"https://twitter.com/olesovhcom/status/1369478732247932929",
"/aobut",
}, last.hrefs)
*/
assert.ElementsMatch(s.T(), []string{
"https://dog.estate/@eli_oat",
"https://twitter.com/olesovhcom/status/1369478732247932929",
"/about",
}, last.hrefs)
}
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
}
func (s *CollectSuite) TestCollectIfTimeTagFoundInContextThatActsAsAnUpdateStamp() {
}
func (s *CollectSuite) TestCollectsNotIfTimeTagFoundInContextButStillOlderThanSince() {
}
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
}

40
common/collections.go Normal file
View File

@ -0,0 +1,40 @@
package common
type EmptySetVal struct{}
var member EmptySetVal
type Set struct {
data map[string]EmptySetVal
}
func NewSet() *Set {
return &Set{
data: map[string]EmptySetVal{},
}
}
func (set *Set) Add(val string) {
set.data[val] = member
}
func (set *Set) Del(val string) {
delete(set.data, val)
}
func (set *Set) Len() int {
return len(set.data)
}
func (set *Set) HasKey(key string) bool {
_, exists := set.data[key]
return exists
}
func (set *Set) Keys() []string {
keys := make([]string, 0, len(set.data))
for key := range set.data {
keys = append(keys, key)
}
return keys
}

View File

@ -18,6 +18,15 @@ type Config struct {
DisallowedWebmentionDomains []string
}
func (c *Config) ContainsDisallowedDomain(url string) bool {
for _, domain := range c.DisallowedWebmentionDomains {
if strings.Contains(url, domain) {
return true
}
}
return false
}
func (c *Config) IsAnAllowedDomain(url string) bool {
for _, domain := range c.AllowedWebmentionSources {
if domain == url {