why am I writing my own set implementation? geez
This commit is contained in:
parent
98695223ca
commit
044483f646
|
@ -5,7 +5,6 @@ import (
|
|||
"encoding/xml"
|
||||
"errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
"html/template"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
@ -26,9 +25,9 @@ type Item struct {
|
|||
// Required
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description template.HTML `xml:"description"`
|
||||
Description string `xml:"description"` // could also be template.HTML, not interested in that
|
||||
// Optional
|
||||
Content template.HTML `xml:"encoded"`
|
||||
Content string `xml:"encoded"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Comments string `xml:"comments"`
|
||||
}
|
||||
|
@ -62,9 +61,9 @@ type Entry struct {
|
|||
Author Author `xml:"author"`
|
||||
}
|
||||
|
||||
func ParseFeed(content []byte) (Rss2, error) {
|
||||
v := Rss2{}
|
||||
err := xml.Unmarshal(content, &v)
|
||||
func ParseFeed(content []byte) (*Rss2, error) {
|
||||
v := &Rss2{}
|
||||
err := xml.Unmarshal(content, v)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@ package send
|
|||
|
||||
import (
|
||||
"brainbaking.com/go-jamming/app/rss"
|
||||
"brainbaking.com/go-jamming/common"
|
||||
"regexp"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
@ -35,7 +37,7 @@ type RSSItem struct {
|
|||
' '
|
||||
}
|
||||
**/
|
||||
func Collect(xml string, since time.Time) ([]RSSItem, error) {
|
||||
func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) {
|
||||
feed, err := rss.ParseFeed([]byte(xml))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -45,8 +47,24 @@ func Collect(xml string, since time.Time) ([]RSSItem, error) {
|
|||
if since.Before(rssitem.PubDateAsTime()) {
|
||||
items = append(items, RSSItem{
|
||||
link: rssitem.Link,
|
||||
hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description),
|
||||
})
|
||||
}
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string {
|
||||
r := regexp.MustCompile(`href="(.+?)"`)
|
||||
ext := regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`)
|
||||
urlmap := common.NewSet()
|
||||
|
||||
for _, match := range r.FindAllStringSubmatch(html, -1) {
|
||||
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
|
||||
if !ext.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) {
|
||||
urlmap.Add(url)
|
||||
}
|
||||
}
|
||||
|
||||
return urlmap.Keys()
|
||||
}
|
||||
|
|
|
@ -11,11 +11,19 @@ import (
|
|||
type CollectSuite struct {
|
||||
suite.Suite
|
||||
xml string
|
||||
snder *Sender
|
||||
}
|
||||
|
||||
func (s *CollectSuite) SetupTest() {
|
||||
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
|
||||
s.xml = string(file)
|
||||
s.snder = &Sender{
|
||||
Conf: &common.Config{
|
||||
DisallowedWebmentionDomains: []string{
|
||||
"youtube.com",
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectSuite(t *testing.T) {
|
||||
|
@ -23,32 +31,21 @@ func TestCollectSuite(t *testing.T) {
|
|||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
|
||||
items, err := Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
|
||||
items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
|
||||
assert.NoError(s.T(), err)
|
||||
last := items[len(items)-1]
|
||||
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
|
||||
/*
|
||||
assert.Equal(s.T(), []string{
|
||||
assert.ElementsMatch(s.T(), []string{
|
||||
"https://dog.estate/@eli_oat",
|
||||
"https://twitter.com/olesovhcom/status/1369478732247932929",
|
||||
"/aobut",
|
||||
"/about",
|
||||
}, last.hrefs)
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectIfTimeTagFoundInContextThatActsAsAnUpdateStamp() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectsNotIfTimeTagFoundInContextButStillOlderThanSince() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
package common
|
||||
|
||||
type EmptySetVal struct{}
|
||||
|
||||
var member EmptySetVal
|
||||
|
||||
type Set struct {
|
||||
data map[string]EmptySetVal
|
||||
}
|
||||
|
||||
func NewSet() *Set {
|
||||
return &Set{
|
||||
data: map[string]EmptySetVal{},
|
||||
}
|
||||
}
|
||||
|
||||
func (set *Set) Add(val string) {
|
||||
set.data[val] = member
|
||||
}
|
||||
|
||||
func (set *Set) Del(val string) {
|
||||
delete(set.data, val)
|
||||
}
|
||||
|
||||
func (set *Set) Len() int {
|
||||
return len(set.data)
|
||||
}
|
||||
|
||||
func (set *Set) HasKey(key string) bool {
|
||||
_, exists := set.data[key]
|
||||
return exists
|
||||
}
|
||||
|
||||
func (set *Set) Keys() []string {
|
||||
keys := make([]string, 0, len(set.data))
|
||||
for key := range set.data {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
return keys
|
||||
}
|
|
@ -18,6 +18,15 @@ type Config struct {
|
|||
DisallowedWebmentionDomains []string
|
||||
}
|
||||
|
||||
func (c *Config) ContainsDisallowedDomain(url string) bool {
|
||||
for _, domain := range c.DisallowedWebmentionDomains {
|
||||
if strings.Contains(url, domain) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *Config) IsAnAllowedDomain(url string) bool {
|
||||
for _, domain := range c.AllowedWebmentionSources {
|
||||
if domain == url {
|
||||
|
|
Loading…
Reference in New Issue