forked from wgroeneveld/go-jamming
why am I writing my own set implementation? geez
This commit is contained in:
parent
98695223ca
commit
044483f646
|
@ -5,7 +5,6 @@ import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"errors"
|
"errors"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"html/template"
|
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -24,13 +23,13 @@ type Rss2 struct {
|
||||||
|
|
||||||
type Item struct {
|
type Item struct {
|
||||||
// Required
|
// Required
|
||||||
Title string `xml:"title"`
|
Title string `xml:"title"`
|
||||||
Link string `xml:"link"`
|
Link string `xml:"link"`
|
||||||
Description template.HTML `xml:"description"`
|
Description string `xml:"description"` // could also be template.HTML, not interested in that
|
||||||
// Optional
|
// Optional
|
||||||
Content template.HTML `xml:"encoded"`
|
Content string `xml:"encoded"`
|
||||||
PubDate string `xml:"pubDate"`
|
PubDate string `xml:"pubDate"`
|
||||||
Comments string `xml:"comments"`
|
Comments string `xml:"comments"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (itm Item) PubDateAsTime() time.Time {
|
func (itm Item) PubDateAsTime() time.Time {
|
||||||
|
@ -62,9 +61,9 @@ type Entry struct {
|
||||||
Author Author `xml:"author"`
|
Author Author `xml:"author"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseFeed(content []byte) (Rss2, error) {
|
func ParseFeed(content []byte) (*Rss2, error) {
|
||||||
v := Rss2{}
|
v := &Rss2{}
|
||||||
err := xml.Unmarshal(content, &v)
|
err := xml.Unmarshal(content, v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return v, err
|
return v, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@ package send
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"brainbaking.com/go-jamming/app/rss"
|
"brainbaking.com/go-jamming/app/rss"
|
||||||
|
"brainbaking.com/go-jamming/common"
|
||||||
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -35,7 +37,7 @@ type RSSItem struct {
|
||||||
' '
|
' '
|
||||||
}
|
}
|
||||||
**/
|
**/
|
||||||
func Collect(xml string, since time.Time) ([]RSSItem, error) {
|
func (snder *Sender) Collect(xml string, since time.Time) ([]RSSItem, error) {
|
||||||
feed, err := rss.ParseFeed([]byte(xml))
|
feed, err := rss.ParseFeed([]byte(xml))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -44,9 +46,25 @@ func Collect(xml string, since time.Time) ([]RSSItem, error) {
|
||||||
for _, rssitem := range feed.ItemList {
|
for _, rssitem := range feed.ItemList {
|
||||||
if since.Before(rssitem.PubDateAsTime()) {
|
if since.Before(rssitem.PubDateAsTime()) {
|
||||||
items = append(items, RSSItem{
|
items = append(items, RSSItem{
|
||||||
link: rssitem.Link,
|
link: rssitem.Link,
|
||||||
|
hrefs: snder.collectUniqueHrefsFromDescription(rssitem.Description),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return items, nil
|
return items, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (snder *Sender) collectUniqueHrefsFromDescription(html string) []string {
|
||||||
|
r := regexp.MustCompile(`href="(.+?)"`)
|
||||||
|
ext := regexp.MustCompile(`\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$`)
|
||||||
|
urlmap := common.NewSet()
|
||||||
|
|
||||||
|
for _, match := range r.FindAllStringSubmatch(html, -1) {
|
||||||
|
url := match[1] // [0] is the match of the entire expression, [1] is the capture group
|
||||||
|
if !ext.MatchString(url) && !snder.Conf.ContainsDisallowedDomain(url) {
|
||||||
|
urlmap.Add(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return urlmap.Keys()
|
||||||
|
}
|
||||||
|
|
|
@ -10,12 +10,20 @@ import (
|
||||||
|
|
||||||
type CollectSuite struct {
|
type CollectSuite struct {
|
||||||
suite.Suite
|
suite.Suite
|
||||||
xml string
|
xml string
|
||||||
|
snder *Sender
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CollectSuite) SetupTest() {
|
func (s *CollectSuite) SetupTest() {
|
||||||
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
|
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
|
||||||
s.xml = string(file)
|
s.xml = string(file)
|
||||||
|
s.snder = &Sender{
|
||||||
|
Conf: &common.Config{
|
||||||
|
DisallowedWebmentionDomains: []string{
|
||||||
|
"youtube.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollectSuite(t *testing.T) {
|
func TestCollectSuite(t *testing.T) {
|
||||||
|
@ -23,32 +31,21 @@ func TestCollectSuite(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
|
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
|
||||||
items, err := Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
|
items, err := s.snder.Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
|
||||||
assert.NoError(s.T(), err)
|
assert.NoError(s.T(), err)
|
||||||
last := items[len(items)-1]
|
last := items[len(items)-1]
|
||||||
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
|
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
|
||||||
/*
|
assert.ElementsMatch(s.T(), []string{
|
||||||
assert.Equal(s.T(), []string{
|
"https://dog.estate/@eli_oat",
|
||||||
"https://dog.estate/@eli_oat",
|
"https://twitter.com/olesovhcom/status/1369478732247932929",
|
||||||
"https://twitter.com/olesovhcom/status/1369478732247932929",
|
"/about",
|
||||||
"/aobut",
|
}, last.hrefs)
|
||||||
}, last.hrefs)
|
|
||||||
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
|
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CollectSuite) TestCollectIfTimeTagFoundInContextThatActsAsAnUpdateStamp() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CollectSuite) TestCollectsNotIfTimeTagFoundInContextButStillOlderThanSince() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
|
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
package common
|
||||||
|
|
||||||
|
type EmptySetVal struct{}
|
||||||
|
|
||||||
|
var member EmptySetVal
|
||||||
|
|
||||||
|
type Set struct {
|
||||||
|
data map[string]EmptySetVal
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSet() *Set {
|
||||||
|
return &Set{
|
||||||
|
data: map[string]EmptySetVal{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (set *Set) Add(val string) {
|
||||||
|
set.data[val] = member
|
||||||
|
}
|
||||||
|
|
||||||
|
func (set *Set) Del(val string) {
|
||||||
|
delete(set.data, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (set *Set) Len() int {
|
||||||
|
return len(set.data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (set *Set) HasKey(key string) bool {
|
||||||
|
_, exists := set.data[key]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
func (set *Set) Keys() []string {
|
||||||
|
keys := make([]string, 0, len(set.data))
|
||||||
|
for key := range set.data {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
return keys
|
||||||
|
}
|
|
@ -18,6 +18,15 @@ type Config struct {
|
||||||
DisallowedWebmentionDomains []string
|
DisallowedWebmentionDomains []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Config) ContainsDisallowedDomain(url string) bool {
|
||||||
|
for _, domain := range c.DisallowedWebmentionDomains {
|
||||||
|
if strings.Contains(url, domain) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Config) IsAnAllowedDomain(url string) bool {
|
func (c *Config) IsAnAllowedDomain(url string) bool {
|
||||||
for _, domain := range c.AllowedWebmentionSources {
|
for _, domain := range c.AllowedWebmentionSources {
|
||||||
if domain == url {
|
if domain == url {
|
||||||
|
|
Loading…
Reference in New Issue