forked from wgroeneveld/go-jamming
rss link collecting impl
This commit is contained in:
parent
d4c854ef81
commit
98695223ca
|
@ -1,6 +1,7 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="GoCommentStart" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="GrazieInspection" enabled="false" level="TYPO" enabled_by_default="false" />
|
||||
<inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package rss
|
||||
|
||||
import (
|
||||
"brainbaking.com/go-jamming/common"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"github.com/rs/zerolog/log"
|
||||
"html/template"
|
||||
"time"
|
||||
)
|
||||
|
||||
// someone already did this for me, yay! https://siongui.github.io/2015/03/03/go-parse-web-feed-rss-atom/
|
||||
type Rss2 struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
// Required
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Description string `xml:"channel>description"`
|
||||
// Optional
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
ItemList []Item `xml:"channel>item"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
// Required
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description template.HTML `xml:"description"`
|
||||
// Optional
|
||||
Content template.HTML `xml:"encoded"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Comments string `xml:"comments"`
|
||||
}
|
||||
|
||||
func (itm Item) PubDateAsTime() time.Time {
|
||||
// format: Tue, 16 Mar 2021 17:07:14 +0000
|
||||
t, err := time.Parse("Mon, 02 Jan 2006 15:04:05 +0000", itm.PubDate)
|
||||
if err != nil {
|
||||
log.Warn().Str("pubDate", itm.PubDate).Msg("Incorrectly formatted RSS date, reverting to now")
|
||||
return common.Now()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
Href string `xml:"href,attr"`
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
Name string `xml:"name"`
|
||||
Email string `xml:"email"`
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
Title string `xml:"title"`
|
||||
Summary string `xml:"summary"`
|
||||
Content string `xml:"content"`
|
||||
Id string `xml:"id"`
|
||||
Updated string `xml:"updated"`
|
||||
Link Link `xml:"link"`
|
||||
Author Author `xml:"author"`
|
||||
}
|
||||
|
||||
func ParseFeed(content []byte) (Rss2, error) {
|
||||
v := Rss2{}
|
||||
err := xml.Unmarshal(content, &v)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
|
||||
if v.Version == "2.0" {
|
||||
for i, _ := range v.ItemList {
|
||||
if v.ItemList[i].Content != "" {
|
||||
v.ItemList[i].Description = v.ItemList[i].Content
|
||||
}
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
return v, errors.New("not RSS 2.0")
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package rss
|
||||
|
||||
import (
|
||||
"brainbaking.com/go-jamming/common"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPubDateAsTimeIncorrectRevertsToNow(t *testing.T) {
|
||||
common.Now = func() time.Time {
|
||||
return time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC)
|
||||
}
|
||||
itm := Item{
|
||||
PubDate: "frutselbolletjes",
|
||||
}
|
||||
|
||||
theTime := itm.PubDateAsTime()
|
||||
assert.Equal(t, 2020, theTime.Year())
|
||||
assert.Equal(t, time.January, theTime.Month())
|
||||
}
|
||||
|
||||
func TestPubDateAsTime(t *testing.T) {
|
||||
itm := Item{
|
||||
PubDate: "Tue, 16 Mar 2021 17:07:14 +0000",
|
||||
}
|
||||
theTime := itm.PubDateAsTime()
|
||||
assert.Equal(t, 2021, theTime.Year())
|
||||
assert.Equal(t, time.March, theTime.Month())
|
||||
assert.Equal(t, 16, theTime.Day())
|
||||
assert.Equal(t, 17, theTime.Hour())
|
||||
assert.Equal(t, 7, theTime.Minute())
|
||||
assert.Equal(t, 14, theTime.Second())
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package send
|
||||
|
||||
import (
|
||||
"brainbaking.com/go-jamming/app/rss"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RSSItem struct {
|
||||
link string
|
||||
hrefs []string
|
||||
}
|
||||
|
||||
/**
|
||||
* a typical RSS item looks like this:
|
||||
-- if <time/> found in body, assume it's a lastmod update timestamp!
|
||||
{
|
||||
title: '@celia @kev I have read both you and Kev's post on...',
|
||||
link: 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
|
||||
comments: 'https://brainbaking.com/notes/2021/03/16h17m07s14/#commento',
|
||||
pubDate: 'Tue, 16 Mar 2021 17:07:14 +0000',
|
||||
author: 'Wouter Groeneveld',
|
||||
guid: {
|
||||
'#text': 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
|
||||
'@_isPermaLink': 'true'
|
||||
},
|
||||
description: ' \n' +
|
||||
' \n' +
|
||||
'\n' +
|
||||
' <p><span class="h-card"><a class="u-url mention" data-user="A5GVjIHI6MH82H6iLQ" href="https://fosstodon.org/@celia" rel="ugc">@<span>celia</span></a></span> <span class="h-card"><a class="u-url mention" data-user="A54b8g0RBaIgjzczMu" href="https://fosstodon.org/@kev" rel="ugc">@<span>kev</span></a></span> I have read both you and Kev’s post on this and agree on some points indeed! But I’m not yet ready to give up webmentions. As an academic, the idea of citing/mentioning each other is very alluring 🤓. Plus, I needed an excuse to fiddle some more with JS… <br><br>As much as I loved using Wordpress before, I can’t imagine going back to writing stuff in there instead of in markdown. Gotta keep the workflow short, though. Hope it helps you focus on what matters - content!</p>\n' +
|
||||
'\n' +
|
||||
'\n' +
|
||||
' <p>\n' +
|
||||
' By <a href="/about">Wouter Groeneveld</a> on <time datetime='2021-03-20'>20 March 2021</time>.\n' +
|
||||
' </p>\n' +
|
||||
' '
|
||||
}
|
||||
**/
|
||||
func Collect(xml string, since time.Time) ([]RSSItem, error) {
|
||||
feed, err := rss.ParseFeed([]byte(xml))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var items []RSSItem
|
||||
for _, rssitem := range feed.ItemList {
|
||||
if since.Before(rssitem.PubDateAsTime()) {
|
||||
items = append(items, RSSItem{
|
||||
link: rssitem.Link,
|
||||
})
|
||||
}
|
||||
}
|
||||
return items, nil
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package send
|
||||
|
||||
import (
|
||||
"brainbaking.com/go-jamming/common"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type CollectSuite struct {
|
||||
suite.Suite
|
||||
xml string
|
||||
}
|
||||
|
||||
func (s *CollectSuite) SetupTest() {
|
||||
file, _ := ioutil.ReadFile("../../../mocks/samplerss.xml")
|
||||
s.xml = string(file)
|
||||
}
|
||||
|
||||
func TestCollectSuite(t *testing.T) {
|
||||
suite.Run(t, new(CollectSuite))
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsFromBlockedDomains() {
|
||||
items, err := Collect(s.xml, common.IsoToTime("2021-03-10T00:00:00.000Z"))
|
||||
assert.NoError(s.T(), err)
|
||||
last := items[len(items)-1]
|
||||
assert.Equal(s.T(), "https://brainbaking.com/notes/2021/03/10h16m24s22/", last.link)
|
||||
/*
|
||||
assert.Equal(s.T(), []string{
|
||||
"https://dog.estate/@eli_oat",
|
||||
"https://twitter.com/olesovhcom/status/1369478732247932929",
|
||||
"/aobut",
|
||||
}, last.hrefs)
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectShouldNotContainHrefsThatPointToImages() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectIfTimeTagFoundInContextThatActsAsAnUpdateStamp() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectsNotIfTimeTagFoundInContextButStillOlderThanSince() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectNothingIfDateInFutureAndSinceNothingNewInFeed() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectLatestXLinksWhenASinceParameterIsProvided() {
|
||||
|
||||
}
|
||||
|
||||
func (s *CollectSuite) TestCollectEveryExternalLinkWithoutAValidSinceDate() {
|
||||
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"brainbaking.com/go-jamming/common"
|
||||
"brainbaking.com/go-jamming/rest"
|
||||
"github.com/rs/zerolog/log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Sender struct {
|
||||
|
@ -15,6 +16,17 @@ type Sender struct {
|
|||
|
||||
func (snder *Sender) Send(domain string, since string) {
|
||||
log.Info().Str("domain", domain).Str("since", since).Msg(` OK: someone wants to send mentions`)
|
||||
feed, err := snder.RestClient.GetBody("https://" + domain + "/index.xml")
|
||||
if err != nil {
|
||||
log.Err(err).Str("domain", domain).Msg("Unable to retrieve RSS feed, aborting send")
|
||||
return
|
||||
}
|
||||
|
||||
snder.parseRssFeed(feed, common.IsoToTime(since))
|
||||
}
|
||||
|
||||
func (snder *Sender) parseRssFeed(feed string, since time.Time) {
|
||||
|
||||
}
|
||||
|
||||
func mention() {
|
||||
|
|
|
@ -5,3 +5,13 @@ import "time"
|
|||
// https://labs.yulrizka.com/en/stubbing-time-dot-now-in-golang/
|
||||
// None of the above are very appealing. For now, just use the lazy way.
|
||||
var Now = time.Now
|
||||
|
||||
// since should be in ISO String format, as produced by clients using day.js - e.g. 2021-04-09T15:51:43.732Z
|
||||
func IsoToTime(since string) time.Time {
|
||||
layout := "2006-01-02T15:04:05.000Z"
|
||||
t, err := time.Parse(layout, since)
|
||||
if err != nil {
|
||||
return Now()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
package common
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type TimeSuite struct {
|
||||
suite.Suite
|
||||
nowtime time.Time
|
||||
}
|
||||
|
||||
func (s *TimeSuite) SetupTest() {
|
||||
s.nowtime = time.Date(2020, time.January, 1, 12, 30, 0, 0, time.UTC)
|
||||
Now = func() time.Time {
|
||||
return s.nowtime
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendSuite(t *testing.T) {
|
||||
suite.Run(t, new(TimeSuite))
|
||||
}
|
||||
|
||||
func (s *TimeSuite) TestIsoToTimeInISOString() {
|
||||
expectedtime := time.Date(2021, time.March, 9, 15, 51, 43, 732, time.UTC)
|
||||
since := IsoToTime("2021-03-09T15:51:43.732Z")
|
||||
assert.Equal(s.T(), expectedtime.Year(), since.Year())
|
||||
assert.Equal(s.T(), expectedtime.Month(), since.Month())
|
||||
assert.Equal(s.T(), expectedtime.Day(), since.Day())
|
||||
assert.Equal(s.T(), expectedtime.Hour(), since.Hour())
|
||||
assert.Equal(s.T(), expectedtime.Minute(), since.Minute())
|
||||
assert.Equal(s.T(), expectedtime.Second(), since.Second())
|
||||
}
|
||||
|
||||
func (s *TimeSuite) TestIsoToTimeInvalidStringReturnsNow() {
|
||||
since := IsoToTime("woef ik ben een hondje")
|
||||
assert.Equal(s.T(), s.nowtime, since)
|
||||
}
|
||||
|
||||
func (s *TimeSuite) TestIsoToTimeEmptyReturnsNow() {
|
||||
since := IsoToTime("")
|
||||
assert.Equal(s.T(), s.nowtime, since)
|
||||
}
|
Loading…
Reference in New Issue