forked from wgroeneveld/go-jamming
first jab at mf2 parsing and testing
This commit is contained in:
parent
955871741a
commit
bacc66872d
|
@ -0,0 +1,8 @@
|
||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/go-jamming.iml" filepath="$PROJECT_DIR$/.idea/go-jamming.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,6 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,47 @@
|
||||||
|
package webmention
|
||||||
|
|
||||||
|
import "willnorris.com/go/microformats"
|
||||||
|
|
||||||
|
// Go stuff: entry.Properties["name"][0].(string),
|
||||||
|
// JS stuff: hEntry.properties?.name?.[0]
|
||||||
|
// The problem: convoluted syntax and no optional chaining!
|
||||||
|
func mfstr(mf *microformats.Microformat, key string) string {
|
||||||
|
val := mf.Properties[key]
|
||||||
|
if len(val) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
str, ok := val[0].(string)
|
||||||
|
if !ok {
|
||||||
|
// in very weird cases, it could be a map holding a value, like in mf2's "photo"
|
||||||
|
valMap, ok2 := val[0].(map[string]string)
|
||||||
|
if !ok2 {
|
||||||
|
str = ""
|
||||||
|
}
|
||||||
|
str = valMap["value"]
|
||||||
|
}
|
||||||
|
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
||||||
|
func mfmap(mf *microformats.Microformat, key string) map[string]string {
|
||||||
|
val := mf.Properties[key]
|
||||||
|
if len(val) == 0 {
|
||||||
|
return map[string]string{}
|
||||||
|
}
|
||||||
|
mapVal, ok := val[0].(map[string]string)
|
||||||
|
if !ok {
|
||||||
|
return map[string]string{}
|
||||||
|
}
|
||||||
|
return mapVal
|
||||||
|
}
|
||||||
|
|
||||||
|
func mfprop(mf *microformats.Microformat, key string) *microformats.Microformat {
|
||||||
|
val := mf.Properties[key]
|
||||||
|
if len(val) == 0 {
|
||||||
|
return µformats.Microformat{
|
||||||
|
Properties: map[string][]interface{}{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return val[0].(*microformats.Microformat)
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ package webmention
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"os"
|
"os"
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
@ -29,6 +30,11 @@ func (wm *webmention) asPath(conf *common.Config) string {
|
||||||
return conf.DataPath + "/" + domain + "/" + filename + ".json"
|
return conf.DataPath + "/" + domain + "/" + filename + ".json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (wm *webmention) sourceUrl() *url.URL {
|
||||||
|
url, _ := url.Parse(wm.source)
|
||||||
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
// used as a "class" to iject dependencies, just to be able to test. Do NOT like htis.
|
// used as a "class" to iject dependencies, just to be able to test. Do NOT like htis.
|
||||||
// Is there a better way? e.g. in validate, I just pass rest.Client as an arg. Not great either.
|
// Is there a better way? e.g. in validate, I just pass rest.Client as an arg. Not great either.
|
||||||
type receiver struct {
|
type receiver struct {
|
||||||
|
@ -53,16 +59,99 @@ func (recv *receiver) deletePossibleOlderWebmention(wm webmention) {
|
||||||
os.Remove(wm.asPath(recv.conf))
|
os.Remove(wm.asPath(recv.conf))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getHEntry(data *microformats.Data) *microformats.Microformat {
|
||||||
|
for _, itm := range data.Items {
|
||||||
|
if common.Includes(itm.Type, "h-entry") {
|
||||||
|
return itm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type indiewebAuthor struct {
|
||||||
|
name string
|
||||||
|
picture string
|
||||||
|
}
|
||||||
|
|
||||||
|
type indiewebData struct {
|
||||||
|
author indiewebAuthor
|
||||||
|
name string
|
||||||
|
content string
|
||||||
|
published string // TODO to a date
|
||||||
|
url string
|
||||||
|
dateType string // TODO json property "type"
|
||||||
|
source string
|
||||||
|
target string
|
||||||
|
}
|
||||||
|
|
||||||
func (recv *receiver) processSourceBody(body string, wm webmention) {
|
func (recv *receiver) processSourceBody(body string, wm webmention) {
|
||||||
if strings.Index(body, wm.target) == -1 {
|
if !strings.Contains(body, wm.target) {
|
||||||
log.Warn().Str("target", wm.target).Msg("ABORT: no mention of target found in html src of source!")
|
log.Warn().Str("target", wm.target).Msg("ABORT: no mention of target found in html src of source!")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
r := strings.NewReader(body)
|
r := strings.NewReader(body)
|
||||||
data := microformats.Parse(r, nil)
|
data := microformats.Parse(r, wm.sourceUrl())
|
||||||
|
hEntry := getHEntry(data)
|
||||||
fmt.Println(data.Items[0].Type[0]) // h-entry
|
var indieweb *indiewebData
|
||||||
// then: .Properties on Items[0]
|
if hEntry == nil {
|
||||||
// see https://github.com/willnorris/microformats/blob/main/microformats.go
|
indieweb = parseBodyAsNonIndiewebSite(body, wm)
|
||||||
|
} else {
|
||||||
|
indieweb = parseBodyAsIndiewebSite(hEntry, wm)
|
||||||
|
}
|
||||||
|
|
||||||
|
saveWebmentionToDisk(wm, indieweb)
|
||||||
|
log.Info().Str("file", wm.asPath(recv.conf)).Msg("OK: webmention processed.")
|
||||||
|
}
|
||||||
|
|
||||||
|
func saveWebmentionToDisk(wm webmention, indieweb *indiewebData) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO I'm smelling very unstable code, apply https://golang.org/doc/effective_go#recover here?
|
||||||
|
// see https://github.com/willnorris/microformats/blob/main/microformats.go
|
||||||
|
func parseBodyAsIndiewebSite(hEntry *microformats.Microformat, wm webmention) *indiewebData {
|
||||||
|
name := mfstr(hEntry, "name")
|
||||||
|
authorName := mfstr(mfprop(hEntry, "author"), "name")
|
||||||
|
if authorName == "" {
|
||||||
|
authorName = mfprop(hEntry, "author").Value
|
||||||
|
}
|
||||||
|
// TODO sometimes it's picture.value??
|
||||||
|
pic := mfstr(mfprop(hEntry, "author"), "photo")
|
||||||
|
summary := mfstr(hEntry, "summary")
|
||||||
|
contentEntry := mfmap(hEntry, "content")["value"]
|
||||||
|
bridgyTwitterContent := mfstr(hEntry, "bridgy-twitter-content")
|
||||||
|
|
||||||
|
return &indiewebData{
|
||||||
|
name: name,
|
||||||
|
author: indiewebAuthor{
|
||||||
|
name: authorName,
|
||||||
|
picture: pic,
|
||||||
|
},
|
||||||
|
content: determineContent(summary, contentEntry, bridgyTwitterContent),
|
||||||
|
source: wm.source,
|
||||||
|
target: wm.target,
|
||||||
|
}
|
||||||
|
|
||||||
|
//len(entry.Properties["hoopw"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func shorten(txt string) string {
|
||||||
|
if len(txt) <= 250 {
|
||||||
|
return txt
|
||||||
|
}
|
||||||
|
return txt[0:250] + "..."
|
||||||
|
}
|
||||||
|
|
||||||
|
func determineContent(summary string, contentEntry string, bridgyTwitterContent string) string {
|
||||||
|
if bridgyTwitterContent != "" {
|
||||||
|
return shorten(bridgyTwitterContent)
|
||||||
|
} else if summary != "" {
|
||||||
|
return shorten(summary)
|
||||||
|
}
|
||||||
|
return shorten(contentEntry)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseBodyAsNonIndiewebSite(body string, wm webmention) *indiewebData {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
package webmention
|
package webmention
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
|
||||||
"os"
|
|
||||||
"errors"
|
"errors"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/wgroeneveld/go-jamming/common"
|
"github.com/wgroeneveld/go-jamming/common"
|
||||||
"github.com/wgroeneveld/go-jamming/mocks"
|
"github.com/wgroeneveld/go-jamming/mocks"
|
||||||
|
@ -36,6 +36,26 @@ func writeSomethingTo(filename string) {
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReceiveTargetExistsSavesWebmentionToDisk(t *testing.T) {
|
||||||
|
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
|
||||||
|
//defer os.RemoveAll("testdata")
|
||||||
|
|
||||||
|
wm := webmention{
|
||||||
|
source: "https://brainbaking.com",
|
||||||
|
target: "https://jefklakscodex.com/articles",
|
||||||
|
}
|
||||||
|
//filename := wm.asPath(conf)
|
||||||
|
|
||||||
|
receiver := &receiver {
|
||||||
|
conf: conf,
|
||||||
|
restClient: &mocks.RestClientMock{
|
||||||
|
GetBodyFunc: mocks.BodyFunc(t, "../../mocks/valid-indieweb-source.html"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
receiver.receive(wm)
|
||||||
|
}
|
||||||
|
|
||||||
func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) {
|
func TestReceiveTargetDoesNotExistAnymoreDeletesPossiblyOlderWebmention(t *testing.T) {
|
||||||
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
|
os.MkdirAll("testdata/jefklakscodex.com", os.ModePerm)
|
||||||
defer os.RemoveAll("testdata")
|
defer os.RemoveAll("testdata")
|
||||||
|
|
|
@ -21,7 +21,7 @@ type Config struct {
|
||||||
|
|
||||||
func (c *Config) FetchDomain(url string) (string, error) {
|
func (c *Config) FetchDomain(url string) (string, error) {
|
||||||
for _, domain := range c.AllowedWebmentionSources {
|
for _, domain := range c.AllowedWebmentionSources {
|
||||||
if strings.Index(url, domain) != -1 {
|
if strings.Contains(url, domain) {
|
||||||
return domain, nil
|
return domain, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
package common
|
||||||
|
|
||||||
|
func Includes(slice []string, elem string) bool {
|
||||||
|
for _, el := range slice {
|
||||||
|
if el == elem {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package common
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestIncludesElemInArrayTrue(t *testing.T) {
|
||||||
|
arr := []string{ "one", "two"}
|
||||||
|
|
||||||
|
result := Includes(arr, "two")
|
||||||
|
if result != true {
|
||||||
|
t.Error("Should be in there")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIncludesElemNotInArrayFalse(t *testing.T) {
|
||||||
|
arr := []string{ "one", "two"}
|
||||||
|
|
||||||
|
result := Includes(arr, "three")
|
||||||
|
if result != false {
|
||||||
|
t.Error("Should NOT be in there")
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,8 @@
|
||||||
package mocks
|
package mocks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"testing"
|
||||||
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -19,3 +21,12 @@ func (m *RestClientMock) GetBody(url string) (string, error) {
|
||||||
return m.GetBodyFunc(url)
|
return m.GetBodyFunc(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BodyFunc(t *testing.T, mockfile string) func(string) (string, error) {
|
||||||
|
html, err := ioutil.ReadFile(mockfile)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
return func(url string) (string, error) {
|
||||||
|
return string(html), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -133,6 +133,7 @@
|
||||||
|
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
Check this out: <a href="https://jefklakscodex.com/articles">cool stuff</a>!
|
||||||
<svg width="0" height="0" class="hidden">
|
<svg width="0" height="0" class="hidden">
|
||||||
<symbol viewBox="0 0 24 24" id="lock" xmlns="http://www.w3.org/2000/svg">
|
<symbol viewBox="0 0 24 24" id="lock" xmlns="http://www.w3.org/2000/svg">
|
||||||
<path fill="currentColor" d="M14 15C14 16.11 13.11 17 12 17C10.89 17 10 16.1 10 15C10 13.89 10.89 13 12 13C13.11 13 14 13.9 14 15M13.09 20C13.21 20.72 13.46 21.39 13.81 22H6C4.89 22 4 21.1 4 20V10C4 8.89 4.89 8 6 8H7V6C7 3.24 9.24 1 12 1S17 3.24 17 6V8H18C19.11 8 20 8.9 20 10V13.09C19.67 13.04 19.34 13 19 13C18.66 13 18.33 13.04 18 13.09V10H6V20H13.09M9 8H15V6C15 4.34 13.66 3 12 3S9 4.34 9 6V8M21.34 15.84L17.75 19.43L16.16 17.84L15 19L17.75 22L22.5 17.25L21.34 15.84Z" />
|
<path fill="currentColor" d="M14 15C14 16.11 13.11 17 12 17C10.89 17 10 16.1 10 15C10 13.89 10.89 13 12 13C13.11 13 14 13.9 14 15M13.09 20C13.21 20.72 13.46 21.39 13.81 22H6C4.89 22 4 21.1 4 20V10C4 8.89 4.89 8 6 8H7V6C7 3.24 9.24 1 12 1S17 3.24 17 6V8H18C19.11 8 20 8.9 20 10V13.09C19.67 13.04 19.34 13 19 13C18.66 13 18.33 13.04 18 13.09V10H6V20H13.09M9 8H15V6C15 4.34 13.66 3 12 3S9 4.34 9 6V8M21.34 15.84L17.75 19.43L16.16 17.84L15 19L17.75 22L22.5 17.25L21.34 15.84Z" />
|
Loading…
Reference in New Issue