forked from wgroeneveld/go-jamming
webmention sending endpoint implementation
This commit is contained in:
parent
ae4006469d
commit
632a134fda
|
@ -38,6 +38,7 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
|
|||
"packageLocation": "./",
|
||||
"packageDependencies": [
|
||||
["dayjs", "npm:1.10.4"],
|
||||
["fast-xml-parser", "npm:3.19.0"],
|
||||
["got", "npm:11.8.2"],
|
||||
["jest", "npm:26.6.3"],
|
||||
["koa", "npm:2.13.1"],
|
||||
|
@ -2743,6 +2744,15 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
|
|||
"linkType": "HARD",
|
||||
}]
|
||||
]],
|
||||
["fast-xml-parser", [
|
||||
["npm:3.19.0", {
|
||||
"packageLocation": "./.yarn/cache/fast-xml-parser-npm-3.19.0-b7dcd3a31a-bac00722d0.zip/node_modules/fast-xml-parser/",
|
||||
"packageDependencies": [
|
||||
["fast-xml-parser", "npm:3.19.0"]
|
||||
],
|
||||
"linkType": "HARD",
|
||||
}]
|
||||
]],
|
||||
["fb-watchman", [
|
||||
["npm:2.0.1", {
|
||||
"packageLocation": "./.yarn/cache/fb-watchman-npm-2.0.1-30005d50fe-f9ec24592a.zip/node_modules/fb-watchman/",
|
||||
|
@ -6082,6 +6092,7 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
|
|||
"packageDependencies": [
|
||||
["serve-my-jams", "workspace:."],
|
||||
["dayjs", "npm:1.10.4"],
|
||||
["fast-xml-parser", "npm:3.19.0"],
|
||||
["got", "npm:11.8.2"],
|
||||
["jest", "npm:26.6.3"],
|
||||
["koa", "npm:2.13.1"],
|
||||
|
|
|
@ -43,4 +43,4 @@ Retrieves a JSON array with relevant webmentions stored for that domain. The tok
|
|||
## TODOs
|
||||
|
||||
- `published` date is not well-formatted and blindly taken over from feed
|
||||
- [brid.gy](https://brid.gy/) does not send webmentions if no target found, although I'd like these to appear in the [brainbaking.com/notes](https://brainbaking.com/notes) somehow, being syndicated from my Mastodon feed.
|
||||
- Implement a Brid.gy-like system that converts links from domains in the config found on [public Mastodon timelines](https://docs.joinmastodon.org/methods/timelines/) into webmentions. (And check if it's ok to only use the public line)
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"dayjs": "^1.10.4",
|
||||
"fast-xml-parser": "^3.19.0",
|
||||
"got": "^11.8.2",
|
||||
"koa": "^2.13.1",
|
||||
"koa-body": "^4.2.0",
|
||||
|
|
|
@ -6,6 +6,11 @@ const allowedWebmentionSources = [
|
|||
"jefklakscodex.com"
|
||||
]
|
||||
|
||||
// do NOT send out webmentions to any of these domains.
|
||||
const disallowedWebmentionDomains = [
|
||||
"youtube.com"
|
||||
]
|
||||
|
||||
function setupDataDirs() {
|
||||
allowedWebmentionSources.forEach(domain => {
|
||||
const dir = `data/${domain}`
|
||||
|
@ -27,5 +32,6 @@ module.exports = {
|
|||
utcOffset: 60,
|
||||
|
||||
allowedWebmentionSources,
|
||||
disallowedWebmentionDomains,
|
||||
setupDataDirs
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
const got = require('got')
|
||||
const { mf2 } = require("microformats-parser");
|
||||
|
||||
const baseUrlOf = (url) => {
|
||||
if(url.match(/\//g).length <= 2) {
|
||||
return url
|
||||
}
|
||||
const split = url.split('/')
|
||||
return split[0] + '//' + split[2]
|
||||
}
|
||||
|
||||
// see https://www.w3.org/TR/webmention/#sender-discovers-receiver-webmention-endpoint
|
||||
async function discover(target) {
|
||||
try {
|
||||
const endpoint = await got(target)
|
||||
if(endpoint.headers.link?.indexOf("webmention") >= 0) {
|
||||
// e.g. Link: <http://aaronpk.example/webmention-endpoint>; rel="webmention"
|
||||
return endpoint.headers.link
|
||||
.split(";")[0]
|
||||
.replace("<" ,"")
|
||||
.replace(">", "")
|
||||
}
|
||||
|
||||
const format = mf2(endpoint.body, {
|
||||
// this also complies with w3.org regulations: relative endpoint could be possible
|
||||
baseUrl: baseUrlOf(target)
|
||||
})
|
||||
return format.rels?.webmention?.[0]
|
||||
} catch(err) {
|
||||
console.warn(` -- whoops, failed to discover ${target}, why: ${err}`)
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
discover
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
const webmentionReceiver = require('./receive')
|
||||
const webmentionLoader = require('./loader')
|
||||
const webmentionSender = require('./send')
|
||||
|
||||
function route(router) {
|
||||
router.post("webmention receive endpoint", "/webmention", async (ctx) => {
|
||||
|
@ -12,10 +13,23 @@ function route(router) {
|
|||
// we do NOT await this on purpose.
|
||||
webmentionReceiver.receive(ctx.request.body)
|
||||
|
||||
ctx.body = "Thanks, bro. Will process this webmention soon, pinky swear!";
|
||||
ctx.body = "Thanks, bro. Will process this webmention soon, pinky swear!"
|
||||
ctx.status = 202
|
||||
});
|
||||
|
||||
router.put("webmention send endpoint", "/webmention/:domain/:token", async (ctx) => {
|
||||
if(!webmentionLoader.validate(ctx.params)) {
|
||||
ctx.throw(403, "access denied")
|
||||
}
|
||||
|
||||
console.log(` OK: someone wants to send mentions from domain ${ctx.params.domain}`)
|
||||
// we do NOT await this on purpose.
|
||||
webmentionSender.send(ctx.params.domain, ctx.request.query?.since)
|
||||
|
||||
ctx.body = "Thanks, bro. Will send these webmentions soon, pinky swear!"
|
||||
ctx.status = 202
|
||||
})
|
||||
|
||||
router.get("webmention get endpoint", "/webmention/:domain/:token", async (ctx) => {
|
||||
if(!webmentionLoader.validate(ctx.params)) {
|
||||
ctx.throw(403, "access denied")
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
|
||||
const parser = require("fast-xml-parser")
|
||||
const config = require('./../config')
|
||||
|
||||
const dayjs = require('dayjs')
|
||||
const customParseFormat = require('dayjs/plugin/customParseFormat')
|
||||
dayjs.extend(customParseFormat)
|
||||
|
||||
const parseOpts = {
|
||||
ignoreAttributes: false
|
||||
}
|
||||
|
||||
function collectHrefsFromDescription(description) {
|
||||
// first thought: use parser.parse() and traverse recursively. turned out to be way too slow.
|
||||
const links = description.match(/href="([^"]*")/g)
|
||||
.map(match => match.replace("href=", "").replaceAll("\"", ""))
|
||||
.filter(match => !(/\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$/i).test(match))
|
||||
.filter(match => !config.disallowedWebmentionDomains.some(domain => match.indexOf(domain) >= 0))
|
||||
return [...new Set(links)]
|
||||
}
|
||||
|
||||
/**
|
||||
* a typical RSS item looks like this:
|
||||
{
|
||||
title: '@celia @kev I have read both you and Kev's post on...',
|
||||
link: 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
|
||||
comments: 'https://brainbaking.com/notes/2021/03/16h17m07s14/#commento',
|
||||
pubDate: 'Tue, 16 Mar 2021 17:07:14 +0000',
|
||||
author: 'Wouter Groeneveld',
|
||||
guid: {
|
||||
'#text': 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
|
||||
'@_isPermaLink': 'true'
|
||||
},
|
||||
description: ' \n' +
|
||||
' \n' +
|
||||
'\n' +
|
||||
' <p><span class="h-card"><a class="u-url mention" data-user="A5GVjIHI6MH82H6iLQ" href="https://fosstodon.org/@celia" rel="ugc">@<span>celia</span></a></span> <span class="h-card"><a class="u-url mention" data-user="A54b8g0RBaIgjzczMu" href="https://fosstodon.org/@kev" rel="ugc">@<span>kev</span></a></span> I have read both you and Kev’s post on this and agree on some points indeed! But I’m not yet ready to give up webmentions. As an academic, the idea of citing/mentioning each other is very alluring 🤓. Plus, I needed an excuse to fiddle some more with JS… <br><br>As much as I loved using Wordpress before, I can’t imagine going back to writing stuff in there instead of in markdown. Gotta keep the workflow short, though. Hope it helps you focus on what matters - content!</p>\n' +
|
||||
'\n' +
|
||||
'\n' +
|
||||
' <p>\n' +
|
||||
' By <a href="/about">Wouter Groeneveld</a> on 16 March 2021.\n' +
|
||||
' </p>\n' +
|
||||
' '
|
||||
}
|
||||
**/
|
||||
function collect(xml, since = '') {
|
||||
const root = parser.parse(xml, parseOpts).rss.channel
|
||||
const sinceDate = dayjs(since)
|
||||
|
||||
// example pubDate format: Tue, 16 Mar 2021 17:07:14 +0000
|
||||
const sincePubDate = (date) => {
|
||||
if(!sinceDate.isValid()) return true
|
||||
const pubDate = dayjs(date.split(", ")[1], "DD MMM YYYY HH:mm:ss ZZ")
|
||||
if(!pubDate.isValid()) return true
|
||||
return sinceDate < pubDate
|
||||
}
|
||||
|
||||
const entries = root.item.filter ? root.item : [root.item]
|
||||
|
||||
return entries
|
||||
.filter(item => sincePubDate(item.pubDate))
|
||||
.map(item => {
|
||||
return {
|
||||
link: item.link,
|
||||
hrefs: collectHrefsFromDescription(item.description)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
collect
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
const got = require('got')
|
||||
const { collect } = require('./rsslinkcollector')
|
||||
const { discover } = require('./linkdiscoverer')
|
||||
|
||||
async function mention(opts) {
|
||||
const { source, target } = opts
|
||||
const endpoint = await discover(target)
|
||||
if(endpoint) {
|
||||
await got.post(endpoint, {
|
||||
contentType: "x-www-form-urlencoded",
|
||||
form: {
|
||||
source,
|
||||
target
|
||||
},
|
||||
retry: {
|
||||
limit: 5,
|
||||
methods: ["POST"]
|
||||
}
|
||||
})
|
||||
console.log(` OK: webmention@${endpoint}, sent: source ${source}, target ${target}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function parseRssFeed(xml, since) {
|
||||
const linksToMention = collect(xml, since)
|
||||
.map(el => el.hrefs
|
||||
// this strips relative URLs; could be a feature to also send these to own domain?
|
||||
.filter(href => href.startsWith('http'))
|
||||
.map(href => {
|
||||
return {
|
||||
// SOURCE is own domain this time, TARGET = outbound
|
||||
target: href,
|
||||
source: el.link
|
||||
}
|
||||
}))
|
||||
.flat()
|
||||
|
||||
await Promise.all(linksToMention.map(mention))
|
||||
}
|
||||
|
||||
|
||||
async function send(domain, since) {
|
||||
const feed = await got(`https://${domain}/index.xml`)
|
||||
await parseRssFeed(feed.body, since)
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
send
|
||||
}
|
|
@ -2,10 +2,24 @@ const fs = require('fs').promises
|
|||
|
||||
async function got(url) {
|
||||
const relativeUrl = url.replace('https://brainbaking.com/', '')
|
||||
const body = await fs.readFile(`./test/__mocks__/${relativeUrl}`, 'utf8')
|
||||
const body = (await fs.readFile(`./test/__mocks__/${relativeUrl}`, 'utf8')).toString()
|
||||
|
||||
let headers = {}
|
||||
try {
|
||||
headerFile = await fs.readFile(`./test/__mocks__/${relativeUrl.replace(".html", "")}-headers.json`, 'utf8')
|
||||
headers = JSON.parse(headerFile.toString())
|
||||
} catch {
|
||||
}
|
||||
|
||||
return {
|
||||
headers,
|
||||
body
|
||||
}
|
||||
}
|
||||
|
||||
async function gotPostMock(url, opts) {
|
||||
}
|
||||
|
||||
got.post = gotPostMock
|
||||
|
||||
module.exports = got
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>Brain Baking</title>
|
||||
<link>https://brainbaking.com/</link>
|
||||
<description>Recent content on Brain Baking</description>
|
||||
<generator>Hugo -- gohugo.io</generator>
|
||||
<language>en-us</language>
|
||||
<managingEditor>Wouter Groeneveld</managingEditor>
|
||||
<webMaster>Wouter Groeneveld</webMaster>
|
||||
<lastBuildDate>Tue, 16 Mar 2021 17:07:14 +0000</lastBuildDate>
|
||||
|
||||
<atom:link href="https://brainbaking.com/index.xml" rel="self" type="application/rss+xml" />
|
||||
|
||||
|
||||
<item>
|
||||
<title>@celia @kev I have read both you and Kev's post on...</title>
|
||||
<link>https://brainbaking.com/notes/2021/03/16h17m07s14/</link>
|
||||
<comments>https://brainbaking.com/notes/2021/03/16h17m07s14/#commento</comments>
|
||||
<pubDate>Tue, 16 Mar 2021 17:07:14 +0000</pubDate>
|
||||
<author>Wouter Groeneveld</author>
|
||||
<guid isPermaLink="true">https://brainbaking.com/notes/2021/03/16h17m07s14/</guid>
|
||||
|
||||
|
||||
|
||||
<description>
|
||||
<![CDATA[
|
||||
|
||||
<p>hi there! test discovering: <a href="https://brainbaking.com/link-discover-test-single.html">single</a>. Nice!</p>
|
||||
|
||||
<p>another cool link: <a href="https://brainbaking.com/link-discover-test-multiple.html">multiple</a></p>
|
||||
|
||||
]]>
|
||||
</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"link": "<http://aaronpk.example/webmention-endpoint>; rel=\"webmention\""
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
<html>
|
||||
<head>
|
||||
...
|
||||
<link href="http://aaronpk.example/webmention-endpoint-header" rel="webmention" />
|
||||
...
|
||||
</head>
|
||||
<body>
|
||||
....
|
||||
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
|
||||
...
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,7 @@
|
|||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
bla
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,11 @@
|
|||
<html>
|
||||
<head>
|
||||
...
|
||||
...
|
||||
</head>
|
||||
<body>
|
||||
....
|
||||
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
|
||||
...
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,12 @@
|
|||
<html>
|
||||
<head>
|
||||
...
|
||||
<link href="http://aaronpk.example/webmention-endpoint-header" rel="webmention" />
|
||||
...
|
||||
</head>
|
||||
<body>
|
||||
....
|
||||
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
|
||||
...
|
||||
</body>
|
||||
</html>
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,26 @@
|
|||
|
||||
const { discover } = require('../../src/webmention/linkdiscoverer')
|
||||
|
||||
describe("link discoverer", () => {
|
||||
|
||||
test("discover link if present in header", async () => {
|
||||
const result = await discover("https://brainbaking.com/link-discover-test.html")
|
||||
expect(result).toBe("http://aaronpk.example/webmention-endpoint")
|
||||
})
|
||||
|
||||
test("discover nothing if no webmention link is present", async() => {
|
||||
const result = await discover("https://brainbaking.com/link-discover-test-none.html")
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
test("discover link if sole entry somewhere in html", async () => {
|
||||
const result = await discover("https://brainbaking.com/link-discover-test-single.html")
|
||||
expect(result).toBe("http://aaronpk.example/webmention-endpoint-body")
|
||||
})
|
||||
|
||||
test("use link in header if multiple present in html", async () => {
|
||||
const result = await discover("https://brainbaking.com/link-discover-test-multiple.html")
|
||||
expect(result).toBe("http://aaronpk.example/webmention-endpoint-header")
|
||||
})
|
||||
|
||||
})
|
|
@ -0,0 +1,70 @@
|
|||
|
||||
const { collect } = require('../../src/webmention/rsslinkcollector')
|
||||
const fs = require('fs').promises
|
||||
const dayjs = require('dayjs')
|
||||
|
||||
describe("collect RSS links of articles since certain period", () => {
|
||||
|
||||
let xml = ''
|
||||
beforeEach(async () => {
|
||||
xml = (await fs.readFile('./test/__mocks__/samplerss.xml')).toString()
|
||||
})
|
||||
|
||||
test("collect should not contain hrefs from blocked domains", () => {
|
||||
const collected = collect(xml, dayjs('2021-03-10T00:00:00.000Z').toDate())
|
||||
|
||||
// test case:
|
||||
// contains youtube.com/cool link
|
||||
const last = collected[collected.length - 1]
|
||||
expect(last.hrefs).toEqual([
|
||||
"https://dog.estate/@eli_oat",
|
||||
"https://twitter.com/olesovhcom/status/1369478732247932929",
|
||||
"/about"
|
||||
])
|
||||
|
||||
})
|
||||
|
||||
test("collect should not contain hrefs that point to images", () => {
|
||||
const collected = collect(xml, dayjs('2021-03-14T00:00:00.000Z').toDate())
|
||||
|
||||
// test case:
|
||||
// contains e.g. https://chat.brainbaking.com/media/6f8b72ca-9bfb-460b-9609-c4298a8cab2b/EuropeBattle%202021-03-14%2016-20-36-87.jpg
|
||||
const last = collected[collected.length - 1]
|
||||
expect(last.hrefs).toEqual([
|
||||
"/about"
|
||||
])
|
||||
})
|
||||
|
||||
test("collects nothing if date in future and since nothing new in feed", () => {
|
||||
const collected = collect(xml, dayjs().add(7, 'day').toDate())
|
||||
expect(collected.length).toEqual(0)
|
||||
})
|
||||
|
||||
test("collect latest x links when a since parameter is provided", () => {
|
||||
const collected = collect(xml, dayjs('2021-03-15T00:00:00.000Z').toDate())
|
||||
expect(collected.length).toEqual(3)
|
||||
|
||||
const last = collected[collected.length - 1]
|
||||
expect(last.link).toBe("https://brainbaking.com/notes/2021/03/15h14m43s49/")
|
||||
expect(last.hrefs).toEqual([
|
||||
"http://replit.com",
|
||||
"http://codepen.io",
|
||||
"https://kuleuven-diepenbeek.github.io/osc-course/ch1-c/intro/",
|
||||
"/about"
|
||||
])
|
||||
})
|
||||
|
||||
test("collect every external link without a valid since date", () => {
|
||||
const collected = collect(xml)
|
||||
expect(collected.length).toEqual(141)
|
||||
|
||||
const first = collected[0]
|
||||
expect(first.link).toBe("https://brainbaking.com/notes/2021/03/16h17m07s14/")
|
||||
expect(first.hrefs).toEqual([
|
||||
"https://fosstodon.org/@celia",
|
||||
"https://fosstodon.org/@kev",
|
||||
"/about"
|
||||
])
|
||||
})
|
||||
|
||||
})
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
const got = require('got')
|
||||
|
||||
const { send } = require('../../src/webmention/send')
|
||||
|
||||
|
||||
describe("webmention send scenarios", () => {
|
||||
test("webmention send integration test", async () => {
|
||||
got.post = jest.fn()
|
||||
|
||||
// fetches index.xml
|
||||
await send("brainbaking.com", '2021-03-16T16:00:00.000Z')
|
||||
|
||||
expect(got.post).toHaveBeenCalledTimes(2)
|
||||
expect(got.post).toHaveBeenCalledWith("http://aaronpk.example/webmention-endpoint-header", {
|
||||
contentType: "x-www-form-urlencoded",
|
||||
form: {
|
||||
source: "https://brainbaking.com/notes/2021/03/16h17m07s14/",
|
||||
target: "https://brainbaking.com/link-discover-test-multiple.html"
|
||||
},
|
||||
retry: {
|
||||
limit: 5,
|
||||
methods: ["POST"]
|
||||
}
|
||||
})
|
||||
expect(got.post).toHaveBeenCalledWith("http://aaronpk.example/webmention-endpoint-body", {
|
||||
contentType: "x-www-form-urlencoded",
|
||||
form: {
|
||||
source: "https://brainbaking.com/notes/2021/03/16h17m07s14/",
|
||||
target: "https://brainbaking.com/link-discover-test-single.html"
|
||||
},
|
||||
retry: {
|
||||
limit: 5,
|
||||
methods: ["POST"]
|
||||
}
|
||||
})
|
||||
|
||||
})
|
||||
})
|
10
yarn.lock
10
yarn.lock
|
@ -2218,6 +2218,15 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"fast-xml-parser@npm:^3.19.0":
|
||||
version: 3.19.0
|
||||
resolution: "fast-xml-parser@npm:3.19.0"
|
||||
bin:
|
||||
xml2js: cli.js
|
||||
checksum: bac00722d00f7f8782ab507281bff3c5cff2b37e2e1e26891a11ac2ac3f0c40e91f545492923d6dc8a57bdf9cfba99518c02ddff380f4ff1e81083d25055e43e
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"fb-watchman@npm:^2.0.0":
|
||||
version: 2.0.1
|
||||
resolution: "fb-watchman@npm:2.0.1"
|
||||
|
@ -5167,6 +5176,7 @@ fsevents@^2.1.2:
|
|||
resolution: "serve-my-jams@workspace:."
|
||||
dependencies:
|
||||
dayjs: ^1.10.4
|
||||
fast-xml-parser: ^3.19.0
|
||||
got: ^11.8.2
|
||||
jest: ^26.6.3
|
||||
koa: ^2.13.1
|
||||
|
|
Loading…
Reference in New Issue