webmention sending endpoint implementation

This commit is contained in:
Wouter Groeneveld 2021-03-17 18:10:40 +01:00
parent ae4006469d
commit 632a134fda
20 changed files with 18145 additions and 3 deletions

11
.pnp.js generated
View File

@ -38,6 +38,7 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"packageLocation": "./",
"packageDependencies": [
["dayjs", "npm:1.10.4"],
["fast-xml-parser", "npm:3.19.0"],
["got", "npm:11.8.2"],
["jest", "npm:26.6.3"],
["koa", "npm:2.13.1"],
@ -2743,6 +2744,15 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"linkType": "HARD",
}]
]],
["fast-xml-parser", [
["npm:3.19.0", {
"packageLocation": "./.yarn/cache/fast-xml-parser-npm-3.19.0-b7dcd3a31a-bac00722d0.zip/node_modules/fast-xml-parser/",
"packageDependencies": [
["fast-xml-parser", "npm:3.19.0"]
],
"linkType": "HARD",
}]
]],
["fb-watchman", [
["npm:2.0.1", {
"packageLocation": "./.yarn/cache/fb-watchman-npm-2.0.1-30005d50fe-f9ec24592a.zip/node_modules/fb-watchman/",
@ -6082,6 +6092,7 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"packageDependencies": [
["serve-my-jams", "workspace:."],
["dayjs", "npm:1.10.4"],
["fast-xml-parser", "npm:3.19.0"],
["got", "npm:11.8.2"],
["jest", "npm:26.6.3"],
["koa", "npm:2.13.1"],

View File

@ -43,4 +43,4 @@ Retrieves a JSON array with relevant webmentions stored for that domain. The tok
## TODOs
- `published` date is not well-formatted and blindly taken over from feed
- [brid.gy](https://brid.gy/) does not send webmentions if no target found, although I'd like these to appear in the [brainbaking.com/notes](https://brainbaking.com/notes) somehow, being syndicated from my Mastodon feed.
- Implement a Brid.gy-like system that converts links from domains in the config found on [public Mastodon timelines](https://docs.joinmastodon.org/methods/timelines/) into webmentions. (And check if it's ok to only use the public line)

View File

@ -18,6 +18,7 @@
},
"dependencies": {
"dayjs": "^1.10.4",
"fast-xml-parser": "^3.19.0",
"got": "^11.8.2",
"koa": "^2.13.1",
"koa-body": "^4.2.0",

View File

@ -6,6 +6,11 @@ const allowedWebmentionSources = [
"jefklakscodex.com"
]
// do NOT send out webmentions to any of these domains.
const disallowedWebmentionDomains = [
"youtube.com"
]
function setupDataDirs() {
allowedWebmentionSources.forEach(domain => {
const dir = `data/${domain}`
@ -27,5 +32,6 @@ module.exports = {
utcOffset: 60,
allowedWebmentionSources,
disallowedWebmentionDomains,
setupDataDirs
}

View File

@ -0,0 +1,37 @@
const got = require('got')
const { mf2 } = require("microformats-parser");
const baseUrlOf = (url) => {
if(url.match(/\//g).length <= 2) {
return url
}
const split = url.split('/')
return split[0] + '//' + split[2]
}
// see https://www.w3.org/TR/webmention/#sender-discovers-receiver-webmention-endpoint
async function discover(target) {
try {
const endpoint = await got(target)
if(endpoint.headers.link?.indexOf("webmention") >= 0) {
// e.g. Link: <http://aaronpk.example/webmention-endpoint>; rel="webmention"
return endpoint.headers.link
.split(";")[0]
.replace("<" ,"")
.replace(">", "")
}
const format = mf2(endpoint.body, {
// this also complies with w3.org regulations: relative endpoint could be possible
baseUrl: baseUrlOf(target)
})
return format.rels?.webmention?.[0]
} catch(err) {
console.warn(` -- whoops, failed to discover ${target}, why: ${err}`)
return undefined
}
}
module.exports = {
discover
}

View File

@ -1,6 +1,7 @@
const webmentionReceiver = require('./receive')
const webmentionLoader = require('./loader')
const webmentionSender = require('./send')
function route(router) {
router.post("webmention receive endpoint", "/webmention", async (ctx) => {
@ -12,10 +13,23 @@ function route(router) {
// we do NOT await this on purpose.
webmentionReceiver.receive(ctx.request.body)
ctx.body = "Thanks, bro. Will process this webmention soon, pinky swear!";
ctx.body = "Thanks, bro. Will process this webmention soon, pinky swear!"
ctx.status = 202
});
router.put("webmention send endpoint", "/webmention/:domain/:token", async (ctx) => {
if(!webmentionLoader.validate(ctx.params)) {
ctx.throw(403, "access denied")
}
console.log(` OK: someone wants to send mentions from domain ${ctx.params.domain}`)
// we do NOT await this on purpose.
webmentionSender.send(ctx.params.domain, ctx.request.query?.since)
ctx.body = "Thanks, bro. Will send these webmentions soon, pinky swear!"
ctx.status = 202
})
router.get("webmention get endpoint", "/webmention/:domain/:token", async (ctx) => {
if(!webmentionLoader.validate(ctx.params)) {
ctx.throw(403, "access denied")

View File

@ -0,0 +1,72 @@
const parser = require("fast-xml-parser")
const config = require('./../config')
const dayjs = require('dayjs')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(customParseFormat)
const parseOpts = {
ignoreAttributes: false
}
function collectHrefsFromDescription(description) {
// first thought: use parser.parse() and traverse recursively. turned out to be way too slow.
const links = description.match(/href="([^"]*")/g)
.map(match => match.replace("href=", "").replaceAll("\"", ""))
.filter(match => !(/\.(gif|zip|rar|bz2|gz|7z|jpe?g|tiff?|png|webp|bmp)$/i).test(match))
.filter(match => !config.disallowedWebmentionDomains.some(domain => match.indexOf(domain) >= 0))
return [...new Set(links)]
}
/**
* a typical RSS item looks like this:
{
title: '@celia @kev I have read both you and Kev&#39;s post on...',
link: 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
comments: 'https://brainbaking.com/notes/2021/03/16h17m07s14/#commento',
pubDate: 'Tue, 16 Mar 2021 17:07:14 +0000',
author: 'Wouter Groeneveld',
guid: {
'#text': 'https://brainbaking.com/notes/2021/03/16h17m07s14/',
'@_isPermaLink': 'true'
},
description: ' \n' +
' \n' +
'\n' +
' <p><span class="h-card"><a class="u-url mention" data-user="A5GVjIHI6MH82H6iLQ" href="https://fosstodon.org/@celia" rel="ugc">@<span>celia</span></a></span> <span class="h-card"><a class="u-url mention" data-user="A54b8g0RBaIgjzczMu" href="https://fosstodon.org/@kev" rel="ugc">@<span>kev</span></a></span> I have read both you and Kev&rsquo;s post on this and agree on some points indeed! But I&rsquo;m not yet ready to give up webmentions. As an academic, the idea of citing/mentioning each other is very alluring 🤓. Plus, I needed an excuse to fiddle some more with JS&hellip; <br><br>As much as I loved using Wordpress before, I can&rsquo;t imagine going back to writing stuff in there instead of in markdown. Gotta keep the workflow short, though. Hope it helps you focus on what matters - content!</p>\n' +
'\n' +
'\n' +
' <p>\n' +
' By <a href="/about">Wouter Groeneveld</a> on 16 March 2021.\n' +
' </p>\n' +
' '
}
**/
function collect(xml, since = '') {
const root = parser.parse(xml, parseOpts).rss.channel
const sinceDate = dayjs(since)
// example pubDate format: Tue, 16 Mar 2021 17:07:14 +0000
const sincePubDate = (date) => {
if(!sinceDate.isValid()) return true
const pubDate = dayjs(date.split(", ")[1], "DD MMM YYYY HH:mm:ss ZZ")
if(!pubDate.isValid()) return true
return sinceDate < pubDate
}
const entries = root.item.filter ? root.item : [root.item]
return entries
.filter(item => sincePubDate(item.pubDate))
.map(item => {
return {
link: item.link,
hrefs: collectHrefsFromDescription(item.description)
}
})
}
module.exports = {
collect
}

50
src/webmention/send.js Normal file
View File

@ -0,0 +1,50 @@
const got = require('got')
const { collect } = require('./rsslinkcollector')
const { discover } = require('./linkdiscoverer')
async function mention(opts) {
const { source, target } = opts
const endpoint = await discover(target)
if(endpoint) {
await got.post(endpoint, {
contentType: "x-www-form-urlencoded",
form: {
source,
target
},
retry: {
limit: 5,
methods: ["POST"]
}
})
console.log(` OK: webmention@${endpoint}, sent: source ${source}, target ${target}`)
}
}
async function parseRssFeed(xml, since) {
const linksToMention = collect(xml, since)
.map(el => el.hrefs
// this strips relative URLs; could be a feature to also send these to own domain?
.filter(href => href.startsWith('http'))
.map(href => {
return {
// SOURCE is own domain this time, TARGET = outbound
target: href,
source: el.link
}
}))
.flat()
await Promise.all(linksToMention.map(mention))
}
async function send(domain, since) {
const feed = await got(`https://${domain}/index.xml`)
await parseRssFeed(feed.body, since)
}
module.exports = {
send
}

View File

@ -2,10 +2,24 @@ const fs = require('fs').promises
async function got(url) {
const relativeUrl = url.replace('https://brainbaking.com/', '')
const body = await fs.readFile(`./test/__mocks__/${relativeUrl}`, 'utf8')
const body = (await fs.readFile(`./test/__mocks__/${relativeUrl}`, 'utf8')).toString()
let headers = {}
try {
headerFile = await fs.readFile(`./test/__mocks__/${relativeUrl.replace(".html", "")}-headers.json`, 'utf8')
headers = JSON.parse(headerFile.toString())
} catch {
}
return {
headers,
body
}
}
async function gotPostMock(url, opts) {
}
got.post = gotPostMock
module.exports = got

37
test/__mocks__/index.xml Normal file
View File

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Brain Baking</title>
<link>https://brainbaking.com/</link>
<description>Recent content on Brain Baking</description>
<generator>Hugo -- gohugo.io</generator>
<language>en-us</language>
<managingEditor>Wouter Groeneveld</managingEditor>
<webMaster>Wouter Groeneveld</webMaster>
<lastBuildDate>Tue, 16 Mar 2021 17:07:14 +0000</lastBuildDate>
<atom:link href="https://brainbaking.com/index.xml" rel="self" type="application/rss+xml" />
<item>
<title>@celia @kev I have read both you and Kev&#39;s post on...</title>
<link>https://brainbaking.com/notes/2021/03/16h17m07s14/</link>
<comments>https://brainbaking.com/notes/2021/03/16h17m07s14/#commento</comments>
<pubDate>Tue, 16 Mar 2021 17:07:14 +0000</pubDate>
<author>Wouter Groeneveld</author>
<guid isPermaLink="true">https://brainbaking.com/notes/2021/03/16h17m07s14/</guid>
<description>
<![CDATA[
<p>hi there! test discovering: <a href="https://brainbaking.com/link-discover-test-single.html">single</a>. Nice!</p>
<p>another cool link: <a href="https://brainbaking.com/link-discover-test-multiple.html">multiple</a></p>
]]>
</description>
</item>
</channel>
</rss>

View File

@ -0,0 +1,3 @@
{
"link": "<http://aaronpk.example/webmention-endpoint>; rel=\"webmention\""
}

View File

@ -0,0 +1,12 @@
<html>
<head>
...
<link href="http://aaronpk.example/webmention-endpoint-header" rel="webmention" />
...
</head>
<body>
....
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
...
</body>
</html>

View File

@ -0,0 +1,7 @@
<html>
<head>
</head>
<body>
bla
</body>
</html>

View File

@ -0,0 +1,11 @@
<html>
<head>
...
...
</head>
<body>
....
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
...
</body>
</html>

View File

@ -0,0 +1,12 @@
<html>
<head>
...
<link href="http://aaronpk.example/webmention-endpoint-header" rel="webmention" />
...
</head>
<body>
....
<a href="http://aaronpk.example/webmention-endpoint-body" rel="webmention">webmention</a>
...
</body>
</html>

17710
test/__mocks__/samplerss.xml Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,26 @@
const { discover } = require('../../src/webmention/linkdiscoverer')
describe("link discoverer", () => {
test("discover link if present in header", async () => {
const result = await discover("https://brainbaking.com/link-discover-test.html")
expect(result).toBe("http://aaronpk.example/webmention-endpoint")
})
test("discover nothing if no webmention link is present", async() => {
const result = await discover("https://brainbaking.com/link-discover-test-none.html")
expect(result).toBeUndefined()
})
test("discover link if sole entry somewhere in html", async () => {
const result = await discover("https://brainbaking.com/link-discover-test-single.html")
expect(result).toBe("http://aaronpk.example/webmention-endpoint-body")
})
test("use link in header if multiple present in html", async () => {
const result = await discover("https://brainbaking.com/link-discover-test-multiple.html")
expect(result).toBe("http://aaronpk.example/webmention-endpoint-header")
})
})

View File

@ -0,0 +1,70 @@
const { collect } = require('../../src/webmention/rsslinkcollector')
const fs = require('fs').promises
const dayjs = require('dayjs')
describe("collect RSS links of articles since certain period", () => {
let xml = ''
beforeEach(async () => {
xml = (await fs.readFile('./test/__mocks__/samplerss.xml')).toString()
})
test("collect should not contain hrefs from blocked domains", () => {
const collected = collect(xml, dayjs('2021-03-10T00:00:00.000Z').toDate())
// test case:
// contains youtube.com/cool link
const last = collected[collected.length - 1]
expect(last.hrefs).toEqual([
"https://dog.estate/@eli_oat",
"https://twitter.com/olesovhcom/status/1369478732247932929",
"/about"
])
})
test("collect should not contain hrefs that point to images", () => {
const collected = collect(xml, dayjs('2021-03-14T00:00:00.000Z').toDate())
// test case:
// contains e.g. https://chat.brainbaking.com/media/6f8b72ca-9bfb-460b-9609-c4298a8cab2b/EuropeBattle%202021-03-14%2016-20-36-87.jpg
const last = collected[collected.length - 1]
expect(last.hrefs).toEqual([
"/about"
])
})
test("collects nothing if date in future and since nothing new in feed", () => {
const collected = collect(xml, dayjs().add(7, 'day').toDate())
expect(collected.length).toEqual(0)
})
test("collect latest x links when a since parameter is provided", () => {
const collected = collect(xml, dayjs('2021-03-15T00:00:00.000Z').toDate())
expect(collected.length).toEqual(3)
const last = collected[collected.length - 1]
expect(last.link).toBe("https://brainbaking.com/notes/2021/03/15h14m43s49/")
expect(last.hrefs).toEqual([
"http://replit.com",
"http://codepen.io",
"https://kuleuven-diepenbeek.github.io/osc-course/ch1-c/intro/",
"/about"
])
})
test("collect every external link without a valid since date", () => {
const collected = collect(xml)
expect(collected.length).toEqual(141)
const first = collected[0]
expect(first.link).toBe("https://brainbaking.com/notes/2021/03/16h17m07s14/")
expect(first.hrefs).toEqual([
"https://fosstodon.org/@celia",
"https://fosstodon.org/@kev",
"/about"
])
})
})

View File

@ -0,0 +1,39 @@
const got = require('got')
const { send } = require('../../src/webmention/send')
describe("webmention send scenarios", () => {
test("webmention send integration test", async () => {
got.post = jest.fn()
// fetches index.xml
await send("brainbaking.com", '2021-03-16T16:00:00.000Z')
expect(got.post).toHaveBeenCalledTimes(2)
expect(got.post).toHaveBeenCalledWith("http://aaronpk.example/webmention-endpoint-header", {
contentType: "x-www-form-urlencoded",
form: {
source: "https://brainbaking.com/notes/2021/03/16h17m07s14/",
target: "https://brainbaking.com/link-discover-test-multiple.html"
},
retry: {
limit: 5,
methods: ["POST"]
}
})
expect(got.post).toHaveBeenCalledWith("http://aaronpk.example/webmention-endpoint-body", {
contentType: "x-www-form-urlencoded",
form: {
source: "https://brainbaking.com/notes/2021/03/16h17m07s14/",
target: "https://brainbaking.com/link-discover-test-single.html"
},
retry: {
limit: 5,
methods: ["POST"]
}
})
})
})

View File

@ -2218,6 +2218,15 @@ __metadata:
languageName: node
linkType: hard
"fast-xml-parser@npm:^3.19.0":
version: 3.19.0
resolution: "fast-xml-parser@npm:3.19.0"
bin:
xml2js: cli.js
checksum: bac00722d00f7f8782ab507281bff3c5cff2b37e2e1e26891a11ac2ac3f0c40e91f545492923d6dc8a57bdf9cfba99518c02ddff380f4ff1e81083d25055e43e
languageName: node
linkType: hard
"fb-watchman@npm:^2.0.0":
version: 2.0.1
resolution: "fb-watchman@npm:2.0.1"
@ -5167,6 +5176,7 @@ fsevents@^2.1.2:
resolution: "serve-my-jams@workspace:."
dependencies:
dayjs: ^1.10.4
fast-xml-parser: ^3.19.0
got: ^11.8.2
jest: ^26.6.3
koa: ^2.13.1