
166 lines
4.8 KiB
Raw Normal View History

2021-03-07 20:30:14 +01:00
const got = require('got')
const config = require('./../config')
const fsp = require('fs').promises
const md5 = require('md5')
const { mf2 } = require("microformats-parser");
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
2021-04-05 17:44:27 +02:00
const log = require('pino')()
2021-03-07 20:30:14 +01:00
function isValidUrl(url) {
2021-03-07 20:41:54 +01:00
return url !== undefined &&
2021-03-07 20:30:14 +01:00
(url.startsWith("http://") || url.startsWith("https://"))
function isValidDomain(url) {
return config.allowedWebmentionSources.some(domain => {
return url.indexOf(domain) !== -1
2021-03-07 20:30:14 +01:00
Remember, TARGET is own domain, SOURCE is the article to process
2021-03-07 20:30:14 +01:00
POST /webmention-endpoint HTTP/1.1
Host: aaronpk.example
Content-Type: application/x-www-form-urlencoded
HTTP/1.1 202 Accepted
function validate(request) {
return request.type === "application/x-www-form-urlencoded" &&
2021-03-07 20:41:54 +01:00
request.body !== undefined &&
isValidUrl(request?.body?.source) &&
isValidUrl(request?.body?.target) &&
request?.body?.source !== request?.body?.target &&
async function isValidTargetUrl(target) {
try {
await got(target)
return true
} catch(unknownTarget) {
2021-04-05 17:44:27 +02:00
log.warn(` ABORT: invalid target url`)
return false
function asPath(source, target) {
const filename = md5(`source=${source},target=${target}`)
const domain = config.allowedWebmentionSources.find(d => target.indexOf(d) >= 0)
return `data/${domain}/${filename}.json`
async function deletePossibleOlderWebmention(source, target) {
2021-04-05 17:44:27 +02:00
try {
await fsp.unlink(asPath(source, target))
} catch(e) {
// does not matter, file not there.
async function saveWebmentionToDisk(source, target, mentiondata) {
await fsp.writeFile(asPath(source, target), mentiondata, 'utf-8')
function publishedNow() {
return dayjs.utc().utcOffset(config.utcOffset).format("YYYY-MM-DDTHH:mm:ss")
function parseBodyAsIndiewebSite(source, target, hEntry) {
function shorten(txt) {
if(!txt || txt.length <= 250) return txt
return txt.substring(0, 250) + "..."
const name = hEntry.properties?.name?.[0]
const authorPropName = hEntry.properties?.author?.[0]?.properties?.name?.[0]
const authorValue = hEntry.properties?.author?.[0]?.value
const picture = hEntry.properties?.author?.[0]?.properties?.photo?.[0]
const summary = hEntry.properties?.summary?.[0]
const contentEntry = hEntry.properties?.content?.[0]?.value
const bridgyTwitterContent = hEntry.properties?.["bridgy-twitter-content"]?.[0]
const publishedDate = hEntry.properties?.published?.[0]
const uid = hEntry.properties?.uid?.[0]
const url = hEntry.properties?.url?.[0]
const type = hEntry.properties?.["like-of"]?.length ? "like" : (hEntry.properties?.["bookmark-of"]?.length ? "bookmark" : "mention" )
return {
author: {
name: authorPropName ? authorPropName : authorValue,
2021-03-18 21:36:17 +01:00
picture: picture?.value ? picture?.value : picture
name: name,
content: bridgyTwitterContent ? shorten(bridgyTwitterContent) : (summary ? shorten(summary) : shorten(contentEntry)),
published: publishedDate ? publishedDate : publishedNow(),
// Mastodon uids start with "tag:server", but we do want indieweb uids from other sources
url: uid && uid.startsWith("http") ? uid : (url ? url : source),
function parseBodyAsNonIndiewebSite(source, target, body) {
const title = body.match(/<title>(.*?)<\/title>/)?.splice(1, 1)[0]
return {
author: {
name: source
name: title,
content: title,
published: publishedNow(),
url: source,
type: "mention",
async function processSourceBody(body, source, target) {
if(body.indexOf(target) === -1) {
2021-04-05 17:44:27 +02:00
log.warn(` ABORT: no mention of ${target} found in html src of source`)
2021-03-08 17:01:28 +01:00
// fiddle: https://aimee-gm.github.io/microformats-parser/
const microformat = mf2(body, {
// WHY? crashes on relative URL, should be injected using Jest. Don't care.
baseUrl: source.startsWith("http") ? source : `http://localhost/${source}`
const hEntry = microformat.items.filter(itm => itm?.type?.includes("h-entry"))?.[0]
const data = hEntry ? parseBodyAsIndiewebSite(source, target, hEntry) : parseBodyAsNonIndiewebSite(source, target, body)
await saveWebmentionToDisk(source, target, JSON.stringify(data))
2021-04-05 17:44:27 +02:00
log.info(` OK: webmention processed`)
2021-03-07 20:30:14 +01:00
async function receive(body) {
if(!isValidTargetUrl(body.target)) return
let src = { body: "" }
try {
src = await got(body.source)
} catch(unknownSource) {
2021-04-05 17:44:27 +02:00
log.warn(` ABORT: invalid source url: ` + unknownSource)
await deletePossibleOlderWebmention(body.source, body.target)
await processSourceBody(src.body, body.source, body.target)
2021-03-07 20:30:14 +01:00
module.exports = {