extract context if explicitly mentioned @http... url
This commit is contained in:
parent
b43eaa347c
commit
513ed565dc
|
@ -28,7 +28,7 @@ function convertAtomItemToMd(item, notesdir) {
|
|||
|
||||
let mddata = ejs.render(templates.markdown, { item })
|
||||
|
||||
if(item.media.length > 0) {
|
||||
if(item.media?.length > 0) {
|
||||
mddata += '\n' + ejs.render(templates.enclosures, { images: item.media }, { rmWhitespace: true })
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,23 @@ function trimIfNeeded(title, count, prefix) {
|
|||
return prefix + title
|
||||
}
|
||||
|
||||
function detectContext(item, content) {
|
||||
// format: <thr:in-reply-to ref='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793' href='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793'/>
|
||||
if(item['thr:in-reply-to']) {
|
||||
return item['thr:in-reply-to']['@_ref']
|
||||
}
|
||||
|
||||
// could also be: manually in text "@[<a href...]"
|
||||
if(content.indexOf("@<a") >= 0) {
|
||||
const res = content.match(/@<a\s(.*?)href="(.*?)".*?>/)
|
||||
if(res.length == 3) {
|
||||
return res[2]
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// opts:
|
||||
// notesdir = `${__dirname}/content/notes`
|
||||
// url = "https://chat.brainbaking.com/users/wouter/feed";
|
||||
|
@ -74,24 +91,26 @@ async function parseMastoFeed(options) {
|
|||
const entries = root.feed.entry.map ? root.feed.entry : [root.feed.entry]
|
||||
|
||||
const items = entries.map(item => {
|
||||
const content = ent.decode(ent.decode(item.content['#text'])) // format: <span class="h-card....
|
||||
const date = dayjs.utc(item.published).utcOffset(utcOffset)
|
||||
const year = date.format("YYYY")
|
||||
const month = date.format("MM")
|
||||
const day = date.format("DD")
|
||||
// format: <thr:in-reply-to ref='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793' href='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793'/>
|
||||
const context = item['thr:in-reply-to'] ? item['thr:in-reply-to']['@_ref'] : ""
|
||||
const context = detectContext(item, content)
|
||||
const title = escQuotes(ent.decode(ent.decode(item.title)))
|
||||
|
||||
const media = item.link?.filter(l =>
|
||||
l['@_rel'] === 'enclosure' &&
|
||||
l['@_type'] === 'image/jpeg').map(l => l['@_href'])
|
||||
|
||||
|
||||
// WHY double decode? " = &#34; - first decode '&', then the other char.'
|
||||
return {
|
||||
title: trimIfNeeded(title, titleCount, titlePrefix), // summary (cut-off) of content
|
||||
content: ent.decode(ent.decode(item.content['#text'])), // format: <span class="h-card....
|
||||
content,
|
||||
url: escQuotes(item.id), // format: https://chat.brainbaking.com/objects/0707fd54-185d-4ee7-9204-be370d57663c
|
||||
context: escQuotes(context),
|
||||
contextFromMastodon: item['thr:in-reply-to'],
|
||||
id: stripBeforeLastSlash(item.id),
|
||||
media,
|
||||
hash: `${day}h${date.format("HH")}m${date.format("mm")}s${date.format("ss")}`,
|
||||
|
@ -101,7 +120,7 @@ async function parseMastoFeed(options) {
|
|||
day
|
||||
}
|
||||
})
|
||||
.filter(itm => ignoreReplies ? !itm.context : true)
|
||||
.filter(itm => ignoreReplies ? !itm.contextFromMastodon : true)
|
||||
.filter(itm => !notes.includes(`${itm.year}/${itm.month}/${itm.hash}`))
|
||||
.forEach(itm => convertAtomItemToMd(itm, notesdir))
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
|
||||
const markdown = `---
|
||||
source: "<%- item.url %>"
|
||||
<% if (item.context) { -%>
|
||||
context: "<%- item.context %>"
|
||||
<% } -%>
|
||||
title: "<%- item.title %>"
|
||||
date: "<%- item.year %>-<%- item.month %>-<%- item.day %>T<%- item.date.format('HH:mm:ss') %>"
|
||||
---
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<feed
|
||||
xmlns="http://www.w3.org/2005/Atom"
|
||||
xmlns:thr="http://purl.org/syndication/thread/1.0"
|
||||
xmlns:activity="http://activitystrea.ms/spec/1.0/"
|
||||
xmlns:poco="http://portablecontacts.net/spec/1.0"
|
||||
xmlns:ostatus="http://ostatus.org/schema/1.0">
|
||||
|
||||
<id>https://chat.brainbaking.com/users/wouter/feed.atom</id>
|
||||
<title>wouter's timeline</title>
|
||||
<updated>2021-03-02T16:18:46</updated>
|
||||
<logo>https://chat.brainbaking.com/media/f39bcd85-5098-45e2-b395-e274b712d512/headshot_2020.jpg</logo>
|
||||
<link rel="self" href="https://chat.brainbaking.com/users/wouter/feed.atom" type="application/atom+xml"/>
|
||||
|
||||
<author>
|
||||
<id>https://chat.brainbaking.com/users/wouter</id>
|
||||
<activity:object>http://activitystrea.ms/schema/1.0/person</activity:object>
|
||||
<uri>https://chat.brainbaking.com/users/wouter</uri>
|
||||
<poco:preferredUsername>wouter</poco:preferredUsername>
|
||||
<poco:displayName>Wouter Groeneveld</poco:displayName>
|
||||
<poco:note>Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. </poco:note>
|
||||
<summary>Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. </summary>
|
||||
<name>wouter</name>
|
||||
<link rel="avatar" href="https://chat.brainbaking.com/media/f39bcd85-5098-45e2-b395-e274b712d512/headshot_2020.jpg"/>
|
||||
|
||||
<link rel="header" href="https://chat.brainbaking.com/media/3399cd78-4fd4-40ab-a174-c7805576a826/boekcover2.jpg"/>
|
||||
|
||||
|
||||
<ap_enabled>true</ap_enabled>
|
||||
|
||||
</author>
|
||||
|
||||
|
||||
|
||||
<link rel="next" href="https://chat.brainbaking.com/users/wouter/feed.atom?max_id=A4fIjNa6N1OJmaSMAS" type="application/atom+xml"/>
|
||||
|
||||
<entry>
|
||||
<activity:object-type>http://activitystrea.ms/schema/1.0/note</activity:object-type>
|
||||
<activity:verb>http://activitystrea.ms/schema/1.0/post</activity:verb>
|
||||
<id>https://chat.brainbaking.com/objects/b5b67e88-eda8-45dd-ab8f-54443b62e250</id>
|
||||
<title>some title</title>
|
||||
<content type="html"><span class="h-card">@<a class="u-url mention" href="https://reply-to-stuff" rel="ugc"> in reply to previous url test</content>
|
||||
<published>2021-03-20T11:12:08.955177Z</published>
|
||||
<updated>2021-03-20T11:12:08.955177Z</updated>
|
||||
<ostatus:conversation ref="tag:mastodon.social,2021-03-20:objectId=227433498:objectType=Conversation">
|
||||
tag:mastodon.social,2021-03-20:objectId=227433498:objectType=Conversation
|
||||
</ostatus:conversation>
|
||||
|
||||
</entry>
|
||||
</feed>
|
|
@ -1,9 +1,20 @@
|
|||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`mastodon feed parser tests parse creates MD with context if in-reply-to 1`] = `
|
||||
"---
|
||||
source: \\"https://chat.brainbaking.com/objects/2e58289c-f5f0-415c-b2e1-62c74662aa16\\"
|
||||
context: \\"https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793\\"
|
||||
title: \\"@StampedingLonghorn I tried to chase him away, but you know how that turned out... 😼 There's ...\\"
|
||||
date: \\"2021-03-02T16:18:46\\"
|
||||
---
|
||||
|
||||
<span class=\\"h-card\\"><a class=\\"u-url mention\\" data-user=\\"A4nwg4LYyh4WgrJOXg\\" href=\\"https://social.linux.pizza/@StampedingLonghorn\\" rel=\\"ugc\\">@<span>StampedingLonghorn</span></a></span> I tried to chase him away, but you know how that turned out... 😼 There's even cat hair inside the cases... (to be clear: also unintentional)
|
||||
"
|
||||
`;
|
||||
|
||||
exports[`mastodon feed parser tests parse creates correct MD structure 1`] = `
|
||||
"---
|
||||
source: \\"https://chat.brainbaking.com/objects/77a3ecfb-47e1-4d7a-a24a-8b779d80a8ac\\"
|
||||
context: \\"\\"
|
||||
title: \\"I pulled the Google plug and installed LineageOS: https://brainbaking.com/post/2021/03/getting-ri...\\"
|
||||
date: \\"2021-03-01T19:03:35\\"
|
||||
---
|
||||
|
@ -15,7 +26,6 @@ I pulled the Google plug and installed LineageOS: <a href=\\"https://brainbaking
|
|||
exports[`mastodon feed parser tests parse embedded images 1`] = `
|
||||
"---
|
||||
source: \\"https://chat.brainbaking.com/objects/a51e13ce-d618-4602-84f7-f398126510ff\\"
|
||||
context: \\"\\"
|
||||
title: \\"Enjoyed an afternoon of oldskool Diablo II on the ...\\"
|
||||
date: \\"2021-03-14T17:41:53\\"
|
||||
---
|
||||
|
|
|
@ -29,6 +29,16 @@ describe("mastodon feed parser tests", () => {
|
|||
dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true })
|
||||
expect(dir.length).toBe(1)
|
||||
})
|
||||
test("does not ignore explicit '@url' replies if ignoreReplies is set to true", async () => {
|
||||
await parseMastoFeed({
|
||||
url: "masto-feed-at-url",
|
||||
notesdir: dumpdir,
|
||||
ignoreReplies: true
|
||||
})
|
||||
|
||||
dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true })
|
||||
expect(dir.length).toBe(1)
|
||||
})
|
||||
test("does not ignore replies if ignoreReplies is set to false", async () => {
|
||||
await parseMastoFeed({
|
||||
url: "masto-feed-with-replies",
|
||||
|
@ -127,9 +137,25 @@ describe("mastodon feed parser tests", () => {
|
|||
titleCount: 5000
|
||||
})
|
||||
|
||||
const actualMd = await fsp.readFile(`${dumpdir}/2021/03/02h16m18s46.md`)
|
||||
const actualMd = (await fsp.readFile(`${dumpdir}/2021/03/02h16m18s46.md`)).toString()
|
||||
expect(actualMd).toMatchSnapshot()
|
||||
const expectedReplyTo = "https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793"
|
||||
|
||||
const md = frontMatterParser.parseSync(actualMd)
|
||||
expect(md.data.context).toBe(expectedReplyTo)
|
||||
})
|
||||
|
||||
test("parse creates MD with context if @http(s) URL", async () => {
|
||||
await parseMastoFeed({
|
||||
url: "masto-feed-at-url",
|
||||
notesdir: dumpdir,
|
||||
utcOffset: 0,
|
||||
titleCount: 5000
|
||||
})
|
||||
|
||||
const actualMd = await fsp.readFile(`${dumpdir}/2021/03/20h11m12s08.md`)
|
||||
const expectedReplyTo = "https://reply-to-stuff"
|
||||
|
||||
const md = frontMatterParser.parseSync(actualMd.toString())
|
||||
expect(md.data.context).toBe(expectedReplyTo)
|
||||
})
|
||||
|
|
|
@ -24,7 +24,15 @@ describe("mastodon feed parser end to end scenario test", () => {
|
|||
notesdir: dumpdir
|
||||
})
|
||||
|
||||
let dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true })
|
||||
const dirroot = await fsp.readdir(`${dumpdir}`, { withFileTypes: true })
|
||||
expect(dirroot.length).toBe(1)
|
||||
const year = dirroot[0].name
|
||||
|
||||
const dirmonth = await fsp.readdir(`${dumpdir}/${year}`, { withFileTypes: true })
|
||||
expect(dirmonth.length).toBe(1)
|
||||
const month = dirmonth[0].name
|
||||
|
||||
const dir = await fsp.readdir(`${dumpdir}/${year}/${month}`, { withFileTypes: true })
|
||||
expect(dir.length).not.toBe(0)
|
||||
})
|
||||
|
||||
|
|
Loading…
Reference in New Issue