From 513ed565dcc3be3efcf7866a981d71ad2e196383 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Sat, 24 Apr 2021 20:03:58 +0200 Subject: [PATCH] extract context if explicitly mentioned @http... url --- src/mastodon/feed-parser.js | 29 +++++++++-- src/mastodon/templates.js | 2 + test/__mocks__/masto-feed-at-url.xml | 50 +++++++++++++++++++ .../__snapshots__/feed-parser.test.js.snap | 14 +++++- test/mastodon/feed-parser.test.js | 28 ++++++++++- test/mastodon/mastodon-e2e.test.js | 10 +++- 6 files changed, 124 insertions(+), 9 deletions(-) create mode 100644 test/__mocks__/masto-feed-at-url.xml diff --git a/src/mastodon/feed-parser.js b/src/mastodon/feed-parser.js index 9ab94de..bad1471 100644 --- a/src/mastodon/feed-parser.js +++ b/src/mastodon/feed-parser.js @@ -28,7 +28,7 @@ function convertAtomItemToMd(item, notesdir) { let mddata = ejs.render(templates.markdown, { item }) - if(item.media.length > 0) { + if(item.media?.length > 0) { mddata += '\n' + ejs.render(templates.enclosures, { images: item.media }, { rmWhitespace: true }) } @@ -44,6 +44,23 @@ function trimIfNeeded(title, count, prefix) { return prefix + title } +function detectContext(item, content) { + // format: + if(item['thr:in-reply-to']) { + return item['thr:in-reply-to']['@_ref'] + } + + // could also be: manually in text "@[= 0) { + const res = content.match(/@/) + if(res.length == 3) { + return res[2] + } + } + + return "" +} + // opts: // notesdir = `${__dirname}/content/notes` // url = "https://chat.brainbaking.com/users/wouter/feed"; @@ -74,24 +91,26 @@ async function parseMastoFeed(options) { const entries = root.feed.entry.map ? root.feed.entry : [root.feed.entry] const items = entries.map(item => { + const content = ent.decode(ent.decode(item.content['#text'])) // format: <span class="h-card.... const date = dayjs.utc(item.published).utcOffset(utcOffset) const year = date.format("YYYY") const month = date.format("MM") const day = date.format("DD") - // format: - const context = item['thr:in-reply-to'] ? item['thr:in-reply-to']['@_ref'] : "" + const context = detectContext(item, content) const title = escQuotes(ent.decode(ent.decode(item.title))) const media = item.link?.filter(l => l['@_rel'] === 'enclosure' && l['@_type'] === 'image/jpeg').map(l => l['@_href']) + // WHY double decode? " = &#34; - first decode '&', then the other char.' return { title: trimIfNeeded(title, titleCount, titlePrefix), // summary (cut-off) of content - content: ent.decode(ent.decode(item.content['#text'])), // format: <span class="h-card.... + content, url: escQuotes(item.id), // format: https://chat.brainbaking.com/objects/0707fd54-185d-4ee7-9204-be370d57663c context: escQuotes(context), + contextFromMastodon: item['thr:in-reply-to'], id: stripBeforeLastSlash(item.id), media, hash: `${day}h${date.format("HH")}m${date.format("mm")}s${date.format("ss")}`, @@ -101,7 +120,7 @@ async function parseMastoFeed(options) { day } }) - .filter(itm => ignoreReplies ? !itm.context : true) + .filter(itm => ignoreReplies ? !itm.contextFromMastodon : true) .filter(itm => !notes.includes(`${itm.year}/${itm.month}/${itm.hash}`)) .forEach(itm => convertAtomItemToMd(itm, notesdir)) } diff --git a/src/mastodon/templates.js b/src/mastodon/templates.js index c695112..1dcdd52 100644 --- a/src/mastodon/templates.js +++ b/src/mastodon/templates.js @@ -1,7 +1,9 @@ const markdown = `--- source: "<%- item.url %>" +<% if (item.context) { -%> context: "<%- item.context %>" +<% } -%> title: "<%- item.title %>" date: "<%- item.year %>-<%- item.month %>-<%- item.day %>T<%- item.date.format('HH:mm:ss') %>" --- diff --git a/test/__mocks__/masto-feed-at-url.xml b/test/__mocks__/masto-feed-at-url.xml new file mode 100644 index 0000000..ec232ab --- /dev/null +++ b/test/__mocks__/masto-feed-at-url.xml @@ -0,0 +1,50 @@ + + + + https://chat.brainbaking.com/users/wouter/feed.atom + wouter's timeline + 2021-03-02T16:18:46 + https://chat.brainbaking.com/media/f39bcd85-5098-45e2-b395-e274b712d512/headshot_2020.jpg + + + + https://chat.brainbaking.com/users/wouter + http://activitystrea.ms/schema/1.0/person + https://chat.brainbaking.com/users/wouter + wouter + Wouter Groeneveld + Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. + Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. + wouter + + + + + + true + + + + + + + + + http://activitystrea.ms/schema/1.0/note + http://activitystrea.ms/schema/1.0/post + https://chat.brainbaking.com/objects/b5b67e88-eda8-45dd-ab8f-54443b62e250 + some title + <span class="h-card">@<a class="u-url mention" href="https://reply-to-stuff" rel="ugc"> in reply to previous url test + 2021-03-20T11:12:08.955177Z + 2021-03-20T11:12:08.955177Z + + tag:mastodon.social,2021-03-20:objectId=227433498:objectType=Conversation + + + + diff --git a/test/mastodon/__snapshots__/feed-parser.test.js.snap b/test/mastodon/__snapshots__/feed-parser.test.js.snap index 0886f38..2d18d42 100644 --- a/test/mastodon/__snapshots__/feed-parser.test.js.snap +++ b/test/mastodon/__snapshots__/feed-parser.test.js.snap @@ -1,9 +1,20 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`mastodon feed parser tests parse creates MD with context if in-reply-to 1`] = ` +"--- +source: \\"https://chat.brainbaking.com/objects/2e58289c-f5f0-415c-b2e1-62c74662aa16\\" +context: \\"https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793\\" +title: \\"@StampedingLonghorn I tried to chase him away, but you know how that turned out... 😼 There's ...\\" +date: \\"2021-03-02T16:18:46\\" +--- + +@StampedingLonghorn I tried to chase him away, but you know how that turned out... 😼 There's even cat hair inside the cases... (to be clear: also unintentional) +" +`; + exports[`mastodon feed parser tests parse creates correct MD structure 1`] = ` "--- source: \\"https://chat.brainbaking.com/objects/77a3ecfb-47e1-4d7a-a24a-8b779d80a8ac\\" -context: \\"\\" title: \\"I pulled the Google plug and installed LineageOS: https://brainbaking.com/post/2021/03/getting-ri...\\" date: \\"2021-03-01T19:03:35\\" --- @@ -15,7 +26,6 @@ I pulled the Google plug and installed LineageOS: { dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true }) expect(dir.length).toBe(1) }) + test("does not ignore explicit '@url' replies if ignoreReplies is set to true", async () => { + await parseMastoFeed({ + url: "masto-feed-at-url", + notesdir: dumpdir, + ignoreReplies: true + }) + + dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true }) + expect(dir.length).toBe(1) + }) test("does not ignore replies if ignoreReplies is set to false", async () => { await parseMastoFeed({ url: "masto-feed-with-replies", @@ -127,9 +137,25 @@ describe("mastodon feed parser tests", () => { titleCount: 5000 }) - const actualMd = await fsp.readFile(`${dumpdir}/2021/03/02h16m18s46.md`) + const actualMd = (await fsp.readFile(`${dumpdir}/2021/03/02h16m18s46.md`)).toString() + expect(actualMd).toMatchSnapshot() const expectedReplyTo = "https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793" + const md = frontMatterParser.parseSync(actualMd) + expect(md.data.context).toBe(expectedReplyTo) + }) + + test("parse creates MD with context if @http(s) URL", async () => { + await parseMastoFeed({ + url: "masto-feed-at-url", + notesdir: dumpdir, + utcOffset: 0, + titleCount: 5000 + }) + + const actualMd = await fsp.readFile(`${dumpdir}/2021/03/20h11m12s08.md`) + const expectedReplyTo = "https://reply-to-stuff" + const md = frontMatterParser.parseSync(actualMd.toString()) expect(md.data.context).toBe(expectedReplyTo) }) diff --git a/test/mastodon/mastodon-e2e.test.js b/test/mastodon/mastodon-e2e.test.js index fc0873d..3c8559c 100644 --- a/test/mastodon/mastodon-e2e.test.js +++ b/test/mastodon/mastodon-e2e.test.js @@ -24,7 +24,15 @@ describe("mastodon feed parser end to end scenario test", () => { notesdir: dumpdir }) - let dir = await fsp.readdir(`${dumpdir}/2021/03`, { withFileTypes: true }) + const dirroot = await fsp.readdir(`${dumpdir}`, { withFileTypes: true }) + expect(dirroot.length).toBe(1) + const year = dirroot[0].name + + const dirmonth = await fsp.readdir(`${dumpdir}/${year}`, { withFileTypes: true }) + expect(dirmonth.length).toBe(1) + const month = dirmonth[0].name + + const dir = await fsp.readdir(`${dumpdir}/${year}/${month}`, { withFileTypes: true }) expect(dir.length).not.toBe(0) })