implement image enclosure parsing

This commit is contained in:
Wouter Groeneveld 2021-03-14 21:21:47 +01:00
parent 24f6d1a8e3
commit 15f779e63f
10 changed files with 262 additions and 17 deletions

44
.pnp.js generated
View File

@ -38,6 +38,7 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"packageLocation": "./",
"packageDependencies": [
["dayjs", "npm:1.10.4"],
["ejs", "npm:3.1.6"],
["ent", "npm:2.2.0"],
["fast-xml-parser", "npm:3.18.0"],
["got", "npm:11.8.2"],
@ -1305,6 +1306,15 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"linkType": "HARD",
}]
]],
["async", [
["npm:0.9.2", {
"packageLocation": "./.yarn/cache/async-npm-0.9.2-d8cafe6cc3-78c0aad8ad.zip/node_modules/async/",
"packageDependencies": [
["async", "npm:0.9.2"]
],
"linkType": "HARD",
}]
]],
["asynckit", [
["npm:0.4.0", {
"packageLocation": "./.yarn/cache/asynckit-npm-0.4.0-c718858525-a024000b9d.zip/node_modules/asynckit/",
@ -2307,6 +2317,16 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"linkType": "HARD",
}]
]],
["ejs", [
["npm:3.1.6", {
"packageLocation": "./.yarn/cache/ejs-npm-3.1.6-03db39fd15-cb77a9368e.zip/node_modules/ejs/",
"packageDependencies": [
["ejs", "npm:3.1.6"],
["jake", "npm:10.8.2"]
],
"linkType": "HARD",
}]
]],
["electron-to-chromium", [
["npm:1.3.680", {
"packageLocation": "./.yarn/cache/electron-to-chromium-npm-1.3.680-a4b621ac31-681c4f4350.zip/node_modules/electron-to-chromium/",
@ -2653,6 +2673,16 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"linkType": "HARD",
}]
]],
["filelist", [
["npm:1.0.2", {
"packageLocation": "./.yarn/cache/filelist-npm-1.0.2-d98495ab20-f8bf29d317.zip/node_modules/filelist/",
"packageDependencies": [
["filelist", "npm:1.0.2"],
["minimatch", "npm:3.0.4"]
],
"linkType": "HARD",
}]
]],
["fill-range", [
["npm:4.0.0", {
"packageLocation": "./.yarn/cache/fill-range-npm-4.0.0-95a6e45784-4a1491ee29.zip/node_modules/fill-range/",
@ -3523,12 +3553,26 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) {
"linkType": "HARD",
}]
]],
["jake", [
["npm:10.8.2", {
"packageLocation": "./.yarn/cache/jake-npm-10.8.2-e211473cb9-c60d3f491c.zip/node_modules/jake/",
"packageDependencies": [
["jake", "npm:10.8.2"],
["async", "npm:0.9.2"],
["chalk", "npm:2.4.2"],
["filelist", "npm:1.0.2"],
["minimatch", "npm:3.0.4"]
],
"linkType": "HARD",
}]
]],
["jam-my-stack", [
["workspace:.", {
"packageLocation": "./",
"packageDependencies": [
["jam-my-stack", "workspace:."],
["dayjs", "npm:1.10.4"],
["ejs", "npm:3.1.6"],
["ent", "npm:2.2.0"],
["fast-xml-parser", "npm:3.18.0"],
["got", "npm:11.8.2"],

View File

@ -82,6 +82,10 @@ I pulled the Google plug and installed LineageOS: <a href="https://brainbaking.c
See implementation for more details and features.
**Also parsers**:
- `<link rel="enclosure"/>` image types (see `render-enclosures.ejs`) [ejs template](https://ejs.co/), that is appended to the Markdown file if any are found. Styling is up to you...
### 2. Goodreads
#### 2.1 `createWidget`

View File

@ -1,6 +1,6 @@
{
"name": "jam-my-stack",
"version": "1.0.9",
"version": "1.0.10",
"repository": {
"url": "https://github.com/wgroeneveld/jam-my-stack",
"type": "git"
@ -13,6 +13,7 @@
},
"dependencies": {
"dayjs": "^1.10.4",
"ejs": "^3.1.6",
"ent": "^2.2.0",
"fast-xml-parser": "^3.18.0",
"got": "^11.8.2",

View File

@ -1,11 +1,12 @@
const ejs = require('ejs');
const got = require("got");
const parser = require("fast-xml-parser");
const { writeFileSync, existsSync, mkdirSync } = require('fs');
const ent = require('ent')
const { readFileSync, writeFileSync, existsSync, mkdirSync } = require('fs');
const ent = require('ent');
const { getFiles } = require('./../file-utils');
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
dayjs.extend(utc)
const dayjs = require('dayjs');
const utc = require('dayjs/plugin/utc');
dayjs.extend(utc);
function stripBeforeThirdSlash(str) {
const splitted = str.split('/')
@ -16,12 +17,19 @@ function stripBeforeLastSlash(str) {
return str.substring(str.lastIndexOf('/') + 1, str.length)
}
function tmpl(filename, data) {
const template = readFileSync(filename).toString()
return ejs.render(template, data, {
rmWhitespace: true
})
}
function convertAtomItemToMd(item, notesdir) {
const path = `${notesdir}/${item.year}/${item.month}`
if(!existsSync(`${notesdir}/${item.year}`)) mkdirSync(`${notesdir}/${item.year}`)
if(!existsSync(path)) mkdirSync(path)
const mddata = `---
let mddata = `---
source: "${item.url}"
context: "${item.context}"
title: "${item.title}"
@ -29,7 +37,10 @@ date: "${item.year}-${item.month}-${item.day}T${item.date.format("HH:mm:ss")}"
---
${item.content}
`
`
if(item.media.length > 0) {
mddata += '\n' + tmpl('./src/mastodon/render-enclosures.ejs', { images: item.media })
}
writeFileSync(`${path}/${item.hash}.md`, mddata, 'utf-8')
}
@ -65,7 +76,11 @@ async function parseMastoFeed(options) {
const root = parser.parse(buffer.toString(), {
ignoreAttributes: false
})
const items = root.feed.entry.map(item => {
// in case a single item is in a feed instead of an array, wrap it ourselves
const entries = root.feed.entry.map ? root.feed.entry : [root.feed.entry]
const items = entries.map(item => {
const date = dayjs.utc(item.published).utcOffset(utcOffset)
const year = date.format("YYYY")
const month = date.format("MM")
@ -73,6 +88,10 @@ async function parseMastoFeed(options) {
// format: <thr:in-reply-to ref='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793' href='https://social.linux.pizza/users/StampedingLonghorn/statuses/105821099684887793'/>
const context = item['thr:in-reply-to'] ? item['thr:in-reply-to']['@_ref'] : ""
const media = item.link?.filter(l =>
l['@_rel'] === 'enclosure' &&
l['@_type'] === 'image/jpeg').map(l => l['@_href'])
// WHY double decode? &#34; = &amp;#34; - first decode '&', then the other char.'
return {
title: trimIfNeeded(ent.decode(ent.decode(item.title)), titleCount, titlePrefix), // summary (cut-off) of content
@ -80,6 +99,7 @@ async function parseMastoFeed(options) {
url: item.id, // format: https://chat.brainbaking.com/objects/0707fd54-185d-4ee7-9204-be370d57663c
context,
id: stripBeforeLastSlash(item.id),
media,
hash: `${day}h${date.format("HH")}m${date.format("mm")}s${date.format("ss")}`,
date, // format: 2021-03-02T16:18:46.658056Z
year,

View File

@ -0,0 +1,9 @@
<div class="flex">
<% images.forEach(function(image){ %>
<div>
<a class="lbox" href="<%= image %>">
<img src="<%= image %>" alt="Enclosed Toot image">
</a>
</div>
<% }); %>
</div>

View File

@ -10,7 +10,7 @@ async function got(url) {
}
}
const result = await fs.readFile(`./test/__mocks__/masto-feed-sample.xml`, 'utf8');
const result = await fs.readFile(`./test/__mocks__/${url}.xml`, 'utf8');
return result
}

View File

@ -0,0 +1,87 @@
<?xml version="1.0" encoding="UTF-8"?>
<feed
xmlns="http://www.w3.org/2005/Atom"
xmlns:thr="http://purl.org/syndication/thread/1.0"
xmlns:activity="http://activitystrea.ms/spec/1.0/"
xmlns:poco="http://portablecontacts.net/spec/1.0"
xmlns:ostatus="http://ostatus.org/schema/1.0">
<id>https://chat.brainbaking.com/users/wouter/feed.atom</id>
<title>wouter's timeline</title>
<updated>2021-03-02T16:18:46</updated>
<logo>https://chat.brainbaking.com/media/f39bcd85-5098-45e2-b395-e274b712d512/headshot_2020.jpg</logo>
<link rel="self" href="https://chat.brainbaking.com/users/wouter/feed.atom" type="application/atom+xml"/>
<author>
<id>https://chat.brainbaking.com/users/wouter</id>
<activity:object>http://activitystrea.ms/schema/1.0/person</activity:object>
<uri>https://chat.brainbaking.com/users/wouter</uri>
<poco:preferredUsername>wouter</poco:preferredUsername>
<poco:displayName>Wouter Groeneveld</poco:displayName>
<poco:note>Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. </poco:note>
<summary>Level 35 Brain Baker. Loving the smell of freshly baked thoughts (and bread) in the morning 🍞. Sometimes convincing others to bake their brain (and bread) too 🧠. </summary>
<name>wouter</name>
<link rel="avatar" href="https://chat.brainbaking.com/media/f39bcd85-5098-45e2-b395-e274b712d512/headshot_2020.jpg"/>
<link rel="header" href="https://chat.brainbaking.com/media/3399cd78-4fd4-40ab-a174-c7805576a826/boekcover2.jpg"/>
<ap_enabled>true</ap_enabled>
</author>
<link rel="next" href="https://chat.brainbaking.com/users/wouter/feed.atom?max_id=A4fIjNa6N1OJmaSMAS" type="application/atom+xml"/>
<entry>
<activity:object-type>http://activitystrea.ms/schema/1.0/note</activity:object-type>
<activity:verb>http://activitystrea.ms/schema/1.0/post</activity:verb>
<id>https://chat.brainbaking.com/objects/a51e13ce-d618-4602-84f7-f398126510ff</id>
<title>Enjoyed an afternoon of oldskool Diablo II on the Europebattle servers. We did a few Mephisto run...</title>
<content type="html">Enjoyed an afternoon of oldskool Diablo II on the Europebattle servers. We did a few Mephisto runs, managed to hit Hell, and I re-converetd my druid into a windy one. Good times!</content>
<published>2021-03-14T16:41:53.518661Z</published>
<updated>2021-03-14T16:41:53.518661Z</updated>
<ostatus:conversation ref="https://chat.brainbaking.com/contexts/6c75527a-613c-47a4-9922-47bac2cb9ee5">
https://chat.brainbaking.com/contexts/6c75527a-613c-47a4-9922-47bac2cb9ee5
</ostatus:conversation>
<link href="https://chat.brainbaking.com/contexts/6c75527a-613c-47a4-9922-47bac2cb9ee5" rel="ostatus:conversation"/>
<summary></summary>
<link type="application/atom+xml" href='https://chat.brainbaking.com/objects/a51e13ce-d618-4602-84f7-f398126510ff' rel="self"/>
<link type="text/html" href='https://chat.brainbaking.com/objects/a51e13ce-d618-4602-84f7-f398126510ff' rel="alternate"/>
<link rel="enclosure" href="https://chat.brainbaking.com/media/6f8b72ca-9bfb-460b-9609-c4298a8cab2b/EuropeBattle%202021-03-14%2016-20-36-87.jpg" type="image/jpeg"/>
<link rel="enclosure" href="https://chat.brainbaking.com/media/3dbcb044-2acc-4ace-a4f6-37ce94c3f2b1/EuropeBattle%202021-03-14%2015-35-01-56.jpg" type="image/jpeg"/>
<link rel="mentioned" ostatus:object-type="http://activitystrea.ms/schema/1.0/collection" href="http://activityschema.org/collection/public"/>
<link rel="mentioned" ostatus:object-type="http://activitystrea.ms/schema/1.0/person" href="https://chat.brainbaking.com/users/wouter"/>
</entry>
</feed>

View File

@ -9,5 +9,32 @@ date: \\"2021-03-01T19:03:35\\"
---
I pulled the Google plug and installed LineageOS: <a href=\\"https://brainbaking.com/post/2021/03/getting-rid-of-tracking-using-lineageos/\\" rel=\\"ugc\\">https://brainbaking.com/post/2021/03/getting-rid-of-tracking-using-lineageos/</a> Very impressed so far! Also rely on my own CalDAV server to replace GCalendar. Any others here running <a class=\\"hashtag\\" data-tag=\\"lineageos\\" href=\\"https://chat.brainbaking.com/tag/lineageos\\" rel=\\"tag ugc\\">#lineageos</a> for privacy reasons?
"
"
`;
exports[`mastodon feed parser tests parse embedded images 1`] = `
"---
source: \\"https://chat.brainbaking.com/objects/a51e13ce-d618-4602-84f7-f398126510ff\\"
context: \\"\\"
title: \\"Enjoyed an afternoon of oldskool Diablo II on the ...\\"
date: \\"2021-03-14T17:41:53\\"
---
Enjoyed an afternoon of oldskool Diablo II on the Europebattle servers. We did a few Mephisto runs, managed to hit Hell, and I re-converetd my druid into a windy one. Good times!
<div class=\\"flex\\">
<div>
<a class=\\"lbox\\" href=\\"https://chat.brainbaking.com/media/6f8b72ca-9bfb-460b-9609-c4298a8cab2b/EuropeBattle%202021-03-14%2016-20-36-87.jpg\\">
<img src=\\"https://chat.brainbaking.com/media/6f8b72ca-9bfb-460b-9609-c4298a8cab2b/EuropeBattle%202021-03-14%2016-20-36-87.jpg\\" alt=\\"Enclosed Toot image\\">
</a>
</div>
<div>
<a class=\\"lbox\\" href=\\"https://chat.brainbaking.com/media/3dbcb044-2acc-4ace-a4f6-37ce94c3f2b1/EuropeBattle%202021-03-14%2015-35-01-56.jpg\\">
<img src=\\"https://chat.brainbaking.com/media/3dbcb044-2acc-4ace-a4f6-37ce94c3f2b1/EuropeBattle%202021-03-14%2015-35-01-56.jpg\\" alt=\\"Enclosed Toot image\\">
</a>
</div>
</div>"
`;

View File

@ -18,9 +18,20 @@ describe("mastodon feed parser tests", () => {
fs.mkdirSync(dumpdir)
});
test("parse embedded images", async () => {
await parseMastoFeed({
url: "masto-feed-images",
notesdir: dumpdir,
utcOffset: 1
})
const actualMd = (await fsp.readFile(`${dumpdir}/2021/03/14h17m41s53.md`)).toString()
expect(actualMd).toMatchSnapshot()
})
test("parse trims title according to config and adds three dots", async () => {
await parseMastoFeed({
url: "invalid",
url: "masto-feed-sample",
notesdir: dumpdir,
utcOffset: 0,
titleCount: 5,
@ -35,7 +46,7 @@ describe("mastodon feed parser tests", () => {
test("parse does not trim if titleCount > title length and does not add three dots", async () => {
await parseMastoFeed({
url: "invalid",
url: "masto-feed-sample",
notesdir: dumpdir,
utcOffset: 0,
titleCount: 5000
@ -49,7 +60,7 @@ describe("mastodon feed parser tests", () => {
test("parse creates separate notes in each month subdir", async () => {
await parseMastoFeed({
url: "invalid",
url: "masto-feed-sample",
notesdir: dumpdir
})
@ -61,7 +72,7 @@ describe("mastodon feed parser tests", () => {
test("parse creates correct MD structure", async () => {
await parseMastoFeed({
url: "invalid",
url: "masto-feed-sample",
notesdir: dumpdir,
utcOffset: 0,
titleCount: 5000
@ -74,7 +85,7 @@ describe("mastodon feed parser tests", () => {
test("parse creates MD with context if in-reply-to", async () => {
//https://aus.social/users/aussocialadmin/statuses/105817435308293091
await parseMastoFeed({
url: "invalid",
url: "masto-feed-sample",
notesdir: dumpdir,
utcOffset: 0,
titleCount: 5000

View File

@ -1008,6 +1008,13 @@ __metadata:
languageName: node
linkType: hard
"async@npm:0.9.x":
version: 0.9.2
resolution: "async@npm:0.9.2"
checksum: 78c0aad8add0b84ccf9bde90d20a9cd20146e3734a4c9ac9bfb3a30d1b7df12b7d95c13119af825a89480210c02f7ffee38ac07c13ac43abd6636691b982b591
languageName: node
linkType: hard
"asynckit@npm:^0.4.0":
version: 0.4.0
resolution: "asynckit@npm:0.4.0"
@ -1311,7 +1318,7 @@ __metadata:
languageName: node
linkType: hard
"chalk@npm:^2.0.0":
"chalk@npm:^2.0.0, chalk@npm:^2.4.2":
version: 2.4.2
resolution: "chalk@npm:2.4.2"
dependencies:
@ -1849,6 +1856,17 @@ __metadata:
languageName: node
linkType: hard
"ejs@npm:^3.1.6":
version: 3.1.6
resolution: "ejs@npm:3.1.6"
dependencies:
jake: ^10.6.1
bin:
ejs: ./bin/cli.js
checksum: cb77a9368e50c7b11cb8d64c911397a7bf8f9f6d155474bc64154fd1d0ccf896de28a01d5e6ce135470ede038ca7dc02a83b8c0a904277eaef605cce4ab46dfb
languageName: node
linkType: hard
"electron-to-chromium@npm:^1.3.649":
version: 1.3.680
resolution: "electron-to-chromium@npm:1.3.680"
@ -2160,6 +2178,15 @@ __metadata:
languageName: node
linkType: hard
"filelist@npm:^1.0.1":
version: 1.0.2
resolution: "filelist@npm:1.0.2"
dependencies:
minimatch: ^3.0.4
checksum: f8bf29d31779dd04ee9e5d225e5a669920d5443f40b38fb5ecc8442ac29848a6f8bc3c937cb8ba78f5ae0d819dad52f4750d23c4dd1b2a51370aa67027d95c0c
languageName: node
linkType: hard
"fill-range@npm:^4.0.0":
version: 4.0.0
resolution: "fill-range@npm:4.0.0"
@ -2943,11 +2970,26 @@ fsevents@^2.1.2:
languageName: node
linkType: hard
"jake@npm:^10.6.1":
version: 10.8.2
resolution: "jake@npm:10.8.2"
dependencies:
async: 0.9.x
chalk: ^2.4.2
filelist: ^1.0.1
minimatch: ^3.0.4
bin:
jake: ./bin/cli.js
checksum: c60d3f491ce59bba09b8a2ee351122eb6dd19a6826347de5ec4e94ddee5c5e70e14120c14ce6fc2efa360d0db12d7a28a1044d7aa084414dd695ce154fb87d9e
languageName: node
linkType: hard
"jam-my-stack@workspace:.":
version: 0.0.0-use.local
resolution: "jam-my-stack@workspace:."
dependencies:
dayjs: ^1.10.4
ejs: ^3.1.6
ent: ^2.2.0
fast-xml-parser: ^3.18.0
got: ^11.8.2