From 707a71d99466c6a111b99780a6d3afed85996db3 Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Thu, 4 Mar 2021 21:06:28 +0100 Subject: [PATCH] lunr script added --- src/file-utils.js | 16 +++++++++ src/index.js | 6 ++-- src/lunr-index-builder.js | 48 +++++++++++++++++++++++++ src/masto-feed-parser.js | 11 +----- test/lunr-index-builder.test.js | 24 +++++++++++++ test/postsstub1/2021/03/some-article.md | 8 +++++ test/postsstub2/2021/03/some-article.md | 10 ++++++ 7 files changed, 111 insertions(+), 12 deletions(-) create mode 100644 src/file-utils.js create mode 100644 src/lunr-index-builder.js create mode 100644 test/lunr-index-builder.test.js create mode 100644 test/postsstub1/2021/03/some-article.md create mode 100644 test/postsstub2/2021/03/some-article.md diff --git a/src/file-utils.js b/src/file-utils.js new file mode 100644 index 0000000..85df69e --- /dev/null +++ b/src/file-utils.js @@ -0,0 +1,16 @@ +const fs = require('fs').promises; +const { resolve } = require('path'); + +// https://stackoverflow.com/questions/5827612/node-js-fs-readdir-recursive-directory-search +async function getFiles(dir) { + const dirents = await fs.readdir(dir, { withFileTypes: true }); + const files = await Promise.all(dirents.map((dirent) => { + const res = resolve(dir, dirent.name); + return dirent.isDirectory() ? getFiles(res) : res; + })); + return Array.prototype.concat(...files); +} + +module.exports = { + getFiles +} diff --git a/src/index.js b/src/index.js index 2d55fd2..3f9b41a 100644 --- a/src/index.js +++ b/src/index.js @@ -1,7 +1,9 @@ const { parseMastoFeed } = require('./masto-feed-parser') const { widgetify } = require('./goodreads-widgetify.js') +const { buildIndex } = require('./lunr-index-builder.js') module.exports = { - parseMastoFeed, - goodreadsWidgetify: widgetify + mastodonParseFeed: parseMastoFeed, + goodreadsWidgetify: widgetify, + lunrBuildIndex: buildIndex }; diff --git a/src/lunr-index-builder.js b/src/lunr-index-builder.js new file mode 100644 index 0000000..6b97d52 --- /dev/null +++ b/src/lunr-index-builder.js @@ -0,0 +1,48 @@ +const fs = require('fs').promises; +const { getFiles } = require('./file-utils'); +const { promisify } = require('util'); +const frontMatterParser = require('parser-front-matter'); + +const parse = promisify(frontMatterParser.parse.bind(frontMatterParser)); + + +async function loadPostsWithFrontMatter(postsDirectoryPath) { + const postNames = await getFiles(postsDirectoryPath); + const posts = await Promise.all( + // could be .DS_Store stuff found using recursive function above... + postNames.filter(name => name.endsWith('.md')).map(async fileName => { + const fileContent = await fs.readFile(fileName, 'utf8'); + const {content, data} = await parse(fileContent); + return { + content: content.slice(0, 3000), + ...data + }; + }) + ); + return posts; +} + +const lunrjs = require('lunr'); + +function makeIndex(posts) { + return lunrjs(function() { + this.ref('title'); + this.field('title'); + this.field('content'); + this.field('tags'); + posts.forEach(p => { + this.add(p); + }); + }); +} + +async function run(contentDirs) { + const posts = await Promise.all(contentDirs.map(async (dir) => { + return await loadPostsWithFrontMatter(dir) + })) + return makeIndex(posts.flat()); +} + +module.exports = { + buildIndex: run +} diff --git a/src/masto-feed-parser.js b/src/masto-feed-parser.js index f4c808f..1e7afe9 100644 --- a/src/masto-feed-parser.js +++ b/src/masto-feed-parser.js @@ -4,18 +4,9 @@ const fs = require('fs').promises; const { writeFileSync, existsSync, mkdirSync } = require('fs'); const { resolve } = require('path'); const ent = require('ent') +const { getFiles } = require('./file-utils'); -// https://stackoverflow.com/questions/5827612/node-js-fs-readdir-recursive-directory-search -async function getFiles(dir) { - const dirents = await fs.readdir(dir, { withFileTypes: true }); - const files = await Promise.all(dirents.map((dirent) => { - const res = resolve(dir, dirent.name); - return dirent.isDirectory() ? getFiles(res) : res; - })); - return Array.prototype.concat(...files); -} - function stripBeforeThirdSlash(str) { const splitted = str.split('/') return splitted.slice(splitted.length - 3).join('/') diff --git a/test/lunr-index-builder.test.js b/test/lunr-index-builder.test.js new file mode 100644 index 0000000..738f01b --- /dev/null +++ b/test/lunr-index-builder.test.js @@ -0,0 +1,24 @@ +const { buildIndex } = require('../src/lunr-index-builder') + +let result = null + +beforeEach(async () => { + result = await buildIndex([ + `${__dirname}/postsstub1`, + `${__dirname}/postsstub2`]) +}) + +test('lunr inverted index stuffed with loads of goodies from both dirs', async() => { + expect(result.invertedIndex.cool).not.toBe(undefined) + expect(result.invertedIndex.gravediggaz).not.toBe(undefined) + expect(result.invertedIndex.wu).not.toBe(undefined) + expect(result.invertedIndex.tang).not.toBe(undefined) + expect(result.invertedIndex.east).not.toBe(undefined) + expect(result.invertedIndex.side).not.toBe(undefined) +}) + +test('lunr index builder fields are title, content, tags', async () => { + // Do not forget to add JSON.Stringify() when calling this in production + expect(result.fields).toEqual(["title", "content", "tags"]) +}) + diff --git a/test/postsstub1/2021/03/some-article.md b/test/postsstub1/2021/03/some-article.md new file mode 100644 index 0000000..da48c66 --- /dev/null +++ b/test/postsstub1/2021/03/some-article.md @@ -0,0 +1,8 @@ +--- +title: some cool article +date: 2021-03-02 +--- + +Hi gayz whas goin' on gravediggaz style! + +kkthxxbbye diff --git a/test/postsstub2/2021/03/some-article.md b/test/postsstub2/2021/03/some-article.md new file mode 100644 index 0000000..ab67f40 --- /dev/null +++ b/test/postsstub2/2021/03/some-article.md @@ -0,0 +1,10 @@ +--- +title: another cool article east-side +date: 2021-03-03 +--- + +It's just a hobby that I picked up in the lobby + +--- + +The W, Wu-tang Clan yoo