From 528fab6c873e94a997e721585b4be9280124d841 Mon Sep 17 00:00:00 2001 From: NGPixel Date: Sun, 4 Sep 2016 01:12:42 -0400 Subject: [PATCH] Early work on background agent for search --- agent.js | 63 +++++++++++++++++++++++++++++++++++++++++----- config.sample.yml | 7 +++--- gulpfile.js | 19 ++++++++------ models/entries.js | 61 ++++++++++++++++++++++---------------------- models/git.js | 16 +++--------- models/markdown.js | 40 +++++++++++++++++++++++++++-- models/search.js | 42 +++++++++++++++++++++++++++++++ package.json | 1 + server.js | 8 +----- 9 files changed, 189 insertions(+), 68 deletions(-) create mode 100644 models/search.js diff --git a/agent.js b/agent.js index 067d6d26..a1b623d9 100644 --- a/agent.js +++ b/agent.js @@ -11,17 +11,20 @@ global.ROOTPATH = __dirname; // ---------------------------------------- global.winston = require('winston'); -winston.info('[AGENT] Requarks Wiki BgAgent is initializing...'); +winston.info('[AGENT] Background Agent is initializing...'); var appconfig = require('./models/config')('./config.yml'); -global.git = require('./models/git').init(appconfig, true); +global.git = require('./models/git').init(appconfig); global.entries = require('./models/entries').init(appconfig); global.mark = require('./models/markdown'); +global.search = require('./models/search').init(appconfig); var _ = require('lodash'); var moment = require('moment'); var Promise = require('bluebird'); +var fs = Promise.promisifyAll(require("fs-extra")); +var path = require('path'); var cron = require('cron').CronJob; // ---------------------------------------- @@ -44,6 +47,7 @@ var job = new cron({ // Prepare async job collector let jobs = []; + let repoPath = path.resolve(ROOTPATH, appconfig.datadir.repo); // ---------------------------------------- // Compile Jobs @@ -51,12 +55,58 @@ var job = new cron({ //-> Resync with Git remote - jobs.push(git.resync().then(() => { + jobs.push(git.onReady.then(() => { + return git.resync().then(() => { - //-> Purge outdated cache + //-> Stream all documents - return entries.purgeStaleCache(); + let cacheJobs = []; + fs.walk(repoPath).on('data', function (item) { + if(path.extname(item.path) === '.md') { + + let entryPath = entries.parsePath(entries.getEntryPathFromFullPath(item.path)); + let cachePath = entries.getCachePath(entryPath); + + //-> Purge outdated cache + + cacheJobs.push( + fs.statAsync(cachePath).then((st) => { + return moment(st.mtime).isBefore(item.stats.mtime) ? 'expired' : 'active'; + }).catch((err) => { + return (err.code !== 'EEXIST') ? err : 'new'; + }).then((fileStatus) => { + + //-> Delete expired cache file + + if(fileStatus === 'expired') { + return fs.unlinkAsync(cachePath).return(fileStatus); + } + + return fileStatus; + + }).then((fileStatus) => { + + //-> Update search index + + if(fileStatus !== 'active') { + return entries.fetchTextVersion(entryPath).then((content) => { + console.log(content); + }); + } + + return true; + + }) + + ); + + } + }); + + return Promise.all(cacheJobs); + + }); })); // ---------------------------------------- @@ -73,7 +123,8 @@ var job = new cron({ }, start: true, - timeZone: 'UTC' + timeZone: 'UTC', + runOnInit: true }); // ---------------------------------------- diff --git a/config.sample.yml b/config.sample.yml index b6aa879f..ff4c6a6b 100644 --- a/config.sample.yml +++ b/config.sample.yml @@ -1,6 +1,7 @@ ################################################### # REQUARKS WIKI - CONFIGURATION # ################################################### +# Full explanation + examples in the documentation (https://requarks-wiki.readme.io/) # ------------------------------------------------- # Title of this site @@ -32,7 +33,6 @@ datadir: # ------------------------------------------------- # Git Connection Info # ------------------------------------------------- -# Full explanation + examples in the documentation (https://requarks-wiki.readme.io/) git: url: https://github.com/Organization/Repo @@ -68,7 +68,8 @@ sessionSecret: 1234567890abcdefghijklmnopqrstuvxyz admin: admin@company.com # ------------------------------------------------- -# Default page for Home +# Site UI Language # ------------------------------------------------- +# Possible values: en, fr -homepage: Home.md \ No newline at end of file +lang: en \ No newline at end of file diff --git a/gulpfile.js b/gulpfile.js index 41d25349..5e5a966e 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -55,13 +55,18 @@ var paths = { '!./node_modules/font-awesome/fonts/*-webfont.svg' ], deploypackage: [ - './**/*', - '!node_modules', '!node_modules/**', - '!coverage', '!coverage/**', - '!client/js', '!client/js/**', - '!dist', '!dist/**', - '!tests', '!tests/**', - '!gulpfile.js', '!inch.json', '!config.yml', '!wiki.sublime-project' + './assets/**/*', + './client/content/**/*', + './controllers/**/*', + './locales/**/*', + './middlewares/**/*', + './models/**/*', + './views/**/*', + './LICENSE', + './agent.js', + './server.js', + './package.json', + './config.sample.yml' ] }; diff --git a/models/entries.js b/models/entries.js index 31325925..abcfd688 100644 --- a/models/entries.js +++ b/models/entries.js @@ -27,8 +27,8 @@ module.exports = { let self = this; - self._repoPath = appconfig.datadir.repo; - self._cachePath = path.join(appconfig.datadir.db, 'cache'); + self._repoPath = path.resolve(ROOTPATH, appconfig.datadir.repo); + self._cachePath = path.resolve(ROOTPATH, appconfig.datadir.db, 'cache'); return self; @@ -177,6 +177,32 @@ module.exports = { }, + /** + * Fetches a text version of a Markdown-formatted document + * + * @param {String} entryPath The entry path + * @return {String} Text-only version + */ + fetchTextVersion(entryPath) { + + let self = this; + + return self.fetchOriginal(entryPath, { + parseMarkdown: false, + parseMeta: true, + parseTree: false, + includeMarkdown: true, + includeParentInfo: false, + cache: false + }).then((pageData) => { + return { + meta: pageData.meta, + text: mark.removeMarkdown(pageData.markdown) + }; + }); + + }, + /** * Parse raw url path and make it safe * @@ -341,6 +367,8 @@ module.exports = { }, + + /** * Generate a starter page content based on the entry path * @@ -356,35 +384,6 @@ module.exports = { return _.replace(contents, new RegExp('{TITLE}', 'g'), formattedTitle); }); - }, - - purgeStaleCache() { - - let self = this; - - let cacheJobs = []; - - fs.walk(self._repoPath) - .on('data', function (item) { - if(path.extname(item.path) === '.md') { - - let entryPath = self.parsePath(self.getEntryPathFromFullPath(item.path)); - let cachePath = self.getCachePath(entryPath); - - cacheJobs.push(fs.statAsync(cachePath).then((st) => { - if(moment(st.mtime).isBefore(item.stats.mtime)) { - return fs.unlinkAsync(cachePath); - } else { - return true; - } - }).catch((err) => { - return (err.code !== 'EEXIST') ? err : true; - })); - } - }); - - return Promise.all(cacheJobs); - } }; \ No newline at end of file diff --git a/models/git.js b/models/git.js index ac54a923..b29fc9bb 100644 --- a/models/git.js +++ b/models/git.js @@ -19,8 +19,7 @@ module.exports = { _repo: { path: '', branch: 'master', - exists: false, - sync: true + exists: false }, _signature: { name: 'Wiki', @@ -30,6 +29,7 @@ module.exports = { clone: {}, push: {} }, + onReady: null, /** * Initialize Git model @@ -37,12 +37,10 @@ module.exports = { * @param {Object} appconfig The application config * @return {Object} Git model instance */ - init(appconfig, sync) { + init(appconfig) { let self = this; - self._repo.sync = sync; - //-> Build repository path if(_.isEmpty(appconfig.datadir.repo)) { @@ -53,13 +51,7 @@ module.exports = { //-> Initialize repository - self._initRepo(appconfig).then((repo) => { - - if(self._repo.sync) { - self.resync(); - } - - }); + self.onReady = self._initRepo(appconfig); // Define signature diff --git a/models/markdown.js b/models/markdown.js index ef14b938..ea40ce8b 100644 --- a/models/markdown.js +++ b/models/markdown.js @@ -12,7 +12,8 @@ var Promise = require('bluebird'), mdAttrs = require('markdown-it-attrs'), hljs = require('highlight.js'), cheerio = require('cheerio'), - _ = require('lodash'); + _ = require('lodash'), + mdRemove = require('remove-markdown'); // Load plugins @@ -157,6 +158,12 @@ const parseContent = (content) => { }; +/** + * Parse meta-data tags from content + * + * @param {String} content Markdown content + * @return {Object} Properties found in the content and their values + */ const parseMeta = (content) => { let commentMeta = new RegExp('','g'); @@ -171,6 +178,12 @@ const parseMeta = (content) => { module.exports = { + /** + * Parse content and return all data + * + * @param {String} content Markdown-formatted content + * @return {Object} Object containing meta, html and tree data + */ parse(content) { return { meta: parseMeta(content), @@ -181,6 +194,29 @@ module.exports = { parseContent, parseMeta, - parseTree + parseTree, + + /** + * Strips non-text elements from Markdown content + * + * @param {String} content Markdown-formatted content + * @return {String} Text-only version + */ + removeMarkdown(content) { + return mdRemove(_.chain(content) + .replace(//g, '') + .replace(/```[^`]+```/g, '') + .replace(/`[^`]+`/g, '') + .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '') + .replace(/\r?\n|\r/g, ' ') + .deburr() + .toLower() + .replace(/(\b([^a-z]+)\b)/g, ' ') + .replace(/[^a-z]+/g, ' ') + .replace(/(\b(\w{1,2})\b(\W|$))/g, '') + .replace(/\s\s+/g, ' ') + .value() + ); + } }; \ No newline at end of file diff --git a/models/search.js b/models/search.js new file mode 100644 index 00000000..a55a5299 --- /dev/null +++ b/models/search.js @@ -0,0 +1,42 @@ +"use strict"; + +var Promise = require('bluebird'), + _ = require('lodash'), + path = require('path'), + searchIndex = Promise.promisifyAll(require('search-index')), + stopWord = require('stopword'); + +/** + * Search Model + */ +module.exports = { + + _si: null, + + /** + * Initialize Search model + * + * @param {Object} appconfig The application config + * @return {Object} Search model instance + */ + init(appconfig) { + + let dbPath = path.resolve(ROOTPATH, appconfig.datadir.db, 'search-index'); + + this._si = searchIndex({ + deletable: true, + fieldedSearch: true, + indexPath: dbPath, + logLevel: 'error', + stopwords: stopWord.getStopwords(appconfig.lang).sort() + }, (err, si) => { + if(err) { + winston.error('Failed to initialize search-index.', err); + } + }); + + } + + + +}; \ No newline at end of file diff --git a/package.json b/package.json index 5b325aa2..2a3d1d13 100644 --- a/package.json +++ b/package.json @@ -72,6 +72,7 @@ "passport": "^0.3.2", "passport-local": "^1.0.0", "pug": "^2.0.0-beta6", + "remove-markdown": "^0.1.0", "search-index": "^0.8.15", "serve-favicon": "^2.3.0", "simplemde": "^1.11.2", diff --git a/server.js b/server.js index d6c5e554..431705eb 100644 --- a/server.js +++ b/server.js @@ -52,7 +52,6 @@ var ctrl = autoload(path.join(ROOTPATH, '/controllers')); // ---------------------------------------- global.app = express(); -global.ROOTPATH = __dirname; var _isDebug = (app.get('env') === 'development'); // ---------------------------------------- @@ -127,7 +126,6 @@ app.use(express.static(path.join(ROOTPATH, 'assets'))); app.locals._ = require('lodash'); app.locals.moment = require('moment'); app.locals.appconfig = appconfig; -//app.locals.appdata = require('./data.json'); app.use(mw.flash); // ---------------------------------------- @@ -195,16 +193,12 @@ server.on('listening', () => { }); // ---------------------------------------- -// Start Background Agent +// Start Agents // ---------------------------------------- var fork = require('child_process').fork; var bgAgent = fork('agent.js'); -bgAgent.on('message', (m) => { - -}); - process.on('exit', (code) => { bgAgent.disconnect(); }); \ No newline at end of file