Early work on background agent for search

This commit is contained in:
NGPixel 2016-09-04 01:12:42 -04:00
parent 576ba7fde2
commit 528fab6c87
9 changed files with 189 additions and 68 deletions

View File

@ -11,17 +11,20 @@ global.ROOTPATH = __dirname;
// ---------------------------------------- // ----------------------------------------
global.winston = require('winston'); global.winston = require('winston');
winston.info('[AGENT] Requarks Wiki BgAgent is initializing...'); winston.info('[AGENT] Background Agent is initializing...');
var appconfig = require('./models/config')('./config.yml'); var appconfig = require('./models/config')('./config.yml');
global.git = require('./models/git').init(appconfig, true); global.git = require('./models/git').init(appconfig);
global.entries = require('./models/entries').init(appconfig); global.entries = require('./models/entries').init(appconfig);
global.mark = require('./models/markdown'); global.mark = require('./models/markdown');
global.search = require('./models/search').init(appconfig);
var _ = require('lodash'); var _ = require('lodash');
var moment = require('moment'); var moment = require('moment');
var Promise = require('bluebird'); var Promise = require('bluebird');
var fs = Promise.promisifyAll(require("fs-extra"));
var path = require('path');
var cron = require('cron').CronJob; var cron = require('cron').CronJob;
// ---------------------------------------- // ----------------------------------------
@ -44,6 +47,7 @@ var job = new cron({
// Prepare async job collector // Prepare async job collector
let jobs = []; let jobs = [];
let repoPath = path.resolve(ROOTPATH, appconfig.datadir.repo);
// ---------------------------------------- // ----------------------------------------
// Compile Jobs // Compile Jobs
@ -51,12 +55,58 @@ var job = new cron({
//-> Resync with Git remote //-> Resync with Git remote
jobs.push(git.resync().then(() => { jobs.push(git.onReady.then(() => {
return git.resync().then(() => {
//-> Stream all documents
let cacheJobs = [];
fs.walk(repoPath).on('data', function (item) {
if(path.extname(item.path) === '.md') {
let entryPath = entries.parsePath(entries.getEntryPathFromFullPath(item.path));
let cachePath = entries.getCachePath(entryPath);
//-> Purge outdated cache //-> Purge outdated cache
return entries.purgeStaleCache(); cacheJobs.push(
fs.statAsync(cachePath).then((st) => {
return moment(st.mtime).isBefore(item.stats.mtime) ? 'expired' : 'active';
}).catch((err) => {
return (err.code !== 'EEXIST') ? err : 'new';
}).then((fileStatus) => {
//-> Delete expired cache file
if(fileStatus === 'expired') {
return fs.unlinkAsync(cachePath).return(fileStatus);
}
return fileStatus;
}).then((fileStatus) => {
//-> Update search index
if(fileStatus !== 'active') {
return entries.fetchTextVersion(entryPath).then((content) => {
console.log(content);
});
}
return true;
})
);
}
});
return Promise.all(cacheJobs);
});
})); }));
// ---------------------------------------- // ----------------------------------------
@ -73,7 +123,8 @@ var job = new cron({
}, },
start: true, start: true,
timeZone: 'UTC' timeZone: 'UTC',
runOnInit: true
}); });
// ---------------------------------------- // ----------------------------------------

View File

@ -1,6 +1,7 @@
################################################### ###################################################
# REQUARKS WIKI - CONFIGURATION # # REQUARKS WIKI - CONFIGURATION #
################################################### ###################################################
# Full explanation + examples in the documentation (https://requarks-wiki.readme.io/)
# ------------------------------------------------- # -------------------------------------------------
# Title of this site # Title of this site
@ -32,7 +33,6 @@ datadir:
# ------------------------------------------------- # -------------------------------------------------
# Git Connection Info # Git Connection Info
# ------------------------------------------------- # -------------------------------------------------
# Full explanation + examples in the documentation (https://requarks-wiki.readme.io/)
git: git:
url: https://github.com/Organization/Repo url: https://github.com/Organization/Repo
@ -68,7 +68,8 @@ sessionSecret: 1234567890abcdefghijklmnopqrstuvxyz
admin: admin@company.com admin: admin@company.com
# ------------------------------------------------- # -------------------------------------------------
# Default page for Home # Site UI Language
# ------------------------------------------------- # -------------------------------------------------
# Possible values: en, fr
homepage: Home.md lang: en

View File

@ -55,13 +55,18 @@ var paths = {
'!./node_modules/font-awesome/fonts/*-webfont.svg' '!./node_modules/font-awesome/fonts/*-webfont.svg'
], ],
deploypackage: [ deploypackage: [
'./**/*', './assets/**/*',
'!node_modules', '!node_modules/**', './client/content/**/*',
'!coverage', '!coverage/**', './controllers/**/*',
'!client/js', '!client/js/**', './locales/**/*',
'!dist', '!dist/**', './middlewares/**/*',
'!tests', '!tests/**', './models/**/*',
'!gulpfile.js', '!inch.json', '!config.yml', '!wiki.sublime-project' './views/**/*',
'./LICENSE',
'./agent.js',
'./server.js',
'./package.json',
'./config.sample.yml'
] ]
}; };

View File

@ -27,8 +27,8 @@ module.exports = {
let self = this; let self = this;
self._repoPath = appconfig.datadir.repo; self._repoPath = path.resolve(ROOTPATH, appconfig.datadir.repo);
self._cachePath = path.join(appconfig.datadir.db, 'cache'); self._cachePath = path.resolve(ROOTPATH, appconfig.datadir.db, 'cache');
return self; return self;
@ -177,6 +177,32 @@ module.exports = {
}, },
/**
* Fetches a text version of a Markdown-formatted document
*
* @param {String} entryPath The entry path
* @return {String} Text-only version
*/
fetchTextVersion(entryPath) {
let self = this;
return self.fetchOriginal(entryPath, {
parseMarkdown: false,
parseMeta: true,
parseTree: false,
includeMarkdown: true,
includeParentInfo: false,
cache: false
}).then((pageData) => {
return {
meta: pageData.meta,
text: mark.removeMarkdown(pageData.markdown)
};
});
},
/** /**
* Parse raw url path and make it safe * Parse raw url path and make it safe
* *
@ -341,6 +367,8 @@ module.exports = {
}, },
/** /**
* Generate a starter page content based on the entry path * Generate a starter page content based on the entry path
* *
@ -356,35 +384,6 @@ module.exports = {
return _.replace(contents, new RegExp('{TITLE}', 'g'), formattedTitle); return _.replace(contents, new RegExp('{TITLE}', 'g'), formattedTitle);
}); });
},
purgeStaleCache() {
let self = this;
let cacheJobs = [];
fs.walk(self._repoPath)
.on('data', function (item) {
if(path.extname(item.path) === '.md') {
let entryPath = self.parsePath(self.getEntryPathFromFullPath(item.path));
let cachePath = self.getCachePath(entryPath);
cacheJobs.push(fs.statAsync(cachePath).then((st) => {
if(moment(st.mtime).isBefore(item.stats.mtime)) {
return fs.unlinkAsync(cachePath);
} else {
return true;
}
}).catch((err) => {
return (err.code !== 'EEXIST') ? err : true;
}));
}
});
return Promise.all(cacheJobs);
} }
}; };

View File

@ -19,8 +19,7 @@ module.exports = {
_repo: { _repo: {
path: '', path: '',
branch: 'master', branch: 'master',
exists: false, exists: false
sync: true
}, },
_signature: { _signature: {
name: 'Wiki', name: 'Wiki',
@ -30,6 +29,7 @@ module.exports = {
clone: {}, clone: {},
push: {} push: {}
}, },
onReady: null,
/** /**
* Initialize Git model * Initialize Git model
@ -37,12 +37,10 @@ module.exports = {
* @param {Object} appconfig The application config * @param {Object} appconfig The application config
* @return {Object} Git model instance * @return {Object} Git model instance
*/ */
init(appconfig, sync) { init(appconfig) {
let self = this; let self = this;
self._repo.sync = sync;
//-> Build repository path //-> Build repository path
if(_.isEmpty(appconfig.datadir.repo)) { if(_.isEmpty(appconfig.datadir.repo)) {
@ -53,13 +51,7 @@ module.exports = {
//-> Initialize repository //-> Initialize repository
self._initRepo(appconfig).then((repo) => { self.onReady = self._initRepo(appconfig);
if(self._repo.sync) {
self.resync();
}
});
// Define signature // Define signature

View File

@ -12,7 +12,8 @@ var Promise = require('bluebird'),
mdAttrs = require('markdown-it-attrs'), mdAttrs = require('markdown-it-attrs'),
hljs = require('highlight.js'), hljs = require('highlight.js'),
cheerio = require('cheerio'), cheerio = require('cheerio'),
_ = require('lodash'); _ = require('lodash'),
mdRemove = require('remove-markdown');
// Load plugins // Load plugins
@ -157,6 +158,12 @@ const parseContent = (content) => {
}; };
/**
* Parse meta-data tags from content
*
* @param {String} content Markdown content
* @return {Object} Properties found in the content and their values
*/
const parseMeta = (content) => { const parseMeta = (content) => {
let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g'); let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');
@ -171,6 +178,12 @@ const parseMeta = (content) => {
module.exports = { module.exports = {
/**
* Parse content and return all data
*
* @param {String} content Markdown-formatted content
* @return {Object} Object containing meta, html and tree data
*/
parse(content) { parse(content) {
return { return {
meta: parseMeta(content), meta: parseMeta(content),
@ -181,6 +194,29 @@ module.exports = {
parseContent, parseContent,
parseMeta, parseMeta,
parseTree parseTree,
/**
* Strips non-text elements from Markdown content
*
* @param {String} content Markdown-formatted content
* @return {String} Text-only version
*/
removeMarkdown(content) {
return mdRemove(_.chain(content)
.replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
.replace(/```[^`]+```/g, '')
.replace(/`[^`]+`/g, '')
.replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
.replace(/\r?\n|\r/g, ' ')
.deburr()
.toLower()
.replace(/(\b([^a-z]+)\b)/g, ' ')
.replace(/[^a-z]+/g, ' ')
.replace(/(\b(\w{1,2})\b(\W|$))/g, '')
.replace(/\s\s+/g, ' ')
.value()
);
}
}; };

42
models/search.js Normal file
View File

@ -0,0 +1,42 @@
"use strict";
var Promise = require('bluebird'),
_ = require('lodash'),
path = require('path'),
searchIndex = Promise.promisifyAll(require('search-index')),
stopWord = require('stopword');
/**
* Search Model
*/
module.exports = {
_si: null,
/**
* Initialize Search model
*
* @param {Object} appconfig The application config
* @return {Object} Search model instance
*/
init(appconfig) {
let dbPath = path.resolve(ROOTPATH, appconfig.datadir.db, 'search-index');
this._si = searchIndex({
deletable: true,
fieldedSearch: true,
indexPath: dbPath,
logLevel: 'error',
stopwords: stopWord.getStopwords(appconfig.lang).sort()
}, (err, si) => {
if(err) {
winston.error('Failed to initialize search-index.', err);
}
});
}
};

View File

@ -72,6 +72,7 @@
"passport": "^0.3.2", "passport": "^0.3.2",
"passport-local": "^1.0.0", "passport-local": "^1.0.0",
"pug": "^2.0.0-beta6", "pug": "^2.0.0-beta6",
"remove-markdown": "^0.1.0",
"search-index": "^0.8.15", "search-index": "^0.8.15",
"serve-favicon": "^2.3.0", "serve-favicon": "^2.3.0",
"simplemde": "^1.11.2", "simplemde": "^1.11.2",

View File

@ -52,7 +52,6 @@ var ctrl = autoload(path.join(ROOTPATH, '/controllers'));
// ---------------------------------------- // ----------------------------------------
global.app = express(); global.app = express();
global.ROOTPATH = __dirname;
var _isDebug = (app.get('env') === 'development'); var _isDebug = (app.get('env') === 'development');
// ---------------------------------------- // ----------------------------------------
@ -127,7 +126,6 @@ app.use(express.static(path.join(ROOTPATH, 'assets')));
app.locals._ = require('lodash'); app.locals._ = require('lodash');
app.locals.moment = require('moment'); app.locals.moment = require('moment');
app.locals.appconfig = appconfig; app.locals.appconfig = appconfig;
//app.locals.appdata = require('./data.json');
app.use(mw.flash); app.use(mw.flash);
// ---------------------------------------- // ----------------------------------------
@ -195,16 +193,12 @@ server.on('listening', () => {
}); });
// ---------------------------------------- // ----------------------------------------
// Start Background Agent // Start Agents
// ---------------------------------------- // ----------------------------------------
var fork = require('child_process').fork; var fork = require('child_process').fork;
var bgAgent = fork('agent.js'); var bgAgent = fork('agent.js');
bgAgent.on('message', (m) => {
});
process.on('exit', (code) => { process.on('exit', (code) => {
bgAgent.disconnect(); bgAgent.disconnect();
}); });