From f5aa726cd54fd96b003d79441f05cd0099728c2c Mon Sep 17 00:00:00 2001 From: NGPixel Date: Sun, 14 May 2017 11:24:05 -0400 Subject: [PATCH] fix: search not indexing properly + cjk/arabic validators --- server/app/regex.js | 4 ++-- server/controllers/uploads.js | 2 +- server/helpers/entry.js | 2 +- server/helpers/security.js | 2 +- server/libs/entries.js | 4 ++++ server/libs/local.js | 2 +- server/libs/markdown.js | 15 +++++++-------- server/libs/search.js | 4 ++-- server/views/pages/admin/profile.pug | 4 ++-- server/views/pages/admin/settings.pug | 2 +- server/views/pages/admin/users-edit.pug | 4 ++-- server/views/pages/admin/users.pug | 4 ++-- 12 files changed, 26 insertions(+), 23 deletions(-) diff --git a/server/app/regex.js b/server/app/regex.js index f3b4490e..490e4b3e 100644 --- a/server/app/regex.js +++ b/server/app/regex.js @@ -1,8 +1,8 @@ 'use strict' module.exports = { - arabic: /([\u0600-\u06ff]|[\u0750-\u077f]|[\ufb50-\ufc3f]|[\ufe70-\ufefc])/, - cjk: /([\u4E00-\u9FBF]|[\u3040-\u309F\u30A0-\u30FF]|[ㄱ-ㅎ가-힣ㅏ-ㅣ])/, + arabic: '\u0600-\u06ff\u0750-\u077f\ufb50-\ufc3f\ufe70-\ufefc', + cjk: '\u4E00-\u9FBF\u3040-\u309F\u30A0-\u30FFㄱ-ㅎ가-힣ㅏ-ㅣ', youtube: /(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|&v(?:i)?=))([^#&?]*).*/, vimeo: /vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/(?:[^/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/, dailymotion: /(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/ diff --git a/server/controllers/uploads.js b/server/controllers/uploads.js index ddcc92d2..a2a3f2f2 100644 --- a/server/controllers/uploads.js +++ b/server/controllers/uploads.js @@ -12,7 +12,7 @@ const fs = Promise.promisifyAll(require('fs-extra')) const path = require('path') const _ = require('lodash') -const validPathRe = new RegExp('^(([a-z0-9/-]|' + appdata.regex.cjk.source + ')+\\.[a-z0-9]+)$') +const validPathRe = new RegExp('^([a-z0-9/-' + appdata.regex.cjk + appdata.regex.arabic + ']+\\.[a-z0-9]+)$') const validPathThumbsRe = new RegExp('^([a-z0-9]+\\.png)$') // ========================================== diff --git a/server/helpers/entry.js b/server/helpers/entry.js index b32b133b..c476cc5e 100644 --- a/server/helpers/entry.js +++ b/server/helpers/entry.js @@ -14,7 +14,7 @@ module.exports = { */ parsePath (urlPath) { urlPath = qs.unescape(urlPath) - let wlist = new RegExp('(?!([^a-z0-9]|' + appdata.regex.cjk.source + '|[/-]))', 'g') + let wlist = new RegExp('[^a-z0-9' + appdata.regex.cjk + appdata.regex.arabic + '/-]', 'g') urlPath = _.toLower(urlPath).replace(wlist, '') diff --git a/server/helpers/security.js b/server/helpers/security.js index c82b30ed..ba78a552 100644 --- a/server/helpers/security.js +++ b/server/helpers/security.js @@ -6,7 +6,7 @@ const _ = require('lodash') module.exports = { sanitizeCommitUser (user) { - let wlist = new RegExp('(?!([^a-zA-Z0-9-_.\',& ]|' + appdata.regex.cjk.source + '))', 'g') + let wlist = new RegExp('[^a-zA-Z0-9-_.\',& ' + appdata.regex.cjk + appdata.regex.arabic + ']', 'g') return { name: _.chain(user.name).replace(wlist, '').trim().value(), email: appconfig.git.showUserEmail ? user.email : appconfig.git.serverEmail diff --git a/server/libs/entries.js b/server/libs/entries.js index 020390ab..1f765088 100644 --- a/server/libs/entries.js +++ b/server/libs/entries.js @@ -266,6 +266,10 @@ module.exports = { }, { new: true, upsert: true + }).then(result => { + let plainResult = result.toObject() + plainResult.text = content.text + return plainResult }) }).then(result => { return self.updateTreeInfo().then(() => { diff --git a/server/libs/local.js b/server/libs/local.js index 381041ae..3053e63b 100644 --- a/server/libs/local.js +++ b/server/libs/local.js @@ -154,7 +154,7 @@ module.exports = { */ validateUploadsFilename (f, fld, isImage) { let fObj = path.parse(f) - let fname = _.chain(fObj.name).trim().toLower().kebabCase().value().replace(new RegExp('(?!([^a-z0-9-]|' + appdata.regex.cjk.source + '))', 'g'), '') + let fname = _.chain(fObj.name).trim().toLower().kebabCase().value().replace(new RegExp('[^a-z0-9-' + appdata.regex.cjk + appdata.regex.arabic + ']', 'g'), '') let fext = _.toLower(fObj.ext) if (isImage && !_.includes(['.jpg', '.jpeg', '.png', '.gif', '.webp'], fext)) { diff --git a/server/libs/markdown.js b/server/libs/markdown.js index 012b2e0a..c5b79056 100644 --- a/server/libs/markdown.js +++ b/server/libs/markdown.js @@ -87,6 +87,10 @@ const videoRules = [ } ] +// Non-markdown filter + +const textRegex = new RegExp('\\b[a-z0-9-.,' + appdata.regex.cjk + appdata.regex.arabic + ']+\\b', 'g') + /** * Parse markdown content and build TOC tree * @@ -290,20 +294,15 @@ const parseMeta = (content) => { * @return {String} Text-only version */ const removeMarkdown = (content) => { - return mdRemove(_.chain(content) + return _.join(mdRemove(_.chain(content) .replace(//g, '') - .replace(/```[^`]+```/g, '') + .replace(/```([^`]|`)+?```/g, '') .replace(/`[^`]+`/g, '') .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '') - .replace(/\r?\n|\r/g, ' ') .deburr() .toLower() - .replace(/(\b([^a-z]+)\b)/g, ' ') - .replace(/[^a-z]+/g, ' ') - .replace(/(\b(\w{1,2})\b(\W|$))/g, '') - .replace(/\s\s+/g, ' ') .value() - ) + ).replace(/\r?\n|\r/g, ' ').match(textRegex), ' ') } module.exports = { diff --git a/server/libs/search.js b/server/libs/search.js index 724886c1..ff1b6e80 100644 --- a/server/libs/search.js +++ b/server/libs/search.js @@ -92,7 +92,7 @@ module.exports = { title: content.title, subtitle: content.subtitle || '', parent: content.parent || '', - content: content.content || '' + content: content.text || '' }]).then(() => { winston.log('verbose', 'Entry ' + content._id + ' added/updated to search index.') return true @@ -160,7 +160,7 @@ module.exports = { .deburr() .toLower() .trim() - .replace(/[^a-z0-9 ]/g, '') + .replace(/[^a-z0-9 ]/g, ' ') .value() let arrTerms = _.chain(terms) .split(' ') diff --git a/server/views/pages/admin/profile.pug b/server/views/pages/admin/profile.pug index 1c54549f..b32a5154 100644 --- a/server/views/pages/admin/profile.pug +++ b/server/views/pages/admin/profile.pug @@ -45,9 +45,9 @@ block adminContent default: i.icon-warning = t('auth:providers.' + user.provider) label.label= t('admin:profile.membersince') - p.control= userMoment(user.createdAt).format('LL') + p.control= moment(user.createdAt).format('LL') label.label= t('admin:profile.lastprofileupdate') - p.control= userMoment(user.updatedAt).format('LL') + p.control= moment(user.updatedAt).format('LL') script(type='text/javascript'). var usrDataName = "!{user.name}"; diff --git a/server/views/pages/admin/settings.pug b/server/views/pages/admin/settings.pug index afba7807..29601be8 100644 --- a/server/views/pages/admin/settings.pug +++ b/server/views/pages/admin/settings.pug @@ -12,7 +12,7 @@ block adminContent .section-block p #{t('admin:settings.currentversion')}: #[strong= sysversion.current] if sysversion.latest - p #{t('admin:settings.latestversion')}: #[strong= sysversion.latest] #[em (Published #{userMoment(sysversion.latestPublishedAt).fromNow()})] + p #{t('admin:settings.latestversion')}: #[strong= sysversion.latest] #[em (Published #{moment(sysversion.latestPublishedAt).fromNow()})] p if sysversion.current !== sysversion.latest button.button.is-deep-orange(v-on:click='upgrade')= t('admin:settings.upgrade') diff --git a/server/views/pages/admin/users-edit.pug b/server/views/pages/admin/users-edit.pug index 1c777a37..e48c7ae6 100644 --- a/server/views/pages/admin/users-edit.pug +++ b/server/views/pages/admin/users-edit.pug @@ -34,8 +34,8 @@ block adminContent when 'ldap': i.icon-arrow-repeat-outline default: i.icon-warning = t('auth:providers.' + usr.provider) - td.is-centered= userMoment(usr.createdAt).format('lll') - td.is-centered= userMoment(usr.updatedAt).format('lll') + td.is-centered= moment(usr.createdAt).format('lll') + td.is-centered= moment(usr.updatedAt).format('lll') .form-sections section label.label= t('admin:profile.email') diff --git a/server/views/pages/admin/users.pug b/server/views/pages/admin/users.pug index 251641f9..48acd039 100644 --- a/server/views/pages/admin/users.pug +++ b/server/views/pages/admin/users.pug @@ -41,7 +41,7 @@ block adminContent when 'ldap': i.icon-arrow-repeat-outline default: i.icon-warning = t('auth:providers.' + usr.provider) - td.is-centered= userMoment(usr.createdAt).format('lll') - td.is-centered= userMoment(usr.updatedAt).format('lll') + td.is-centered= moment(usr.createdAt).format('lll') + td.is-centered= moment(usr.updatedAt).format('lll') include ../../modals/admin-createuser.pug