fix: search not indexing properly + cjk/arabic validators

2017-05-14 11:24:05 -04:00
parent 8f974a3ac7
commit f5aa726cd5
12 changed files with 26 additions and 23 deletions
--- a/server/libs/entries.js
+++ b/server/libs/entries.js
@@ -266,6 +266,10 @@ module.exports = {
      }, {
        new: true,
        upsert: true
+      }).then(result => {
+        let plainResult = result.toObject()
+        plainResult.text = content.text
+        return plainResult
      })
    }).then(result => {
      return self.updateTreeInfo().then(() => {
--- a/server/libs/local.js
+++ b/server/libs/local.js
@@ -154,7 +154,7 @@ module.exports = {
   */
  validateUploadsFilename (f, fld, isImage) {
    let fObj = path.parse(f)
-    let fname = _.chain(fObj.name).trim().toLower().kebabCase().value().replace(new RegExp('(?!([^a-z0-9-]|' + appdata.regex.cjk.source + '))', 'g'), '')
+    let fname = _.chain(fObj.name).trim().toLower().kebabCase().value().replace(new RegExp('[^a-z0-9-' + appdata.regex.cjk + appdata.regex.arabic + ']', 'g'), '')
    let fext = _.toLower(fObj.ext)

    if (isImage && !_.includes(['.jpg', '.jpeg', '.png', '.gif', '.webp'], fext)) {
--- a/server/libs/markdown.js
+++ b/server/libs/markdown.js
@@ -87,6 +87,10 @@ const videoRules = [
  }
 ]

+// Non-markdown filter
+
+const textRegex = new RegExp('\\b[a-z0-9-.,' + appdata.regex.cjk + appdata.regex.arabic + ']+\\b', 'g')
+
 /**
 * Parse markdown content and build TOC tree
 *
@@ -290,20 +294,15 @@ const parseMeta = (content) => {
 * @return     {String}  Text-only version
 */
 const removeMarkdown = (content) => {
-  return mdRemove(_.chain(content)
+  return _.join(mdRemove(_.chain(content)
    .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
-    .replace(/```[^`]+```/g, '')
+    .replace(/```([^`]|`)+?```/g, '')
    .replace(/`[^`]+`/g, '')
    .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
-    .replace(/\r?\n|\r/g, ' ')
    .deburr()
    .toLower()
-    .replace(/(\b([^a-z]+)\b)/g, ' ')
-    .replace(/[^a-z]+/g, ' ')
-    .replace(/(\b(\w{1,2})\b(\W|$))/g, '')
-    .replace(/\s\s+/g, ' ')
    .value()
-  )
+  ).replace(/\r?\n|\r/g, ' ').match(textRegex), ' ')
 }

 module.exports = {
--- a/server/libs/search.js
+++ b/server/libs/search.js
@@ -92,7 +92,7 @@ module.exports = {
          title: content.title,
          subtitle: content.subtitle || '',
          parent: content.parent || '',
-          content: content.content || ''
+          content: content.text || ''
        }]).then(() => {
          winston.log('verbose', 'Entry ' + content._id + ' added/updated to search index.')
          return true
@@ -160,7 +160,7 @@ module.exports = {
              .deburr()
              .toLower()
              .trim()
-              .replace(/[^a-z0-9 ]/g, '')
+              .replace(/[^a-z0-9 ]/g, ' ')
              .value()
    let arrTerms = _.chain(terms)
                    .split(' ')