fix: restore unicode chars for search content

This commit is contained in:
Nick 2019-09-21 10:36:09 -04:00
parent 89754ca7fc
commit 744e6e3248
3 changed files with 7 additions and 4 deletions

View File

@ -76,6 +76,7 @@
"graphql-rate-limit-directive": "1.1.0", "graphql-rate-limit-directive": "1.1.0",
"graphql-subscriptions": "1.1.0", "graphql-subscriptions": "1.1.0",
"graphql-tools": "4.0.5", "graphql-tools": "4.0.5",
"he": "1.2.0",
"highlight.js": "9.15.10", "highlight.js": "9.15.10",
"i18next": "17.0.15", "i18next": "17.0.15",
"i18next-express-middleware": "1.8.2", "i18next-express-middleware": "1.8.2",

View File

@ -7,6 +7,7 @@ const fs = require('fs-extra')
const yaml = require('js-yaml') const yaml = require('js-yaml')
const striptags = require('striptags') const striptags = require('striptags')
const emojiRegex = require('emoji-regex') const emojiRegex = require('emoji-regex')
const he = require('he')
/* global WIKI */ /* global WIKI */
@ -17,7 +18,7 @@ const frontmatterRegex = {
} }
const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig
const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig // const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
/** /**
* Pages model * Pages model
@ -663,9 +664,10 @@ module.exports = class Page extends Model {
* @returns {string} Cleaned Content Text * @returns {string} Cleaned Content Text
*/ */
static cleanHTML(rawHTML = '') { static cleanHTML(rawHTML = '') {
return striptags(rawHTML || '') let data = striptags(rawHTML || '')
.replace(emojiRegex(), '') .replace(emojiRegex(), '')
.replace(htmlEntitiesRegex, '') // .replace(htmlEntitiesRegex, '')
return he.decode(data)
.replace(punctuationRegex, ' ') .replace(punctuationRegex, ' ')
.replace(/(\r\n|\n|\r)/gm, ' ') .replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s\s+/g, ' ') .replace(/\s\s+/g, ' ')

View File

@ -6215,7 +6215,7 @@ hash.js@^1.0.0, hash.js@^1.0.3, hash.js@^1.1.3:
inherits "^2.0.3" inherits "^2.0.3"
minimalistic-assert "^1.0.1" minimalistic-assert "^1.0.1"
he@^1.1.0, he@^1.2.0: he@1.2.0, he@^1.1.0, he@^1.2.0:
version "1.2.0" version "1.2.0"
resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw== integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==