wikijs-fork/server/modules/search/elasticsearch/engine.js

375 lines
10 KiB
JavaScript
Raw Normal View History

2019-03-23 22:18:36 +00:00
const _ = require('lodash')
const stream = require('stream')
const Promise = require('bluebird')
const fs = require('fs')
const pipeline = Promise.promisify(stream.pipeline)
2018-09-01 03:42:14 +00:00
2019-03-23 22:18:36 +00:00
/* global WIKI */
2018-09-01 03:42:14 +00:00
2019-03-23 22:18:36 +00:00
module.exports = {
async activate() {
// not used
2018-09-01 03:42:14 +00:00
},
2019-03-23 22:18:36 +00:00
async deactivate() {
// not used
2018-09-01 03:42:14 +00:00
},
2019-03-23 22:18:36 +00:00
/**
* INIT
*/
async init() {
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Initializing...`)
switch (this.config.apiVersion) {
case '7.x':
const { Client: Client7 } = require('elasticsearch7')
this.client = new Client7({
nodes: this.config.hosts.split(',').map(_.trim),
sniffOnStart: this.config.sniffOnStart,
sniffInterval: (this.config.sniffInterval > 0) ? this.config.sniffInterval : false,
ssl: getTlsOptions(this.config),
name: 'wiki-js'
})
break
case '6.x':
const { Client: Client6 } = require('elasticsearch6')
this.client = new Client6({
nodes: this.config.hosts.split(',').map(_.trim),
sniffOnStart: this.config.sniffOnStart,
sniffInterval: (this.config.sniffInterval > 0) ? this.config.sniffInterval : false,
ssl: getTlsOptions(this.config),
name: 'wiki-js'
})
break
default:
throw new Error('Unsupported version of elasticsearch! Update your settings in the Administration Area.')
}
2018-09-01 03:42:14 +00:00
2019-03-23 22:18:36 +00:00
// -> Create Search Index
await this.createIndex()
2018-09-01 03:42:14 +00:00
2019-03-23 22:18:36 +00:00
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Initialization completed.`)
2018-09-01 03:42:14 +00:00
},
2019-03-23 22:18:36 +00:00
/**
* Create Index
*/
async createIndex() {
try {
const indexExists = await this.client.indices.exists({ index: this.config.indexName })
if (!indexExists.body) {
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Creating index...`)
try {
const idxBody = {
properties: {
suggest: { type: 'completion' },
title: { type: 'text', boost: 10.0 },
description: { type: 'text', boost: 3.0 },
content: { type: 'text', boost: 1.0 },
locale: { type: 'keyword' },
path: { type: 'text' },
tags: { type: 'text', boost: 8.0 }
2019-03-23 22:18:36 +00:00
}
}
await this.client.indices.create({
index: this.config.indexName,
body: {
mappings: (this.config.apiVersion === '6.x') ? {
_doc: idxBody
} : idxBody,
settings: {
analysis: {
analyzer: {
default: {
type: this.config.analyzer
}
}
}
}
}
})
} catch (err) {
WIKI.logger.error(`(SEARCH/ELASTICSEARCH) Create Index Error: `, _.get(err, 'meta.body.error', err))
2019-03-23 22:18:36 +00:00
}
}
} catch (err) {
WIKI.logger.error(`(SEARCH/ELASTICSEARCH) Index Check Error: `, _.get(err, 'meta.body.error', err))
2019-03-23 22:18:36 +00:00
}
2018-09-01 03:42:14 +00:00
},
2019-03-23 22:18:36 +00:00
/**
* QUERY
*
* @param {String} q Query
* @param {Object} opts Additional options
*/
async query(q, opts) {
try {
const results = await this.client.search({
index: this.config.indexName,
body: {
query: {
simple_query_string: {
query: `*${q}*`,
fields: ['title^20', 'description^3', 'tags^8', 'content^1'],
default_operator: 'and',
analyze_wildcard: true
2019-03-23 22:18:36 +00:00
}
},
from: 0,
size: 50,
_source: ['title', 'description', 'path', 'locale'],
suggest: {
suggestions: {
text: q,
completion: {
field: 'suggest',
size: 5,
skip_duplicates: true,
fuzzy: true
}
}
}
}
})
return {
results: _.get(results, 'body.hits.hits', []).map(r => ({
2019-03-23 22:18:36 +00:00
id: r._id,
locale: r._source.locale,
path: r._source.path,
title: r._source.title,
description: r._source.description
})),
suggestions: _.reject(_.get(results, 'suggest.suggestions', []).map(s => _.get(s, 'options[0].text', false)), s => !s),
totalHits: _.get(results, 'body.hits.total.value', _.get(results, 'body.hits.total', 0))
2019-03-23 22:18:36 +00:00
}
} catch (err) {
WIKI.logger.warn('Search Engine Error: ', _.get(err, 'meta.body.error', err))
2019-03-23 22:18:36 +00:00
}
},
/**
* Build tags field
* @param id
* @returns {Promise<*|*[]>}
*/
async buildTags(id) {
const tags = await WIKI.models.pages.query().findById(id).select('*').withGraphJoined('tags')
return (tags.tags && tags.tags.length > 0) ? tags.tags.map(function (tag) {
return tag.title
}) : []
},
2019-03-23 22:18:36 +00:00
/**
* Build suggest field
*/
buildSuggest(page) {
return _.reject(_.uniq(_.concat(
2019-03-23 22:18:36 +00:00
page.title.split(' ').map(s => ({
input: s,
weight: 10
2019-03-23 22:18:36 +00:00
})),
page.description.split(' ').map(s => ({
input: s,
weight: 3
})),
page.safeContent.split(' ').map(s => ({
2019-03-23 22:18:36 +00:00
input: s,
weight: 1
}))
)), ['input', ''])
2019-03-23 22:18:36 +00:00
},
/**
* CREATE
*
* @param {Object} page Page to create
*/
async created(page) {
await this.client.index({
index: this.config.indexName,
type: '_doc',
id: page.hash,
body: {
suggest: this.buildSuggest(page),
locale: page.localeCode,
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent,
tags: await this.buildTags(page.id)
2019-03-23 22:18:36 +00:00
},
refresh: true
})
},
/**
* UPDATE
*
* @param {Object} page Page to update
*/
async updated(page) {
await this.client.index({
index: this.config.indexName,
type: '_doc',
id: page.hash,
body: {
suggest: this.buildSuggest(page),
locale: page.localeCode,
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent,
tags: await this.buildTags(page.id)
2019-03-23 22:18:36 +00:00
},
refresh: true
})
2018-09-01 03:42:14 +00:00
},
2019-03-23 22:18:36 +00:00
/**
* DELETE
*
* @param {Object} page Page to delete
*/
async deleted(page) {
await this.client.delete({
index: this.config.indexName,
type: '_doc',
id: page.hash,
refresh: true
})
},
/**
* RENAME
*
* @param {Object} page Page to rename
*/
async renamed(page) {
await this.client.delete({
index: this.config.indexName,
type: '_doc',
2019-10-13 23:59:50 +00:00
id: page.hash,
2019-03-23 22:18:36 +00:00
refresh: true
})
await this.client.index({
index: this.config.indexName,
type: '_doc',
id: page.destinationHash,
body: {
suggest: this.buildSuggest(page),
2019-10-13 23:59:50 +00:00
locale: page.destinationLocaleCode,
2019-03-23 22:18:36 +00:00
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.safeContent,
tags: await this.buildTags(page.id)
2019-03-23 22:18:36 +00:00
},
refresh: true
})
},
/**
* REBUILD INDEX
*/
async rebuild() {
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Rebuilding Index...`)
await this.client.indices.delete({ index: this.config.indexName })
await this.createIndex()
const MAX_INDEXING_BYTES = 10 * Math.pow(2, 20) - Buffer.from('[').byteLength - Buffer.from(']').byteLength // 10 MB
const MAX_INDEXING_COUNT = 1000
const COMMA_BYTES = Buffer.from(',').byteLength
let chunks = []
let bytes = 0
const processDocument = async (cb, doc) => {
try {
if (doc) {
const docBytes = Buffer.from(JSON.stringify(doc)).byteLength
doc['tags'] = await this.buildTags(doc.realId)
2019-03-23 22:18:36 +00:00
// -> Current batch exceeds size limit, flush
if (docBytes + COMMA_BYTES + bytes >= MAX_INDEXING_BYTES) {
await flushBuffer()
}
if (chunks.length > 0) {
bytes += COMMA_BYTES
}
bytes += docBytes
chunks.push(doc)
// -> Current batch exceeds count limit, flush
if (chunks.length >= MAX_INDEXING_COUNT) {
await flushBuffer()
}
} else {
// -> End of stream, flush
await flushBuffer()
}
cb()
} catch (err) {
cb(err)
}
}
const flushBuffer = async () => {
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Sending batch of ${chunks.length}...`)
try {
await this.client.bulk({
index: this.config.indexName,
body: _.reduce(chunks, (result, doc) => {
result.push({
index: {
_index: this.config.indexName,
_type: '_doc',
_id: doc.id
}
})
doc.safeContent = WIKI.models.pages.cleanHTML(doc.render)
2019-03-23 22:18:36 +00:00
result.push({
suggest: this.buildSuggest(doc),
tags: doc.tags,
2019-03-23 22:18:36 +00:00
locale: doc.locale,
path: doc.path,
title: doc.title,
description: doc.description,
content: doc.safeContent
2019-03-23 22:18:36 +00:00
})
return result
}, []),
refresh: true
})
} catch (err) {
WIKI.logger.warn('(SEARCH/ELASTICSEARCH) Failed to send batch to elasticsearch: ', err)
}
chunks.length = 0
bytes = 0
}
2018-09-01 03:42:14 +00:00
// Added real id in order to fetch page tags from the query
2019-03-23 22:18:36 +00:00
await pipeline(
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render', { realId: 'id' }).select().from('pages').where({
2019-03-23 22:18:36 +00:00
isPublished: true,
isPrivate: false
}).stream(),
new stream.Transform({
2019-03-23 22:18:36 +00:00
objectMode: true,
transform: async (chunk, enc, cb) => processDocument(cb, chunk),
flush: async (cb) => processDocument(cb)
})
)
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Index rebuilt successfully.`)
2018-09-01 03:42:14 +00:00
}
}
function getTlsOptions(conf) {
if (!conf.tlsCertPath) {
return {
rejectUnauthorized: conf.verifyTLSCertificate
}
}
const caList = []
if (conf.verifyTLSCertificate) {
caList.push(fs.readFileSync(conf.tlsCertPath))
}
return {
rejectUnauthorized: conf.verifyTLSCertificate,
ca: caList
}
}