fix: postgres indexing (#791) + deactivate handler

This commit is contained in:
Nick
2019-03-24 00:27:42 -04:00
parent 2a950575e2
commit 7c77cb0815
12 changed files with 167 additions and 43 deletions

View File

@@ -1,6 +1,8 @@
const _ = require('lodash')
const algoliasearch = require('algoliasearch')
const { pipeline, Transform } = require('stream')
const stream = require('stream')
const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
/* global WIKI */
@@ -77,7 +79,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
})
},
/**
@@ -90,7 +92,7 @@ module.exports = {
objectID: page.hash,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
})
},
/**
@@ -114,7 +116,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
})
},
/**
@@ -176,7 +178,7 @@ module.exports = {
path: doc.path,
title: doc.title,
description: doc.description,
content: doc.content
content: WIKI.models.pages.cleanHTML(doc.render)
}))
)
} catch (err) {
@@ -187,11 +189,11 @@ module.exports = {
}
await pipeline(
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new Transform({
new stream.Transform({
objectMode: true,
transform: async (chunk, enc, cb) => processDocument(cb, chunk),
flush: async (cb) => processDocument(cb)

View File

@@ -1,6 +1,8 @@
const _ = require('lodash')
const AWS = require('aws-sdk')
const { pipeline, Transform } = require('stream')
const stream = require('stream')
const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
/* global WIKI */
@@ -197,7 +199,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
}
])
@@ -220,7 +222,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
}
])
@@ -268,7 +270,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
}
])
@@ -335,7 +337,7 @@ module.exports = {
path: doc.path,
title: doc.title,
description: doc.description,
content: doc.content
content: WIKI.models.pages.cleanHTML(doc.render)
}
})))
}).promise()
@@ -347,11 +349,11 @@ module.exports = {
}
await pipeline(
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new Transform({
new stream.Transform({
objectMode: true,
transform: async (chunk, enc, cb) => processDocument(cb, chunk),
flush: async (cb) => processDocument(cb)

View File

@@ -1,7 +1,9 @@
const _ = require('lodash')
const { SearchService, QueryType } = require('azure-search-client')
const request = require('request-promise')
const { pipeline } = require('stream')
const stream = require('stream')
const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
/* global WIKI */
@@ -146,7 +148,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
])
},
@@ -163,7 +165,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
])
},
@@ -199,7 +201,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
}
])
},
@@ -209,10 +211,23 @@ module.exports = {
async rebuild() {
WIKI.logger.info(`(SEARCH/AZURE) Rebuilding Index...`)
await pipeline(
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new stream.Transform({
objectMode: true,
transform: (chunk, enc, cb) => {
cb(null, {
id: chunk.id,
path: chunk.path,
locale: chunk.locale,
title: chunk.title,
description: chunk.description,
content: WIKI.models.pages.cleanHTML(chunk.render)
})
}
}),
this.client.indexes.use(this.config.indexName).createIndexingStream()
)
WIKI.logger.info(`(SEARCH/AZURE) Index rebuilt successfully.`)

View File

@@ -1,4 +1,4 @@
const _ = require('lodash')
/* global WIKI */
module.exports = {
activate() {
@@ -32,7 +32,7 @@ module.exports = {
}
// TODO: Add user permissions filtering
builder.andWhere(builder => {
switch(WIKI.config.db.type) {
switch (WIKI.config.db.type) {
case 'postgres':
builder.where('title', 'ILIKE', `%${q}%`)
builder.orWhere('description', 'ILIKE', `%${q}%`)

View File

@@ -1,6 +1,8 @@
const _ = require('lodash')
const elasticsearch = require('elasticsearch')
const { pipeline, Transform } = require('stream')
const stream = require('stream')
const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
/* global WIKI */
@@ -116,7 +118,7 @@ module.exports = {
input: s,
weight: 3
})),
page.content.split(' ').map(s => ({
page.safeContent.split(' ').map(s => ({
input: s,
weight: 1
}))
@@ -138,7 +140,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
},
refresh: true
})
@@ -159,7 +161,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
},
refresh: true
})
@@ -199,7 +201,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.content
content: page.safeContent
},
refresh: true
})
@@ -262,13 +264,14 @@ module.exports = {
_id: doc.id
}
})
doc.safeContent = WIKI.models.pages.cleanHTML(doc.render)
result.push({
suggest: this.buildSuggest(doc),
locale: doc.locale,
path: doc.path,
title: doc.title,
description: doc.description,
content: doc.content
content: doc.safeContent
})
return result
}, []),
@@ -282,11 +285,11 @@ module.exports = {
}
await pipeline(
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new Transform({
new stream.Transform({
objectMode: true,
transform: async (chunk, enc, cb) => processDocument(cb, chunk),
flush: async (cb) => processDocument(cb)

View File

@@ -1,5 +1,9 @@
const _ = require('lodash')
const tsquery = require('pg-tsquery')()
const stream = require('stream')
const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
/* global WIKI */
module.exports = {
async activate() {
@@ -8,7 +12,10 @@ module.exports = {
}
},
async deactivate() {
// not used
WIKI.logger.info(`(SEARCH/POSTGRES) Dropping index tables...`)
await WIKI.models.knex.schema.dropTable('pagesWords')
await WIKI.models.knex.schema.dropTable('pagesVector')
WIKI.logger.info(`(SEARCH/POSTGRES) Index tables have been dropped.`)
},
/**
* INIT
@@ -27,6 +34,7 @@ module.exports = {
table.string('title')
table.string('description')
table.specificType('tokens', 'TSVECTOR')
table.text('content')
})
}
// -> Create Words Index
@@ -71,7 +79,6 @@ module.exports = {
WIKI.logger.warn('Search Engine Error:')
WIKI.logger.warn(err)
}
},
/**
* CREATE
@@ -80,10 +87,10 @@ module.exports = {
*/
async created(page) {
await WIKI.models.knex.raw(`
INSERT INTO "pagesVector" (path, locale, title, description, tokens) VALUES (
'?', '?', '?', '?', (setweight(to_tsvector('${this.config.dictLanguage}', '?'), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', '?'), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', '?'), 'C'))
INSERT INTO "pagesVector" (path, locale, title, description, "tokens") VALUES (
?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
)
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.content])
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent])
},
/**
* UPDATE
@@ -99,7 +106,7 @@ module.exports = {
setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') ||
setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
WHERE path = ? AND locale = ?
`, [page.title, page.description, page.title, page.description, page.content, page.path, page.localeCode])
`, [page.title, page.description, page.title, page.description, page.safeContent, page.path, page.localeCode])
},
/**
* DELETE
@@ -132,14 +139,34 @@ module.exports = {
async rebuild() {
WIKI.logger.info(`(SEARCH/POSTGRES) Rebuilding Index...`)
await WIKI.models.knex('pagesVector').truncate()
await WIKI.models.knex('pagesWords').truncate()
await pipeline(
WIKI.models.knex.column('path', 'localeCode', 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new stream.Transform({
objectMode: true,
transform: async (page, enc, cb) => {
const content = WIKI.models.pages.cleanHTML(page.render)
await WIKI.models.knex.raw(`
INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES (
?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ?
)
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, content])
cb()
}
})
)
await WIKI.models.knex.raw(`
INSERT INTO "pagesVector" (path, locale, title, description, "tokens")
SELECT path, "localeCode" AS locale, title, description,
(setweight(to_tsvector('${this.config.dictLanguage}', title), 'A') ||
setweight(to_tsvector('${this.config.dictLanguage}', description), 'B') ||
setweight(to_tsvector('${this.config.dictLanguage}', content), 'C')) AS tokens
FROM "pages"
WHERE pages."isPublished" AND NOT pages."isPrivate"`)
INSERT INTO "pagesWords" (word)
SELECT word FROM ts_stat(
'SELECT to_tsvector(''simple'', "title") || to_tsvector(''simple'', "description") || to_tsvector(''simple'', "content") FROM "pagesVector"'
)
`)
WIKI.logger.info(`(SEARCH/POSTGRES) Index rebuilt successfully.`)
}
}