"use strict";
var Promise = require('bluebird'),
md = require('markdown-it'),
mdEmoji = require('markdown-it-emoji'),
mdTaskLists = require('markdown-it-task-lists'),
mdAbbr = require('markdown-it-abbr'),
mdAnchor = require('markdown-it-anchor'),
mdFootnote = require('markdown-it-footnote'),
mdExternalLinks = require('markdown-it-external-links'),
mdExpandTabs = require('markdown-it-expand-tabs'),
mdAttrs = require('markdown-it-attrs'),
hljs = require('highlight.js'),
cheerio = require('cheerio'),
_ = require('lodash'),
mdRemove = require('remove-markdown');
// Load plugins
var mkdown = md({
html: true,
linkify: true,
typography: true,
highlight(str, lang) {
if (lang && hljs.getLanguage(lang)) {
try {
return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
} catch (err) {
return '<pre><code>' + str + '</code></pre>';
return '<pre><code>' + str + '</code></pre>';
.use(mdAnchor, {
slugify: _.kebabCase,
permalink: true,
permalinkClass: 'toc-anchor',
permalinkSymbol: '#',
permalinkBefore: true
.use(mdExternalLinks, {
externalClassName: 'external-link',
internalClassName: 'internal-link'
.use(mdExpandTabs, {
tabWidth: 4
// Rendering rules
mkdown.renderer.rules.emoji = function(token, idx) {
return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>';
// Video rules
const videoRules = [
selector: '',
regexp: new RegExp(/(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*/, 'i'),
output: '<iframe width="640" height="360" src="{0}?rel=0" frameborder="0" allowfullscreen></iframe>'
selector: 'a.vimeo',
regexp: new RegExp(/\/(?:channels\/(?:\w+\/)?|groups\/(?:[^\/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/, 'i'),
output: '<iframe src="{0}" width="640" height="360" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
selector: 'a.dailymotion',
regexp: new RegExp(/(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[\-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/, 'i'),
output: '<iframe width="640" height="360" src="//{0}?endscreen-enable=false" frameborder="0" allowfullscreen></iframe>'
selector: '',
regexp: false,
output: '<video width="640" height="360" controls preload="metadata"><source src="{0}" type="video/mp4"></video>'
* Parse markdown content and build TOC tree
* @param {(Function|string)} content Markdown content
* @return {Array} TOC tree
const parseTree = (content) => {
let tokens = md().parse(content, {});
let tocArray = [];
//-> Extract headings and their respective levels
for (let i = 0; i < tokens.length; i++) {
if (tokens[i].type !== "heading_close") {
const heading = tokens[i - 1];
const heading_close = tokens[i];
if (heading.type === "inline") {
2016-08-24 17:17:59 -04:00
let content = "";
let anchor = "";
if (heading.children && heading.children[0].type === "link_open") {
2016-08-23 21:09:09 -04:00
content = heading.children[1].content;
anchor = _.kebabCase(content);
2016-08-24 17:17:59 -04:00
} else {
2016-10-17 19:52:04 -04:00
content = heading.content;
anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ""));
2016-08-24 17:17:59 -04:00
2016-08-24 17:17:59 -04:00
level: +heading_close.tag.substr(1, 1)
2016-08-24 17:17:59 -04:00
//-> Exclude levels deeper than 2
_.remove(tocArray, (n) => { return n.level > 2; });
//-> Build tree from flat array
return _.reduce(tocArray, (tree, v) => {
let treeLength = tree.length - 1;
if(v.level < 2) {
content: v.content,
anchor: v.anchor,
nodes: []
} else {
let lastNodeLevel = 1;
let GetNodePath = (startPos) => {
if(_.isEmpty(startPos)) {
startPos = 'nodes';
if(lastNodeLevel === v.level) {
return startPos;
} else {
return GetNodePath(startPos + '[' + ([treeLength], startPos).length - 1) + '].nodes');
let lastNodePath = GetNodePath();
let lastNode = _.get(tree[treeLength], lastNodePath);
if(lastNode) {
content: v.content,
anchor: v.anchor,
nodes: []
_.set(tree[treeLength], lastNodePath, lastNode);
return tree;
}, []);
* Parse markdown content to HTML
* @param {String} content Markdown content
* @return {String} HTML formatted content
const parseContent = (content) => {
let output = mkdown.render(content);
let cr = cheerio.load(output);
2016-11-20 20:09:50 -05:00
//-> Check for empty first element
let firstElm = cr.root().children().first()[0];
if(firstElm.type === 'tag' && === 'p') {
let firstElmChildren = firstElm.children;
if(firstElmChildren.length < 1) {
} else if(firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
2016-11-20 20:09:50 -05:00
//-> Remove links in headers
cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
let txtLink = cr(elm).text();
2016-12-05 21:42:41 -05:00
//-> Re-attach blockquote styling classes to their parents
cr.root().children('blockquote').each((i, elm) => {
if(cr(elm).children().length > 0) {
let bqLastChild = cr(elm).children().last()[0];
let bqLastChildClasses = cr(bqLastChild).attr('class');
2016-12-08 01:46:52 -05:00
if(bqLastChildClasses && bqLastChildClasses.length > 0) {
2016-12-05 21:42:41 -05:00
2016-12-08 01:46:52 -05:00
//-> Enclose content below headers
cr('h2').each((i, elm) => {
let subH2Content = cr(elm).nextUntil('h1, h2');
cr(elm).after('<div class="indent-h2"></div>');
let subH2Container = cr(elm).next('.indent-h2');
_.forEach(subH2Content, (ch) => {
cr('h3').each((i, elm) => {
let subH3Content = cr(elm).nextUntil('h1, h2, h3');
cr(elm).after('<div class="indent-h3"></div>');
let subH3Container = cr(elm).next('.indent-h3');
_.forEach(subH3Content, (ch) => {
2017-01-02 22:20:24 -05:00
// Replace video links with embeds
_.forEach(videoRules, (vrule) => {
cr(vrule.selector).each((i, elm) => {
let originLink = cr(elm).attr('href');
if(vrule.regexp) {
let vidMatches = originLink.match(vrule.regexp);
if((vidMatches && _.isArray(vidMatches))) {
vidMatches = _.filter(vidMatches, (f) => {
return f && _.isString(f);
originLink = _.last(vidMatches);
let processedLink = _.replace(vrule.output, '{0}', originLink);
output = cr.html();
return output;
* Parse meta-data tags from content
* @param {String} content Markdown content
* @return {Object} Properties found in the content and their values
2016-08-24 17:17:59 -04:00
const parseMeta = (content) => {
let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');
let results = {}, match;
while(match = commentMeta.exec(content)) {
results[_.toLower(match[1])] = _.trim(match[2]);
return results;
module.exports = {
* Parse content and return all data
* @param {String} content Markdown-formatted content
* @return {Object} Object containing meta, html and tree data
parse(content) {
return {
2016-08-24 17:17:59 -04:00
meta: parseMeta(content),
html: parseContent(content),
tree: parseTree(content)
2016-08-27 21:46:10 -04:00
* Strips non-text elements from Markdown content
* @param {String} content Markdown-formatted content
* @return {String} Text-only version
removeMarkdown(content) {
return mdRemove(_.chain(content)
.replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
.replace(/```[^`]+```/g, '')
.replace(/`[^`]+`/g, '')
.replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
.replace(/\r?\n|\r/g, ' ')
.replace(/(\b([^a-z]+)\b)/g, ' ')
.replace(/[^a-z]+/g, ' ')
.replace(/(\b(\w{1,2})\b(\W|$))/g, '')
.replace(/\s\s+/g, ' ')