blob: 7b503ab82c609661e88f36d3f540634f86f44905 [file] [log] [blame]
const TurndownService = require('turndown');
const turndownPluginGfm = require('turndown-plugin-gfm');
/**
* Creates and configures a TurndownService instance for converting HTML to Markdown.
* Includes custom rules for Asciidoctor admonition blocks, table wrappers, and icons.
*
* @returns {TurndownService} Configured Turndown service instance
*/
function createTurndownService() {
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
bulletListMarker: '-',
emDelimiter: '_',
});
// Use GitHub-flavored Markdown for tables, strikethrough, etc.
const gfm = turndownPluginGfm.gfm;
turndownService.use(gfm);
// Custom rule to convert Asciidoctor admonition blocks to readable Markdown
turndownService.addRule('admonitionBlocks', {
filter: function (node) {
return node.classList && node.classList.contains('admonitionblock');
},
replacement: function (content, node) {
// Extract the icon type (tip, note, important, warning, caution)
const iconElement = node.querySelector('.icon i');
let admonitionType = 'Note';
if (iconElement) {
const title = iconElement.getAttribute('title') || '';
admonitionType = title || 'Note';
}
// Extract the content text
const contentDiv = node.querySelector('.content');
if (!contentDiv) return '';
// Convert the content div to markdown
const contentHtml = contentDiv.innerHTML;
const contentMarkdown = turndownService.turndown(contentHtml);
// Format as a blockquote with the admonition type
const lines = contentMarkdown.split('\n');
const quotedLines = lines.map(line => line ? `> ${line}` : '>');
return `\n> **${admonitionType}**\n${quotedLines.join('\n')}\n\n`;
}
});
// Custom rule to handle table-wrapper divs
turndownService.addRule('tableWrapper', {
filter: function (node) {
return node.classList && node.classList.contains('table-wrapper');
},
replacement: function (content, node) {
// Just extract the table inside
const table = node.querySelector('table');
if (table) {
return turndownService.turndown(table.outerHTML);
}
return content;
}
});
// Custom rule to clean up icon elements and other UI-only content
turndownService.addRule('removeIcons', {
filter: function (node) {
// Remove icon elements, anchors without text, etc.
if (node.classList) {
return node.classList.contains('icon') ||
node.classList.contains('anchor') ||
(node.tagName === 'I' && node.classList.contains('fa'));
}
return false;
},
replacement: function () {
return '';
}
});
return turndownService;
}
module.exports = { createTurndownService };