| #!/usr/bin/env node |
| |
| /** |
| * Documentation Sync Script for Apache Mahout |
| * |
| * This script syncs documentation from the source /docs directory |
| * into the Docusaurus website. It: |
| * 1. Cleans the destination (preserving only .gitignore) |
| * 2. Copies all markdown files from /docs to website/docs |
| * 3. Copies all blog posts from /docs/blog to website/blog |
| * 4. Transforms frontmatter for Docusaurus compatibility |
| * |
| * /docs is the SINGLE SOURCE OF TRUTH for all documentation and blog posts. |
| * website/docs and website/blog are build artifacts that should not be edited directly. |
| */ |
| |
| const fs = require('fs'); |
| const path = require('path'); |
| |
| // Configuration |
| const SOURCE_DIR = path.resolve(__dirname, '../../docs'); |
| const DEST_DIR = path.resolve(__dirname, '../docs'); |
| const BLOG_SOURCE_DIR = path.resolve(__dirname, '../../docs/blog'); |
| const BLOG_DEST_DIR = path.resolve(__dirname, '../blog'); |
| |
| // Files that should be preserved during sync (not deleted) |
| const PRESERVE_FILES = ['.gitignore']; |
| |
| // Files/directories to exclude from docs sync (blog is synced separately) |
| const EXCLUDE_PATTERNS = [ |
| /^\./, // Hidden files |
| /^node_modules$/, |
| /\.pyc$/, |
| /^__pycache__$/, |
| /^blog$/, // Blog is synced separately to website/blog |
| ]; |
| |
| /** |
| * Check if a file/directory should be excluded |
| */ |
| function shouldExclude(name) { |
| return EXCLUDE_PATTERNS.some(pattern => pattern.test(name)); |
| } |
| |
| /** |
| * Ensure directory exists, creating it if necessary |
| */ |
| function ensureDir(dirPath) { |
| if (!fs.existsSync(dirPath)) { |
| fs.mkdirSync(dirPath, { recursive: true }); |
| } |
| } |
| |
| /** |
| * Clean destination directory (preserving .gitignore) |
| */ |
| function cleanDestination(destDir) { |
| if (!fs.existsSync(destDir)) { |
| ensureDir(destDir); |
| return; |
| } |
| |
| const entries = fs.readdirSync(destDir, { withFileTypes: true }); |
| |
| for (const entry of entries) { |
| // Skip preserved files (like .gitignore) |
| if (entry.isFile() && PRESERVE_FILES.includes(entry.name)) { |
| console.log(` Preserving: ${entry.name}`); |
| continue; |
| } |
| |
| const fullPath = path.join(destDir, entry.name); |
| fs.rmSync(fullPath, { recursive: true, force: true }); |
| } |
| } |
| |
| /** |
| * Parse YAML frontmatter from markdown content |
| */ |
| function parseFrontmatter(content) { |
| const frontmatterRegex = /^---\n([\s\S]*?)\n---\n/; |
| const match = content.match(frontmatterRegex); |
| |
| if (!match) { |
| return { frontmatter: {}, body: content }; |
| } |
| |
| const frontmatterStr = match[1]; |
| const body = content.slice(match[0].length); |
| |
| // Simple YAML parsing (key: value pairs) |
| const frontmatter = {}; |
| frontmatterStr.split('\n').forEach(line => { |
| const colonIndex = line.indexOf(':'); |
| if (colonIndex > 0) { |
| const key = line.slice(0, colonIndex).trim(); |
| let value = line.slice(colonIndex + 1).trim(); |
| // Remove quotes if present |
| if ((value.startsWith('"') && value.endsWith('"')) || |
| (value.startsWith("'") && value.endsWith("'"))) { |
| value = value.slice(1, -1); |
| } |
| frontmatter[key] = value; |
| } |
| }); |
| |
| return { frontmatter, body }; |
| } |
| |
| /** |
| * Generate YAML frontmatter string |
| */ |
| function generateFrontmatter(frontmatter) { |
| const lines = ['---']; |
| for (const [key, value] of Object.entries(frontmatter)) { |
| if (typeof value === 'string' && (value.includes(':') || value.includes('#'))) { |
| lines.push(`${key}: "${value}"`); |
| } else { |
| lines.push(`${key}: ${value}`); |
| } |
| } |
| lines.push('---\n'); |
| return lines.join('\n'); |
| } |
| |
| /** |
| * Transform frontmatter for Docusaurus compatibility |
| */ |
| function transformFrontmatter(frontmatter, filePath) { |
| const transformed = { ...frontmatter }; |
| |
| // Remove Jekyll-specific fields |
| delete transformed.layout; |
| delete transformed.permalink; |
| |
| // Generate title from filename if not present |
| if (!transformed.title) { |
| const basename = path.basename(filePath, '.md'); |
| if (basename !== 'index') { |
| transformed.title = basename |
| .replace(/_/g, ' ') |
| .replace(/-/g, ' ') |
| .replace(/\b\w/g, c => c.toUpperCase()); |
| } |
| } |
| |
| return transformed; |
| } |
| |
| /** |
| * Transform markdown links for Docusaurus |
| */ |
| function transformLinks(content) { |
| // Match markdown links: [text](url) |
| const linkRegex = /\[([^\]]*)\]\(([^)]+)\)/g; |
| |
| return content.replace(linkRegex, (match, text, url) => { |
| // Skip external links |
| if (url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//')) { |
| return match; |
| } |
| |
| // Skip anchor links |
| if (url.startsWith('#')) { |
| return match; |
| } |
| |
| // Handle .md file references |
| if (url.endsWith('.md')) { |
| // Remove .md extension for Docusaurus |
| url = url.slice(0, -3); |
| } |
| |
| return `[${text}](${url})`; |
| }); |
| } |
| |
| /** |
| * Process a single markdown file |
| */ |
| function processMarkdownFile(srcPath, destPath) { |
| let content = fs.readFileSync(srcPath, 'utf-8'); |
| |
| const { frontmatter, body } = parseFrontmatter(content); |
| const transformedFrontmatter = transformFrontmatter(frontmatter, srcPath); |
| const transformedBody = transformLinks(body); |
| |
| // Only add frontmatter if there's something to add |
| let finalContent; |
| if (Object.keys(transformedFrontmatter).length > 0) { |
| finalContent = generateFrontmatter(transformedFrontmatter) + transformedBody; |
| } else { |
| finalContent = transformedBody; |
| } |
| |
| fs.writeFileSync(destPath, finalContent); |
| } |
| |
| /** |
| * Copy a file (binary or non-markdown) |
| */ |
| function copyFile(srcPath, destPath) { |
| fs.copyFileSync(srcPath, destPath); |
| } |
| |
| /** |
| * Recursively sync a directory |
| */ |
| function syncDirectory(srcDir, destDir, stats = { files: 0, dirs: 0 }) { |
| if (!fs.existsSync(srcDir)) { |
| console.log(` Source directory does not exist: ${srcDir}`); |
| return stats; |
| } |
| |
| ensureDir(destDir); |
| |
| const entries = fs.readdirSync(srcDir, { withFileTypes: true }); |
| |
| for (const entry of entries) { |
| if (shouldExclude(entry.name)) { |
| continue; |
| } |
| |
| const srcPath = path.join(srcDir, entry.name); |
| const destPath = path.join(destDir, entry.name); |
| |
| if (entry.isDirectory()) { |
| syncDirectory(srcPath, destPath, stats); |
| stats.dirs++; |
| } else if (entry.isFile()) { |
| if (entry.name.endsWith('.md')) { |
| processMarkdownFile(srcPath, destPath); |
| } else { |
| copyFile(srcPath, destPath); |
| } |
| stats.files++; |
| } |
| } |
| |
| return stats; |
| } |
| |
| /** |
| * Main sync function |
| */ |
| function main() { |
| console.log('Starting documentation sync...\n'); |
| |
| // Sync docs |
| console.log(`Docs Source: ${SOURCE_DIR}`); |
| console.log(`Docs Destination: ${DEST_DIR}\n`); |
| |
| console.log('Cleaning docs destination...'); |
| cleanDestination(DEST_DIR); |
| |
| console.log('\nSyncing documentation from /docs...'); |
| const docsStats = syncDirectory(SOURCE_DIR, DEST_DIR); |
| |
| // Sync blog |
| console.log(`\nBlog Source: ${BLOG_SOURCE_DIR}`); |
| console.log(`Blog Destination: ${BLOG_DEST_DIR}\n`); |
| |
| console.log('Cleaning blog destination...'); |
| cleanDestination(BLOG_DEST_DIR); |
| |
| console.log('\nSyncing blog posts from /docs/blog...'); |
| const blogStats = syncDirectory(BLOG_SOURCE_DIR, BLOG_DEST_DIR); |
| |
| console.log(`\nSync complete!`); |
| console.log(` Docs: ${docsStats.files} files, ${docsStats.dirs} directories`); |
| console.log(` Blog: ${blogStats.files} files, ${blogStats.dirs} directories`); |
| } |
| |
| // Run the sync |
| main(); |