| #!/usr/bin/env node |
| /** |
| * Replace specific English terms in Japanese docs with preferred Japanese terms, |
| * while skipping fenced code blocks and inline code. |
| * |
| * Usage: |
| * node scripts/i18n/replace-ja-terms.js <targetDir> |
| */ |
| const fs = require('fs'); |
| const path = require('path'); |
| |
| const targetDir = process.argv[2]; |
| if (!targetDir) { |
| console.error('Usage: node scripts/i18n/replace-ja-terms.js <targetDir>'); |
| process.exit(1); |
| } |
| |
| function escapeRegExp(input) { |
| return input.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); |
| } |
| |
| /** |
| * Glossary mapping sourced from the provided screenshot. |
| * Keep keys exact/case-sensitive; we apply word-boundary matching for single words. |
| */ |
| const replacements = [ |
| // Phrases |
| { from: 'Constraints and Limitations', to: '制約と制限', kind: 'phrase' }, |
| { from: 'Log Storage and Analysis', to: 'ログ分析', kind: 'phrase' }, |
| { from: 'Data Write-back', to: 'データ書き戻し', kind: 'phrase' }, |
| { from: 'Query Acceleration', to: 'クエリ加速', kind: 'phrase' }, |
| { from: 'Supported Operations in Hive', to: 'Hiveでサポートされている操作', kind: 'phrase' }, |
| { from: 'Supported Hive Versions', to: 'サポートされるHiveバージョン', kind: 'phrase' }, |
| { from: 'Concurrent Writing Mechanism', to: '同時書込みメカニズム', kind: 'phrase' }, |
| { from: 'Transactional Mechanism', to: 'トランザクション', kind: 'phrase' }, |
| { from: 'On This Page', to: 'このページで', kind: 'phrase' }, |
| { from: 'Branch and Tag', to: 'ブランチとタグ', kind: 'phrase' }, |
| { from: 'Write operations', to: '書き込み操作', kind: 'phrase' }, |
| { from: 'Schema changes', to: 'スキーマ変更', kind: 'phrase' }, |
| { from: 'Time zone', to: 'タイムゾーン', kind: 'phrase' }, |
| { from: 'Default value', to: 'デフォルト値', kind: 'phrase' }, |
| { from: 'Parameter Name', to: 'パラメータ名', kind: 'phrase' }, |
| { from: 'Parameter Type', to: 'パラメータ型', kind: 'phrase' }, |
| { from: 'Format Name', to: 'フォーマット名', kind: 'phrase' }, |
| { from: 'Filter Predicate Pushdown', to: 'フィルタ述語プッシュダウン', kind: 'phrase' }, |
| { from: 'Driver Package Security', to: 'ドライバーパッケージセキュリティ', kind: 'phrase' }, |
| { from: 'Permission Policies', to: '権限ポリシー', kind: 'phrase' }, |
| { from: 'Data Preparation', to: 'データ準備', kind: 'phrase' }, |
| { from: 'Data Insertion and Storage', to: 'データの挿入と保存', kind: 'phrase' }, |
| |
| // Single words |
| { from: 'Overview', to: '概要', kind: 'word' }, |
| { from: 'Troubleshooting', to: 'トラブルシューティング', kind: 'word' }, |
| { from: 'Configuration', to: '設定', kind: 'word' }, |
| { from: 'Kubernetes', to: 'Kubernetes', kind: 'word' }, |
| { from: 'Lakehouse', to: 'レイクハウス', kind: 'word' }, |
| { from: 'Catalog', to: 'カタログ', kind: 'word' }, |
| { from: 'Properties', to: 'プロパティ', kind: 'word' }, |
| { from: 'Appendix', to: '付録', kind: 'word' }, |
| { from: 'Operations', to: '操作', kind: 'word' }, |
| { from: 'Description', to: '詳細', kind: 'word' }, |
| { from: 'Scenario', to: 'シナリオ', kind: 'word' }, |
| { from: 'Type', to: 'タイプ', kind: 'word' }, |
| { from: 'Comment', to: 'コメント', kind: 'word' }, |
| { from: 'Update', to: 'アップデート', kind: 'word' }, |
| { from: 'Cluster', to: 'クラスター', kind: 'word' }, |
| { from: 'Integration', to: '統合', kind: 'word' }, |
| { from: 'Permissions', to: '許可', kind: 'word' }, |
| { from: 'Authentication', to: '認証', kind: 'word' }, |
| { from: 'Notes', to: '注', kind: 'word' }, |
| { from: 'Summary', to: 'まとめ', kind: 'word' }, |
| { from: 'Sample', to: 'サンプル', kind: 'word' }, |
| { from: 'Partition', to: 'パーティション', kind: 'word' }, |
| { from: 'Bucket', to: 'バケット', kind: 'word' }, |
| { from: 'Server', to: 'サーバー', kind: 'word' }, |
| { from: 'Agent', to: 'エージェント', kind: 'word' }, |
| { from: 'Compact', to: 'コンパクション', kind: 'word' }, |
| { from: 'Strategy', to: 'ストラテジー', kind: 'word' }, |
| { from: 'Table', to: 'table', kind: 'word' }, |
| ]; |
| |
| const compiled = replacements |
| .slice() |
| .sort((a, b) => b.from.length - a.from.length) |
| .map(({ from, to, kind }) => { |
| const pattern = |
| kind === 'word' |
| ? new RegExp(`\\b${escapeRegExp(from)}\\b`, 'g') |
| : new RegExp(escapeRegExp(from), 'g'); |
| return { from, to, pattern }; |
| }); |
| |
| function walk(dir) { |
| const entries = fs.readdirSync(dir, { withFileTypes: true }); |
| const files = []; |
| for (const entry of entries) { |
| const full = path.join(dir, entry.name); |
| if (entry.isDirectory()) files.push(...walk(full)); |
| else files.push(full); |
| } |
| return files; |
| } |
| |
| function isMarkdownFile(filePath) { |
| return filePath.endsWith('.md') || filePath.endsWith('.mdx'); |
| } |
| |
| function replaceOutsideInlineCode(line, replacer) { |
| let out = ''; |
| let i = 0; |
| let inCode = false; |
| let codeDelimiterLen = 0; |
| |
| while (i < line.length) { |
| if (line[i] === '`') { |
| let j = i; |
| while (j < line.length && line[j] === '`') j++; |
| const runLen = j - i; |
| |
| if (!inCode) { |
| inCode = true; |
| codeDelimiterLen = runLen; |
| } else if (runLen === codeDelimiterLen) { |
| inCode = false; |
| codeDelimiterLen = 0; |
| } |
| |
| out += line.slice(i, j); |
| i = j; |
| continue; |
| } |
| |
| let j = i; |
| while (j < line.length && line[j] !== '`') j++; |
| const chunk = line.slice(i, j); |
| out += inCode ? chunk : replacer(chunk); |
| i = j; |
| } |
| |
| return out; |
| } |
| |
| function replaceOutsideInlineCodeAndTags(line, replacer) { |
| let out = ''; |
| let i = 0; |
| let inCode = false; |
| let codeDelimiterLen = 0; |
| |
| while (i < line.length) { |
| const ch = line[i]; |
| |
| if (ch === '`') { |
| let j = i; |
| while (j < line.length && line[j] === '`') j++; |
| const runLen = j - i; |
| |
| if (!inCode) { |
| inCode = true; |
| codeDelimiterLen = runLen; |
| } else if (runLen === codeDelimiterLen) { |
| inCode = false; |
| codeDelimiterLen = 0; |
| } |
| |
| out += line.slice(i, j); |
| i = j; |
| continue; |
| } |
| |
| if (!inCode && ch === '<') { |
| const next = line[i + 1]; |
| // Treat MDX/HTML tag markup as code-like and skip replacements inside it. |
| if (next && /[A-Za-z/!]/.test(next)) { |
| let j = i + 1; |
| let quote = null; |
| while (j < line.length) { |
| const c = line[j]; |
| if (quote) { |
| if (c === quote) quote = null; |
| j++; |
| continue; |
| } |
| if (c === '"' || c === "'") { |
| quote = c; |
| j++; |
| continue; |
| } |
| if (c === '>') { |
| j++; |
| break; |
| } |
| j++; |
| } |
| out += line.slice(i, j); |
| i = j; |
| continue; |
| } |
| |
| // Not a tag (e.g. comparison operator). Emit as-is to avoid infinite loop. |
| out += ch; |
| i++; |
| continue; |
| } |
| |
| let j = i; |
| if (inCode) { |
| while (j < line.length && line[j] !== '`') j++; |
| } else { |
| while (j < line.length && line[j] !== '`' && line[j] !== '<') j++; |
| } |
| const chunk = line.slice(i, j); |
| out += inCode ? chunk : replacer(chunk); |
| i = j; |
| } |
| |
| return out; |
| } |
| |
| function applyReplacements(text) { |
| const lines = text.split('\n'); |
| let inFence = false; |
| let fenceChar = null; // ` or ~ |
| let fenceLen = 0; |
| let changed = false; |
| |
| const outLines = lines.map((line) => { |
| if (!inFence) { |
| // CommonMark: opening fence can be indented up to 3 spaces and may include an info string. |
| const m = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/); |
| if (m) { |
| inFence = true; |
| fenceChar = m[2][0]; |
| fenceLen = m[2].length; |
| return line; |
| } |
| } else { |
| // Closing fence: up to 3 spaces, same fence char, length >= opening, and no info string. |
| const m = line.match(/^( {0,3})(`{3,}|~{3,})\\s*$/); |
| if (m && m[2][0] === fenceChar && m[2].length >= fenceLen) { |
| inFence = false; |
| fenceChar = null; |
| fenceLen = 0; |
| } |
| return line; |
| } |
| |
| const replaced = replaceOutsideInlineCodeAndTags(line, (chunk) => { |
| let next = chunk; |
| for (const { pattern, to } of compiled) next = next.replace(pattern, to); |
| return next; |
| }); |
| |
| if (replaced !== line) changed = true; |
| return replaced; |
| }); |
| |
| return { text: outLines.join('\n'), changed }; |
| } |
| |
| function main() { |
| const absTarget = path.isAbsolute(targetDir) |
| ? targetDir |
| : path.join(process.cwd(), targetDir); |
| |
| // Walk iteratively to reduce memory footprint, and show progress for large trees. |
| const stack = [absTarget]; |
| let processed = 0; |
| let touched = 0; |
| let discoveredMarkdown = 0; |
| |
| while (stack.length) { |
| const current = stack.pop(); |
| const entries = fs.readdirSync(current, { withFileTypes: true }); |
| |
| for (const entry of entries) { |
| const full = path.join(current, entry.name); |
| if (entry.isDirectory()) { |
| stack.push(full); |
| continue; |
| } |
| |
| if (!isMarkdownFile(full)) continue; |
| discoveredMarkdown++; |
| |
| const raw = fs.readFileSync(full, 'utf8'); |
| const { text, changed } = applyReplacements(raw); |
| if (changed) { |
| fs.writeFileSync(full, text, 'utf8'); |
| touched++; |
| } |
| |
| processed++; |
| if (processed % 200 === 0) { |
| console.log(`Processed: ${processed} files (changed: ${touched})`); |
| } |
| } |
| } |
| |
| console.log(`Updated files: ${touched}/${discoveredMarkdown}`); |
| } |
| |
| main(); |