blob: d07227eb528f6b772d39612de575181055f0e8f8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Html2Markdown;
using JavaDocToMarkdownConverter.Formatters;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace JavaDocToMarkdownConverter
{
public class DocConverter
{
public DocConverter()
{
_defaultConverter = new Converter(new CustomMarkdownScheme());
}
private readonly Converter _defaultConverter;
/// <summary>
///
/// </summary>
/// <param name="inputDirectory">The /lucene directory in the Java source code.</param>
/// <param name="rootOutputDirectory">The root directory of the Lucene.Net repository.</param>
public void Convert(string inputDirectory, string rootOutputDirectory)
{
var dir = new DirectoryInfo(inputDirectory);
if (!dir.Exists)
{
Console.WriteLine("Directory Doesn't Exist: '" + dir.FullName + "'");
return;
}
foreach (var file in dir.EnumerateFiles("overview.html", SearchOption.AllDirectories))
{
ConvertDoc(file.FullName, rootOutputDirectory);
}
foreach (var file in dir.EnumerateFiles("package.html", SearchOption.AllDirectories))
{
ConvertDoc(file.FullName, rootOutputDirectory);
}
}
private void ConvertDoc(string inputDoc, string rootOutputDirectory)
{
var outputDir = GetOutputDirectory(inputDoc, rootOutputDirectory);
var outputFile = Path.Combine(outputDir, GetOuputFilename(inputDoc));
var inputFileInfo = new FileInfo(inputDoc);
if (!Directory.Exists(outputDir))
{
Console.WriteLine("Output Directory Doesn't Exist: '" + outputDir + "'");
return;
}
if (!inputFileInfo.Exists)
{
Console.WriteLine("Input File Doesn't Exist: '" + inputDoc + "'");
return;
}
var ns = ExtractNamespaceFromFile(outputFile);
// we might need to convert this namespace to an explicit value
if (PackageNamespaceToStandalone.TryGetValue(ns, out var standaloneNs))
ns = standaloneNs;
// get the MD converter for the namespace
var converter = _defaultConverter;
if (CustomConverters.TryGetValue(ns, out var customConverter))
converter = customConverter;
var markdown = converter.ConvertFile(inputDoc);
if (JavaDocFormatters.CustomReplacers.TryGetValue(ns, out var replacers))
{
foreach (var r in replacers)
markdown = r.Replace(markdown);
}
var doc = new ConvertedDocument(inputFileInfo, new FileInfo(outputFile), ns, markdown);
if (JavaDocFormatters.CustomProcessors.TryGetValue(ns, out var processor))
{
processor(doc);
}
markdown = doc.Markdown; // it may have changed
var fileContent = AppendYamlHeader(ns, markdown);
File.WriteAllText(outputFile, fileContent, Encoding.UTF8);
}
/// <summary>
/// Custom markdown converters for certain namespaces
/// </summary>
private static readonly Dictionary<string, Converter> CustomConverters = new Dictionary<string, Converter>(StringComparer.InvariantCultureIgnoreCase)
{
["Lucene.Net.Benchmarks"] = new Converter(new CustomMarkdownScheme(new ElementWhitespacePrefixReplacer("div"))),
["Lucene.Net.Replicator"] = new Converter(new CustomMarkdownScheme(new AllWhitespacePrefixReplacer()))
};
/// <summary>
/// Explicit mappings of namespaced package files to standalone files
/// </summary>
/// <remarks>
/// This is really edge case stuff
/// </remarks>
private static readonly Dictionary<string, string> PackageNamespaceToStandalone = new Dictionary<string, string>(StringComparer.InvariantCultureIgnoreCase)
{
["Lucene.Net.Search.Grouping"] = "Lucene.Net.Grouping",
["Lucene.Net.Memory"] = "Lucene.Net.Index.Memory",
["Lucene.Net.Queryparser"] = "Lucene.Net.QueryParser",
["Lucene.Net.Testframework"] = "Lucene.Net.TestFramework",
["Lucene.Net.Benchmark"] = "Lucene.Net.Benchmarks"
};
/// <summary>
/// These aren't real namespaces but they have overview.md files and in this case we need to prepend an H1 header
/// to the overview.md file.
/// </summary>
private static readonly List<string> StandaloneOverviews = new List<string>
{
"Lucene.Net",
"Lucene.Net.Analysis.Common",
"Lucene.Net.Analysis.Morfologik",
"Lucene.Net.Highlighter",
"Lucene.Net.Grouping",
"Lucene.Net.QueryParser",
"Lucene.Net.Sandbox",
"Lucene.Net.Suggest",
"Lucene.Net.TestFramework",
"Lucene.Net.Benchmarks",
};
/// <summary>
/// Appends the YAML front-matter header
/// </summary>
/// <param name="ns"></param>
/// <param name="fileContent"></param>
/// <returns></returns>
private string AppendYamlHeader(string ns, string fileContent)
{
var sb = new StringBuilder();
sb.AppendLine("---");
sb.Append("uid: ");
sb.AppendLine(ns);
// Add "title" yaml front-matter if a standalone file
if (StandaloneOverviews.Contains(ns))
{
sb.Append("title: ");
sb.AppendLine(ns);
}
sb.AppendLine("summary: *content");
sb.AppendLine("---");
sb.AppendLine();
return sb + fileContent;
}
private static Regex CSharpNamespaceMatch = new Regex(@"^\s*namespace\s*([\w\.]+)", RegexOptions.Multiline | RegexOptions.Compiled);
/// <summary>
/// Normally the files would be in the same folder name as their namespace but this isn't the case so we need to try to figure it out
/// </summary>
/// <param name="outputFile"></param>
/// <returns></returns>
private string ExtractNamespaceFromFile(string outputFile)
{
var folder = Path.GetDirectoryName(outputFile);
// First check if there are c# files beside this file
var csharpFiles = Directory.GetFiles(folder, "*.cs");
if (csharpFiles.Length > 0)
{
// extract the namespace from a file
var csharpFile = File.ReadAllText(csharpFiles[0]);
var nsMatches = CSharpNamespaceMatch.Matches(csharpFile);
if (nsMatches.Count > 0)
{
if (nsMatches[0].Groups.Count == 2)
return nsMatches[0].Groups[1].Value;
}
}
// Else we'll fall back to trying to determine namespace by folder
var folderParts = folder.Split(Path.DirectorySeparatorChar);
var index = folderParts.Length - 1;
for (int i = index; i >= 0; i--)
{
if (folderParts[i].StartsWith("Lucene.Net", StringComparison.InvariantCultureIgnoreCase))
{
index = i;
break;
}
}
var nsParts = new List<string>();
for (var i = index; i < folderParts.Length; i++)
{
var innerParts = folderParts[i].Split('.');
foreach (var innerPart in innerParts)
{
nsParts.Add(innerPart);
}
}
var textInfo = new CultureInfo("en-US", false).TextInfo;
return string.Join(".", nsParts.Select(x => textInfo.ToTitleCase(x)).ToArray());
}
private string GetOuputFilename(string inputDoc)
{
return Path.GetFileNameWithoutExtension(inputDoc) + ".md";
}
private string GetOutputDirectory(string inputDoc, string rootOutputDirectory)
{
string project = Path.Combine(rootOutputDirectory, @"src\Lucene.Net");
var file = new FileInfo(inputDoc);
var dir = file.Directory.FullName;
var segments = dir.Split(Path.DirectorySeparatorChar);
int i;
bool inLucene = false;
string lastSegment = string.Empty;
for (i = 0; i < segments.Length; i++)
{
var segment = segments[i];
if (segment.Equals("lucene"))
{
inLucene = true;
continue;
}
if (!inLucene)
continue;
if (segment.Equals("core"))
break;
project += "." + segment;
lastSegment = segment;
if (segment.Equals("analysis"))
continue;
break;
}
//if (project.EndsWith("analysis.icu", StringComparison.OrdinalIgnoreCase))
//{
// project = project.Replace("Lucene.Net.analysis.icu", @"dotnet\Lucene.Net.ICU");
//}
if (project.EndsWith("test-framework", StringComparison.OrdinalIgnoreCase))
{
project = project.Replace("test-framework", "TestFramework");
}
// Now we have the project directory and segment that it equates to.
// We need to walk up the tree and ignore the java-ish deep directories.
var ignore = new List<string>() { "src", "java", "org", "apache", "lucene" };
string path = project;
for (int j = i + 1; j < segments.Length; j++)
{
var segment = segments[j];
if (ignore.Contains(segment))
{
continue;
}
// Special Cases
switch (lastSegment.ToLower())
{
case "morfologik":
if (segment.Equals("analysis")) continue;
if (segment.Equals("morfologik")) continue;
break;
case "stempel":
if (segment.Equals("analysis")) continue;
if (segment.Equals("egothor")) segment = "Egothor.Stemmer";
if (segment.Equals("stemmer")) continue;
break;
case "kuromoji":
if (segment.Equals("analysis") || segment.Equals("ja")) continue;
break;
case "phonetic":
if (segment.Equals("analysis") || segment.Equals("phonetic")) continue;
break;
case "smartcn":
if (segment.Equals("analysis") || segment.Equals("cn") || segment.Equals("smart")) continue;
break;
case "benchmark":
if (segment.Equals("benchmark")) continue;
break;
case "classification":
if (segment.Equals("classification")) continue;
break;
case "codecs":
if (segment.Equals("codecs")) continue;
break;
case "demo":
if (segment.Equals("demo")) continue;
break;
case "expressions":
if (segment.Equals("expressions")) continue;
break;
case "facet":
if (segment.Equals("facet")) continue;
break;
case "grouping":
if (segment.Equals("search") || segment.Equals("grouping")) continue;
break;
case "highlighter":
if (segment.Equals("search")) continue;
break;
case "join":
if (segment.Equals("search") || segment.Equals("join")) continue;
break;
case "memory":
if (segment.Equals("index") || segment.Equals("memory")) continue;
break;
case "queries":
if (segment.Equals("queries")) continue;
if (segment.Equals("valuesource")) segment = "ValueSources";
break;
case "queryparser":
if (segment.Equals("queryparser")) continue;
break;
case "replicator":
if (segment.Equals("replicator")) continue;
break;
case "sandbox":
if (segment.Equals("sandbox")) continue;
break;
case "spatial":
if (segment.Equals("spatial")) continue;
break;
case "suggest":
if (segment.Equals("search")) continue;
break;
}
path = Path.Combine(path, segment);
}
return path;
}
}
}