blob: 0570a170600e8b8242164f0abe3b13a7f1f48d71 [file] [log] [blame]
using Lucene.Net.Analysis.Ja.Dict;
using Lucene.Net.Diagnostics;
using System;
using System.Collections.Generic;
using System.Text;
namespace Lucene.Net.Analysis.Ja
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO: would be nice to show 2nd best path in a diff't
// color...
/// <summary>
/// Outputs the dot (graphviz) string for the viterbi lattice.
/// </summary>
public class GraphvizFormatter
{
private readonly static string BOS_LABEL = "BOS";
private readonly static string EOS_LABEL = "EOS";
private readonly static string FONT_NAME = "Helvetica";
private readonly ConnectionCosts costs;
private readonly IDictionary<string, string> bestPathMap;
private readonly StringBuilder sb = new StringBuilder();
public GraphvizFormatter(ConnectionCosts costs)
{
this.costs = costs;
this.bestPathMap = new Dictionary<string, string>();
sb.Append(FormatHeader());
sb.Append(" init [style=invis]\n");
sb.Append(" init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
}
public virtual string Finish()
{
sb.Append(FormatTrailer());
return sb.ToString();
}
// Backtraces another incremental fragment:
internal void OnBacktrace(JapaneseTokenizer tok, WrappedPositionArray positions, int lastBackTracePos, Position endPosData, int fromIDX, char[] fragment, bool isEnd)
{
SetBestPathMap(positions, lastBackTracePos, endPosData, fromIDX);
sb.Append(FormatNodes(tok, positions, lastBackTracePos, endPosData, fragment));
if (isEnd)
{
sb.Append(" fini [style=invis]\n");
sb.Append(" ");
sb.Append(GetNodeID(endPosData.pos, fromIDX));
sb.Append(" -> fini [label=\"" + EOS_LABEL + "\"]");
}
}
// Records which arcs make up the best bath:
private void SetBestPathMap(WrappedPositionArray positions, int startPos, Position endPosData, int fromIDX)
{
bestPathMap.Clear();
int pos = endPosData.pos;
int bestIDX = fromIDX;
while (pos > startPos)
{
Position posData = positions.Get(pos);
int backPos = posData.backPos[bestIDX];
int backIDX = posData.backIndex[bestIDX];
string toNodeID = GetNodeID(pos, bestIDX);
string fromNodeID = GetNodeID(backPos, backIDX);
if (Debugging.AssertsEnabled)
{
Debugging.Assert(!bestPathMap.ContainsKey(fromNodeID));
Debugging.Assert(!bestPathMap.Values.Contains(toNodeID));
}
bestPathMap[fromNodeID] = toNodeID;
pos = backPos;
bestIDX = backIDX;
}
}
private string FormatNodes(JapaneseTokenizer tok, WrappedPositionArray positions, int startPos, Position endPosData, char[] fragment)
{
StringBuilder sb = new StringBuilder();
// Output nodes
for (int pos = startPos + 1; pos <= endPosData.pos; pos++)
{
Position posData = positions.Get(pos);
for (int idx = 0; idx < posData.count; idx++)
{
sb.Append(" ");
sb.Append(GetNodeID(pos, idx));
sb.Append(" [label=\"");
sb.Append(pos);
sb.Append(": ");
sb.Append(posData.lastRightID[idx]);
sb.Append("\"]\n");
}
}
// Output arcs
for (int pos = endPosData.pos; pos > startPos; pos--)
{
Position posData = positions.Get(pos);
for (int idx = 0; idx < posData.count; idx++)
{
Position backPosData = positions.Get(posData.backPos[idx]);
string toNodeID = GetNodeID(pos, idx);
string fromNodeID = GetNodeID(posData.backPos[idx], posData.backIndex[idx]);
sb.Append(" ");
sb.Append(fromNodeID);
sb.Append(" -> ");
sb.Append(toNodeID);
string attrs;
string path;
bestPathMap.TryGetValue(fromNodeID, out path);
if (toNodeID.Equals(path, StringComparison.Ordinal))
{
// This arc is on best path
attrs = " color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20";
}
else
{
attrs = "";
}
IDictionary dict = tok.GetDict(posData.backType[idx]);
int wordCost = dict.GetWordCost(posData.backID[idx]);
int bgCost = costs.Get(backPosData.lastRightID[posData.backIndex[idx]],
dict.GetLeftId(posData.backID[idx]));
string surfaceForm = new string(fragment,
posData.backPos[idx] - startPos,
pos - posData.backPos[idx]);
sb.Append(" [label=\"");
sb.Append(surfaceForm);
sb.Append(' ');
sb.Append(wordCost);
if (bgCost >= 0)
{
sb.Append('+');
}
sb.Append(bgCost);
sb.Append("\"");
sb.Append(attrs);
sb.Append("]\n");
}
}
return sb.ToString();
}
private string FormatHeader()
{
StringBuilder sb = new StringBuilder();
sb.Append("digraph viterbi {\n");
sb.Append(" graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n");
//sb.Append(" // A2 paper size\n");
//sb.Append(" size = \"34.4,16.5\";\n");
//sb.Append(" // try to fill paper\n");
//sb.Append(" ratio = fill;\n");
sb.Append(" edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n");
sb.Append(" node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n");
return sb.ToString();
}
private string FormatTrailer()
{
return "}";
}
private string GetNodeID(int pos, int idx)
{
return pos + "." + idx;
}
}
}