| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.ja; |
| |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.lucene.analysis.ja.JapaneseTokenizer.Position; |
| import org.apache.lucene.analysis.ja.JapaneseTokenizer.WrappedPositionArray; |
| import org.apache.lucene.analysis.ja.dict.ConnectionCosts; |
| import org.apache.lucene.analysis.ja.dict.Dictionary; |
| |
| |
| // TODO: would be nice to show 2nd best path in a diff't |
| // color... |
| |
| /** |
| * Outputs the dot (graphviz) string for the viterbi lattice. |
| */ |
| public class GraphvizFormatter { |
| |
| private final static String BOS_LABEL = "BOS"; |
| |
| private final static String EOS_LABEL = "EOS"; |
| |
| private final static String FONT_NAME = "Helvetica"; |
| |
| private final ConnectionCosts costs; |
| |
| private final Map<String, String> bestPathMap; |
| |
| private final StringBuilder sb = new StringBuilder(); |
| |
| public GraphvizFormatter(ConnectionCosts costs) { |
| this.costs = costs; |
| this.bestPathMap = new HashMap<>(); |
| sb.append(formatHeader()); |
| sb.append(" init [style=invis]\n"); |
| sb.append(" init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n"); |
| } |
| |
| public String finish() { |
| sb.append(formatTrailer()); |
| return sb.toString(); |
| } |
| |
| // Backtraces another incremental fragment: |
| void onBacktrace(JapaneseTokenizer tok, WrappedPositionArray positions, int lastBackTracePos, Position endPosData, int fromIDX, char[] fragment, boolean isEnd) { |
| setBestPathMap(positions, lastBackTracePos, endPosData, fromIDX); |
| sb.append(formatNodes(tok, positions, lastBackTracePos, endPosData, fragment)); |
| if (isEnd) { |
| sb.append(" fini [style=invis]\n"); |
| sb.append(" "); |
| sb.append(getNodeID(endPosData.pos, fromIDX)); |
| sb.append(" -> fini [label=\"" + EOS_LABEL + "\"]"); |
| } |
| } |
| |
| // Records which arcs make up the best bath: |
| private void setBestPathMap(WrappedPositionArray positions, int startPos, Position endPosData, int fromIDX) { |
| bestPathMap.clear(); |
| |
| int pos = endPosData.pos; |
| int bestIDX = fromIDX; |
| while (pos > startPos) { |
| final Position posData = positions.get(pos); |
| |
| final int backPos = posData.backPos[bestIDX]; |
| final int backIDX = posData.backIndex[bestIDX]; |
| |
| final String toNodeID = getNodeID(pos, bestIDX); |
| final String fromNodeID = getNodeID(backPos, backIDX); |
| |
| assert !bestPathMap.containsKey(fromNodeID); |
| assert !bestPathMap.containsValue(toNodeID); |
| bestPathMap.put(fromNodeID, toNodeID); |
| pos = backPos; |
| bestIDX = backIDX; |
| } |
| } |
| |
| private String formatNodes(JapaneseTokenizer tok, WrappedPositionArray positions, int startPos, Position endPosData, char[] fragment) { |
| |
| StringBuilder sb = new StringBuilder(); |
| // Output nodes |
| for (int pos = startPos+1; pos <= endPosData.pos; pos++) { |
| final Position posData = positions.get(pos); |
| for(int idx=0;idx<posData.count;idx++) { |
| sb.append(" "); |
| sb.append(getNodeID(pos, idx)); |
| sb.append(" [label=\""); |
| sb.append(pos); |
| sb.append(": "); |
| sb.append(posData.lastRightID[idx]); |
| sb.append("\"]\n"); |
| } |
| } |
| |
| // Output arcs |
| for (int pos = endPosData.pos; pos > startPos; pos--) { |
| final Position posData = positions.get(pos); |
| for(int idx=0;idx<posData.count;idx++) { |
| final Position backPosData = positions.get(posData.backPos[idx]); |
| final String toNodeID = getNodeID(pos, idx); |
| final String fromNodeID = getNodeID(posData.backPos[idx], posData.backIndex[idx]); |
| |
| sb.append(" "); |
| sb.append(fromNodeID); |
| sb.append(" -> "); |
| sb.append(toNodeID); |
| |
| final String attrs; |
| if (toNodeID.equals(bestPathMap.get(fromNodeID))) { |
| // This arc is on best path |
| attrs = " color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20"; |
| } else { |
| attrs = ""; |
| } |
| |
| final Dictionary dict = tok.getDict(posData.backType[idx]); |
| final int wordCost = dict.getWordCost(posData.backID[idx]); |
| final int bgCost = costs.get(backPosData.lastRightID[posData.backIndex[idx]], |
| dict.getLeftId(posData.backID[idx])); |
| |
| final String surfaceForm = new String(fragment, |
| posData.backPos[idx] - startPos, |
| pos - posData.backPos[idx]); |
| |
| sb.append(" [label=\""); |
| sb.append(surfaceForm); |
| sb.append(' '); |
| sb.append(wordCost); |
| if (bgCost >= 0) { |
| sb.append('+'); |
| } |
| sb.append(bgCost); |
| sb.append("\""); |
| sb.append(attrs); |
| sb.append("]\n"); |
| } |
| } |
| return sb.toString(); |
| } |
| |
| private String formatHeader() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("digraph viterbi {\n"); |
| sb.append(" graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n"); |
| //sb.append(" // A2 paper size\n"); |
| //sb.append(" size = \"34.4,16.5\";\n"); |
| //sb.append(" // try to fill paper\n"); |
| //sb.append(" ratio = fill;\n"); |
| sb.append(" edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n"); |
| sb.append(" node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n"); |
| |
| return sb.toString(); |
| } |
| |
| private String formatTrailer() { |
| return "}"; |
| } |
| |
| private String getNodeID(int pos, int idx) { |
| return pos + "." + idx; |
| } |
| } |