| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.hyphenation; |
| |
| import java.io.BufferedReader; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.ObjectInputStream; |
| import java.io.ObjectOutputStream; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.zip.ZipFile; |
| import java.util.zip.ZipEntry; |
| |
| /** |
| * This class provides some useful methods to print the structure of a HyphenationTree object |
| */ |
| public class HyphenationTreeAnalysis extends TernaryTreeAnalysis { |
| |
| /** |
| * The HyphenationTree object to analyse |
| */ |
| protected HyphenationTree ht; |
| |
| /** |
| * @param ht the HyphenationTree object |
| */ |
| public HyphenationTreeAnalysis(HyphenationTree ht) { |
| super(ht); |
| this.ht = ht; |
| } |
| |
| /** |
| * Class representing a node of the HyphenationTree object |
| */ |
| protected class Node extends TernaryTreeAnalysis.Node { |
| private String value = null; |
| |
| /** |
| * @param index the index of the node |
| */ |
| protected Node(int index) { |
| super(index); |
| if (isLeafNode) { |
| value = readValue().toString(); |
| } |
| } |
| |
| private StringBuffer readValue() { |
| StringBuffer s = new StringBuffer(); |
| int i = (int) ht.eq[index]; |
| byte v = ht.vspace.get(i); |
| for (; v != 0; v = ht.vspace.get(++i)) { |
| int c = (int) ((v >>> 4) - 1); |
| s.append(c); |
| c = (int) (v & 0x0f); |
| if (c == 0) { |
| break; |
| } |
| c = (c - 1); |
| s.append(c); |
| } |
| return s; |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toNodeString() |
| */ |
| public String toNodeString() { |
| if (isLeafNode) { |
| StringBuffer s = new StringBuffer(); |
| s.append("-" + index); |
| if (isPacked) { |
| s.append(",=>'" + key + "'"); |
| } |
| s.append("," + value); |
| s.append(",leaf"); |
| return s.toString(); |
| } else { |
| return super.toNodeString(); |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toCompactString() |
| */ |
| public String toCompactString() { |
| if (isLeafNode) { |
| StringBuffer s = new StringBuffer(); |
| s.append("-" + index); |
| if (isPacked) { |
| s.append(",=>'" + key + "'"); |
| } |
| s.append("," + value); |
| s.append(",leaf\n"); |
| return s.toString(); |
| } else { |
| return super.toCompactString(); |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see java.lang.Object#toString() |
| */ |
| public String toString() { |
| StringBuffer s = new StringBuffer(); |
| s.append(super.toString()); |
| if (isLeafNode) { |
| s.append("value: " + value + "\n"); |
| } |
| return s.toString(); |
| } |
| |
| } |
| |
| private void addNode(int nodeIndex, List strings, NodeString ns) { |
| int pos = ns.indent + ns.string.length() + 1; |
| Node n = new Node(nodeIndex); |
| ns.string.append(n.toNodeString()); |
| if (n.high != 0) { |
| ns.high.add(new Integer(pos)); |
| NodeString highNs = new NodeString(pos); |
| highNs.low.add(new Integer(pos)); |
| int index = strings.indexOf(ns); |
| strings.add(index, highNs); |
| addNode(n.high, strings, highNs); |
| } |
| if (n.low != 0) { |
| ns.low.add(new Integer(pos)); |
| NodeString lowNs = new NodeString(pos); |
| lowNs.high.add(new Integer(pos)); |
| int index = strings.indexOf(ns); |
| strings.add(index + 1, lowNs); |
| addNode(n.low, strings, lowNs); |
| } |
| if (!n.isLeafNode) { |
| addNode(n.equal, strings, ns); |
| } |
| |
| } |
| |
| /** |
| * Construct the tree representation of a list of node strings |
| * @param strings the list of node strings |
| * @return the string representing the tree |
| */ |
| public String toTree(List strings) { |
| StringBuffer indentString = new StringBuffer(); |
| for (int j = indentString.length(); j < ((NodeString) strings.get(0)).indent; ++j) { |
| indentString.append(' '); |
| } |
| StringBuffer tree = new StringBuffer(); |
| for (int i = 0; i < strings.size(); ++i) { |
| NodeString ns = (NodeString) strings.get(i); |
| if (indentString.length() > ns.indent) { |
| indentString.setLength(ns.indent); |
| } else { |
| // should not happen |
| for (int j = indentString.length(); j < ns.indent; ++j) { |
| indentString.append(' '); |
| } |
| } |
| tree.append(indentString); |
| tree.append(ns.string + "\n"); |
| |
| if (i + 1 == strings.size()) { |
| continue; |
| } |
| for (int j = 0; j < ns.low.size(); ++j) { |
| int pos = ((Integer) ns.low.get(j)).intValue(); |
| if (pos < indentString.length()) { |
| indentString.setCharAt(pos, '|'); |
| } else { |
| for (int k = indentString.length(); k < pos; ++k) { |
| indentString.append(' '); |
| } |
| indentString.append('|'); |
| } |
| } |
| tree.append(indentString + "\n"); |
| } |
| |
| return tree.toString(); |
| } |
| |
| /** |
| * Construct the tree representation of the HyphenationTree object |
| * @return the string representing the tree |
| */ |
| public String toTree() { |
| List strings = new ArrayList(); |
| NodeString ns = new NodeString(0); |
| strings.add(ns); |
| addNode(1, strings, ns); |
| return toTree(strings); |
| } |
| |
| /** |
| * Construct the compact node representation of the HyphenationTree object |
| * @return the string representing the tree |
| */ |
| public String toCompactNodes() { |
| StringBuffer s = new StringBuffer(); |
| for (int i = 1; i < ht.sc.length; ++i) { |
| if (i != 1) { |
| s.append("\n"); |
| } |
| s.append((new Node(i)).toCompactString()); |
| } |
| return s.toString(); |
| } |
| |
| /** |
| * Construct the node representation of the HyphenationTree object |
| * @return the string representing the tree |
| */ |
| public String toNodes() { |
| StringBuffer s = new StringBuffer(); |
| for (int i = 1; i < ht.sc.length; ++i) { |
| if (i != 1) { |
| s.append("\n"); |
| } |
| s.append((new Node(i)).toString()); |
| } |
| return s.toString(); |
| } |
| |
| /** |
| * Construct the printed representation of the HyphenationTree object |
| * @return the string representing the tree |
| */ |
| public String toString() { |
| StringBuffer s = new StringBuffer(); |
| |
| s.append("classes: \n"); |
| s.append((new TernaryTreeAnalysis(ht.classmap)).toString()); |
| |
| s.append("\npatterns: \n"); |
| s.append(super.toString()); |
| s.append("vspace: "); |
| for (int i = 0; i < ht.vspace.length(); ++i) { |
| byte v = ht.vspace.get(i); |
| if (v == 0) { |
| s.append("--"); |
| } else { |
| int c = (int) ((v >>> 4) - 1); |
| s.append(c); |
| c = (int) (v & 0x0f); |
| if (c == 0) { |
| s.append("-"); |
| } else { |
| c = (c - 1); |
| s.append(c); |
| } |
| } |
| } |
| s.append("\n"); |
| |
| return s.toString(); |
| } |
| |
| /** |
| * Provide interactive access to a HyphenationTree object and its representation methods |
| * @param args the arguments |
| */ |
| public static void main(String[] args) { |
| HyphenationTree ht = null; |
| HyphenationTreeAnalysis hta = null; |
| int minCharCount = 2; |
| BufferedReader in = new BufferedReader(new java.io.InputStreamReader(System.in)); |
| while (true) { |
| System.out.print("l:\tload patterns from XML\n" |
| + "L:\tload patterns from serialized object\n" |
| + "s:\tset minimun character count\n" |
| + "w:\twrite hyphenation tree to object file\n" |
| + "p:\tprint hyphenation tree to stdout\n" |
| + "n:\tprint hyphenation tree nodes to stdout\n" |
| + "c:\tprint compact hyphenation tree nodes to stdout\n" |
| + "t:\tprint tree representation of hyphenation tree to stdout\n" |
| + "h:\thyphenate\n" |
| + "f:\tfind pattern\n" |
| + "b:\tbenchmark\n" |
| + "q:\tquit\n\n" |
| + "Command:"); |
| try { |
| String token = in.readLine().trim(); |
| if (token.equals("f")) { |
| System.out.print("Pattern: "); |
| token = in.readLine().trim(); |
| System.out.println("Values: " + ht.findPattern(token)); |
| } else if (token.equals("s")) { |
| System.out.print("Minimum value: "); |
| token = in.readLine().trim(); |
| minCharCount = Integer.parseInt(token); |
| } else if (token.equals("l")) { |
| ht = new HyphenationTree(); |
| hta = new HyphenationTreeAnalysis(ht); |
| System.out.print("XML file name: "); |
| token = in.readLine().trim(); |
| try { |
| ht.loadPatterns(token); |
| } catch (HyphenationException e) { |
| e.printStackTrace(); |
| } |
| } else if (token.equals("L")) { |
| ObjectInputStream ois = null; |
| System.out.print("Object file name: "); |
| token = in.readLine().trim(); |
| try { |
| String[] parts = token.split(":"); |
| InputStream is = null; |
| if (parts.length == 1) { |
| is = new FileInputStream(token); |
| } else if (parts.length == 2) { |
| ZipFile jar = new ZipFile(parts[0]); |
| ZipEntry entry = new ZipEntry(jar.getEntry(parts[1])); |
| is = jar.getInputStream(entry); |
| } |
| ois = new ObjectInputStream(is); |
| ht = (HyphenationTree) ois.readObject(); |
| hta = new HyphenationTreeAnalysis(ht); |
| } catch (Exception e) { |
| e.printStackTrace(); |
| } finally { |
| if (ois != null) { |
| try { |
| ois.close(); |
| } catch (IOException e) { |
| //ignore |
| } |
| } |
| } |
| } else if (token.equals("w")) { |
| System.out.print("Object file name: "); |
| token = in.readLine().trim(); |
| ObjectOutputStream oos = null; |
| try { |
| oos = new ObjectOutputStream(new FileOutputStream(token)); |
| oos.writeObject(ht); |
| } catch (Exception e) { |
| e.printStackTrace(); |
| } finally { |
| if (oos != null) { |
| try { |
| oos.flush(); |
| } catch (IOException e) { |
| //ignore |
| } |
| try { |
| oos.close(); |
| } catch (IOException e) { |
| //ignore |
| } |
| } |
| } |
| } else if (token.equals("p")) { |
| System.out.print(hta); |
| } else if (token.equals("n")) { |
| System.out.print(hta.toNodes()); |
| } else if (token.equals("c")) { |
| System.out.print(hta.toCompactNodes()); |
| } else if (token.equals("t")) { |
| System.out.print(hta.toTree()); |
| } else if (token.equals("h")) { |
| System.out.print("Word: "); |
| token = in.readLine().trim(); |
| System.out.print("Hyphenation points: "); |
| System.out.println(ht.hyphenate(token, minCharCount, |
| minCharCount)); |
| } else if (token.equals("b")) { |
| if (ht == null) { |
| System.out.println("No patterns have been loaded."); |
| break; |
| } |
| System.out.print("Word list filename: "); |
| token = in.readLine().trim(); |
| long starttime = 0; |
| int counter = 0; |
| try { |
| BufferedReader reader = new BufferedReader(new FileReader(token)); |
| String line; |
| |
| starttime = System.currentTimeMillis(); |
| while ((line = reader.readLine()) != null) { |
| // System.out.print("\nline: "); |
| Hyphenation hyp = ht.hyphenate(line, minCharCount, |
| minCharCount); |
| if (hyp != null) { |
| String hword = hyp.toString(); |
| // System.out.println(line); |
| // System.out.println(hword); |
| } else { |
| // System.out.println("No hyphenation"); |
| } |
| counter++; |
| } |
| } catch (Exception ioe) { |
| System.out.println("Exception " + ioe); |
| ioe.printStackTrace(); |
| } |
| long endtime = System.currentTimeMillis(); |
| long result = endtime - starttime; |
| System.out.println(counter + " words in " + result |
| + " Milliseconds hyphenated"); |
| |
| } else if (token.equals("q")) { |
| break; |
| } |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| |
| } |
| |
| } |