| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.icu; |
| |
| |
| import java.io.BufferedReader; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.FilenameFilter; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.nio.charset.StandardCharsets; |
| |
| import com.ibm.icu.text.RuleBasedBreakIterator; |
| |
| /** |
| * Command-line utility to converts RuleBasedBreakIterator (.rbbi) files into |
| * binary compiled form (.brk). |
| */ |
| public class RBBIRuleCompiler { |
| |
| static String getRules(File ruleFile) throws IOException { |
| StringBuilder rules = new StringBuilder(); |
| InputStream in = new FileInputStream(ruleFile); |
| BufferedReader cin = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); |
| String line = null; |
| while ((line = cin.readLine()) != null) { |
| if (!line.startsWith("#")) |
| rules.append(line); |
| rules.append('\n'); |
| } |
| cin.close(); |
| in.close(); |
| return rules.toString(); |
| } |
| |
| static void compile(File srcDir, File destDir) throws Exception { |
| File files[] = srcDir.listFiles(new FilenameFilter() { |
| public boolean accept(File dir, String name) { |
| return name.endsWith("rbbi"); |
| }}); |
| if (files == null) throw new IOException("Path does not exist: " + srcDir); |
| for (int i = 0; i < files.length; i++) { |
| File file = files[i]; |
| File outputFile = new File(destDir, |
| file.getName().replaceAll("rbbi$", "brk")); |
| String rules = getRules(file); |
| System.err.print("Compiling " + file.getName() + " to " |
| + outputFile.getName() + ": "); |
| /* |
| * if there is a syntax error, compileRules() may succeed. the way to |
| * check is to try to instantiate from the string. additionally if the |
| * rules are invalid, you can get a useful syntax error. |
| */ |
| try { |
| new RuleBasedBreakIterator(rules); |
| } catch (IllegalArgumentException e) { |
| /* |
| * do this intentionally, so you don't get a massive stack trace |
| * instead, get a useful syntax error! |
| */ |
| System.err.println(e.getMessage()); |
| System.exit(1); |
| } |
| FileOutputStream os = new FileOutputStream(outputFile); |
| RuleBasedBreakIterator.compileRules(rules, os); |
| os.close(); |
| System.err.println(outputFile.length() + " bytes."); |
| } |
| } |
| |
| public static void main(String args[]) throws Exception { |
| if (args.length < 2) { |
| System.err.println("Usage: RBBIRuleComputer <sourcedir> <destdir>"); |
| System.exit(1); |
| } |
| compile(new File(args[0]), new File(args[1])); |
| System.exit(0); |
| } |
| } |