| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.langdetect; |
| |
| import static java.nio.charset.StandardCharsets.UTF_8; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.io.Writer; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.commons.io.IOUtils; |
| |
| public abstract class LanguageDetectorTest { |
| |
| protected String[] getTestLanguages() throws IOException { |
| List<String> result = new ArrayList<>(); |
| |
| try (InputStream is = this.getClass().getResourceAsStream("language-codes.txt")) { |
| List<String> lines = IOUtils.readLines(is, UTF_8); |
| for (String line : lines) { |
| line = line.trim(); |
| if (line.isEmpty() || line.startsWith("#")) { |
| continue; |
| } |
| |
| String[] parsed = line.split("\t"); |
| String language = parsed[0]; |
| if (hasTestLanguage(language)) { |
| result.add(language); |
| } |
| } |
| return result.toArray(new String[0]); |
| } |
| } |
| |
| |
| protected boolean hasTestLanguage(String language) { |
| InputStream stream = LanguageDetectorTest.class |
| .getResourceAsStream("/language-tests/" + language + ".test"); |
| if (stream != null) { |
| IOUtils.closeQuietly(stream); |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| protected void writeTo(String language, Writer writer) throws IOException { |
| writeTo(language, writer, Integer.MAX_VALUE); |
| } |
| |
| protected void writeTo(String language, Writer writer, int limit) throws IOException { |
| try (InputStream stream = LanguageDetectorTest.class |
| .getResourceAsStream("/language-tests/" + language + ".test")) { |
| copyAtMost(new InputStreamReader(stream, UTF_8), writer, limit); |
| } |
| } |
| |
| protected int copyAtMost(Reader input, Writer output, int limit) throws IOException { |
| char[] buffer = new char[4096]; |
| int count = 0; |
| int n = 0; |
| |
| while ((-1 != (n = input.read(buffer))) && (count < limit)) { |
| int bytesToCopy = Math.min(limit - count, n); |
| output.write(buffer, 0, bytesToCopy); |
| count += bytesToCopy; |
| } |
| |
| return count; |
| } |
| |
| } |