ANY23-281 Build Policeman's Forbidden API Checker into Maven config (#147)
* ANY23-281 Build Policeman's Forbidden API Checker into Maven config
diff --git a/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java b/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
index 170548e..ce346b5 100644
--- a/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
+++ b/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
@@ -140,6 +140,7 @@
}
throw new IllegalArgumentException(
String.format(
+ java.util.Locale.ROOT,
"Invalid value [%s] for flag property [%s]. Supported values are %s|%s",
value, propertyName, FLAG_PROPERTY_ON, FLAG_PROPERTY_OFF
)
@@ -163,6 +164,7 @@
if(logger.isDebugEnabled()) {
logger.debug(
String.format(
+ java.util.Locale.ROOT,
"Property '%s' is not declared in default configuration file [%s]",
propertyName,
DEFAULT_CONFIG_FILE
diff --git a/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java b/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
index 055d39c..470a167 100644
--- a/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
+++ b/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
@@ -34,7 +34,7 @@
public synchronized String setProperty(String propertyName, String propertyValue) {
if( ! defineProperty(propertyName) )
throw new IllegalArgumentException(
- String.format("Property '%s' is not defined in configuration.", propertyName)
+ String.format(java.util.Locale.ROOT, "Property '%s' is not defined in configuration.", propertyName)
);
return (String) properties.setProperty(propertyName, propertyValue);
}
diff --git a/api/src/main/java/org/apache/any23/extractor/ExtractionException.java b/api/src/main/java/org/apache/any23/extractor/ExtractionException.java
index ffb86d7..f174582 100644
--- a/api/src/main/java/org/apache/any23/extractor/ExtractionException.java
+++ b/api/src/main/java/org/apache/any23/extractor/ExtractionException.java
@@ -17,8 +17,10 @@
package org.apache.any23.extractor;
+import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
/**
* Defines a specific exception raised during the metadata extraction phase.
@@ -44,7 +46,7 @@
@Override
public void printStackTrace(PrintStream ps) {
- printExceptionResult( new PrintWriter(ps) );
+ printExceptionResult( new PrintWriter(new OutputStreamWriter(ps, StandardCharsets.UTF_8), true));
super.printStackTrace(ps);
}
diff --git a/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java b/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
index 96a6218..9aeb6c7 100644
--- a/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
+++ b/api/src/main/java/org/apache/any23/extractor/ExtractionParameters.java
@@ -243,13 +243,13 @@
private void checkPropertyExists(String propertyName) {
if(! configuration.defineProperty(propertyName) ) {
throw new IllegalArgumentException(
- String.format("Property '%s' is unknown and cannot be set.", propertyName)
+ String.format(java.util.Locale.ROOT, "Property '%s' is unknown and cannot be set.", propertyName)
);
}
}
private void validateValue(String desc, String value) {
if(value == null || value.trim().length() == 0)
- throw new IllegalArgumentException( String.format("Invalid %s: '%s'", desc, value) );
+ throw new IllegalArgumentException( String.format(java.util.Locale.ROOT, "Invalid %s: '%s'", desc, value) );
}
}
diff --git a/api/src/main/java/org/apache/any23/extractor/IssueReport.java b/api/src/main/java/org/apache/any23/extractor/IssueReport.java
index 04cb998..6b4a722 100644
--- a/api/src/main/java/org/apache/any23/extractor/IssueReport.java
+++ b/api/src/main/java/org/apache/any23/extractor/IssueReport.java
@@ -94,7 +94,7 @@
@Override
public String toString() {
- return String.format("%s: \t'%s' \t(%d,%d)", level, message, row, col);
+ return String.format(java.util.Locale.ROOT, "%s: \t'%s' \t(%d,%d)", level, message, row, col);
}
}
diff --git a/api/src/main/java/org/apache/any23/mime/MIMEType.java b/api/src/main/java/org/apache/any23/mime/MIMEType.java
index 76d2d9e..210cbbb 100644
--- a/api/src/main/java/org/apache/any23/mime/MIMEType.java
+++ b/api/src/main/java/org/apache/any23/mime/MIMEType.java
@@ -62,7 +62,7 @@
if (i2 == -1) {
continue;
}
- if (!"q".equals(param.substring(0, i2).trim().toLowerCase())){
+ if (!"q".equals(param.substring(0, i2).trim().toLowerCase(java.util.Locale.ROOT))){
continue;
}
String value = param.substring(i2 + 1);
@@ -83,8 +83,8 @@
if (i2 == -1) {
throw new IllegalArgumentException(MSG + mimeType);
}
- String p1 = type.substring(0, i2).trim().toLowerCase();
- String p2 = type.substring(i2 + 1).trim().toLowerCase();
+ String p1 = type.substring(0, i2).trim().toLowerCase(java.util.Locale.ROOT);
+ String p2 = type.substring(i2 + 1).trim().toLowerCase(java.util.Locale.ROOT);
if ("*".equals(p1)) {
if (!"*".equals(p2)) {
throw new IllegalArgumentException(MSG + mimeType);
diff --git a/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java b/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
index 3cd0829..f286874 100644
--- a/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
+++ b/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
@@ -103,7 +103,7 @@
}
if (!jar.isFile() && !jar.exists()) {
throw new IllegalArgumentException(
- String.format("Invalid JAR [%s], must be an existing file.", jar.getAbsolutePath())
+ String.format(java.util.Locale.ROOT, "Invalid JAR [%s], must be an existing file.", jar.getAbsolutePath())
);
}
return dynamicClassLoader.addJAR(jar);
@@ -123,7 +123,7 @@
} catch (Throwable t) {
result.add(
new IllegalArgumentException(
- String.format("Error while loading jar [%s]", jar.getAbsolutePath()),
+ String.format(java.util.Locale.ROOT, "Error while loading jar [%s]", jar.getAbsolutePath()),
t
)
);
@@ -145,7 +145,7 @@
}
if (!classDir.isDirectory() && !classDir.exists()) {
throw new IllegalArgumentException(
- String.format("Invalid class dir [%s], must be an existing file.", classDir.getAbsolutePath())
+ String.format(java.util.Locale.ROOT, "Invalid class dir [%s], must be an existing file.", classDir.getAbsolutePath())
);
}
return dynamicClassLoader.addClassDir(classDir);
@@ -165,7 +165,7 @@
} catch (Throwable t) {
result.add(
new IllegalArgumentException(
- String.format("Error while loading class dir [%s]", classDir.getAbsolutePath()),
+ String.format(java.util.Locale.ROOT, "Error while loading class dir [%s]", classDir.getAbsolutePath()),
t
)
);
@@ -394,7 +394,7 @@
final File location = new File(locationStr);
if( ! location.exists()) {
throw new IllegalArgumentException(
- String.format("Plugin location '%s' cannot be found.", locationStr)
+ String.format(java.util.Locale.ROOT, "Plugin location '%s' cannot be found.", locationStr)
);
}
locations.add(location);
diff --git a/api/src/main/java/org/apache/any23/vocab/Vocabulary.java b/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
index 718f514..7e4de65 100644
--- a/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
+++ b/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
@@ -69,7 +69,7 @@
private Map<IRI,String> resourceToCommentMap;
/**
- * Constructor.
+ * Overloaded Constructor.
*
* @param namespace the namespace IRI prefix.
*/
@@ -145,7 +145,7 @@
String camelCase = names[0];
for (int i = 1; i < names.length; i++) {
String tmp = names[i];
- camelCase += tmp.replaceFirst("(.)", tmp.substring(0, 1).toUpperCase());
+ camelCase += tmp.replaceFirst("(.)", tmp.substring(0, 1).toUpperCase(java.util.Locale.ROOT));
}
return getProperty(camelCase);
}
diff --git a/cli/pom.xml b/cli/pom.xml
index a2b01a6..127ddf7 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -373,6 +373,36 @@
</execution>
</executions>
</plugin>
+
+ <!-- Does NOT forbid calls to jdk-system-out -->
+ <plugin>
+ <groupId>de.thetaphi</groupId>
+ <artifactId>forbiddenapis</artifactId>
+ <version>${forbiddenapis.version}</version>
+ <configuration>
+ <failOnUnsupportedJava>false</failOnUnsupportedJava>
+ <failOnUnresolvableSignatures>false</failOnUnresolvableSignatures>
+ <bundledSignatures>
+ <!-- https://github.com/policeman-tools/forbidden-apis/wiki/BundledSignatures -->
+ <bundledSignature>jdk-unsafe-${javac.src.version}</bundledSignature>
+ <bundledSignature>jdk-deprecated-${javac.src.version}</bundledSignature>
+ <bundledSignature>jdk-non-portable</bundledSignature>
+ <!-- comment out until we upgrade past Java 8 -->
+ <!--bundledSignature>jdk-reflection</bundledSignature-->
+ <bundledSignature>jdk-internal-${javac.src.version}</bundledSignature>
+ <!--bundledSignature>jdk-system-out</bundledSignature-->
+ <bundledSignature>commons-io-unsafe-${commons-io.version}</bundledSignature>
+ </bundledSignatures>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>check</goal>
+ <!--goal>testCheck</goal-->
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
diff --git a/cli/src/main/java/org/apache/any23/cli/BaseTool.java b/cli/src/main/java/org/apache/any23/cli/BaseTool.java
index 6164158..5f9f5b1 100644
--- a/cli/src/main/java/org/apache/any23/cli/BaseTool.java
+++ b/cli/src/main/java/org/apache/any23/cli/BaseTool.java
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
/**
* This class reduces the verbosity of testing command-line
@@ -54,15 +55,19 @@
}
private static PrintStream concise(PrintStream out, boolean concise) {
- return (concise && (out == System.out || out == System.err)) ? new ConcisePrintStream(out)
- : (out instanceof ConcisePrintStream ? ((ConcisePrintStream) out).out : out);
+ try {
+ return (concise && (out == System.out || out == System.err)) ? new ConcisePrintStream(out)
+ : (out instanceof ConcisePrintStream ? ((ConcisePrintStream) out).out : out);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Error supporting UTF-8 encodings in ConcisePrintStream", e);
+ }
}
private static final class ConcisePrintStream extends PrintStream {
private PrintStream out;
- private ConcisePrintStream(PrintStream out) {
+ private ConcisePrintStream(PrintStream out) throws UnsupportedEncodingException {
super(new OutputStream() {
StringBuilder sb = new StringBuilder();
int lineCount;
@@ -98,7 +103,7 @@
sb = null;
BaseTool.close(out);
}
- }, true);
+ }, true, "UTF-8");
this.out = out;
}
diff --git a/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
index d531c2d..b3fe637 100644
--- a/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
+++ b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
@@ -31,6 +31,7 @@
import java.io.IOException;
import java.io.PrintStream;
+import java.util.Locale;
/**
* This class provides some command-line documentation
@@ -92,7 +93,7 @@
*/
public void printExtractorList(ExtractorRegistry registry) {
for (ExtractorFactory factory : registry.getExtractorGroup()) {
- out.println(String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
+ out.println(String.format(Locale.ROOT, "%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
}
}
diff --git a/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
index ae0868b..f83ad14 100644
--- a/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
+++ b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
@@ -30,6 +30,7 @@
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Locale;
/**
* Commandline utility to verify the <b>Any23</b> plugins
@@ -90,9 +91,9 @@
private void printPluginData(ExtractorFactory<?> extractorFactory, PrintStream ps) {
final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class);
- ps.printf("Plugin author : %s%n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
- ps.printf("Plugin factory : %s%n", extractorFactory.getClass());
- ps.printf("Plugin mime-types: %s%n", getMimeTypesStr(extractorFactory.getSupportedMIMETypes()));
+ ps.printf(Locale.ROOT, "Plugin author : %s%n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
+ ps.printf(Locale.ROOT, "Plugin factory : %s%n", extractorFactory.getClass());
+ ps.printf(Locale.ROOT, "Plugin mime-types: %s%n", getMimeTypesStr(extractorFactory.getSupportedMIMETypes()));
}
}
diff --git a/cli/src/main/java/org/apache/any23/cli/Rover.java b/cli/src/main/java/org/apache/any23/cli/Rover.java
index 8556669..488d61d 100644
--- a/cli/src/main/java/org/apache/any23/cli/Rover.java
+++ b/cli/src/main/java/org/apache/any23/cli/Rover.java
@@ -46,15 +46,20 @@
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
+import java.util.Locale;
import java.util.Objects;
import static java.lang.String.format;
@@ -190,9 +195,10 @@
if (logFile != null) {
try {
- tripleHandler = new LoggingTripleHandler(tripleHandler, new PrintWriter(logFile));
+ tripleHandler = new LoggingTripleHandler(tripleHandler,
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(logFile), StandardCharsets.UTF_8)));
} catch (FileNotFoundException fnfe) {
- throw new IllegalArgumentException( format("Can not write to log file [%s]", logFile), fnfe );
+ throw new IllegalArgumentException(format(Locale.ROOT, "Can not write to log file [%s]", logFile), fnfe );
}
}
@@ -202,9 +208,8 @@
}
if (noTrivial) {
- tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler),
- true // suppress stylesheet triples.
- );
+ tripleHandler = new IgnoreAccidentalRDFa(
+ new IgnoreTitlesOfEmptyDocuments(tripleHandler),true); // suppress stylesheet triples.
}
reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
@@ -237,7 +242,7 @@
protected void performExtraction(DocumentSource documentSource) throws Exception {
if (!any23.extract(extractionParameters, documentSource, reportingTripleHandler).hasMatchingExtractors()) {
- throw new IllegalStateException(format("No suitable extractors found for source %s", documentSource.getDocumentIRI()));
+ throw new IllegalStateException(format(Locale.ROOT, "No suitable extractors found for source %s", documentSource.getDocumentIRI()));
}
}
@@ -289,20 +294,20 @@
@Override
public String convert(String uri) {
uri = uri.trim();
- if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) {
+ if (uri.toLowerCase(Locale.ROOT).startsWith("http:") || uri.toLowerCase(Locale.ROOT).startsWith("https:")) {
try {
return new URL(uri).toString();
} catch (MalformedURLException murle) {
- throw new ParameterException(format("Invalid IRI: '%s': %s", uri, murle.getMessage()));
+ throw new ParameterException(format(Locale.ROOT, "Invalid IRI: '%s': %s", uri, murle.getMessage()));
}
}
final File f = new File(uri);
if (!f.exists()) {
- throw new ParameterException(format("No such file: [%s]", f.getAbsolutePath()));
+ throw new ParameterException(format(Locale.ROOT, "No such file: [%s]", f.getAbsolutePath()));
}
if (f.isDirectory()) {
- throw new ParameterException(format("Found a directory: [%s]", f.getAbsolutePath()));
+ throw new ParameterException(format(Locale.ROOT, "Found a directory: [%s]", f.getAbsolutePath()));
}
return f.toURI().toString();
}
@@ -315,9 +320,11 @@
public PrintStream convert( String value ) {
final File file = new File(value);
try {
- return new PrintStream(file);
+ return new PrintStream(new FileOutputStream(file), true, "UTF-8");
} catch (FileNotFoundException fnfe) {
- throw new ParameterException(format("Cannot open file '%s': %s", file, fnfe.getMessage()));
+ throw new ParameterException(format(Locale.ROOT, "Cannot open file '%s': %s", file, fnfe.getMessage()));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Error converting to PrintStream with UTF-8 encoding.", e);
}
}
diff --git a/cli/src/main/java/org/apache/any23/cli/ToolRunner.java b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
index 39e0c0a..d37ffd5 100644
--- a/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
+++ b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
@@ -29,6 +29,7 @@
import java.io.PrintStream;
import java.util.Date;
import java.util.Iterator;
+import java.util.Locale;
import java.util.Map;
import java.util.Properties;
@@ -125,7 +126,7 @@
// execute the parsed command
infoStream.println();
infoStream.println( "------------------------------------------------------------------------" );
- infoStream.printf( "Apache Any23 :: %s%n", parsedCommand );
+ infoStream.printf(Locale.ROOT, "Apache Any23 :: %s%n", parsedCommand );
infoStream.println( "------------------------------------------------------------------------" );
infoStream.println();
@@ -142,7 +143,7 @@
} finally {
infoStream.println();
infoStream.println( "------------------------------------------------------------------------" );
- infoStream.printf( "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" );
+ infoStream.printf(Locale.ROOT, "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" );
if (exit != 0) {
infoStream.println();
@@ -151,18 +152,18 @@
System.err.println( "Execution terminated with errors:" );
error.printStackTrace(infoStream);
} else {
- infoStream.printf( "Execution terminated with errors: %s%n", error.getMessage() );
+ infoStream.printf(Locale.ROOT, "Execution terminated with errors: %s%n", error.getMessage() );
}
infoStream.println();
}
- infoStream.printf( "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) );
- infoStream.printf( "Finished at: %s%n", new Date() );
+ infoStream.printf(Locale.ROOT, "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) );
+ infoStream.printf(Locale.ROOT, "Finished at: %s%n", new Date() );
final Runtime runtime = Runtime.getRuntime();
final int megaUnit = 1024 * 1024;
- infoStream.printf( "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit,
+ infoStream.printf(Locale.ROOT, "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit,
runtime.totalMemory() / megaUnit );
infoStream.println( "------------------------------------------------------------------------" );
@@ -197,16 +198,16 @@
}
}
- infoStream.printf( "Apache Any23 %s%n", Any23.VERSION );
- infoStream.printf( "Java version: %s, vendor: %s%n",
+ infoStream.printf(Locale.ROOT, "Apache Any23 %s%n", Any23.VERSION );
+ infoStream.printf(Locale.ROOT, "Java version: %s, vendor: %s%n",
System.getProperty( "java.version" ),
System.getProperty( "java.vendor" ) );
- infoStream.printf( "Java home: %s%n", System.getProperty( "java.home" ) );
- infoStream.printf( "Default locale: %s_%s, platform encoding: %s%n",
+ infoStream.printf(Locale.ROOT, "Java home: %s%n", System.getProperty( "java.home" ) );
+ infoStream.printf(Locale.ROOT, "Default locale: %s_%s, platform encoding: %s%n",
System.getProperty( "user.language" ),
System.getProperty( "user.country" ),
System.getProperty( "sun.jnu.encoding" ) );
- infoStream.printf( "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n",
+ infoStream.printf(Locale.ROOT, "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n",
System.getProperty( "os.name" ),
System.getProperty( "os.version" ),
System.getProperty( "os.arch" ),
@@ -214,7 +215,7 @@
}
private static final String getOsFamily() {
- String osName = System.getProperty( "os.name" ).toLowerCase();
+ String osName = System.getProperty( "os.name" ).toLowerCase(Locale.ROOT);
String pathSep = System.getProperty( "path.separator" );
if (osName.contains("windows")) {
diff --git a/core/src/main/java/org/apache/any23/Any23.java b/core/src/main/java/org/apache/any23/Any23.java
index 80e4a00..30f46fc 100644
--- a/core/src/main/java/org/apache/any23/Any23.java
+++ b/core/src/main/java/org/apache/any23/Any23.java
@@ -51,6 +51,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Locale;
/**
@@ -173,7 +174,7 @@
userAgent = defaultUserAgent;
}
if (userAgent.trim().length() == 0) {
- throw new IllegalArgumentException(String.format("Invalid user agent: '%s'", userAgent));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid user agent: '%s'", userAgent));
}
this.userAgent = userAgent;
}
@@ -259,14 +260,14 @@
public DocumentSource createDocumentSource(String documentIRI) throws URISyntaxException, IOException {
if (documentIRI == null)
throw new NullPointerException("documentIRI cannot be null.");
- if (documentIRI.toLowerCase().startsWith("file:")) {
+ if (documentIRI.toLowerCase(Locale.ROOT).startsWith("file:")) {
return new FileDocumentSource(new File(new URI(documentIRI)));
}
- if (documentIRI.toLowerCase().startsWith("http:") || documentIRI.toLowerCase().startsWith("https:")) {
+ if (documentIRI.toLowerCase(Locale.ROOT).startsWith("http:") || documentIRI.toLowerCase(Locale.ROOT).startsWith("https:")) {
return new HTTPDocumentSource(getHTTPClient(), documentIRI);
}
throw new IllegalArgumentException(
- String.format("Unsupported protocol for document IRI: '%s' . "
+ String.format(Locale.ROOT, "Unsupported protocol for document IRI: '%s' . "
+ "Check that document IRI contains a protocol.", documentIRI)
);
}
diff --git a/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java b/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
index 636b230..234c563 100644
--- a/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
+++ b/core/src/main/java/org/apache/any23/extractor/ExtractionResultImpl.java
@@ -32,6 +32,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Locale;
import java.util.Set;
/**
@@ -129,7 +130,7 @@
@Override
public void printReport(PrintStream ps) {
- ps.print(String.format("Context: %s [errors: %d] {\n", context, getIssuesCount()));
+ ps.print(String.format(Locale.ROOT, "Context: %s [errors: %d] {\n", context, getIssuesCount()));
for (Issue issue : issues) {
ps.print(issue.toString());
ps.print("\n");
@@ -176,7 +177,7 @@
tripleHandler.receiveTriple(s, p, o, g, context);
} catch (TripleHandlerException e) {
throw new RuntimeException(
- String.format("Error while receiving triple %s %s %s", s, p, o ),
+ String.format(Locale.ROOT, "Error while receiving triple %s %s %s", s, p, o ),
e
);
}
@@ -198,7 +199,7 @@
tripleHandler.receiveNamespace(prefix, uri, context);
} catch (TripleHandlerException e) {
throw new RuntimeException(
- String.format("Error while writing namespace %s:%s", prefix, uri),
+ String.format(Locale.ROOT, "Error while writing namespace %s:%s", prefix, uri),
e
);
}
@@ -231,7 +232,7 @@
try {
tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceIRIFor(prefix), context);
} catch (TripleHandlerException e) {
- throw new RuntimeException(String.format("Error while writing namespace %s", prefix),
+ throw new RuntimeException(String.format(Locale.ROOT, "Error while writing namespace %s", prefix),
e
);
}
diff --git a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
index 66022fe..a8acbea 100644
--- a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
+++ b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
@@ -55,12 +55,14 @@
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
@@ -244,8 +246,8 @@
try {
output.startDocument(documentIRI);
} catch (TripleHandlerException e) {
- log.error(String.format("Error starting document with IRI %s", documentIRI));
- throw new ExtractionException(String.format("Error starting document with IRI %s", documentIRI),
+ log.error(String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI));
+ throw new ExtractionException(String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI),
e
);
}
@@ -322,7 +324,7 @@
addExtractionTimeSizeMetaTriples(consolidationContext);
} catch (TripleHandlerException e) {
throw new ExtractionException(
- String.format(
+ String.format(Locale.ROOT,
"Error while adding extraction metadata triples document with IRI %s", documentIRI
),
e
@@ -333,8 +335,8 @@
try {
output.endDocument(documentIRI);
} catch (TripleHandlerException e) {
- log.error(String.format("Error ending document with IRI %s", documentIRI));
- throw new ExtractionException(String.format("Error ending document with IRI %s", documentIRI),
+ log.error(String.format(Locale.ROOT, "Error ending document with IRI %s", documentIRI));
+ throw new ExtractionException(String.format(Locale.ROOT, "Error ending document with IRI %s", documentIRI),
e
);
}
@@ -549,8 +551,8 @@
// Logging result error report.
if(log.isDebugEnabled() && extractionResult.hasIssues() ) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- extractionResult.printReport(new PrintStream(baos));
- log.debug(baos.toString());
+ extractionResult.printReport(new PrintStream(baos, true, "UTF-8"));
+ log.debug(baos.toString("UTF-8"));
}
extractionResult.close();
@@ -771,7 +773,7 @@
output.openContext(context);
} catch (TripleHandlerException e) {
throw new ExtractionException(
- String.format("Error starting document with IRI %s", documentIRI),
+ String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI),
e
);
}
@@ -820,7 +822,7 @@
output.openContext(context);
} catch (TripleHandlerException e) {
throw new ExtractionException(
- String.format("Error starting document with IRI %s", documentIRI),
+ String.format(Locale.ROOT, "Error starting document with IRI %s", documentIRI),
e
);
}
diff --git a/core/src/main/java/org/apache/any23/extractor/TagSoupExtractionResult.java b/core/src/main/java/org/apache/any23/extractor/TagSoupExtractionResult.java
index c228dd9..974b004 100644
--- a/core/src/main/java/org/apache/any23/extractor/TagSoupExtractionResult.java
+++ b/core/src/main/java/org/apache/any23/extractor/TagSoupExtractionResult.java
@@ -23,6 +23,7 @@
import java.util.Arrays;
import java.util.List;
+import java.util.Locale;
/**
* This interface models a specific {@link ExtractionResult}
@@ -85,7 +86,7 @@
public ResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
if(path == null || path.length == 0) {
- throw new IllegalArgumentException( String.format("Invalid xpath: '%s'.", Arrays.toString(path) ) );
+ throw new IllegalArgumentException( String.format(Locale.ROOT, "Invalid xpath: '%s'.", Arrays.toString(path) ) );
}
if(root == null) {
throw new IllegalArgumentException("Invalid root, cannot be null.");
@@ -112,7 +113,7 @@
@Override
public String toString() {
- return String.format(
+ return String.format(Locale.ROOT,
"%s-%s-%s %s",
this.getClass().getCanonicalName(),
Arrays.toString(path),
@@ -175,7 +176,7 @@
@Override
public String toString() {
- return String.format(
+ return String.format(Locale.ROOT,
"%s %s - %s - %s -- %s -->",
this.getClass().getCanonicalName(),
Arrays.toString(path),
diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
index 298d930..6bcc8b9 100644
--- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
@@ -40,6 +40,7 @@
import java.io.InputStream;
import java.util.StringTokenizer;
import java.util.Iterator;
+import java.util.Locale;
/**
* This extractor produces <i>RDF</i> from a <i>CSV file</i> .
@@ -204,7 +205,7 @@
}
private IRI normalize(String toBeNormalized, IRI documentIRI) {
- String newToBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
+ String newToBeNormalized = toBeNormalized.trim().toLowerCase(Locale.ROOT).replace("?", "").replace("&", "");
StringBuilder result = new StringBuilder(documentIRI.toString());
diff --git a/core/src/main/java/org/apache/any23/extractor/html/DomUtils.java b/core/src/main/java/org/apache/any23/extractor/html/DomUtils.java
index 72d824f..4f57a94 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/DomUtils.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/DomUtils.java
@@ -47,6 +47,7 @@
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.regex.Pattern;
/**
@@ -128,10 +129,10 @@
return EMPTY_STRING_ARRAY;
}
List<String> ancestors = new ArrayList<String>();
- ancestors.add( String.format("%s[%s]", n.getNodeName(), getIndexInParent(n) ) );
+ ancestors.add( String.format(Locale.ROOT, "%s[%s]", n.getNodeName(), getIndexInParent(n) ) );
Node parent = n.getParentNode();
while(parent != null) {
- ancestors.add(0, String.format("%s[%s]", parent.getNodeName(), getIndexInParent(parent) ) );
+ ancestors.add(0, String.format(Locale.ROOT, "%s[%s]", parent.getNodeName(), getIndexInParent(parent) ) );
parent = parent.getParentNode();
}
return ancestors.toArray( new String[ancestors.size()] );
@@ -201,7 +202,7 @@
* @return list of matching nodes or an empty list.
*/
public static List<Node> findAllByClassName(Node root, String className) {
- return findAllBy(root, null, "class", className.toLowerCase());
+ return findAllBy(root, null, "class", className.toLowerCase(Locale.ROOT));
}
/**
@@ -493,28 +494,28 @@
* @return an {@link java.io.InputStream}
*/
public static InputStream documentToInputStream(Document doc) {
- DOMSource source = new DOMSource(doc);
- StringWriter xmlAsWriter = new StringWriter();
- StreamResult result = new StreamResult(xmlAsWriter);
- try {
- TransformerFactory.newInstance().newTransformer().transform(source, result);
- } catch (TransformerConfigurationException e) {
- throw new RuntimeException("Error within Document to InputStream transformation configuration!");
- } catch (TransformerException e) {
- throw new RuntimeException("Error whilst transforming the Document to InputStream!");
- } catch (TransformerFactoryConfigurationError e) {
- throw new RuntimeException("Error within Document to InputStream transformation configuration!");
- }
-
- InputStream is = null;
- try {
- is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- }
- return is;
+ DOMSource source = new DOMSource(doc);
+ StringWriter xmlAsWriter = new StringWriter();
+ StreamResult result = new StreamResult(xmlAsWriter);
+ try {
+ TransformerFactory.newInstance().newTransformer().transform(source, result);
+ } catch (TransformerConfigurationException e) {
+ throw new RuntimeException("Error within Document to InputStream transformation configuration!");
+ } catch (TransformerException e) {
+ throw new RuntimeException("Error whilst transforming the Document to InputStream!");
+ } catch (TransformerFactoryConfigurationError e) {
+ throw new RuntimeException("Error within Document to InputStream transformation configuration factory!");
+ }
+
+ InputStream is = null;
+ try {
+ is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Error obtaining data with \"UTF-8\" encoding!", e);
+ }
+ return is;
}
-
+
/**
* Convert a w3c dom node to a InputStream
@@ -526,21 +527,19 @@
Result outputTarget = new StreamResult(outputStream);
Transformer t = null;
try {
- t = TransformerFactory.newInstance().newTransformer();
+ t = TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
- e.printStackTrace();
+ throw new RuntimeException("Serious configuration error.", e);
} catch (TransformerFactoryConfigurationError e) {
- e.printStackTrace();
+ throw new RuntimeException("Serious configuration error.", e);
}
t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
try {
- t.transform(new DOMSource(node), outputTarget);
+ t.transform(new DOMSource(node), outputTarget);
} catch (TransformerException e) {
- e.printStackTrace();
+ throw new RuntimeException("Error whilst transforming the Node to InputStream!");
}
return new ByteArrayInputStream(outputStream.toByteArray());
}
-
-
}
diff --git a/core/src/main/java/org/apache/any23/extractor/html/JsoupUtils.java b/core/src/main/java/org/apache/any23/extractor/html/JsoupUtils.java
index 3b50221..6894ccd 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/JsoupUtils.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/JsoupUtils.java
@@ -26,6 +26,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/**
@@ -69,7 +70,7 @@
if (c == '>') {
if (length >= 20 && bytes[length - 2] == '?') {
- String decl = "<" + new String(bytes, 2, length - 4) + ">";
+ String decl = "<" + new String(bytes, 2, length - 4, StandardCharsets.UTF_8) + ">";
org.jsoup.nodes.Document doc = org.jsoup.Jsoup.parse(decl, documentIRI, Parser.xmlParser());
for (org.jsoup.nodes.Element el : doc.children()) {
if ("xml".equalsIgnoreCase(el.tagName())) {
diff --git a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
index 6f666a9..2746d76 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
@@ -30,6 +30,7 @@
import org.w3c.dom.Node;
import java.io.IOException;
+import java.util.Locale;
/**
* Extractor for the <a href="http://microformats.org/wiki/rel-license">rel-license</a>
@@ -56,7 +57,7 @@
if ("".equals(link)) {
out.notifyIssue(
IssueReport.IssueLevel.WARNING,
- String.format(
+ String.format(Locale.ROOT,
"Invalid license link detected within document %s.",
documentIRI.toString()
),
diff --git a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
index ad6f901..01f2e6c 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
@@ -36,6 +36,7 @@
import org.w3c.dom.Node;
import java.io.IOException;
+import java.util.Locale;
/**
* The abstract base class for any
@@ -167,7 +168,7 @@
if( containsScriptBlock(literalStr) ) {
out.notifyIssue(
IssueReport.IssueLevel.WARNING,
- String.format("Detected script in literal: [%s]", literalStr)
+ String.format(Locale.ROOT, "Detected script in literal: [%s]", literalStr)
, -1
, -1
);
@@ -237,7 +238,7 @@
}
private boolean containsScriptBlock(String in) {
- final String inLowerCase = in.toLowerCase();
+ final String inLowerCase = in.toLowerCase(Locale.ROOT);
final int beginBlock = inLowerCase.indexOf(BEGIN_SCRIPT);
if(beginBlock == -1) {
return false;
diff --git a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
index 11a6223..97f4686 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.html;
+import java.util.Locale;
+
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
@@ -140,7 +142,7 @@
private IRI resolvePropertyName(String clazz) {
return vWO.getProperty(
- String.format(
+ String.format(Locale.ROOT,
"%sName",
clazz
)
@@ -150,8 +152,8 @@
private IRI resolveClassName(String clazz) {
String upperCaseClass = clazz.substring(0, 1);
return vWO.getClass(
- String.format("%s%s",
- upperCaseClass.toUpperCase(),
+ String.format(Locale.ROOT, "%s%s",
+ upperCaseClass.toUpperCase(Locale.ROOT),
clazz.substring(1)
)
);
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
index 4f54018..37a8f18 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
@@ -30,6 +30,7 @@
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
+import java.util.Locale;
/**
* <p>Parses an {@link java.io.InputStream}
@@ -74,7 +75,7 @@
public TagSoupParser(InputStream input, String documentIRI, String encoding) {
if (encoding != null && !Charset.isSupported(encoding))
- throw new UnsupportedCharsetException(String.format("Charset %s is not supported", encoding));
+ throw new UnsupportedCharsetException(String.format(Locale.ROOT, "Charset %s is not supported", encoding));
this.input = input;
this.documentIRI = documentIRI;
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
index 17b54e6..fc17b4f 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
@@ -35,6 +35,7 @@
import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
+import java.util.Locale;
/**
* Extractor for <i>Turtle/N3</i> format embedded within <i>HTML</i>
@@ -108,7 +109,7 @@
} catch (RDFParseException rdfpe) {
er.notifyIssue(
IssueReport.IssueLevel.ERROR,
- String.format(
+ String.format(Locale.ROOT,
"An error occurred while parsing turtle content within script node: %s",
Arrays.toString(DomUtils.getXPathListForNode(n))
),
diff --git a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
index af971fa..b12a553 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
@@ -34,6 +34,7 @@
import org.w3c.dom.Node;
import java.io.IOException;
+import java.util.Locale;
/**
* Extractor for the <a href="http://microformats.org/wiki/xfn">XFN</a>
@@ -113,7 +114,7 @@
private boolean containsRelMe(String[] rels) {
for (String rel : rels) {
- if ("me".equals(rel.toLowerCase())) {
+ if ("me".equals(rel.toLowerCase(Locale.ROOT))) {
return true;
}
}
@@ -122,7 +123,7 @@
private boolean containsXFNRelExceptMe(String[] rels) {
for (String rel : rels) {
- if (!"me".equals(rel.toLowerCase()) && vXFN.isXFNLocalName(rel)) {
+ if (!"me".equals(rel.toLowerCase(Locale.ROOT)) && vXFN.isXFNLocalName(rel)) {
return true;
}
}
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java
index 6b8072f..a1d860d 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemProp.java
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.microdata;
+import java.util.Locale;
+
/**
* Describes a <b>Microdata item property</b>.
*
@@ -79,7 +81,7 @@
@Override
public String toJSON() {
- return String.format(
+ return String.format(Locale.ROOT,
"{ \"xpath\" : \"%s\", \"name\" : \"%s\", \"value\" : %s }",
getXpath(),
name,
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index 8b5bffd..40fb42c 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -22,6 +22,7 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
+import java.util.Locale;
import java.util.Objects;
import org.apache.any23.util.StringUtils;
@@ -82,7 +83,7 @@
private static SimpleDateFormat getSdf() {
SimpleDateFormat simpleDateFormat = sdf.get();
if (simpleDateFormat == null) {
- simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
sdf.set(simpleDateFormat);
}
return simpleDateFormat;
@@ -258,7 +259,7 @@
contentStr = content.toString();
}
- return String.format( "{ \"content\" : %s, \"type\" : \"%s\" }", contentStr, type );
+ return String.format(Locale.ROOT, "{ \"content\" : %s, \"type\" : \"%s\" }", contentStr, type );
}
@Override
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
index 599611c..4643c40 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
@@ -30,6 +30,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
@@ -206,7 +207,7 @@
}
j++;
}
- return String.format(
+ return String.format(Locale.ROOT,
"{ " +
"\"xpath\" : \"%s\", \"id\" : %s, \"refs\" : %s, \"type\" : %s, \"itemid\" : %s, \"properties\" : [ %s ]" +
" }",
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index 4d4b0db..1bddeb5 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -336,7 +336,7 @@
return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested);
}
- final String nodeName = node.getNodeName().toLowerCase();
+ final String nodeName = node.getNodeName().toLowerCase(Locale.ROOT);
//see http://w3c.github.io/microdata-rdf/#dfn-property-values
if ("data".equals(nodeName) || "meter".equals(nodeName)) {
@@ -611,7 +611,7 @@
for (String ref : refs) {
if (loopDetectorSet.contains(ref)) {
throw new MicrodataParserException(
- String.format(
+ String.format(Locale.ROOT,
"Loop detected with depth %d while dereferencing itemProp '%s' .",
dereferenceRecursionCounter - 1, ref
),
@@ -622,7 +622,7 @@
Element element = document.getElementById(ref);
if (element == null) {
manageError(
- new MicrodataParserException( String.format("Unknown itemProp id '%s'", ref ), null )
+ new MicrodataParserException( String.format(Locale.ROOT, "Unknown itemProp id '%s'", ref ), null )
);
continue;
}
@@ -676,7 +676,7 @@
if( itemProps.contains(deferredProperty) ) {
manageError(
new MicrodataParserException(
- String.format("Duplicated deferred itemProp '%s'.", deferredProperty.getName() ),
+ String.format(Locale.ROOT, "Duplicated deferred itemProp '%s'.", deferredProperty.getName() ),
node
)
);
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParserException.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParserException.java
index c4281a7..ac4df5a 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParserException.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParserException.java
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.microdata;
+import java.util.Locale;
+
import org.apache.any23.extractor.html.DomUtils;
import org.w3c.dom.Node;
@@ -63,7 +65,7 @@
}
public String toJSON() {
- return String.format(
+ return String.format(Locale.ROOT,
"{ \"message\" : \"%s\", " +
"\"path\" : \"%s\", " +
"\"begin_row\" : %d, \"begin_col\" : %d, " +
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java
index 8053e39..3eec42b 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java
@@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import javax.xml.transform.TransformerException;
@@ -231,7 +232,7 @@
if( ! isXMLNSDeclared(document)) {
reportError(
document.getDocumentElement(),
- String.format(
+ String.format(Locale.ROOT,
"The default %s namespace is expected to be declared and equal to '%s' .",
XMLNS_ATTRIBUTE, XMLNS_DEFAULT
)
@@ -280,7 +281,7 @@
try {
pushVocabulary(currentNode, RDFUtils.iri(vocabularyStr));
} catch (Exception e) {
- reportError(currentNode, String.format("Invalid vocabulary [%s], must be a IRI.", vocabularyStr));
+ reportError(currentNode, String.format(Locale.ROOT, "Invalid vocabulary [%s], must be a IRI.", vocabularyStr));
}
}
@@ -357,7 +358,7 @@
if(curieOrIRI != null && curieOrIRI instanceof IRI) {
result.add((IRI) curieOrIRI);
} else {
- reportError(n, String.format("Invalid CURIE '%s' : expected IRI, found BNode.", curieORIRIListPart));
+ reportError(n, String.format(Locale.ROOT, "Invalid CURIE '%s' : expected IRI, found BNode.", curieORIRIListPart));
}
}
return result.toArray(new IRI[result.size()]);
@@ -482,7 +483,7 @@
* @param msg human readable message.
*/
private void reportError(Node n, String msg) {
- final String errorMsg = String.format(
+ final String errorMsg = String.format(Locale.ROOT,
"Error while processing node [%s] : '%s'",
DomUtils.getXPathForNode(n), msg
);
@@ -710,7 +711,7 @@
int splitPoint = prefixPart.indexOf(IRI_PREFIX_SEPARATOR);
final String prefix = prefixPart.substring(0, splitPoint);
if(prefix.length() == 0) {
- reportError(node, String.format("Invalid prefix length in prefix attribute '%s'", prefixAttribute));
+ reportError(node, String.format(Locale.ROOT, "Invalid prefix length in prefix attribute '%s'", prefixAttribute));
continue;
}
final IRI iri;
@@ -720,7 +721,7 @@
} catch (Exception e) {
reportError(
node,
- String.format(
+ String.format(Locale.ROOT,
"Resolution of prefix '%s' defines an invalid IRI: '%s'",
prefixAttribute, iriStr
)
@@ -956,7 +957,7 @@
if(prefixSeparatorIndex == -1) { // there is no prefix separator.
if(resolutionPolicy == ResolutionPolicy.NSRequired) {
throw new IllegalArgumentException(
- String.format("Invalid mapping string [%s], must declare a prefix.", mapping)
+ String.format(Locale.ROOT, "Invalid mapping string [%s], must declare a prefix.", mapping)
);
}
if (resolutionPolicy == ResolutionPolicy.TermAllowed) {
@@ -972,14 +973,14 @@
final String prefix = mapping.substring(0, prefixSeparatorIndex);
final IRI curieMapping = getMapping(prefix);
if(curieMapping == null) {
- throw new IllegalArgumentException( String.format("Cannot map prefix '%s'", prefix) );
+ throw new IllegalArgumentException( String.format(Locale.ROOT, "Cannot map prefix '%s'", prefix) );
}
final String candidateCURIEStr = curieMapping.toString() + mapping.substring(prefixSeparatorIndex + 1);
final java.net.URI candidateCURIE;
try {
candidateCURIE = new java.net.URI(candidateCURIEStr);
} catch (URISyntaxException IRIse) {
- throw new IllegalArgumentException(String.format("Invalid CURIE '%s'", candidateCURIEStr) );
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid CURIE '%s'", candidateCURIEStr) );
}
return resolveIRI(
candidateCURIE.isAbsolute()
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
index dc0eaeb..ef0dbb7 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
@@ -22,6 +22,7 @@
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Value;
+import java.util.Locale;
import java.util.Map;
/**
@@ -129,7 +130,7 @@
@Override
public String toString() {
- return String.format("%s %s %s %s", subject, predicate, object, graph);
+ return String.format(Locale.ROOT, "%s %s %s %s", subject, predicate, object, graph);
}
}
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
index 95d7b0d..946f4a0 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.xpath;
+import java.util.Locale;
+
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
@@ -76,7 +78,7 @@
return SimpleValueFactory.getInstance().createIRI(value);
} catch (IllegalArgumentException iae) {
throw new IllegalArgumentException(
- String.format("Expected a valid IRI for object template, found '%s'", value),
+ String.format(Locale.ROOT, "Expected a valid IRI for object template, found '%s'", value),
iae
);
}
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplatePredicate.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplatePredicate.java
index 28d93ac..5d57b93 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplatePredicate.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplatePredicate.java
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.xpath;
+import java.util.Locale;
+
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
@@ -44,7 +46,7 @@
return SimpleValueFactory.getInstance().createIRI(value);
} catch (IllegalArgumentException iae) {
throw new IllegalArgumentException(
- String.format("Expected a valid IRI for predicate template, found '%s'", value),
+ String.format(Locale.ROOT, "Expected a valid IRI for predicate template, found '%s'", value),
iae
);
}
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateXPathExtractionRuleImpl.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateXPathExtractionRuleImpl.java
index c66d1fb..e3bf1ca 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateXPathExtractionRuleImpl.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateXPathExtractionRuleImpl.java
@@ -25,6 +25,7 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
@@ -102,7 +103,7 @@
final Map<String,String> varValues = new HashMap<String, String>();
String value;
for(Variable variable : variables) {
- value = DomUtils.find(in, variable.getxPath().toUpperCase());
+ value = DomUtils.find(in, variable.getxPath().toUpperCase(Locale.ROOT));
varValues.put(variable.getName(), value);
}
@@ -123,7 +124,7 @@
private void checkVariableNameDeclared(String varName) {
if (!variableNameDeclared(varName)) {
throw new IllegalArgumentException(
- String.format("A variable with name '%s' was not declared.", varName)
+ String.format(Locale.ROOT, "A variable with name '%s' was not declared.", varName)
);
}
}
@@ -131,7 +132,7 @@
private void checkVariableNameNotDeclared(String varName) {
if (variableNameDeclared(varName)) {
throw new IllegalArgumentException(
- String.format("A variable with name '%s' is already declared.", varName)
+ String.format(Locale.ROOT, "A variable with name '%s' is already declared.", varName)
);
}
}
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/Term.java b/core/src/main/java/org/apache/any23/extractor/xpath/Term.java
index c6e1382..4fcaa71 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/Term.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/Term.java
@@ -19,6 +19,7 @@
import org.eclipse.rdf4j.model.Value;
+import java.util.Locale;
import java.util.Map;
/**
@@ -79,7 +80,7 @@
value = varMapping.get(internalValue);
if(value == null) {
throw new IllegalStateException(
- String.format("Cannot find a valid value for variable '%s'", internalValue)
+ String.format(Locale.ROOT, "Cannot find a valid value for variable '%s'", internalValue)
);
}
} else {
diff --git a/core/src/main/java/org/apache/any23/filter/ExtractionContextBlocker.java b/core/src/main/java/org/apache/any23/filter/ExtractionContextBlocker.java
index 16d8378..42b16ba 100644
--- a/core/src/main/java/org/apache/any23/filter/ExtractionContextBlocker.java
+++ b/core/src/main/java/org/apache/any23/filter/ExtractionContextBlocker.java
@@ -27,6 +27,7 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
/**
@@ -104,7 +105,7 @@
contextQueues.get(context.getUniqueID()).receiveTriple(s, p, o, g);
} catch (ValvedTriplePipeException e) {
throw new TripleHandlerException(
- String.format("Error while receiving triple %s %s %s", s, p, o),
+ String.format(Locale.ROOT, "Error while receiving triple %s %s %s", s, p, o),
e
);
}
@@ -116,7 +117,7 @@
contextQueues.get(context.getUniqueID()).receiveNamespace(prefix, uri);
} catch (ValvedTriplePipeException e) {
throw new TripleHandlerException(
- String.format("Error while receiving namespace %s:%s", prefix, uri),
+ String.format(Locale.ROOT, "Error while receiving namespace %s:%s", prefix, uri),
e
);
}
diff --git a/core/src/main/java/org/apache/any23/rdf/PopularPrefixes.java b/core/src/main/java/org/apache/any23/rdf/PopularPrefixes.java
index 5170bee..ec8a776 100644
--- a/core/src/main/java/org/apache/any23/rdf/PopularPrefixes.java
+++ b/core/src/main/java/org/apache/any23/rdf/PopularPrefixes.java
@@ -24,6 +24,7 @@
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.Locale;
import java.util.Map;
import java.util.Properties;
@@ -42,11 +43,11 @@
Prefixes prefixes = new Prefixes();
Properties properties = new Properties();
try {
- logger.trace(String.format("Loading prefixes from %s", RESOURCE_NAME));
+ logger.trace(String.format(Locale.ROOT, "Loading prefixes from %s", RESOURCE_NAME));
properties.load(getResourceAsStream());
} catch (IOException e) {
- logger.error(String.format("Error while loading prefixes from %s", RESOURCE_NAME), e);
- throw new RuntimeException(String.format("Error while loading prefixes from %s", RESOURCE_NAME));
+ logger.error(String.format(Locale.ROOT, "Error while loading prefixes from %s", RESOURCE_NAME), e);
+ throw new RuntimeException(String.format(Locale.ROOT, "Error while loading prefixes from %s", RESOURCE_NAME));
}
for (Map.Entry entry : properties.entrySet()) {
if (testIRICompliance((String) entry.getValue())) {
@@ -55,7 +56,7 @@
(String) entry.getValue()
);
} else {
- logger.warn(String.format("Prefixes entry '%s' is not a well-formad IRI. Skipped.", entry.getValue()));
+ logger.warn(String.format(Locale.ROOT, "Prefixes entry '%s' is not a well-formad IRI. Skipped.", entry.getValue()));
}
}
return prefixes;
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index 14aeab3..8b2308a 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -47,12 +47,15 @@
import java.io.OutputStream;
import java.io.Writer;
import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.GregorianCalendar;
+import java.util.Locale;
import java.util.Optional;
+import java.util.TimeZone;
/**
* Basic class providing a set of utility methods when dealing with <i>RDF</i>.
@@ -104,9 +107,9 @@
*/
public static String getXSDDate(String dateToBeParsed, String format)
throws ParseException, DatatypeConfigurationException {
- SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format, Locale.ROOT);
Date date = simpleDateFormat.parse(dateToBeParsed);
- GregorianCalendar gc = new GregorianCalendar();
+ GregorianCalendar gc = new GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
gc.setTime(date);
XMLGregorianCalendar xml = DatatypeFactory.newInstance().newXMLGregorianCalendar(gc);
xml.setTimezone(0);
@@ -120,7 +123,7 @@
* @return the string representation of the input date.
*/
public static String toXSDDateTime(Date date) {
- SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.ROOT);
String s = simpleDateFormat.format(date);
StringBuilder sb = new StringBuilder(s);
sb.insert(22, ':');
@@ -494,7 +497,7 @@
*/
public static Statement[] parseRDF(RDFFormat format, String in)
throws IOException {
- return parseRDF(format, new ByteArrayInputStream(in.getBytes()));
+ return parseRDF(format, new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8)));
}
/**
diff --git a/core/src/main/java/org/apache/any23/source/FileDocumentSource.java b/core/src/main/java/org/apache/any23/source/FileDocumentSource.java
index 5c57b56..34fbfa2 100644
--- a/core/src/main/java/org/apache/any23/source/FileDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/FileDocumentSource.java
@@ -23,6 +23,7 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
/**
* File implementation of {@link DocumentSource}.
@@ -74,6 +75,6 @@
} finally {
is.close();
}
- return new String( baos.toByteArray() );
+ return new String(baos.toByteArray(), StandardCharsets.UTF_8 );
}
}
diff --git a/core/src/main/java/org/apache/any23/source/StringDocumentSource.java b/core/src/main/java/org/apache/any23/source/StringDocumentSource.java
index 2ac61dd..53bf1e5 100644
--- a/core/src/main/java/org/apache/any23/source/StringDocumentSource.java
+++ b/core/src/main/java/org/apache/any23/source/StringDocumentSource.java
@@ -20,6 +20,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
/**
* String implementation of {@link DocumentSource}.
@@ -51,7 +52,7 @@
public InputStream openInputStream() throws IOException {
if (encoding == null) {
- return new ByteArrayInputStream(in.getBytes());
+ return new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
}
return new ByteArrayInputStream(in.getBytes(encoding));
}
diff --git a/core/src/main/java/org/apache/any23/util/DiscoveryUtils.java b/core/src/main/java/org/apache/any23/util/DiscoveryUtils.java
index fa919e0..8ab78af 100644
--- a/core/src/main/java/org/apache/any23/util/DiscoveryUtils.java
+++ b/core/src/main/java/org/apache/any23/util/DiscoveryUtils.java
@@ -70,6 +70,7 @@
}
dirs.add( new File(fileNameDecoded) );
}
+ @SuppressWarnings("rawtypes")
final ArrayList<Class> classes = new ArrayList<Class>();
for (File directory : dirs) {
classes.addAll(findClasses(directory, packageName) );
@@ -86,11 +87,12 @@
* @param filter the interface/class filter.
* @return list of matching classes.
*/
- public static List<Class> getClassesInPackage(String packageName, Class filter) {
+ public static List<Class> getClassesInPackage(String packageName, Class<?> filter) {
final List<Class> classesInPackage = getClassesInPackage(packageName);
+ @SuppressWarnings("rawtypes")
final List<Class> result = new ArrayList<Class>();
- Class superClazz;
- for(Class clazz : classesInPackage) {
+ Class<?> superClazz;
+ for(Class<?> clazz : classesInPackage) {
if(clazz.equals(filter)) {
continue;
}
@@ -133,13 +135,15 @@
final String packagePath = sections[1].substring(1);
try {
+ @SuppressWarnings("resource")
final JarFile jarFile = new JarFile(jarLocation);
final Enumeration<JarEntry> entries = jarFile.entries();
+ @SuppressWarnings("rawtypes")
final List<Class> result = new ArrayList<Class>();
JarEntry current;
String entryName;
String clazzName;
- Class clazz;
+ Class<?> clazz;
while(entries.hasMoreElements()) {
current = entries.nextElement();
entryName = current.getName();
@@ -178,6 +182,7 @@
if (!directory.exists()) {
return Collections.emptyList();
}
+ @SuppressWarnings("rawtypes")
final List<Class> classes = new ArrayList<Class>();
File[] files = directory.listFiles();
for (File file : files) {
@@ -187,7 +192,7 @@
classes.addAll(findClassesInDir(file, packageName + "." + fileName));
} else if (fileName.endsWith(".class") && !fileName.contains("$")) {
try {
- Class clazz;
+ Class<?> clazz;
try {
clazz = Class.forName(packageName + '.' + fileName.substring(0, fileName.length() - 6));
} catch (ExceptionInInitializerError e) {
diff --git a/core/src/main/java/org/apache/any23/util/FileUtils.java b/core/src/main/java/org/apache/any23/util/FileUtils.java
index b31898f..aef36f5 100644
--- a/core/src/main/java/org/apache/any23/util/FileUtils.java
+++ b/core/src/main/java/org/apache/any23/util/FileUtils.java
@@ -24,13 +24,16 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
-import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
/**
* Utility class for handling files.
@@ -55,7 +58,7 @@
boolean success = target.renameTo(newFile);
if (!success) {
throw new IllegalStateException(
- String.format("Cannot move target file [%s] to destination [%s]", target, newFile)
+ String.format(Locale.ROOT, "Cannot move target file [%s] to destination [%s]", target, newFile)
);
}
return newFile;
@@ -120,9 +123,10 @@
* @throws IOException if there is an error dumping the content
*/
public static void dumpContent(File f, String content) throws IOException {
- FileWriter fw = new FileWriter(f);
+ Writer fw = null;
try {
- fw.write(content);
+ fw = new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8);
+ fw.write(content);
} finally {
StreamUtils.closeGracefully(fw);
}
@@ -137,10 +141,11 @@
*/
public static void dumpContent(File f, Throwable t) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- final PrintWriter pw = new PrintWriter(baos);
+ final PrintWriter pw = new PrintWriter(new OutputStreamWriter(
+ baos, StandardCharsets.UTF_8), true);
t.printStackTrace(pw);
pw.close();
- dumpContent(f, baos.toString());
+ dumpContent(f, baos.toString("UTF-8"));
}
/**
diff --git a/core/src/main/java/org/apache/any23/util/MathUtils.java b/core/src/main/java/org/apache/any23/util/MathUtils.java
index 7d4cdd9..ccb4ade 100644
--- a/core/src/main/java/org/apache/any23/util/MathUtils.java
+++ b/core/src/main/java/org/apache/any23/util/MathUtils.java
@@ -17,6 +17,7 @@
package org.apache.any23.util;
+import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -34,7 +35,7 @@
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
md5.reset();
- md5.update(s.getBytes());
+ md5.update(s.getBytes(StandardCharsets.UTF_8));
byte[] digest = md5.digest();
StringBuffer result = new StringBuffer();
for (byte b : digest) {
diff --git a/core/src/main/java/org/apache/any23/util/StreamUtils.java b/core/src/main/java/org/apache/any23/util/StreamUtils.java
index f2b227a..8210df5 100644
--- a/core/src/main/java/org/apache/any23/util/StreamUtils.java
+++ b/core/src/main/java/org/apache/any23/util/StreamUtils.java
@@ -32,6 +32,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -66,7 +67,7 @@
* @throws IOException if an error occurs while consuming the <code>is</code> stream.
*/
public static String[] asLines(InputStream is) throws IOException {
- final BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ final BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
final List<String> lines = new ArrayList<String>();
try {
String line;
@@ -91,7 +92,7 @@
if (is == null) {
throw new NullPointerException("input stream is null.");
}
- final BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ final BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
try {
final StringBuilder content = new StringBuilder();
String line;
diff --git a/core/src/main/java/org/apache/any23/util/StringUtils.java b/core/src/main/java/org/apache/any23/util/StringUtils.java
index b050641..1fada1c 100644
--- a/core/src/main/java/org/apache/any23/util/StringUtils.java
+++ b/core/src/main/java/org/apache/any23/util/StringUtils.java
@@ -16,6 +16,8 @@
*/
package org.apache.any23.util;
+import java.util.Locale;
+
/**
* This class provides a set of string utility methods.
*
@@ -195,7 +197,7 @@
return org.apache.commons.lang3.StringUtils.uncapitalize(in);
}
- in = in.toLowerCase();
+ in = in.toLowerCase(Locale.ROOT);
String[] words = in.split("\\s+");
StringBuilder sb = new StringBuilder(in.length());
sb.append(words[0]);
diff --git a/core/src/main/java/org/apache/any23/validator/DefaultDOMDocument.java b/core/src/main/java/org/apache/any23/validator/DefaultDOMDocument.java
index 5fe9c04..9bac66e 100644
--- a/core/src/main/java/org/apache/any23/validator/DefaultDOMDocument.java
+++ b/core/src/main/java/org/apache/any23/validator/DefaultDOMDocument.java
@@ -25,6 +25,7 @@
import java.net.URI;
import java.util.List;
+import java.util.Locale;
/**
* This class wraps the <i>DOM</i> document.
@@ -65,12 +66,12 @@
List<Node> nodes = DomUtils.findAll(document, xPath);
if(nodes.size() == 0) {
throw new IllegalArgumentException(
- String.format("Cannot find node at XPath '%s'", xPath)
+ String.format(Locale.ROOT, "Cannot find node at XPath '%s'", xPath)
);
}
if(nodes.size() > 1) {
throw new IllegalArgumentException(
- String.format("The given XPath '%s' corresponds to more than one node.", xPath)
+ String.format(Locale.ROOT, "The given XPath '%s' corresponds to more than one node.", xPath)
);
}
return nodes.get(0);
diff --git a/core/src/main/java/org/apache/any23/validator/ValidationReport.java b/core/src/main/java/org/apache/any23/validator/ValidationReport.java
index 56b3f10..e9e2e8a 100644
--- a/core/src/main/java/org/apache/any23/validator/ValidationReport.java
+++ b/core/src/main/java/org/apache/any23/validator/ValidationReport.java
@@ -22,6 +22,7 @@
import java.io.Serializable;
import java.util.List;
+import java.util.Locale;
/**
* This class contains the report of a validation performed by
@@ -107,7 +108,7 @@
@Override
public String toString() {
- return String.format(
+ return String.format(Locale.ROOT,
"Issue %s '%s' %s",
level,
message,
@@ -177,7 +178,7 @@
@Override
public String toString() {
- return String.format("%s %s %s", this.getClass().getName(), cause, message);
+ return String.format(Locale.ROOT, "%s %s %s", this.getClass().getName(), cause, message);
}
}
@@ -206,7 +207,7 @@
@Override
public String toString() {
- return String.format("%s - %s", super.toString(), origin.getHRName());
+ return String.format(Locale.ROOT, "%s - %s", super.toString(), origin.getHRName());
}
}
@@ -232,7 +233,7 @@
@Override
public String toString() {
- return String.format("%s - %s", super.toString(), origin.getHRName());
+ return String.format(Locale.ROOT, "%s - %s", super.toString(), origin.getHRName());
}
}
diff --git a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
index 2e591d0..b973efc 100644
--- a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
+++ b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
@@ -21,6 +21,7 @@
import java.io.OutputStream;
import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
import java.lang.annotation.Documented;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
@@ -32,6 +33,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Locale;
/**
* Default implementation of {@link ValidationReportSerializer}
@@ -43,7 +45,12 @@
@Override
public void serialize(ValidationReport vr, OutputStream os) throws SerializationException {
- final PrintStream ps = new PrintStream(os);
+ PrintStream ps;
+ try {
+ ps = new PrintStream(os, true, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Error serializing the OuputStream as UTF-8 encoding.", e);
+ }
try {
serializeObject(vr, ps);
} finally {
@@ -57,7 +64,7 @@
}
final Class<? extends Object> oClass = o.getClass();
final String oClassName = getClassName(oClass);
- ps.printf("<%s>%n", oClassName);
+ ps.printf(Locale.ROOT, "<%s>%n", oClassName);
List<Method> getters = filterGetters(o.getClass());
if(getters.isEmpty()) {
ps.print( o.toString() );
@@ -66,7 +73,7 @@
for (Method getter : getters) {
serializeGetterValue(o, getter, ps);
}
- ps.printf("</%s>%n", oClassName);
+ ps.printf(Locale.ROOT, "</%s>%n", oClassName);
}
private String getClassName(Class<? extends Object> oClass) {
@@ -107,13 +114,13 @@
try {
value = m.invoke(o);
} catch (Exception e) {
- throw new SerializationException( String.format("Error while reading method '%s'", methodName), e );
+ throw new SerializationException( String.format(Locale.ROOT, "Error while reading method '%s'", methodName), e );
}
final String property = getPropertyFromMethodName(methodName);
if( isManaged(value) ) {
- ps.printf("<%s>%n", property);
+ ps.printf(Locale.ROOT, "<%s>%n", property);
printObject(value, ps);
- ps.printf("</%s>%n", property);
+ ps.printf(Locale.ROOT, "</%s>%n", property);
} else {
List<Method> getters = filterGetters(value.getClass());
for (Method getter : getters) {
diff --git a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
index c8b62c5..f77ec7f 100644
--- a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
+++ b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
@@ -30,6 +30,7 @@
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
import java.lang.reflect.Constructor;
import java.util.List;
import java.util.Map;
@@ -136,10 +137,19 @@
*/
public static String serializeVocabulary(Vocabulary vocabulary, RDFFormat format) {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- final PrintStream ps = new PrintStream(baos);
+ PrintStream ps;
+ try {
+ ps = new PrintStream(baos, true, "UTF-8");
+ } catch (UnsupportedEncodingException e1) {
+ throw new RuntimeException("UTF-8 encoding error when serializing the vocabulary to NQuads.", e1);
+ }
serializeVocabulary(vocabulary, format, false, ps);
ps.close();
- return baos.toString();
+ try {
+ return baos.toString("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("Error writing ByteArrayOutputStream to String with \"UTF-8\" encoding!");
+ }
}
/**
diff --git a/core/src/main/java/org/apache/any23/writer/CountingTripleHandler.java b/core/src/main/java/org/apache/any23/writer/CountingTripleHandler.java
index 9839759..409ccf8 100644
--- a/core/src/main/java/org/apache/any23/writer/CountingTripleHandler.java
+++ b/core/src/main/java/org/apache/any23/writer/CountingTripleHandler.java
@@ -17,6 +17,8 @@
package org.apache.any23.writer;
+import java.util.Locale;
+
import org.apache.any23.extractor.ExtractionContext;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.IRI;
@@ -70,7 +72,7 @@
public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
throws TripleHandlerException {
count++;
- if(logTriples) logger.debug( String.format("%s %s %s %s %s\n", s, p, o, g, context) );
+ if(logTriples) logger.debug( String.format(Locale.ROOT, "%s %s %s %s %s\n", s, p, o, g, context) );
}
public void receiveNamespace(String prefix, String uri, ExtractionContext context)
diff --git a/core/src/main/java/org/apache/any23/writer/LoggingTripleHandler.java b/core/src/main/java/org/apache/any23/writer/LoggingTripleHandler.java
index 9676cfb..7bd9e69 100644
--- a/core/src/main/java/org/apache/any23/writer/LoggingTripleHandler.java
+++ b/core/src/main/java/org/apache/any23/writer/LoggingTripleHandler.java
@@ -24,6 +24,7 @@
import java.io.PrintWriter;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
@@ -100,7 +101,7 @@
if (e.getValue() > 0) {
success.set(true);
}
- return String.format("%s:%d", e.getKey(), e.getValue()); }
+ return String.format(Locale.ROOT, "%s:%d", e.getKey(), e.getValue()); }
).collect(Collectors.toList()).toArray(new String[] {});
sb.append(StringUtils.join(", ", parsers));
sb.append(" ]");
diff --git a/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java b/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
index c237ff5..ba30589 100644
--- a/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
+++ b/core/src/main/java/org/apache/any23/writer/RDFWriterTripleHandler.java
@@ -34,6 +34,7 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
+import java.util.Locale;
import java.util.Optional;
/**
@@ -134,7 +135,7 @@
writer().handleStatement(RDFUtils.quad(s, p, o, g));
} catch (RDFHandlerException ex) {
throw new TripleHandlerException(
- String.format("Error while receiving triple: %s %s %s %s", s, p, o, g),
+ String.format(Locale.ROOT, "Error while receiving triple: %s %s %s %s", s, p, o, g),
ex
);
}
@@ -146,7 +147,7 @@
try {
writer().handleNamespace(prefix, uri);
} catch (RDFHandlerException ex) {
- throw new TripleHandlerException(String.format("Error while receiving namespace: %s:%s", prefix, uri),
+ throw new TripleHandlerException(String.format(Locale.ROOT, "Error while receiving namespace: %s:%s", prefix, uri),
ex
);
}
diff --git a/core/src/main/java/org/apache/any23/writer/ReportingTripleHandler.java b/core/src/main/java/org/apache/any23/writer/ReportingTripleHandler.java
index 081a251..f67d5af 100644
--- a/core/src/main/java/org/apache/any23/writer/ReportingTripleHandler.java
+++ b/core/src/main/java/org/apache/any23/writer/ReportingTripleHandler.java
@@ -24,6 +24,7 @@
import java.util.Collection;
import java.util.HashSet;
+import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
/**
@@ -64,7 +65,7 @@
* @return a human readable report.
*/
public String printReport() {
- return String.format("Total Documents: %d, Total Triples: %d", getTotalDocuments(), getTotalTriples());
+ return String.format(Locale.ROOT, "Total Documents: %d, Total Triples: %d", getTotalDocuments(), getTotalTriples());
}
public void startDocument(IRI documentIRI) throws TripleHandlerException {
diff --git a/core/src/main/java/org/apache/any23/writer/RepositoryWriter.java b/core/src/main/java/org/apache/any23/writer/RepositoryWriter.java
index 594cc37..299e069 100644
--- a/core/src/main/java/org/apache/any23/writer/RepositoryWriter.java
+++ b/core/src/main/java/org/apache/any23/writer/RepositoryWriter.java
@@ -17,6 +17,8 @@
package org.apache.any23.writer;
+import java.util.Locale;
+
import org.apache.any23.extractor.ExtractionContext;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.IRI;
@@ -67,7 +69,7 @@
getContextResource(context.getDocumentIRI())
);
} catch (RepositoryException ex) {
- throw new TripleHandlerException(String.format("Error while receiving triple: %s %s %s", s, p , o),
+ throw new TripleHandlerException(String.format(Locale.ROOT, "Error while receiving triple: %s %s %s", s, p , o),
ex
);
}
@@ -82,7 +84,7 @@
try {
conn.setNamespace(prefix, uri);
} catch (RepositoryException ex) {
- throw new TripleHandlerException(String.format("Error while receiving namespace: %s:%s", prefix, uri),
+ throw new TripleHandlerException(String.format(Locale.ROOT, "Error while receiving namespace: %s:%s", prefix, uri),
ex
);
}
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java
index c9b8814..d851986 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -648,7 +648,7 @@
any23 = new Any23();
Repository store = new SailRepository(new MemoryStore());
- store.initialize();
+ store.init();
try
{
conn = store.getConnection();
@@ -708,7 +708,7 @@
statement.getPredicate(), statement.getObject()));
}
- private boolean containsClass(List<?> list, Class clazz) {
+ private boolean containsClass(List<?> list, Class<?> clazz) {
for (Object o : list) {
if (o.getClass().equals(clazz)) {
return true;
diff --git a/core/src/test/java/org/apache/any23/extractor/SingleDocumentExtractionTest.java b/core/src/test/java/org/apache/any23/extractor/SingleDocumentExtractionTest.java
index 398cc7a..a22f7db 100644
--- a/core/src/test/java/org/apache/any23/extractor/SingleDocumentExtractionTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/SingleDocumentExtractionTest.java
@@ -89,7 +89,7 @@
super.setUp();
extractorGroup = ExtractorRegistryImpl.getInstance().getExtractorGroup();
store = new MemoryStore();
- store.initialize();
+ store.init();
conn = new SailRepository(store).getConnection();
}
@@ -97,7 +97,7 @@
public void tearDown() throws SailException, RepositoryException, TripleHandlerException {
rdfxmlWriter.close();
repositoryWriter.close();
- logger.debug( baos.toString() );
+ logger.debug(baos.toString());
singleDocumentExtraction = null;
extractorGroup = null;
diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
index e351e74..9241041 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
@@ -110,7 +110,7 @@
super.setUp();
store = new MemoryStore();
repository = new SailRepository(store);
- repository.initialize();
+ repository.init();
conn = repository.getConnection();
}
diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
index a2cd8b6..64d8122 100644
--- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
@@ -128,7 +128,7 @@
}
}
catch (Exception ex) {
- ex.printStackTrace();
+ logger.error(ex.getMessage());
foundFailure.set(true);
}
finally {
diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
index 5db98ae..d689201 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
@@ -73,6 +73,7 @@
for (int i = 0; i <= Character.MAX_CODE_POINT; i++) {
if (Character.isWhitespace(i) || Character.isSpaceChar(i)) {
byte[] bytes = new String(Character.toChars(i)).getBytes(StandardCharsets.UTF_8);
+ @SuppressWarnings("resource")
InputStream stream = new JsonCleaningInputStream(new ByteArrayInputStream(bytes));
if (i == '\r' || i == '\n') {
Assert.assertEquals(stream.read(), i);
@@ -105,7 +106,6 @@
final TripleHandler tHandler = new RDFXMLWriter(baos);
final ExtractionContext extractionContext = new ExtractionContext("rdf-jsonld", uri);
final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
- extractor.setStopAtFirstError(false);
try {
extractor.run(
ExtractionParameters.newDefault(),
@@ -114,7 +114,7 @@
result
);
} finally {
- logger.debug(baos.toString());
+ logger.debug(baos.toString("UTF-8"));
tHandler.close();
result.close();
}
diff --git a/core/src/test/java/org/apache/any23/extractor/rdfa/AbstractRDFaExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/rdfa/AbstractRDFaExtractorTestCase.java
index ff4120b..39ba542 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdfa/AbstractRDFaExtractorTestCase.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdfa/AbstractRDFaExtractorTestCase.java
@@ -46,7 +46,7 @@
@Test
public void testBasic() throws Exception {
assertExtract("/html/rdfa/basic.html");
- System.out.println(dumpModelToNQuads());
+ logger.info(dumpModelToNQuads());
assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice", "en"));
assertContains(null, vDCTERMS.title,
RDFUtils.literal("The trouble with Bob", "en"));
diff --git a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLTikaParserTest.java b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLTikaParserTest.java
index 6180a18..196dba4 100644
--- a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLTikaParserTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLTikaParserTest.java
@@ -54,7 +54,6 @@
Assert.assertNotNull("Could not find test file: " + file1, is);
MIMEType type = detector.guessMIMEType(null, is, null);
- log.info("Type: {}", type.toString());
// Not currently doing stream detection for YAML, so it returns the default, octet-stream
Assert.assertEquals("application/octet-stream", type.toString());
}
@@ -66,7 +65,6 @@
log.debug("normatised file name: {}", fileName);
MIMEType type = detector.guessMIMEType(fileName, null, null);
- log.info("Type: {}", type.toString());
Assert.assertEquals("text/x-yaml", type.toString());
}
diff --git a/csvutils/src/main/java/org/apache/any23/extractor/csv/CSVReaderBuilder.java b/csvutils/src/main/java/org/apache/any23/extractor/csv/CSVReaderBuilder.java
index 9ecb6b9..7733a09 100644
--- a/csvutils/src/main/java/org/apache/any23/extractor/csv/CSVReaderBuilder.java
+++ b/csvutils/src/main/java/org/apache/any23/extractor/csv/CSVReaderBuilder.java
@@ -25,6 +25,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.Iterator;
/**
@@ -68,7 +69,7 @@
CSVFormat bestStrategy = getBestStrategy(is);
if (bestStrategy == null)
bestStrategy = getCSVStrategyFromConfiguration();
- return new CSVParser(new InputStreamReader(is), bestStrategy);
+ return new CSVParser(new InputStreamReader(is, StandardCharsets.UTF_8), bestStrategy);
}
/**
@@ -130,7 +131,8 @@
is.mark(Integer.MAX_VALUE);
try {
- final Iterator<CSVRecord> rows = new CSVParser(new InputStreamReader(is), strategy).iterator();
+ @SuppressWarnings("resource")
+ final Iterator<CSVRecord> rows = new CSVParser(new InputStreamReader(is, StandardCharsets.UTF_8), strategy).iterator();
int linesToCheck = 5;
int headerColumnCount = -1;
while (linesToCheck > 0 && rows.hasNext()) {
diff --git a/encoding/src/main/java/org/apache/any23/encoding/TikaEncodingDetector.java b/encoding/src/main/java/org/apache/any23/encoding/TikaEncodingDetector.java
index a7ccf9e..67a01af 100644
--- a/encoding/src/main/java/org/apache/any23/encoding/TikaEncodingDetector.java
+++ b/encoding/src/main/java/org/apache/any23/encoding/TikaEncodingDetector.java
@@ -224,7 +224,7 @@
//make sure json-ld data is included in text stats
//otherwise, ignore css & javascript
if ("script".equalsIgnoreCase(node.nodeName())) {
- if (node.attr("type").toLowerCase().contains("json")) {
+ if (node.attr("type").toLowerCase(java.util.Locale.ROOT).contains("json")) {
sb.append(data);
}
break;
diff --git a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
index 3347895..4c9b7d7 100644
--- a/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
+++ b/mime/src/main/java/org/apache/any23/mime/TikaMIMETypeDetector.java
@@ -36,6 +36,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
/**
@@ -112,7 +113,7 @@
String sample = extractDataSample(is, '.');
RDFParser turtleParser = Rio.createParser(RDFFormat.TURTLE);
turtleParser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
- ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(sample.getBytes(StandardCharsets.UTF_8));
try {
turtleParser.parse(bais, "");
return true;
@@ -163,7 +164,7 @@
* @throws IOException if an error occurs during sampling.
*/
private static String extractDataSample(InputStream is, char breakChar) throws IOException {
- BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder();
final int MAX_SIZE = 1024 * 2;
int c;
diff --git a/mime/src/test/java/org/apache/any23/mime/TikaMIMETypeDetectorTest.java b/mime/src/test/java/org/apache/any23/mime/TikaMIMETypeDetectorTest.java
index 1a207cd..a212699 100644
--- a/mime/src/test/java/org/apache/any23/mime/TikaMIMETypeDetectorTest.java
+++ b/mime/src/test/java/org/apache/any23/mime/TikaMIMETypeDetectorTest.java
@@ -27,6 +27,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
@@ -398,22 +399,22 @@
/* END: by content and name. */
private void assertN3Detection(String n3Exp) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(n3Exp.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(n3Exp.getBytes(StandardCharsets.UTF_8));
Assert.assertTrue(TikaMIMETypeDetector.checkN3Format(bais));
}
private void assertN3DetectionFail(String n3Exp) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(n3Exp.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(n3Exp.getBytes(StandardCharsets.UTF_8));
Assert.assertFalse(TikaMIMETypeDetector.checkN3Format(bais));
}
private void assertNQuadsDetection(String n4Exp) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(n4Exp.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(n4Exp.getBytes(StandardCharsets.UTF_8));
Assert.assertTrue(TikaMIMETypeDetector.checkNQuadsFormat(bais));
}
private void assertNQuadsDetectionFail(String n4Exp) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(n4Exp.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(n4Exp.getBytes(StandardCharsets.UTF_8));
Assert.assertFalse(TikaMIMETypeDetector.checkNQuadsFormat(bais));
}
@@ -438,7 +439,8 @@
Assert.assertNotSame(expectedMimeType, detectedMimeType);
} else {
Assert.assertEquals(
- String.format("Error in mimetype detection for file %s", test),
+ String.format(java.util.Locale.ROOT,
+ "Error in mimetype detection for file %s", test),
expectedMimeType,
detectedMimeType
);
@@ -481,7 +483,8 @@
Assert.assertNotSame(expectedMimeType, detectedMimeType);
} else {
Assert.assertEquals(
- String.format("Error while detecting mimetype in file %s", test),
+ String.format(java.util.Locale.ROOT,
+ "Error while detecting mimetype in file %s", test),
expectedMimeType,
detectedMimeType
);
diff --git a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
index 66b167b..8acc3dd 100644
--- a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
+++ b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
@@ -28,9 +28,12 @@
import org.apache.any23.plugin.crawler.CrawlerListener;
import org.apache.any23.plugin.crawler.SiteCrawler;
import org.apache.any23.source.StringDocumentSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.net.URL;
+import java.util.Locale;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -46,6 +49,8 @@
@Parameters(commandNames = "crawler", commandDescription = "Any23 Crawler Command Line Tool.")
public class Crawler extends Rover {
+ private static final Logger LOG = LoggerFactory.getLogger(Crawler.class);
+
private final Object roverLock = new Object();
@Parameter(
@@ -84,14 +89,14 @@
final URL seed = new URL(inputIRIs.get( 0 ));
if ( storageFolder.isFile() ) {
- throw new IllegalStateException( format( "Storage folder %s can not be a file, must be a directory",
+ throw new IllegalStateException( format(Locale.ROOT, "Storage folder %s can not be a file, must be a directory",
storageFolder ) );
}
if ( !storageFolder.exists() ) {
if ( !storageFolder.mkdirs() ) {
throw new IllegalStateException(
- format( "Storage folder %s can not be created, please verify you have enough permissions",
+ format(Locale.ROOT, "Storage folder %s can not be created, please verify you have enough permissions",
storageFolder ) );
}
}
@@ -106,7 +111,7 @@
@Override
public void visitedPage(Page page) {
final String pageURL = page.getWebURL().getURL();
- System.err.println( format("Processing page: [%s]", pageURL) );
+ LOG.info(format(Locale.ROOT, "Processing page: [%s]", pageURL) );
final ParseData parseData = page.getParseData();
if (parseData instanceof HtmlParseData) {
@@ -122,7 +127,7 @@
);
}
} catch (Exception e) {
- System.err.println(format("Error while processing page [%s], error: %s .",
+ LOG.error(format(Locale.ROOT, "Error while processing page [%s], error: %s .",
pageURL, e.getMessage())
);
}
@@ -134,10 +139,9 @@
@Override
public void run() {
try {
- System.err.println( Crawler.super.printReports() );
- // siteCrawler.stop(); // TODO: cause shutdown hanging.
+ LOG.error(Crawler.super.printReports());
} catch (Exception e) {
- e.printStackTrace(System.err);
+ LOG.error(e.getMessage());
}
}
});
@@ -149,9 +153,9 @@
@Override
public Pattern convert( String value ) {
try {
- return Pattern.compile( value );
+ return Pattern.compile(value);
} catch (PatternSyntaxException pse) {
- throw new ParameterException( format("Invalid page filter, '%s' must be a regular expression.", value) );
+ throw new ParameterException(format(Locale.ROOT, "Invalid page filter, '%s' must be a regular expression.", value) );
}
}
diff --git a/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java b/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java
index 2e43445..835e77d 100644
--- a/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java
+++ b/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java
@@ -23,6 +23,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Locale;
import java.util.regex.Pattern;
/**
@@ -54,7 +55,7 @@
return false;
if (url.getURL() == null)
return false;
- final String href = url.getURL().toLowerCase();
+ final String href = url.getURL().toLowerCase(Locale.ROOT);
if (!href.startsWith(sharedData.getSeed()))
return false;
return pattern == null || !pattern.matcher(href).matches();
diff --git a/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java b/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java
index d1d5ca2..ccb23fb 100644
--- a/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java
+++ b/plugins/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java
@@ -20,6 +20,7 @@
import edu.uci.ics.crawler4j.crawler.Page;
import java.util.List;
+import java.util.Locale;
import java.util.regex.Pattern;
/**
@@ -85,7 +86,7 @@
private SharedData(String seed, Pattern pattern, List<CrawlerListener> listeners) {
if(seed == null || seed.trim().length() == 0)
throw new IllegalArgumentException(
- String.format("Invalid seed '%s'", seed)
+ String.format(Locale.ROOT, "Invalid seed '%s'", seed)
);
this.seed = seed;
diff --git a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
index 94a3210..0f6af9d 100644
--- a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
+++ b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
@@ -36,6 +36,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -83,7 +84,7 @@
try {
final IRI documentIRI = extractionContext.getDocumentIRI();
for (ExtractionRule extractionRule : extractionRules) {
- final String content = extractionRule.boilerpipeExtractor.getText(new InputStreamReader(inputStream));
+ final String content = extractionRule.boilerpipeExtractor.getText(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
extractionResult.writeTriple(
documentIRI,
extractionRule.property,
diff --git a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
index b879641..5d12b6f 100644
--- a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
+++ b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
@@ -37,6 +37,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Locale;
/**
* Implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor} able to process
@@ -156,7 +157,7 @@
private IRI getCellIRI(IRI rowIRI, Cell cell) {
return RDFUtils.iri(rowIRI +
- String.format("/%d/", cell.getColumnIndex()));
+ String.format(Locale.ROOT, "/%d/", cell.getColumnIndex()));
}
private IRI cellTypeToType(CellType cellType) {
diff --git a/pom.xml b/pom.xml
index c8f5fb6..df41a12 100644
--- a/pom.xml
+++ b/pom.xml
@@ -281,6 +281,7 @@
<openie_2.11.version>4.2.6</openie_2.11.version>
<openregex.version>1.1.1</openregex.version>
<jackson.version>2.9.10</jackson.version>
+ <commons-io.version>2.6</commons-io.version>
<!-- Overridden in profiles to add JDK specific arguments to surefire -->
<surefire-extra-args />
@@ -317,6 +318,7 @@
<maven-war-plugin.version>3.2.3</maven-war-plugin.version>
<maven-invoker-plugin.version>3.2.1</maven-invoker-plugin.version>
<maven-checkstyle-plugin.version>3.1.12.2</maven-checkstyle-plugin.version>
+ <forbiddenapis.version>2.6</forbiddenapis.version>
<!--
| Any23 website has to be stored in SVN
@@ -554,7 +556,7 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
- <version>2.6</version>
+ <version>${commons-io.version}</version>
</dependency>
<!-- END: Apache Commons -->
@@ -738,6 +740,34 @@
</pluginManagement>
<plugins>
+ <plugin>
+ <groupId>de.thetaphi</groupId>
+ <artifactId>forbiddenapis</artifactId>
+ <version>${forbiddenapis.version}</version>
+ <configuration>
+ <failOnUnsupportedJava>false</failOnUnsupportedJava>
+ <failOnUnresolvableSignatures>false</failOnUnresolvableSignatures>
+ <bundledSignatures>
+ <!-- https://github.com/policeman-tools/forbidden-apis/wiki/BundledSignatures -->
+ <bundledSignature>jdk-unsafe-${javac.src.version}</bundledSignature>
+ <bundledSignature>jdk-deprecated-${javac.src.version}</bundledSignature>
+ <bundledSignature>jdk-non-portable</bundledSignature>
+ <!-- comment out until we upgrade past Java 8 -->
+ <!--bundledSignature>jdk-reflection</bundledSignature-->
+ <bundledSignature>jdk-internal-${javac.src.version}</bundledSignature>
+ <bundledSignature>jdk-system-out</bundledSignature>
+ <bundledSignature>commons-io-unsafe-${commons-io.version}</bundledSignature>
+ </bundledSignatures>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>check</goal>
+ <!--goal>testCheck</goal-->
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
<!-- Javadoc plugin. -->
<plugin>
@@ -782,7 +812,6 @@
<configuration>
<doCheck>false</doCheck>
<doUpdate>false</doUpdate>
- <!-- Use committed revision so it does not change every time svn update is run -->
<useLastCommittedRevision>true</useLastCommittedRevision>
<!-- default revision number if unavailable -->
<revisionOnScmFailure>??????</revisionOnScmFailure>
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index ad7c1ed..0953ca4 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -42,6 +42,7 @@
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.Locale;
import java.util.regex.Pattern;
import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
@@ -308,7 +309,7 @@
if ("validate-fix".equalsIgnoreCase(validationMode))
return ValidationMode.VALIDATE_AND_FIX;
throw new IllegalArgumentException(
- String.format("Invalid value '%s' for '%s' parameter.", validationMode, parameter)
+ String.format(Locale.ROOT, "Invalid value '%s' for '%s' parameter.", validationMode, parameter)
);
}
diff --git a/service/src/main/java/org/apache/any23/servlet/WebResponder.java b/service/src/main/java/org/apache/any23/servlet/WebResponder.java
index 9640b17..21cc140 100644
--- a/service/src/main/java/org/apache/any23/servlet/WebResponder.java
+++ b/service/src/main/java/org/apache/any23/servlet/WebResponder.java
@@ -25,6 +25,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Locale;
import java.util.stream.Collectors;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
@@ -178,7 +179,7 @@
final ServletOutputStream sos = response.getOutputStream();
final byte[] data = byteOutStream.toByteArray();
if(report) {
- final PrintStream ps = new PrintStream(sos);
+ final PrintStream ps = new PrintStream(sos, true, "UTF-8");
try {
printHeader(ps);
printResponse(reporter, er, data, ps);
@@ -225,10 +226,10 @@
final Collection<IssueReport.Issue> extractorIssues = er.getExtractorIssues(name);
if(extractorIssues.isEmpty())
continue;
- ps.println( String.format("<extractorIssues extractor=\"%s\">", name));
+ ps.println( String.format(Locale.ROOT, "<extractorIssues extractor=\"%s\">", name));
for(IssueReport.Issue issue : er.getExtractorIssues(name)) {
ps.println(
- String.format(
+ String.format(Locale.ROOT,
"<issue level=\"%s\" row=\"%d\" col=\"%d\">%s</issue>",
issue.getLevel().toString(),
issue.getRow(),
@@ -249,7 +250,7 @@
// Human readable error message.
if(msg != null) {
- ps.printf("<message>%s</message>%n", msg);
+ ps.printf(Locale.ROOT, "<message>%s</message>%n", msg);
} else {
ps.print("<message/>\n");
}
@@ -296,7 +297,7 @@
response.setStatus(code);
response.setContentType("text/plain");
final ServletOutputStream sos = response.getOutputStream();
- final PrintStream ps = new PrintStream(sos);
+ final PrintStream ps = new PrintStream(sos, true, "UTF-8");
final byte[] data = byteOutStream.toByteArray();
if (report) {
try {
diff --git a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
index f4923fc..ea1e8da 100644
--- a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
+++ b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
@@ -21,6 +21,7 @@
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -102,8 +103,8 @@
if (!m.matches()) {
return null;
}
- String type = m.group(1).toLowerCase();
- String subtype = m.group(2).toLowerCase();
+ String type = m.group(1).toLowerCase(Locale.ROOT);
+ String subtype = m.group(2).toLowerCase(Locale.ROOT);
String unparsedParameters = m.group(3);
String qValue = m.group(7);
m = parameterPattern.matcher(unparsedParameters);
@@ -113,7 +114,7 @@
List<String> parameterNames = new ArrayList<>();
List<String> parameterValues = new ArrayList<>();
while (m.find()) {
- String name = m.group(1).toLowerCase();
+ String name = m.group(1).toLowerCase(Locale.ROOT);
String value = (m.group(3) == null) ? m.group(2) : unescape(m.group(3));
parameterNames.add(name);
parameterValues.add(value);