Merge pull request #240 from Claudenw/tika_based_document_analyzer
RAT-54: Tika based document analyzer
diff --git a/apache-rat-core/pom.xml b/apache-rat-core/pom.xml
index 1b31b09..a4ad439 100644
--- a/apache-rat-core/pom.xml
+++ b/apache-rat-core/pom.xml
@@ -113,10 +113,6 @@
<artifactId>commons-cli</artifactId>
</dependency>
<dependency>
- <groupId>commons-beanutils</groupId>
- <artifactId>commons-beanutils</artifactId>
- </dependency>
- <dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
@@ -126,5 +122,9 @@
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
</dependencies>
</project>
diff --git a/apache-rat-core/src/main/java/org/apache/rat/Defaults.java b/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
index cb747f0..be0456b 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
@@ -19,6 +19,7 @@
package org.apache.rat;
import java.io.File;
+import java.io.FilenameFilter;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
@@ -28,6 +29,8 @@
import java.util.SortedSet;
import java.util.TreeSet;
+import org.apache.commons.io.filefilter.FalseFileFilter;
+import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.function.IOSupplier;
import org.apache.rat.configuration.Format;
import org.apache.rat.configuration.LicenseReader;
@@ -37,11 +40,15 @@
import org.apache.rat.license.LicenseSetFactory;
import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
import org.apache.rat.utils.Log;
+import org.apache.rat.walker.NameBasedHiddenFileFilter;
/**
- * A class that holds the list of licenses and approved licenses from one or more configuration files.
+ * A class that provides the standard system defaults for the ReportConfiguration.
+ *
+ * Properties in this class may be overridden or added to by configuration options in the various UIs.
+ * See the specific UI for details.
*/
-public class Defaults {
+public final class Defaults {
/**
* The default configuration file from the package.
@@ -57,7 +64,11 @@
public static final String UNAPPROVED_LICENSES_STYLESHEET = "org/apache/rat/unapproved-licenses.xsl";
private final LicenseSetFactory setFactory;
-
+
+ private static final FilenameFilter FILES_TO_IGNORE = FalseFileFilter.FALSE;
+
+ private static final IOFileFilter DIRECTORIES_TO_IGNORE = NameBasedHiddenFileFilter.HIDDEN;
+
/**
* Initialize the system configuration reader..
*/
@@ -71,7 +82,7 @@
/**
* Builder constructs instances.
*/
- private Defaults(Log log, Set<URL> urls) {
+ private Defaults(final Log log, final Set<URL> urls) {
this.setFactory = Defaults.readConfigFiles(log, urls);
}
@@ -87,7 +98,7 @@
* Reads the configuration files.
* @param urls the URLs to read.
*/
- private static LicenseSetFactory readConfigFiles(Log log, Collection<URL> urls) {
+ private static LicenseSetFactory readConfigFiles(final Log log, final Collection<URL> urls) {
SortedSet<ILicense> licenses = LicenseSetFactory.emptyLicenseSet();
@@ -133,16 +144,16 @@
* @param filter define which type of licenses to return.
* @return sorted set of licenses.
*/
- public SortedSet<ILicense> getLicenses(LicenseFilter filter) {
+ public SortedSet<ILicense> getLicenses(final LicenseFilter filter) {
return setFactory.getLicenses(filter);
}
-
+
/**
* Gets the sorted set of approved licenses for a given filter condition.
* @param filter define which type of licenses to return.
* @return sorted set of license families.
*/
- public SortedSet<ILicenseFamily> getLicenseFamilies(LicenseFilter filter) {
+ public SortedSet<ILicenseFamily> getLicenseFamilies(final LicenseFilter filter) {
return setFactory.getLicenseFamilies(filter);
}
@@ -152,14 +163,22 @@
* @param filter define which type of licenses to return.
* @return The sorted set of approved licenseIds.
*/
- public SortedSet<String> getLicenseIds(LicenseFilter filter) {
+ public SortedSet<String> getLicenseIds(final LicenseFilter filter) {
return setFactory.getLicenseFamilyIds(filter);
}
+
+ public static FilenameFilter getFilesToIgnore() {
+ return FILES_TO_IGNORE;
+ }
+
+ public static IOFileFilter getDirectoriesToIgnore() {
+ return DIRECTORIES_TO_IGNORE;
+ }
/**
* The Defaults builder.
*/
- public static class Builder {
+ public final static class Builder {
private final Set<URL> fileNames = new TreeSet<>(Comparator.comparing(URL::toString));
private Builder() {
@@ -172,7 +191,7 @@
* @param url the URL to add
* @return this Builder for chaining
*/
- public Builder add(URL url) {
+ public Builder add(final URL url) {
fileNames.add(url);
return this;
}
@@ -184,7 +203,7 @@
* @return this Builder for chaining
* @throws MalformedURLException in case the fileName cannot be found.
*/
- public Builder add(String fileName) throws MalformedURLException {
+ public Builder add(final String fileName) throws MalformedURLException {
return add(new File(fileName));
}
@@ -195,7 +214,7 @@
* @return this Builder for chaining
* @throws MalformedURLException in case the file cannot be found.
*/
- public Builder add(File file) throws MalformedURLException {
+ public Builder add(final File file) throws MalformedURLException {
return add(file.toURI().toURL());
}
@@ -205,7 +224,7 @@
* @param url the URL of the file to remove.
* @return this Builder for chaining
*/
- public Builder remove(URL url) {
+ public Builder remove(final URL url) {
fileNames.remove(url);
return this;
}
@@ -217,7 +236,7 @@
* @return this Builder for chaining
* @throws MalformedURLException in case the fileName cannot be found.
*/
- public Builder remove(String fileName) throws MalformedURLException {
+ public Builder remove(final String fileName) throws MalformedURLException {
return remove(new File(fileName));
}
@@ -228,7 +247,7 @@
* @return this Builder for chaining
* @throws MalformedURLException in case the file cannot be found.
*/
- public Builder remove(File file) throws MalformedURLException {
+ public Builder remove(final File file) throws MalformedURLException {
return remove(file.toURI().toURL());
}
@@ -246,7 +265,7 @@
* @param log the Log to use to report errors when building the defaults.
* @return the current defaults object.
*/
- public Defaults build(Log log) {
+ public Defaults build(final Log log) {
return new Defaults(log, fileNames);
}
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/Report.java b/apache-rat-core/src/main/java/org/apache/rat/Report.java
index 6f49556..74ba5d1 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/Report.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/Report.java
@@ -238,7 +238,7 @@
}
if (cl.hasOption(SCAN_HIDDEN_DIRECTORIES)) {
- configuration.setDirectoryFilter(null);
+ configuration.setDirectoriesToIgnore(null);
}
if (cl.hasOption('a') || cl.hasOption('A')) {
@@ -250,14 +250,14 @@
String[] excludes = cl.getOptionValues(EXCLUDE_CLI);
if (excludes != null) {
final FilenameFilter filter = parseExclusions(Arrays.asList(excludes));
- configuration.setInputFileFilter(filter);
+ configuration.setFilesToIgnore(filter);
}
} else if (cl.hasOption(EXCLUDE_FILE_CLI)) {
String excludeFileName = cl.getOptionValue(EXCLUDE_FILE_CLI);
if (excludeFileName != null) {
final FilenameFilter filter = parseExclusions(
FileUtils.readLines(new File(excludeFileName), StandardCharsets.UTF_8));
- configuration.setInputFileFilter(filter);
+ configuration.setFilesToIgnore(filter);
}
}
@@ -452,11 +452,11 @@
}
if (base.isDirectory()) {
- return new DirectoryWalker(base, config.getInputFileFilter(), config.getDirectoryFilter());
+ return new DirectoryWalker(base, config.getFilesToIgnore(), config.getDirectoriesToIgnore());
}
try {
- return new ArchiveWalker(base, config.getInputFileFilter());
+ return new ArchiveWalker(base, config.getFilesToIgnore());
} catch (IOException ex) {
config.getLog().log(Level.ERROR, "file '"+baseDirectory+"' is not valid gzip data.");
return null;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java b/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
index d737989..dee2915 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
@@ -50,7 +50,6 @@
import org.apache.rat.report.IReportable;
import org.apache.rat.utils.Log;
import org.apache.rat.utils.ReportingSet;
-import org.apache.rat.walker.NameBasedHiddenFileFilter;
/**
* A configuration object is used by the front end to invoke the
@@ -69,9 +68,9 @@
private boolean styleReport;
private IOSupplier<InputStream> styleSheet;
private IReportable reportable;
- private FilenameFilter inputFileFilter;
- private IOFileFilter directoryFilter;
- private Log log;
+ private FilenameFilter filesToIgnore;
+ private IOFileFilter directoriesToIgnore;
+ private final Log log;
private LicenseFilter listFamilies;
private LicenseFilter listLicenses;
private boolean dryRun;
@@ -89,7 +88,6 @@
.setMsgFormat( s -> String.format( "Duplicate License %s (%s) of type %s", s.getName(), s.getId(), s.getLicenseFamily().getFamilyCategory()));
approvedLicenseCategories = new TreeSet<>();
removedLicenseCategories = new TreeSet<>();
- directoryFilter = NameBasedHiddenFileFilter.HIDDEN;
styleReport = true;
listFamilies = LicenseFilter.NONE;
listLicenses = LicenseFilter.NONE;
@@ -179,31 +177,31 @@
/**
* @return The filename filter for the potential input files.
*/
- public FilenameFilter getInputFileFilter() {
- return inputFileFilter;
+ public FilenameFilter getFilesToIgnore() {
+ return filesToIgnore;
}
/**
- * @param inputFileFilter the filename filter to filter the input files.
+ * @param filesToIgnore the filename filter to filter the input files.
*/
- public void setInputFileFilter(FilenameFilter inputFileFilter) {
- this.inputFileFilter = inputFileFilter;
+ public void setFilesToIgnore(FilenameFilter filesToIgnore) {
+ this.filesToIgnore = filesToIgnore;
}
- public IOFileFilter getDirectoryFilter() {
- return directoryFilter;
+ public IOFileFilter getDirectoriesToIgnore() {
+ return directoriesToIgnore;
}
- public void setDirectoryFilter(IOFileFilter directoryFilter) {
- if (directoryFilter == null) {
- this.directoryFilter = FalseFileFilter.FALSE;
+ public void setDirectoriesToIgnore(IOFileFilter directoriesToIgnore) {
+ if (directoriesToIgnore == null) {
+ this.directoriesToIgnore = FalseFileFilter.FALSE;
} else {
- this.directoryFilter = directoryFilter;
+ this.directoriesToIgnore = directoriesToIgnore;
}
}
- public void addDirectoryFilter(IOFileFilter directoryFilter) {
- this.directoryFilter = this.directoryFilter.and(directoryFilter);
+ public void addDirectoryToIgnore(IOFileFilter directoryToIgnore) {
+ this.directoriesToIgnore = this.directoriesToIgnore.and(directoryToIgnore);
}
/**
@@ -247,6 +245,8 @@
* @param defaults The defaults to set.
*/
public void setFrom(Defaults defaults) {
+ setFilesToIgnore(Defaults.getFilesToIgnore());
+ setDirectoriesToIgnore(Defaults.getDirectoriesToIgnore());
addLicensesIfNotPresent(defaults.getLicenses(LicenseFilter.ALL));
addApprovedLicenseCategories(defaults.getLicenseIds(LicenseFilter.APPROVED));
if (isStyleReport() && getStyleSheet() == null) {
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
index 666cfc8..141de2a 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
@@ -24,9 +24,6 @@
import org.apache.rat.api.Document;
import org.apache.rat.document.IDocumentAnalyser;
import org.apache.rat.document.RatDocumentAnalysisException;
-import org.apache.rat.document.impl.guesser.ArchiveGuesser;
-import org.apache.rat.document.impl.guesser.BinaryGuesser;
-import org.apache.rat.document.impl.guesser.NoteGuesser;
import org.apache.rat.license.ILicense;
import org.apache.rat.utils.Log;
@@ -63,8 +60,8 @@
/**
* Constructs a DocumentAnalyser for the specified license.
- *
- * @param license The license to analyse
+ * @param log the Log to use
+ * @param licenses The licenses to analyse
*/
public DefaultAnalyser(final Log log, final Collection<ILicense> licenses) {
this.licenses = licenses;
@@ -73,16 +70,23 @@
@Override
public void analyse(Document document) throws RatDocumentAnalysisException {
- if (NoteGuesser.isNote(document)) {
- document.getMetaData().setDocumentType(Document.Type.NOTICE);
- } else if (ArchiveGuesser.isArchive(document)) {
- document.getMetaData().setDocumentType(Document.Type.ARCHIVE);
- } else if (BinaryGuesser.isBinary(document)) {
- document.getMetaData().setDocumentType(Document.Type.BINARY);
- } else {
- document.getMetaData().setDocumentType(Document.Type.STANDARD);
- new DocumentHeaderAnalyser(log, licenses).analyse(document);
+
+ TikaProcessor.process(log, document);
+
+ switch (document.getMetaData().getDocumentType()) {
+ case STANDARD:
+ DocumentHeaderAnalyser analyser = new DocumentHeaderAnalyser(log, licenses);
+ analyser.analyse(document);
+ case NOTICE:
+ case ARCHIVE:
+ case BINARY:
+ case UNKNOWN:
+ default:
+ break;
}
+
+
+
}
}
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
new file mode 100644
index 0000000..113411f
--- /dev/null
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+package org.apache.rat.analysis;
+
+import org.apache.rat.api.Document;
+import org.apache.rat.document.RatDocumentAnalysisException;
+import org.apache.rat.document.impl.guesser.NoteGuesser;
+import org.apache.rat.utils.Log;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A wrapping around the tika processor.
+ */
+public class TikaProcessor {
+
+ /** the Tika parser */
+ private static final Tika TIKA = new Tika();
+ /** A map of mime type string to non BINARY types.
+ * "text" types are already handled everything else
+ * BINARY unless listed here*/
+ private static Map<String, Document.Type> documentTypeMap;
+
+ static {
+ documentTypeMap = new HashMap<>();
+// org.apache.tika.parser.epub.EpubParser
+ documentTypeMap.put("application/x-ibooks+zip", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/epub+zip", Document.Type.ARCHIVE);
+
+ documentTypeMap.put("application/vnd.wap.xhtml+xml", Document.Type.STANDARD);
+ documentTypeMap.put("application/x-asp", Document.Type.STANDARD);
+ documentTypeMap.put("application/xhtml+xml", Document.Type.STANDARD);
+
+// org.apache.tika.parser.pdf.PDFParser", Type.BINARY);
+ documentTypeMap.put("application/pdf", Document.Type.STANDARD);
+//org.apache.tika.parser.pkg.CompressorParser
+ documentTypeMap.put("application/zlib", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-gzip", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-bzip2", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-compress", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-java-pack200", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-lzma", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/deflate64", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-lz4", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-snappy", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-brotli", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/gzip", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-bzip", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-xz", Document.Type.ARCHIVE);
+//org.apache.tika.parser.pkg.PackageParser
+ documentTypeMap.put("application/x-tar", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/java-archive", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-arj", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-archive", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/zip", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-cpio", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-tika-unix-dump", Document.Type.ARCHIVE);
+ documentTypeMap.put("application/x-7z-compressed", Document.Type.ARCHIVE);
+//org.apache.tika.parser.pkg.RarParser
+ documentTypeMap.put("application/x-rar-compressed", Document.Type.ARCHIVE);
+
+// org.apache.tika.parser.xliff.XLIFF12Parser
+ documentTypeMap.put("application/x-xliff+xml", Document.Type.STANDARD);
+// org.apache.tika.parser.xliff.XLZParser
+ documentTypeMap.put("application/x-xliff+zip", Document.Type.ARCHIVE);
+// org.apache.tika.parser.xml.DcXMLParser
+ documentTypeMap.put("application/xml", Document.Type.STANDARD);
+ documentTypeMap.put("image/svg+xml", Document.Type.STANDARD);
+// org.apache.tika.parser.xml.FictionBookParser
+ documentTypeMap.put("application/x-fictionbook+xml", Document.Type.STANDARD);
+ }
+
+ /**
+ * Creates a copy of the document type map.
+ * Exposed for testing.
+ * @return a copy of the document type map.
+ */
+ static Map<String, Document.Type> getDocumentTypeMap() {
+ return new HashMap<>(documentTypeMap);
+ }
+
+ /**
+ * Process the input document.
+ * @param log the log for messages.
+ * @param document the Document to process.
+ * @return the mimetype as a string.
+ * @throws RatDocumentAnalysisException on error.
+ */
+ public static String process(final Log log, final Document document) throws RatDocumentAnalysisException {
+ Metadata metadata = new Metadata();
+ try (InputStream stream = document.inputStream()) {
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, document.getName());
+ String result = TIKA.detect(stream, metadata);
+ String[] parts = result.split("/");
+ MediaType mediaType = new MediaType(parts[0], parts[1]);
+ document.getMetaData().setMediaType(mediaType);
+ document.getMetaData()
+ .setDocumentType(fromMediaType(mediaType, log));
+ if (Document.Type.STANDARD == document.getMetaData().getDocumentType()) {
+ if (NoteGuesser.isNote(document)) {
+ document.getMetaData().setDocumentType(Document.Type.NOTICE);
+ }
+ }
+
+ return result;
+ } catch (IOException /* | SAXException | TikaException */ e) {
+ throw new RatDocumentAnalysisException(e);
+ }
+ }
+
+ public static Document.Type fromMediaType(final MediaType mediaType, final Log log) {
+ if ("text".equals(mediaType.getType())) {
+ return Document.Type.STANDARD;
+ }
+
+ Document.Type result = documentTypeMap.get(mediaType.toString());
+ return result == null ? Document.Type.BINARY : result;
+ }
+}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
index c97f886..0473223 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
@@ -24,8 +24,6 @@
import org.apache.rat.analysis.IHeaderMatcher;
import org.apache.rat.config.parameters.ComponentType;
import org.apache.rat.config.parameters.ConfigComponent;
-import org.apache.rat.config.parameters.Description;
-import org.apache.rat.config.parameters.DescriptionBuilder;
/**
* An abstract class to simplify IHeaderMatcher creation. This class ensures
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
index 2f00c9a..1f2ab6f 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
@@ -18,8 +18,6 @@
*/
package org.apache.rat.analysis.matchers;
-import java.util.Arrays;
-import java.util.List;
import java.util.Objects;
import org.apache.rat.analysis.IHeaderMatcher;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
index 71f8a47..5d4467d 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
@@ -15,7 +15,7 @@
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
- */
+ */
package org.apache.rat.api;
import java.io.IOException;
@@ -33,34 +33,37 @@
*/
enum Type {
/** A generated document. */
- GENERATED,
+ GENERATED,
/** An unknown document type. */
UNKNOWN,
/** An archive type document. */
- ARCHIVE,
+ ARCHIVE,
/** A notice document (e.g. LICENSE file) */
NOTICE,
/** A binary file */
BINARY,
/** A standard document */
- STANDARD}
+ STANDARD;;
+ }
/**
* @return the name of the current document.
*/
- String getName();
-
+ String getName();
+
/**
* Reads the contents of this document.
+ *
* @return <code>Reader</code> not null
* @throws IOException if this document cannot be read
- * @throws CompositeDocumentException if this document can only be read as
- * a composite archive
+ * @throws CompositeDocumentException if this document can only be read as a
+ * composite archive
*/
Reader reader() throws IOException;
-
+
/**
* Streams the document's contents.
+ *
* @return a non null input stream of the document.
* @throws IOException when stream could not be opened
*/
@@ -68,12 +71,14 @@
/**
* Gets data describing this resource.
+ *
* @return a non null MetaData object.
*/
MetaData getMetaData();
-
+
/**
* Tests if this a composite document.
+ *
* @return true if composite, false otherwise
*/
boolean isComposite();
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
index 4acbbe3..bd92bc2 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
@@ -26,6 +26,7 @@
import org.apache.rat.license.ILicense;
import org.apache.rat.license.ILicenseFamily;
+import org.apache.tika.mime.MediaType;
/**
* Data about the document under test..
@@ -37,6 +38,7 @@
/** The list of License Family Categories that are approved */
private final Set<String> approvedLicenses;
+ private MediaType mediaType;
private Document.Type documentType;
private String sampleHeader;
@@ -49,6 +51,22 @@
}
/**
+ * Gets the defined media type.
+ * @return the media type.
+ */
+ public MediaType getMediaType() {
+ return mediaType;
+ }
+
+ /**
+ * Sets the defined media type.
+ * @param mediaType the media type.
+ */
+ public void setMediaType(MediaType mediaType) {
+ this.mediaType = mediaType;
+ }
+
+ /**
* Determines if a matching license has been detected.
* @return true if there is a matching license.
*/
diff --git a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java b/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java
deleted file mode 100644
index f213d60..0000000
--- a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- */
-package org.apache.rat.document.impl.guesser;
-
-import java.util.Locale;
-
-import org.apache.rat.api.Document;
-
-public class ArchiveGuesser {
-
- private static final String DOT = ".";
-
- private static final String[] ARCHIVE_EXTENSIONS = {
- "jar", "gz",
- "zip", "tar",
- "bz", "bz2",
- "rar", "war",
- "ear", "mar",
- "par", "xar",
- "odb", "odf",
- "odg", "odp",
- "ods", "odt",
- "har", "sar",
- "wsr",
- };
-
- /**
- * @param document the current document.
- * @return whether the given document is an archive.
- */
- public static boolean isArchive(final Document document) {
- return isArchive(document.getName());
- }
-
- /**
- * @return Is a file by that name an archive?
- * @param name file name to check against.
- */
- public static boolean isArchive(final String name) {
- if (name == null) {return false;}
- String nameToLower = name.toLowerCase(Locale.US);
- for (int i = 0; i < ArchiveGuesser.ARCHIVE_EXTENSIONS.length; i++) {
- if (nameToLower.endsWith(DOT + ArchiveGuesser.ARCHIVE_EXTENSIONS[i])) {
- return true;
- }
- }
- return false;
- }
-
-}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java b/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
deleted file mode 100644
index 1a900f6..0000000
--- a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- */
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.rat.api.Document;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
-import java.nio.charset.*;
-import java.util.Locale;
-
-/**
- * TODO: factor into MIME guesser and MIME->binary guesser
- */
-public class BinaryGuesser {
-
- private static final String DOT = ".";
-
- static final String FILE_ENCODING = "file.encoding";
- private static final Charset CHARSET_FROM_FILE_ENCODING_OR_UTF8 = getFileEncodingOrUTF8AsFallback();
-
- private static boolean isBinaryDocument(Document document) {
- boolean result;
- InputStream stream = null;
- try {
- stream = document.inputStream();
- result = isBinary(stream);
- } catch (IOException e) {
- result = false;
- } finally {
- IOUtils.closeQuietly(stream);
- }
- return result;
- }
-
- private static boolean isBinary(CharSequence taste) {
- int highBytes = 0;
- final int length = taste.length();
- for (int i = 0; i < length; i++) {
- char c = taste.charAt(i);
- if (c > BinaryGuesser.NON_ASCII_THRESHOLD
- || c <= BinaryGuesser.ASCII_CHAR_THRESHOLD) {
- highBytes++;
- }
- }
- return highBytes * BinaryGuesser.HIGH_BYTES_RATIO
- > length * BinaryGuesser.TOTAL_READ_RATIO;
- }
-
- /**
- * @param in the file to check.
- * @return Do the first few bytes of the stream hint at a binary file?
- * <p>Any IOException is swallowed internally and the test returns
- * false.</p>
- * <p>This method may lead to false negatives if the reader throws
- * an exception because it can't read characters according to the
- * reader's encoding from the underlying stream.</p>
- */
- public static boolean isBinary(Reader in) {
- char[] taste = new char[100];
- try {
- int bytesRead = in.read(taste);
- if (bytesRead > 0) {
- return isBinary(new String(taste, 0, bytesRead));
- }
- } catch (IOException e) {
- // SWALLOW
- }
- return false;
- }
-
- /**
- * @param in the file to check.
- * @return Do the first few bytes of the stream hint at a binary file?
- * <p>Any IOException is swallowed internally and the test returns
- * false.</p>
- * <p>This method will try to read bytes from the stream and
- * translate them to characters according to the platform's
- * default encoding. If any bytes can not be translated to
- * characters it will assume the original data must be binary and
- * return true.</p>
- */
- public static boolean isBinary(InputStream in) {
- try {
- byte[] taste = new byte[200];
- int bytesRead = in.read(taste);
- if (bytesRead > 0) {
- ByteBuffer bytes = ByteBuffer.wrap(taste, 0, bytesRead);
- CharBuffer chars = CharBuffer.allocate(2 * bytesRead);
- CharsetDecoder cd = CHARSET_FROM_FILE_ENCODING_OR_UTF8.newDecoder()
- .onMalformedInput(CodingErrorAction.REPORT)
- .onUnmappableCharacter(CodingErrorAction.REPORT);
- while (bytes.remaining() > 0) {
- CoderResult res = cd.decode(bytes, chars, true);
- if (res.isMalformed() || res.isUnmappable()) {
- return true;
- } else if (res.isOverflow()) {
- chars.limit(chars.position());
- chars.rewind();
- int c = chars.capacity() * 2;
- CharBuffer on = CharBuffer.allocate(c);
- on.put(chars);
- chars = on;
- }
- }
- chars.limit(chars.position());
- chars.rewind();
- return isBinary(chars);
- }
- } catch (IOException e) {
- // SWALLOW
- }
- return false;
- }
-
- static Charset getFileEncodingOrUTF8AsFallback() {
- try {
- return Charset.forName(System.getProperty(FILE_ENCODING));
- } catch (UnsupportedCharsetException e) {
- return StandardCharsets.UTF_8;
- }
- }
-
- /**
- * @param name current file name.
- * @return whether given name is binary.
- */
- public static boolean isBinaryData(final String name) {
- return extensionMatches(name, DATA_EXTENSIONS);
- }
-
- /**
- * @param name current file name.
- * @return Is a file by that name a known non-binary file?
- */
- public static boolean isNonBinary(final String name) {
- return name != null && extensionMatches(name.toUpperCase(Locale.US), BinaryGuesser.NON_BINARY_EXTENSIONS);
- }
-
- /**
- * @param name current file name.
- * @return Is a file by that name an executable/binary file?
- */
- public static boolean isExecutable(final String name) {
- return name.equals(BinaryGuesser.JAVA) || extensionMatches(name, EXE_EXTENSIONS)
- || containsExtension(name, EXE_EXTENSIONS);
- }
-
- public static boolean containsExtension(final String name,
- final String[] exts) {
- for (String ext : exts) {
- if (name.contains(DOT + ext + DOT)) {
- return true;
- }
- }
- return false;
- }
-
- public static boolean extensionMatches(final String name,
- final String[] exts) {
- for (String ext : exts) {
- if (name.endsWith(DOT + ext)) {
- return true;
- }
- }
- return false;
- }
-
- public static boolean isBytecode(final String name) {
- return BinaryGuesser.extensionMatches(name, BYTECODE_EXTENSIONS);
- }
-
- public static boolean isImage(final String name) {
- return BinaryGuesser.extensionMatches(name, IMAGE_EXTENSIONS);
- }
-
- public static boolean isKeystore(final String name) {
- return BinaryGuesser.extensionMatches(name, KEYSTORE_EXTENSIONS);
- }
-
- public static boolean isAudio(final String name) {
- return BinaryGuesser.extensionMatches( name, AUDIO_EXTENSIONS );
- }
-
- /**
- * @param name file name.
- * @return Is a file by that name a known binary file?
- */
- public static boolean isBinary(final String name) {
- if (name == null) {
- return false;
- }
- String normalisedName = GuessUtils.normalise(name);
- return BinaryGuesser.JAR_MANIFEST.equalsIgnoreCase(name) || BinaryGuesser.isImage(normalisedName)
- || BinaryGuesser.isKeystore(normalisedName) || BinaryGuesser.isBytecode(normalisedName)
- || BinaryGuesser.isBinaryData(normalisedName) || BinaryGuesser.isExecutable(normalisedName)
- || BinaryGuesser.isAudio( normalisedName );
- }
-
- private static final String[] DATA_EXTENSIONS = {
- "DAT", "DOC",
- "NCB", "IDB",
- "SUO", "XCF",
- "RAJ", "CERT",
- "KS", "ODP", "SWF",
- // fonts
- "WOFF2", "WOFF", "TTF", "EOT",
- // JSON structure does not allow comments/license injections in the way RAT expects it
- "JSON"
- };
-
- private static final String[] EXE_EXTENSIONS = {
- "EXE", "DLL",
- "LIB", "SO",
- "A", "EXP",
- };
-
- private static final String[] KEYSTORE_EXTENSIONS = {
- "JKS", "KEYSTORE", "PEM", "CRL", "TRUSTSTORE"
- };
-
- private static final String[] IMAGE_EXTENSIONS = {
- "PNG", "PDF",
- "GIF", "GIFF",
- "TIF", "TIFF",
- "JPG", "JPEG",
- "ICO", "ICNS",
- "PSD",
- };
-
- private static final String[] BYTECODE_EXTENSIONS = {
- "CLASS", "PYD",
- "OBJ", "PYC",
- };
-
- private static final String[] AUDIO_EXTENSIONS = {
- "AIF", "IFF",
- "M3U", "M4A",
- "MID", "MP3",
- "MPA", "WAV",
- "WMA"
- };
-
- /**
- * Based on <a href="https://www.apache.org/dev/svn-eol-style.txt">https://www.apache.org/dev/svn-eol-style.txt</a>
- */
- private static final String[] NON_BINARY_EXTENSIONS = {
- "AART",
- "AC",
- "AM",
- "BAT",
- "C",
- "CAT",
- "CGI",
- "CLASSPATH",
- "CMD",
- "CONFIG",
- "CPP",
- "CSS",
- "CWIKI",
- "DATA",
- "DCL",
- "DTD",
- "EGRM",
- "ENT",
- "FT",
- "FN",
- "FV",
- "GRM",
- "G",
- "GO",
- "H",
- "HTACCESS",
- "HTML",
- "IHTML",
- "IN",
- "JAVA",
- "JMX",
- "JSP",
- "JS",
- "JSON",
- "JUNIT",
- "JX",
- "M4",
- "MANIFEST",
- "MD",
- "MF",
- "META",
- "MOD",
- "N3",
- "PEN",
- "PL",
- "PM",
- "POD",
- "POM",
- "PROJECT",
- "PROPERTIES",
- "PY",
- "RB",
- "RDF",
- "RNC",
- "RNG",
- "RNX",
- "ROLES",
- "RSS",
- "SH",
- "SQL",
- "SVG",
- "TLD",
- "TXT",
- "TYPES",
- "VM",
- "VSL",
- "WSDD",
- "WSDL",
- "XARGS",
- "XCAT",
- "XCONF",
- "XEGRM",
- "XGRM",
- "XLEX",
- "XLOG",
- "XMAP",
- "XML",
- "XROLES",
- "XSAMPLES",
- "XSD",
- "XSL",
- "XSLT",
- "XSP",
- "XUL",
- "XWEB",
- "XWELCOME",
- };
- public static final String JAR_MANIFEST = "MANIFEST.MF";
- public static final String JAVA = "JAVA";
- public static final int HIGH_BYTES_RATIO = 100;
- public static final int TOTAL_READ_RATIO = 30;
- public static final int NON_ASCII_THRESHOLD = 256;
- public static final int ASCII_CHAR_THRESHOLD = 8;
-
- public static boolean isBinary(final Document document) {
- // TODO: reimplement the binary test algorithm?
- // TODO: more efficient to move into standard analysis
- // TODO: then use binary as default
- return isBinary(document.getName())
- ||
- // try a taste
- isBinaryDocument(document);
- }
-
-}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java b/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
index e7f046a..8465ec5 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
@@ -1,65 +1,65 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- */
-package org.apache.rat.header;
-
-import java.io.IOException;
-import java.io.Reader;
-
-/** Replacement for {@link java.io.LineNumberReader}. This class
- * provides a workaround for an incompatibility in the
- * {@link java.io.LineNumberReader}: If the last line in a file
- * isn't terminated with LF, or CR, or CRLF, then that line
- * is counted in Java 16, and beyond, but wasn't counted before.
- * This implementation is compatible with the latter variant,
- * thus providing upwards compatibility for RAT.
- */
-@Deprecated // since 0.17
-public class LineNumberReader {
- private final Reader parent;
- private boolean previousCharWasCR = false;
- private int lineNumber = 0;
-
- public LineNumberReader(Reader pReader) {
- parent = pReader;
- }
-
- public int read() throws IOException {
- final int c = parent.read();
- switch(c) {
- case 13:
- previousCharWasCR = true;
- ++lineNumber;
- break;
- case 10:
- if (!previousCharWasCR) {
- ++lineNumber;
- }
- previousCharWasCR = false;
- break;
- default:
- previousCharWasCR = false;
- break;
- }
- return c;
- }
-
- public int getLineNumber() {
- return lineNumber;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+package org.apache.rat.header;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/** Replacement for {@link java.io.LineNumberReader}. This class
+ * provides a workaround for an incompatibility in the
+ * {@link java.io.LineNumberReader}: If the last line in a file
+ * isn't terminated with LF, or CR, or CRLF, then that line
+ * is counted in Java 16, and beyond, but wasn't counted before.
+ * This implementation is compatible with the latter variant,
+ * thus providing upwards compatibility for RAT.
+ */
+@Deprecated // since 0.17
+public class LineNumberReader {
+ private final Reader parent;
+ private boolean previousCharWasCR = false;
+ private int lineNumber = 0;
+
+ public LineNumberReader(Reader pReader) {
+ parent = pReader;
+ }
+
+ public int read() throws IOException {
+ final int c = parent.read();
+ switch(c) {
+ case 13:
+ previousCharWasCR = true;
+ ++lineNumber;
+ break;
+ case 10:
+ if (!previousCharWasCR) {
+ ++lineNumber;
+ }
+ previousCharWasCR = false;
+ break;
+ default:
+ previousCharWasCR = false;
+ break;
+ }
+ return c;
+ }
+
+ public int getLineNumber() {
+ return lineNumber;
+ }
+}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
index 3f08d7f..12e070a 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
@@ -19,12 +19,12 @@
package org.apache.rat.report.claim;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.rat.api.Document;
-
/**
* This class provides a numerical overview about
* the report.
@@ -39,63 +39,131 @@
/** count of generated files */
GENERATED,
/** count of unknown files */
- UNKNOWN };
+ UNKNOWN }
- private final Map<String, int[]> licenseFamilyNameMap = new HashMap<>();
- private final Map<String, int[]> licenseFamilyCodeMap = new HashMap<>();
- private final Map<Document.Type, int[]> documentCategoryMap = new HashMap<>();
- private final Map<ClaimStatistic.Counter, int[]> counterMap = new HashMap<>();
+ private final ConcurrentHashMap<String, IntCounter> licenseFamilyNameMap = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap<String, IntCounter> licenseFamilyCategoryMap = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap<Document.Type, IntCounter> documentCategoryMap = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap<ClaimStatistic.Counter, IntCounter> counterMap = new ConcurrentHashMap<>();
-
+ /** converts null counter to 0.
+ *
+ * @param counter the Counter to retrieve the value from.
+ * @return 0 if counter is {@code null} or counter value otherwise.
+ */
+ private int getValue(IntCounter counter) {
+ return counter == null ? 0 : counter.value();
+ }
/**
* Returns the counts for the counter.
* @param counter the counter to get the value for.
- * @return Returns the number of files with approved licenses.
+ * @return Returns the number times the Counter type was seen.
*/
public int getCounter(Counter counter) {
- int[] count = counterMap.get(counter);
- return count == null ? 0 : count[0];
+ return getValue(counterMap.get(counter));
}
/**
- * @return Returns a map with the file types. The map keys
- * are file type names and the map values
- * are integers with the number of resources matching
- * the file type.
+ * Increments the counts for hte counter.
+ * @param counter the counter to increment.
+ * @param value the value to increment the counter by.
*/
- public Map<Counter, int[]> getCounterMap() {
- return counterMap;
- }
-
-
- /**
- * @return Returns a map with the file types. The map keys
- * are file type names and the map values
- * are integers with the number of resources matching
- * the file type.
- */
- public Map<Document.Type, int[]> getDocumentCategoryMap() {
- return documentCategoryMap;
+ public void incCounter(Counter counter, int value) {
+ counterMap.compute(counter, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
}
/**
- * @return Returns a map with the license family codes. The map
- * keys are license family category names,
- * the map values are integers with the number of resources
- * matching the license family code.
+ * Gets the counts for the Document.Type.
+ * @param documentType the Document.Type to get the counter for.
+ * @return Returns the number times the Document.Type was seen
*/
- public Map<String, int[]> getLicenseFamilyCodeMap() {
- return licenseFamilyCodeMap;
+ public int getCounter(Document.Type documentType) {
+ return getValue(documentCategoryMap.get(documentType));
}
/**
- * @return Returns a map with the license family codes. The map
- * keys are the names of the license families and
- * the map values are integers with the number of resources
- * matching the license family name.
+ * Increments the number of times the Document.Type was seen.
+ * @param documentType the Document.Type to increment.
+ * @param value the vlaue to increment the counter by.
*/
- public Map<String, int[]> getLicenseFileNameMap() {
- return licenseFamilyNameMap;
+ public void incCounter(Document.Type documentType, int value) {
+ documentCategoryMap.compute(documentType, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
}
+ /**
+ * Gets the counts for hte license category.
+ * @param licenseFamilyCategory the license family category to get the count for.
+ * @return the number of times the license family category was seen.
+ */
+ public int getLicenseCategoryCount(String licenseFamilyCategory) {
+ return getValue(licenseFamilyCategoryMap.get(licenseFamilyCategory));
+ }
+
+ /**
+ * Increments the number of times a license family category was seen.
+ * @param licenseFamilyCategory the License family category to incmrement.
+ * @param value the value to increment the count by.
+ */
+ public void incLicenseCategoryCount(String licenseFamilyCategory, int value) {
+ licenseFamilyCategoryMap.compute(licenseFamilyCategory, (k, v)-> v == null? new IntCounter().increment(value) : v.increment(value));
+ }
+
+ /**
+ * Gets the set of license family categories that were seen.
+ * @return A set of license family categories.
+ */
+ public Set<String> getLicenseFamilyCategories() {
+ return Collections.unmodifiableSet(licenseFamilyCategoryMap.keySet());
+ }
+
+ /**
+ * Gets the set of license family names that were seen.
+ * @return a Set of license family names that were seen.
+ */
+ public Set<String> getLicenseFamilyNames() {
+ return Collections.unmodifiableSet(licenseFamilyNameMap.keySet());
+ }
+
+ /**
+ * Retrieves the number of times a license family name was seen.
+ * @param licenseFilename the license family name to look for.
+ * @return the number of times the license family name was seen.
+ */
+ public int getLicenseFamilyNameCount(String licenseFilename) {
+ return getValue(licenseFamilyNameMap.get(licenseFilename));
+ }
+
+ /**
+ * Increments the license family name count.
+ * @param licenseFamilyName the license family name to increment.
+ * @param value the value to increment the count by.
+ */
+ public void incLicenseFamilyNameCount(String licenseFamilyName, int value) {
+ licenseFamilyNameMap.compute(licenseFamilyName, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
+ }
+
+ /**
+ * A class that wraps and int and allows easy increment and retrieval.
+ */
+ static class IntCounter {
+ int value = 0;
+
+ /**
+ * Increment the count.
+ * @param count the count to increment by (may be negative)
+ * @return this.
+ */
+ public IntCounter increment(int count) {
+ value += count;
+ return this;
+ }
+
+ /**
+ * Retrieves the count.
+ * @return the count contained by this counter.
+ */
+ public int value() {
+ return value;
+ }
+ }
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
index ad56a97..5004cb3 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
@@ -19,8 +19,6 @@
package org.apache.rat.report.claim.impl;
-import java.util.stream.Collectors;
-
import org.apache.rat.api.Document;
import org.apache.rat.api.MetaData;
import org.apache.rat.api.RatException;
@@ -53,28 +51,18 @@
}
/**
- * Increment the license family counter
- * The default implementation does nothing.
- * @param licenseFamilyName name of the license family
- */
- protected void handleLicenseFamilyNameClaim(String licenseFamilyName) {
- // Does Nothing
- }
-
- /**
- * Increment the license category count.
+ * Increment the counts associated with the license
* The default implementation does nothing.
* @param license the license to record the category for.
*/
- protected void handleHeaderCategoryClaim(ILicense license) {
+ protected void handleLicenseClaim(ILicense license) {
// Does nothing
}
@Override
public void report(Document subject) throws RatException {
final MetaData metaData = subject.getMetaData();
- metaData.licenses().forEach(this::handleHeaderCategoryClaim);
- metaData.licenses().map(lic -> lic.getLicenseFamily().getFamilyName()).collect(Collectors.toSet()).forEach(this::handleLicenseFamilyNameClaim);
+ metaData.licenses().forEach(this::handleLicenseClaim);
handleDocumentCategoryClaim(metaData.getDocumentType());
handleApprovedLicenseClaim(metaData);
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
index 598d4a1..8cc3fef 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
@@ -19,8 +19,6 @@
package org.apache.rat.report.claim.impl;
-import java.util.Map;
-
import org.apache.rat.api.Document;
import org.apache.rat.api.MetaData;
import org.apache.rat.api.RatException;
@@ -43,52 +41,27 @@
this.statistic = statistic;
}
- private <T> void incMapValue(Map<T, int[]> map, T key, int value) {
- final int[] num = map.get(key);
-
- if (num == null) {
- map.put(key, new int[] { value });
- } else {
- num[0] += value;
- }
- }
-
@Override
protected void handleDocumentCategoryClaim(Document.Type documentType) {
- incMapValue(statistic.getDocumentCategoryMap(), documentType, 1);
+ statistic.incCounter(documentType, 1);
}
@Override
protected void handleApprovedLicenseClaim(MetaData metadata) {
- incValueMap(statistic.getCounterMap(), ClaimStatistic.Counter.APPROVED, (int) metadata.approvedLicenses().count());
- incValueMap(statistic.getCounterMap(), ClaimStatistic.Counter.UNAPPROVED,
- (int) metadata.unapprovedLicenses().count());
- }
-
- private void incValueMap(Map<Counter, int[]> map, Counter key, int value) {
- final int[] num = map.get(key);
-
- if (num == null) {
- map.put(key, new int[] { value });
- } else {
- num[0] += value;
- }
+ statistic.incCounter(ClaimStatistic.Counter.APPROVED, (int) metadata.approvedLicenses().count());
+ statistic.incCounter(ClaimStatistic.Counter.UNAPPROVED, (int) metadata.unapprovedLicenses().count());
}
@Override
- protected void handleLicenseFamilyNameClaim(String licenseFamilyName) {
- incMapValue(statistic.getLicenseFileNameMap(), licenseFamilyName, 1);
- }
-
- @Override
- protected void handleHeaderCategoryClaim(ILicense license) {
+ protected void handleLicenseClaim(ILicense license) {
String category = license.getLicenseFamily().getFamilyCategory();
if (category.equals(ILicenseFamily.GENTERATED_CATEGORY)) {
- incValueMap(statistic.getCounterMap(), Counter.GENERATED, 1);
+ statistic.incCounter(Counter.GENERATED, 1);
} else if (category.equals(ILicenseFamily.UNKNOWN_CATEGORY)) {
- incValueMap(statistic.getCounterMap(), Counter.UNKNOWN, 1);
+ statistic.incCounter(Counter.UNKNOWN, 1);
}
- incMapValue(statistic.getLicenseFamilyCodeMap(), category, 1);
+ statistic.incLicenseCategoryCount(category, 1);
+ statistic.incLicenseFamilyNameCount(license.getFamilyName(), 1);
}
@Override
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
index e2196f8..d61f735 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
@@ -34,31 +34,29 @@
import org.apache.rat.api.Document;
import org.apache.rat.api.RatException;
import org.apache.rat.document.impl.ArchiveEntryDocument;
-import org.apache.rat.report.IReportable;
import org.apache.rat.report.RatReport;
/**
* Walks various kinds of archives files
*/
-public class ArchiveWalker extends Walker implements IReportable {
+public class ArchiveWalker extends Walker {
/**
* Constructs a walker.
* @param file not null
- * @param filter filters input files (optional),
- * or null when no filtering should be performed
- * @throws FileNotFoundException in case of I/O errors.
+ * @param filter filters input files (optional) null when no filtering should be performed
+ * @throws FileNotFoundException in case of I/O errors.
*/
- public ArchiveWalker(File file, final FilenameFilter filter) throws FileNotFoundException {
+ public ArchiveWalker(final File file, final FilenameFilter filter) throws FileNotFoundException {
super(file, filter);
}
-
+
/**
* Run a report over all files and directories in this GZIPWalker,
* ignoring any files/directories set to be ignored.
- *
+ *
* @param report the defined RatReport to run on this GZIP walker.
- *
+ *
*/
public void run(final RatReport report) throws RatException {
@@ -68,12 +66,12 @@
/* I am really sad that classes aren't first-class objects in
Java :'( */
try {
- input = new TarArchiveInputStream(new GzipCompressorInputStream(Files.newInputStream(file.toPath())));
+ input = new TarArchiveInputStream(new GzipCompressorInputStream(Files.newInputStream(getBaseFile().toPath())));
} catch (IOException e) {
try {
- input = new TarArchiveInputStream(new BZip2CompressorInputStream(Files.newInputStream(file.toPath())));
+ input = new TarArchiveInputStream(new BZip2CompressorInputStream(Files.newInputStream(getBaseFile().toPath())));
} catch (IOException e2) {
- input = new ZipArchiveInputStream(Files.newInputStream(file.toPath()));
+ input = new ZipArchiveInputStream(Files.newInputStream(getBaseFile().toPath()));
}
}
@@ -105,15 +103,13 @@
/**
* Report on the given file.
- *
+ *
* @param report the report to process the file with
* @param file the file to be reported on
* @throws RatException
*/
- private void report(final RatReport report, byte[] contents, File file) throws RatException {
-
+ private void report(final RatReport report, final byte[] contents, final File file) throws RatException {
Document document = new ArchiveEntryDocument(file, contents);
report.report(document);
-
}
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
index ab75cd3..e3cf3a7 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
@@ -22,57 +22,33 @@
import java.io.File;
import java.io.FilenameFilter;
import java.util.Arrays;
-import java.util.regex.Pattern;
+import org.apache.commons.io.filefilter.FalseFileFilter;
import org.apache.commons.io.filefilter.IOFileFilter;
-import org.apache.rat.api.Document;
import org.apache.rat.api.RatException;
import org.apache.rat.document.impl.FileDocument;
-import org.apache.rat.report.IReportable;
import org.apache.rat.report.RatReport;
/**
* Walks directories.
*/
-public class DirectoryWalker extends Walker implements IReportable {
+public class DirectoryWalker extends Walker {
private static final FileNameComparator COMPARATOR = new FileNameComparator();
- private final IOFileFilter directoryFilter;
-
- /**
- * Constructs a walker.
- *
- * @param file the directory to walk.
- * @param directoryFilter directory filter to eventually exclude some directories/files from the scan.
- */
- public DirectoryWalker(File file, IOFileFilter directoryFilter) {
- this(file, (FilenameFilter) null, directoryFilter);
- }
+ private final IOFileFilter directoriesToIgnore;
/**
* Constructs a walker.
*
* @param file the directory to walk (not null).
- * @param filter filters input files (optional),
+ * @param filesToIgnore filters input files (optional),
* or null when no filtering should be performed
- * @param directoryFilter filters directories (optional), or null when no filtering should be performed.
+ * @param directoriesToIgnore filters directories (optional), or null when no filtering should be performed.
*/
- public DirectoryWalker(File file, final FilenameFilter filter, IOFileFilter directoryFilter) {
- super(file.getPath(), file, filter);
- this.directoryFilter = directoryFilter;
- }
-
- /**
- * Constructs a walker.
- *
- * @param file the directory to walk (not null).
- * @param ignoreNameRegex ignore directories/files with name matching the regex.
- * @param directoryFilter filters directories (optional), or null when no filtering should be performed.
- */
- public DirectoryWalker(File file, final Pattern ignoreNameRegex, IOFileFilter directoryFilter) {
- super(file.getPath(), file, regexFilter(ignoreNameRegex));
- this.directoryFilter = directoryFilter;
+ public DirectoryWalker(final File file, final FilenameFilter filesToIgnore, final IOFileFilter directoriesToIgnore) {
+ super(file, filesToIgnore);
+ this.directoriesToIgnore = directoriesToIgnore == null ? FalseFileFilter.FALSE : directoriesToIgnore;
}
/**
@@ -80,14 +56,10 @@
*
* @param report The report to process the directory with
* @param file the directory to process
- * @throws RatException
+ * @throws RatException on error.
*/
- private void processDirectory(RatReport report, final File file) throws RatException {
- if (directoryFilter != null) {
- if (!directoryFilter.accept(file)) {
- process(report, file);
- }
- } else {
+ private void processDirectory(final RatReport report, final File file) throws RatException {
+ if (!directoriesToIgnore.accept(file)) {
process(report, file);
}
}
@@ -97,9 +69,10 @@
* ignoring any files/directories set to be ignored.
*
* @param report the defined RatReport to run on this Directory walker.
+ * @throws RatException on error
*/
public void run(final RatReport report) throws RatException {
- process(report, file);
+ process(report, getBaseFile());
}
/**
@@ -107,9 +80,9 @@
*
* @param report the report to use in processing
* @param file the run the report against
- * @throws RatException
+ * @throws RatException on error
*/
- private void process(final RatReport report, final File file) throws RatException {
+ protected void process(final RatReport report, final File file) throws RatException {
final File[] files = file.listFiles();
if (files != null) {
Arrays.sort(files, COMPARATOR);
@@ -119,16 +92,20 @@
}
}
+ private boolean isNotIgnoredDirectory(final File file) {
+ return !directoriesToIgnore.accept(file.getParentFile(), file.getName());
+ }
+
/**
* Process all directories in a set of file objects, ignoring any directories set to be ignored.
*
* @param report the report to use in processing
* @param files the files to process (only directories will be processed)
- * @throws RatException
+ * @throws RatException on error
*/
private void processDirectories(final RatReport report, final File[] files) throws RatException {
for (final File file : files) {
- if (isNotIgnored(file) && file.isDirectory()) {
+ if (file.isDirectory() && isNotIgnoredDirectory(file)) {
processDirectory(report, file);
}
}
@@ -139,28 +116,13 @@
*
* @param report the report to use in processing
* @param files the files to process (only files will be processed)
- * @throws RatException
+ * @throws RatException on error
*/
private void processNonDirectories(final RatReport report, final File[] files) throws RatException {
for (final File file : files) {
- if (isNotIgnored(file) && !file.isDirectory()) {
- report(report, file);
+ if (!file.isDirectory() && isNotIgnored(file)) {
+ report.report(new FileDocument(file));
}
}
-
- }
-
- /**
- * Report on the given file.
- *
- * @param report the report to process the file with
- * @param file the file to be reported on
- * @throws RatException
- */
- private void report(final RatReport report, File file) throws RatException {
-
- Document document = new FileDocument(file);
- report.report(document);
-
}
}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
index daecbd1..abb8cae 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
@@ -19,52 +19,42 @@
package org.apache.rat.walker;
+import org.apache.commons.io.filefilter.FalseFileFilter;
import org.apache.rat.report.IReportable;
import java.io.File;
import java.io.FilenameFilter;
-import java.util.regex.Pattern;
/**
* Abstract walker.
*/
public abstract class Walker implements IReportable {
- protected final File file;
- protected final String name;
+ /** The file that this walker started at */
+ private final File baseFile;
- protected final FilenameFilter filter;
+ /** The file name filter that the walker is applying */
+ private final FilenameFilter filesToIgnore;
- protected static FilenameFilter regexFilter(final Pattern pattern) {
- return (dir, name) -> {
- final boolean result;
- if (pattern == null) {
- result = true;
- } else {
- result = !pattern.matcher(name).matches();
- }
- return result;
- };
+ public Walker(final File file, final FilenameFilter filesToIgnore) {
+ this.baseFile = file;
+ this.filesToIgnore = filesToIgnore == null ? FalseFileFilter.FALSE : filesToIgnore;
}
-
+
+ /**
+ * Retrieve the file from the constructor.
+ * @return the file from the constructor.
+ */
+ protected File getBaseFile() {
+ return baseFile;
+ }
+
+ /**
+ * Test if the specified file should be ignored.
+ * @param file the file to test.
+ * @return {@code true} if the file should be ignored.
+ */
protected final boolean isNotIgnored(final File file) {
- boolean result = false;
- if (filter != null) {
- final String name = file.getName();
- final File dir = file.getParentFile();
- result = !filter.accept(dir, name);
- }
- return !result;
+ return !filesToIgnore.accept(file.getParentFile(), file.getName());
}
-
- public Walker(File file, final FilenameFilter filter) {
- this(file.getPath(), file, filter);
- }
-
- protected Walker(final String name, final File file, final FilenameFilter filter) {
- this.name = name;
- this.file = file;
- this.filter = filter;
- }
-
}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
index 9325602..48e962a 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
@@ -57,6 +57,7 @@
import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
import org.apache.rat.report.IReportable;
import org.apache.rat.testhelpers.TestingLicense;
+import org.apache.rat.utils.DefaultLog;
import org.apache.rat.utils.Log;
import org.apache.rat.utils.Log.Level;
import org.apache.rat.utils.ReportingSet.Options;
@@ -186,24 +187,33 @@
}
@Test
- public void inputFileFilterTest() {
+ public void filesToIgnoreTest() {
+
+ assertThat(underTest.getFilesToIgnore()).isNull();
+
+ underTest.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+ assertThat(underTest.getFilesToIgnore()).isNotNull();
+ assertThat(underTest.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
+
FilenameFilter filter = mock(FilenameFilter.class);
- assertThat(underTest.getInputFileFilter()).isNull();
- underTest.setInputFileFilter(filter);
- assertThat(underTest.getInputFileFilter()).isEqualTo(filter);
+ underTest.setFilesToIgnore(filter);
+ assertThat(underTest.getFilesToIgnore()).isEqualTo(filter);
}
@Test
- public void directoryFilterTest() {
- assertThat(underTest.getDirectoryFilter()).isNotNull();
- assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+ public void directoriesToIgnoreTest() {
+ assertThat(underTest.getDirectoriesToIgnore()).isNull();
- underTest.setDirectoryFilter(DirectoryFileFilter.DIRECTORY);
- underTest.addDirectoryFilter(NameBasedHiddenFileFilter.HIDDEN);
- assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(AndFileFilter.class);
+ underTest.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+ assertThat(underTest.getDirectoriesToIgnore()).isNotNull();
+ assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
- underTest.setDirectoryFilter(null);
- assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(FalseFileFilter.class);
+ underTest.setDirectoriesToIgnore(DirectoryFileFilter.DIRECTORY);
+ underTest.addDirectoryToIgnore(NameBasedHiddenFileFilter.HIDDEN);
+ assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(AndFileFilter.class);
+
+ underTest.setDirectoriesToIgnore(null);
+ assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
}
@Test
@@ -549,11 +559,11 @@
assertThat(config.isAddingLicenses()).isFalse();
assertThat(config.isAddingLicensesForced()).isFalse();
assertThat(config.getCopyrightMessage()).isNull();
- assertThat(config.getInputFileFilter()).isNull();
+ assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
assertThat(config.isStyleReport()).isTrue();
assertThat(config.getStyleSheet()).isNotNull().withFailMessage("Stylesheet should not be null");
- assertThat(config.getDirectoryFilter()).isNotNull().withFailMessage("Directory filter should not be null");
- assertThat(config.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+ assertThat(config.getDirectoriesToIgnore()).isNotNull().withFailMessage("Directory filter should not be null");
+ assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
validateDefaultApprovedLicenses(config);
validateDefaultLicenseFamilies(config);
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
index db9e8bd..185848c 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
@@ -91,13 +91,13 @@
}
assertTrue(output.exists());
String content = FileUtils.readFileToString(output, StandardCharsets.UTF_8);
- TextUtils.isMatching("Notes: 2$", content);
- TextUtils.isMatching("Binaries: 2$", content);
- TextUtils.isMatching("Archives: 1$", content);
- TextUtils.isMatching("Standards: 8$", content);
- TextUtils.isMatching("Apache Licensed: 5$", content);
- TextUtils.isMatching("Generated Documents 1$", content);
- TextUtils.isMatching("^2 Unknown licenses", content);
+ TextUtils.assertPatternInOutput("Notes: 2$", content);
+ TextUtils.assertPatternInOutput("Binaries: 2$", content);
+ TextUtils.assertPatternInOutput("Archives: 1$", content);
+ TextUtils.assertPatternInOutput("Standards: 8$", content);
+ TextUtils.assertPatternInOutput("Apache Licensed: 5$", content);
+ TextUtils.assertPatternInOutput("Generated Documents: 1$", content);
+ TextUtils.assertPatternInOutput("^2 Unknown Licenses", content);
assertTrue(content.contains(" S target/test-classes/elements/ILoggerFactory.java"));
assertTrue(content.contains(" B target/test-classes/elements/Image.png"));
assertTrue(content.contains(" N target/test-classes/elements/LICENSE"));
@@ -108,7 +108,6 @@
assertTrue(content.contains(" S target/test-classes/elements/Xml.xml"));
assertTrue(content.contains(" S target/test-classes/elements/buildr.rb"));
assertTrue(content.contains(" A target/test-classes/elements/dummy.jar"));
- assertTrue(content.contains(" B target/test-classes/elements/plain.json"));
assertTrue(content.contains("!S target/test-classes/elements/sub/Empty.txt"));
assertTrue(content.contains(" S target/test-classes/elements/tri.txt"));
assertTrue(content.contains(" G target/test-classes/elements/generated.txt"));
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
index eb02d09..7ca9b7d 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
@@ -53,12 +53,12 @@
* @param doc The document to check/
* @param xpath the XPath instance to use.
* @param resource the xpath statement to locate the node.
- * @param id the expected family for the node (may be null)
- * @param approval the expected approval value (may be null)
+ * @param licenseInfo the license info for the node. (may = null)
* @param type the type of resource located.
+ * @param hasSample true if a sample from the document should be present.
* @throws Exception on XPath error.
*/
- public static void checkNode(Document doc, XPath xpath, String resource, LicenseInfo licenseInfo, String type,
+ private static void checkNode(Document doc, XPath xpath, String resource, LicenseInfo licenseInfo, String type,
boolean hasSample) throws Exception {
XmlUtils.getNode(doc, xpath, String.format("/rat-report/resource[@name='%s'][@type='%s']", resource, type));
if (licenseInfo != null) {
@@ -67,7 +67,7 @@
resource, type, licenseInfo.id, licenseInfo.family));
XmlUtils.getNode(doc, xpath,
String.format("/rat-report/resource[@name='%s'][@type='%s']/license[@id='%s'][@approval='%s']",
- resource, type, licenseInfo.id, Boolean.toString(licenseInfo.approval)));
+ resource, type, licenseInfo.id, licenseInfo.approval));
if (licenseInfo.hasNotes) {
XmlUtils.getNode(doc, xpath,
String.format("/rat-report/resource[@name='%s'][@type='%s']/license[@id='%s']/notes", resource,
@@ -89,7 +89,7 @@
final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
configuration.setStyleReport(false);
configuration.setFrom(defaults);
- configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+ configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
configuration.setOut(() -> out);
new Reporter(configuration).output();
Document doc = XmlUtils.toDom(new ByteArrayInputStream(out.toByteArray()));
@@ -111,7 +111,6 @@
checkNode(doc, xPath, "src/test/resources/elements/Xml.xml", apacheLic, "STANDARD", false);
checkNode(doc, xPath, "src/test/resources/elements/buildr.rb", apacheLic, "STANDARD", false);
checkNode(doc, xPath, "src/test/resources/elements/dummy.jar", null, "ARCHIVE", false);
- checkNode(doc, xPath, "src/test/resources/elements/plain.json", null, "BINARY", false);
checkNode(doc, xPath, "src/test/resources/elements/sub/Empty.txt", new LicenseInfo("?????", false, false),
"STANDARD", false);
checkNode(doc, xPath, "src/test/resources/elements/tri.txt", apacheLic, "STANDARD", false);
@@ -125,7 +124,7 @@
assertEquals(14, nodeList.getLength());
}
- private static final String NL = System.getProperty("line.separator");
+ private static final String NL = System.lineSeparator();
private static final String PARAGRAPH = "*****************************************************";
private static final String HEADER = NL + PARAGRAPH + NL + //
"Summary" + NL + //
@@ -133,7 +132,7 @@
"Generated at: ";
private String documentOut(boolean approved, Type type, String name) {
- return String.format("^\\Q%s%s %s\\E$", approved ? " " : "!", type.name().substring(0, 1), name);
+ return String.format("^\\Q%s%s %s\\E$", approved ? " " : "!", type.name().charAt(0), name);
}
private String licenseOut(String family, String name) {
@@ -152,7 +151,7 @@
final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
configuration.setFrom(defaults);
- configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+ configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
configuration.setOut(() -> out);
new Reporter(configuration).output();
@@ -169,7 +168,8 @@
TextUtils.assertPatternInOutput("^Generated Documents: 1$", document);
TextUtils.assertPatternInOutput("^2 Unknown Licenses$", document);
TextUtils.assertPatternInOutput(
- "^Files with unapproved licenses:\\s+" + "\\Qsrc/test/resources/elements/Source.java\\E\\s+"
+ "^Files with unapproved licenses:\\s+" //
+ + "\\Qsrc/test/resources/elements/Source.java\\E\\s+" //
+ "\\Qsrc/test/resources/elements/sub/Empty.txt\\E\\s",
document);
TextUtils.assertPatternInOutput(documentOut(true, Type.ARCHIVE, "src/test/resources/elements/dummy.jar"),
@@ -193,8 +193,6 @@
+ licenseOut("AL", "Apache License Version 2.0"), document);
TextUtils.assertPatternInOutput(documentOut(true, Type.STANDARD, "src/test/resources/elements/TextHttps.txt")
+ licenseOut("AL", "Apache License Version 2.0"), document);
- TextUtils.assertPatternInOutput(documentOut(true, Type.BINARY, "src/test/resources/elements/plain.json"),
- document);
TextUtils.assertPatternInOutput(documentOut(true, Type.STANDARD, "src/test/resources/elements/tri.txt")
+ licenseOut("AL", "Apache License Version 2.0") + licenseOut("BSD-3", "BSD 3 clause")
+ licenseOut("BSD-3", "TMF", "The Telemanagement Forum License"), document);
@@ -210,7 +208,7 @@
final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
configuration.setFrom(defaults);
- configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+ configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
configuration.setOut(() -> out);
configuration.setStyleSheet(this.getClass().getResource("/org/apache/rat/unapproved-licenses.xsl"));
new Reporter(configuration).output();
@@ -224,7 +222,7 @@
TextUtils.assertPatternInOutput("\\Qsrc/test/resources/elements/sub/Empty.txt\\E", document);
}
- private class LicenseInfo {
+ private static class LicenseInfo {
String id;
String family;
boolean approval;
diff --git a/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java b/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
index ca8bd0e..9484b6f 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
@@ -18,7 +18,6 @@
*/
package org.apache.rat.analysis;
-import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -31,6 +30,7 @@
import org.apache.rat.report.claim.impl.xml.SimpleXmlClaimReporter;
import org.apache.rat.report.xml.writer.impl.base.XmlWriter;
import org.apache.rat.test.utils.Resources;
+import org.apache.rat.testhelpers.TextUtils;
import org.apache.rat.utils.DefaultLog;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -72,13 +72,13 @@
" * specific language governing permissions and limitations", //
" * under the License.", //
" ]]></sample></resource>" };
-
+
final MonolithicFileDocument document = new MonolithicFileDocument(
Resources.getResourceFile("/elements/Text.txt"));
analyser.analyse(document);
reporter.report(document);
String result = out.toString();
- for (String exp : expected ) {
+ for (String exp : expected) {
assertTrue(result.contains(exp), () -> exp);
}
}
@@ -122,4 +122,57 @@
assertEquals("<resource name='src/test/resources/elements/dummy.jar' type='ARCHIVE'/>", out.toString(),
"Open archive element");
}
+
+ @Test
+ public void RAT211_bmp_Test() throws Exception {
+ MonolithicFileDocument document = new MonolithicFileDocument(
+ Resources.getResourceFile("/jira/RAT211/side_left.bmp"));
+ analyser.analyse(document);
+ reporter.report(document);
+ assertEquals("<resource name='src/test/resources/jira/RAT211/side_left.bmp' type='BINARY'/>", out.toString(),
+ "Open archive element");
+ }
+
+ @Test
+ public void RAT211_dia_Test() throws Exception {
+ MonolithicFileDocument document = new MonolithicFileDocument(
+ Resources.getResourceFile("/jira/RAT211/leader-election-message-arrives.dia"));
+ analyser.analyse(document);
+ reporter.report(document);
+ assertEquals(
+ "<resource name='src/test/resources/jira/RAT211/leader-election-message-arrives.dia' type='ARCHIVE'/>",
+ out.toString(), "Open archive element");
+ }
+
+ @Test
+ public void RAT147_unix_Test() throws Exception {
+ MonolithicFileDocument document = new MonolithicFileDocument(
+ Resources.getResourceFile("/jira/RAT147/unix-newlines.txt.bin"));
+ analyser.analyse(document);
+ reporter.report(document);
+ String result = out.toString();
+ TextUtils.assertPatternInOutput(
+ "<resource name='src/test/resources/jira/RAT147/unix-newlines.txt.bin' type='STANDARD'",
+ result);
+ TextUtils.assertPatternInOutput("sentence 1.$", result);
+ TextUtils.assertPatternInOutput("^sentence 2.$", result);
+ TextUtils.assertPatternInOutput("^sentence 3.$", result);
+ TextUtils.assertPatternInOutput("^sentence 4.$", result);
+ }
+
+ @Test
+ public void RAT147_windows_Test() throws Exception {
+ MonolithicFileDocument document = new MonolithicFileDocument(
+ Resources.getResourceFile("/jira/RAT147/windows-newlines.txt.bin"));
+ analyser.analyse(document);
+ reporter.report(document);
+ String result = out.toString();
+ TextUtils.assertPatternInOutput(
+ "<resource name='src/test/resources/jira/RAT147/windows-newlines.txt.bin' type='STANDARD'",
+ result);
+ TextUtils.assertPatternInOutput("sentence 1.$", result);
+ TextUtils.assertPatternInOutput("^sentence 2.$", result);
+ TextUtils.assertPatternInOutput("^sentence 3.$", result);
+ TextUtils.assertPatternInOutput("^sentence 4.$", result);
+ }
}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java b/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java
new file mode 100644
index 0000000..aef464d
--- /dev/null
+++ b/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+package org.apache.rat.analysis;
+
+import org.apache.rat.api.Document;
+import org.apache.rat.api.MetaData;
+import org.apache.rat.document.RatDocumentAnalysisException;
+import org.apache.rat.document.impl.FileDocument;
+import org.apache.rat.report.claim.ClaimStatistic;
+import org.apache.rat.test.utils.Resources;
+import org.apache.rat.utils.DefaultLog;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.MalformedInputException;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class TikaProcessorTest {
+ /**
+ * Used to swallow a MalformedInputException and return false
+ * because the encoding of the stream was different from the
+ * platform's default encoding.
+ *
+ * @throws Exception
+ * @see "RAT-81"
+ */
+ @Test
+ public void RAT81() throws Exception {
+ // create a document that throws a MalformedInputException
+ Document doc = getDocument(new InputStream() {
+ @Override
+ public int read() throws IOException {
+ throw new MalformedInputException(0);
+ }
+ });
+ assertThrows(RatDocumentAnalysisException.class, () -> TikaProcessor.process(DefaultLog.INSTANCE, doc));
+ }
+
+ @Test
+ public void UTF16_input() throws Exception {
+ Document doc = getDocument(Resources.getResourceStream("/binaries/UTF16_with_signature.xml"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+ }
+
+ @Test
+ public void UTF8_input() throws Exception {
+ FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/UTF8_with_signature.xml"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+ }
+
+ @Test
+ public void missNamedBinaryTest() throws Exception {
+ FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/Image-png.not"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.BINARY, doc.getMetaData().getDocumentType());
+ }
+
+
+ @Test
+ public void plainTextTest() throws Exception {
+ FileDocument doc = new FileDocument(Resources.getResourceFile("/elements/Text.txt"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+ }
+
+ @Test
+ public void emptyFileTest() throws Exception {
+ FileDocument doc = new FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+ }
+
+ @Test
+ public void javaFileWithChineseCharacters_RAT301() throws Exception {
+ FileDocument doc = new FileDocument(Resources.getResourceFile("/tikaFiles/standard/ChineseCommentsJava.java"));
+ TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+ }
+
+ @Test
+ public void testTikaFiles() throws RatDocumentAnalysisException, IOException {
+ File dir = new File("src/test/resources/tikaFiles");
+ Map<String, Document.Type> unseenMime = TikaProcessor.getDocumentTypeMap();
+ ClaimStatistic statistic = new ClaimStatistic();
+ for (Document.Type docType : Document.Type.values()) {
+ File typeDir = new File(dir, docType.name().toLowerCase(Locale.ROOT));
+ if (typeDir.isDirectory()) {
+ for (File file : Objects.requireNonNull(typeDir.listFiles())) {
+ Document doc = new FileDocument(file);
+ String mimeType = TikaProcessor.process(DefaultLog.INSTANCE, doc);
+ statistic.incCounter(doc.getMetaData().getDocumentType(), 1);
+ assertEquals( docType, doc.getMetaData().getDocumentType(), () -> "Wrong type for "+file.toString());
+ unseenMime.remove(mimeType);
+ }
+ }
+ }
+ System.out.println( "untested mime types");
+ unseenMime.keySet().forEach(System.out::println);
+ for (Document.Type type : Document.Type.values()) {
+ System.out.format("Tested %s %s files%n", statistic.getCounter(type), type );
+ }
+ }
+
+
+ /**
+ * Build a document with the specific input stream
+ * @return
+ */
+ private static Document getDocument(final InputStream stream) {
+ MetaData metaData = new MetaData();
+
+ Document doc = new Document() {
+ @Override
+ public String getName() {
+ return "Testing Document";
+ }
+
+ @Override
+ public Reader reader() throws IOException {
+ return new InputStreamReader(inputStream());
+ }
+
+ @Override
+ public InputStream inputStream() throws IOException {
+ return stream;
+ }
+
+ @Override
+ public MetaData getMetaData() {
+ return metaData;
+ }
+
+ @Override
+ public boolean isComposite() {
+ return false;
+ }
+ };
+ return doc;
+ }
+}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java b/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java
deleted file mode 100644
index 919a17a..0000000
--- a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- */
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.rat.document.MockDocument;
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class ArchiveGuesserTest {
-
- @Test
- public void matches() {
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.jar")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.tar.gz")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.zip")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.tar")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.bz")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.bz2")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odb")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odf")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odg")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odp")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.ods")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odt")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.sar")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.har")));
- assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.wsr")));
- }
-
- @Test
- public void isArchive() {
- assertTrue(ArchiveGuesser.isArchive("42.jar"));
- assertTrue(ArchiveGuesser.isArchive("42.tar.gz"));
- assertTrue(ArchiveGuesser.isArchive("42.zip"));
- assertTrue(ArchiveGuesser.isArchive("42.tar"));
- assertTrue(ArchiveGuesser.isArchive("42.bz"));
- assertTrue(ArchiveGuesser.isArchive("42.bz2"));
- assertTrue(ArchiveGuesser.isArchive("42.odb"));
- assertTrue(ArchiveGuesser.isArchive("42.odf"));
- assertTrue(ArchiveGuesser.isArchive("42.odg"));
- assertTrue(ArchiveGuesser.isArchive("42.odp"));
- assertTrue(ArchiveGuesser.isArchive("42.ods"));
- assertTrue(ArchiveGuesser.isArchive("42.odt"));
- assertTrue(ArchiveGuesser.isArchive("42.sar"));
- assertTrue(ArchiveGuesser.isArchive("42.har"));
- assertTrue(ArchiveGuesser.isArchive("42.wsr"));
- }
-
-}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java b/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
deleted file mode 100644
index 43843f6..0000000
--- a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one *
- * or more contributor license agreements. See the NOTICE file *
- * distributed with this work for additional information *
- * regarding copyright ownership. The ASF licenses this file *
- * to you under the Apache License, Version 2.0 (the *
- * "License"); you may not use this file except in compliance *
- * with the License. You may obtain a copy of the License at *
- * *
- * http://www.apache.org/licenses/LICENSE-2.0 *
- * *
- * Unless required by applicable law or agreed to in writing, *
- * software distributed under the License is distributed on an *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
- * KIND, either express or implied. See the License for the *
- * specific language governing permissions and limitations *
- * under the License. *
- */
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.rat.document.MockDocument;
-import org.apache.rat.document.impl.FileDocument;
-import org.apache.rat.test.utils.Resources;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Arrays;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class BinaryGuesserTest {
-
- private static final List<String> BINARY_FILES = Arrays.asList(//
- "image.png",//
- "image.pdf",//
- "image.psd",//
- "image.gif",//
- "image.giff",//
- "image.jpg",//
- "image.jpeg",//
- "image.exe",//
- "Whatever.class",//
- "data.dat",//
- "libicuda.so.34",//
- "my.truststore",//
- //"foo.Java", //
- //"manifest.Mf",//
- "deprecatedtechnology.swf",
- "xyz.aif",
- "abc.iff",
- // Audio Files
- "test.m3u", "test.m4a",
- "test-audio.mid", "test-audio.mp3",
- "test-audio.mpa", "test-audio.wav",
- "test-audio.wma"
- );
-
- @Test
- public void testMatches() {
- for (String name : BINARY_FILES) {
- assertTrue(BinaryGuesser.isBinary(new MockDocument(name)), ()->"'" + name + "' should be detected as a binary");
- }
-
- }
-
- @Test
- public void testIsBinary() {
- for (String name : BINARY_FILES) {
- assertTrue(BinaryGuesser.isBinary(name), ()->"'" + name + "' should be detected as a binary");
- }
- }
-
- /**
- * Used to swallow a MalformedInputException and return false
- * because the encoding of the stream was different from the
- * platform's default encoding.
- *
- * @throws Exception
- * @see "RAT-81"
- */
- @Test
- public void binaryWithMalformedInputRAT81() throws Exception {
- FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/UTF16_with_signature.xml"));
- Reader r = doc.reader(); // this will fail test if file is not readable
- try {
- char[] dummy = new char[100];
- r.read(dummy);
- // if we get here, the UTF-16 encoded file didn't throw
- // any exception, try the UTF-8 encoded one
- r.close();
- r = null; // ensure we detect failure to read second file
- doc = new FileDocument(Resources.getResourceFile("/binaries/UTF8_with_signature.xml"));
- r = doc.reader();
- r.read(dummy);
- // still here? can't test on this platform
- System.err.println("Skipping testBinaryWithMalformedInput");
- } catch (IOException e) {
- if (r != null) {
- IOUtils.closeQuietly(r);
- } else {
- throw e; // could not open the second file
- }
- r = null;
- assertTrue(BinaryGuesser.isBinary(doc), "Expected binary for " + doc.getName());
- } finally {
- IOUtils.closeQuietly(r);
- }
- }
-
- @Test
- public void realBinaryContent() throws IOException {
- // This test is not accurate on all platforms
- final String encoding = System.getProperty("file.encoding");
- final boolean isBinary = BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/binaries/Image-png.not")));
- if (encoding.startsWith("ANSI")) {
- assertTrue(isBinary);
- } else {
- if (isBinary) {
- System.out.println("BinaryGuesserTest.realBinaryContent() succeeded when using encoding " + encoding);
- } else {
- System.err.println("BinaryGuesserTest.realBinaryContent() failed when using encoding " + encoding);
- }
- }
- }
-
- @Test
- public void textualContent() throws IOException {
- assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/Text.txt"))));
- }
-
- @Test
- public void emptyFile() throws IOException {
- assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"))));
- }
-
- @Test
- public void testFileEncodingCanBeSetAndHasFallbackInCaseOfErrors() {
- System.setProperty(BinaryGuesser.FILE_ENCODING, "shouldThrowAnExceptionBecauseNotFound");
- assertEquals("UTF-8", BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
-
- final String usAscii = "US-ASCII";
- System.setProperty(BinaryGuesser.FILE_ENCODING, usAscii);
- assertEquals(usAscii, BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
- }
-}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java b/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
index af7a81b..9759c4f 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
@@ -18,24 +18,19 @@
*/
package org.apache.rat.report;
-import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.util.List;
-import java.util.Optional;
import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.rat.Defaults;
import org.apache.rat.ReportConfiguration;
-import org.apache.rat.config.parameters.Description;
import org.apache.rat.configuration.MatcherBuilderTracker;
-import org.apache.rat.license.ILicense;
import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
import org.apache.rat.report.xml.writer.IXmlWriter;
import org.apache.rat.report.xml.writer.impl.base.XmlWriter;
diff --git a/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java b/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
index c9d67a2..e386f93 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
@@ -31,6 +31,7 @@
import org.apache.commons.io.filefilter.HiddenFileFilter;
import org.apache.rat.ConfigurationException;
+import org.apache.rat.Defaults;
import org.apache.rat.ReportConfiguration;
import org.apache.rat.api.Document;
import org.apache.rat.license.ILicense;
@@ -50,8 +51,7 @@
public class XmlReportFactoryTest {
- private static final Pattern IGNORE_EMPTY = Pattern.compile(".svn|Empty.txt");
- private ILicenseFamily family = ILicenseFamily.builder().setLicenseFamilyCategory("TEST")
+ private final ILicenseFamily family = ILicenseFamily.builder().setLicenseFamilyCategory("TEST")
.setLicenseFamilyName("Testing family").build();
private StringWriter out;
@@ -71,12 +71,13 @@
@Test
public void standardReport() throws Exception {
final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
-
- final TestingLicense testingLicense = new TestingLicense(new TestingMatcher(true), family);
-
- DirectoryWalker directory = new DirectoryWalker(new File(elementsPath), IGNORE_EMPTY, HiddenFileFilter.HIDDEN);
- final ClaimStatistic statistic = new ClaimStatistic();
final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
+ final TestingLicense testingLicense = new TestingLicense(new TestingMatcher(true), family);
+ configuration.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+
+ DirectoryWalker directory = new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN);
+ final ClaimStatistic statistic = new ClaimStatistic();
+
configuration.addLicense(testingLicense);
RatReport report = XmlReportFactory.createStandardReport(writer, statistic, configuration);
report.startReport();
@@ -88,10 +89,10 @@
"Preamble and document element are OK");
assertTrue(XmlUtils.isWellFormedXml(output), "Is well formed");
- assertEquals(2, statistic.getDocumentCategoryMap().get(Document.Type.BINARY)[0], "Binary files");
- assertEquals(2, statistic.getDocumentCategoryMap().get(Document.Type.NOTICE)[0], "Notice files");
- assertEquals(8, statistic.getDocumentCategoryMap().get(Document.Type.STANDARD)[0], "Standard files");
- assertEquals(1, statistic.getDocumentCategoryMap().get(Document.Type.ARCHIVE)[0], "Archives");
+ assertEquals(2, statistic.getCounter(Document.Type.BINARY), "Binary files");
+ assertEquals(2, statistic.getCounter(Document.Type.NOTICE), "Notice files");
+ assertEquals(8, statistic.getCounter(Document.Type.STANDARD), "Standard files");
+ assertEquals(1, statistic.getCounter(Document.Type.ARCHIVE), "Archives");
}
@Test
diff --git a/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java b/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
index daef24b..0fe276a 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
@@ -19,6 +19,7 @@
package org.apache.rat.walker;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.FileWriter;
@@ -26,48 +27,100 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.commons.io.filefilter.FalseFileFilter;
import org.apache.rat.api.Document;
import org.apache.rat.api.RatException;
+import org.apache.rat.document.impl.DocumentImplUtils;
import org.apache.rat.report.RatReport;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
public class DirectoryWalkerTest {
-
- @TempDir
- private File toWalk;
-
- @Test
- public void walk() throws IOException, RatException {
- File regular = new File(toWalk, "regular");
- regular.mkdir();
- File regularFile = new File(regular, "test");
- try (FileWriter writer = new FileWriter(regularFile)) {
- writer.write("test");
+
+ private static File toWalk;
+
+ private static void fileWriter(File dir, String name, String contents) throws IOException {
+ try (FileWriter writer = new FileWriter(new File(dir, name))) {
+ writer.write(contents);
writer.flush();
}
+ }
+ @BeforeAll
+ public static void setUp(@TempDir File dir) throws Exception {
+ toWalk = dir;
+ /*
+ Create a directory structure like this:
+
+ regular
+ regularFile
+ .hiddenFile
+ .hidden
+ regularFile
+ .hiddenFile
+ */
+ File regular = new File(toWalk, "regular");
+ regular.mkdir();
+ fileWriter(regular, "regularFile", "regular file");
+ fileWriter(regular, ".hiddenFile", "hidden file");
File hidden = new File(toWalk, ".hidden");
hidden.mkdir();
- File hiddenFile = new File(hidden, "test");
+ fileWriter(hidden, "regularFile", "regular file");
+ fileWriter(hidden, ".hiddenFile", "hidden file");
+ }
- try (FileWriter writer = new FileWriter(hiddenFile)) {
- writer.write("test");
- writer.flush();
- }
+ private String expectedName(String name) {
+ return DocumentImplUtils.toName(toWalk)+name;
+ }
- DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN);
+
+
+ @Test
+ public void noFiltersTest() throws IOException, RatException {
+ DirectoryWalker walker = new DirectoryWalker(toWalk, null,null);
List<String> scanned = new ArrayList<>();
walker.run(new TestRatReport(scanned));
+ String[] expected = {"/regular/regularFile", "/regular/.hiddenFile", "/.hidden/regularFile", "/.hidden/.hiddenFile"};
+ assertEquals(4, scanned.size());
+ for (String ex : expected) {
+ assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+ }
+ }
- assertEquals(1, scanned.size());
-
- walker = new DirectoryWalker(toWalk, FalseFileFilter.FALSE);
- scanned = new ArrayList<>();
+ @Test
+ public void noHiddenFileFiltersTest() throws IOException, RatException {
+ DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN,null);
+ List<String> scanned = new ArrayList<>();
walker.run(new TestRatReport(scanned));
-
+ String[] expected = {"/regular/regularFile", "/.hidden/regularFile"};
assertEquals(2, scanned.size());
+ for (String ex : expected) {
+ assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+ }
+ }
+
+ @Test
+ public void noHiddenDirectoryFiltersTest() throws IOException, RatException {
+ DirectoryWalker walker = new DirectoryWalker(toWalk, null, NameBasedHiddenFileFilter.HIDDEN);
+ List<String> scanned = new ArrayList<>();
+ walker.run(new TestRatReport(scanned));
+ String[] expected = {"/regular/regularFile", "/regular/.hiddenFile"};
+ assertEquals(2, scanned.size());
+ for (String ex : expected) {
+ assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+ }
+ }
+
+ @Test
+ public void noHiddenDirectoryAndNoHiddenFileFiltersTest() throws IOException, RatException {
+ DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN, NameBasedHiddenFileFilter.HIDDEN);
+ List<String> scanned = new ArrayList<>();
+ walker.run(new TestRatReport(scanned));
+ String[] expected = {"/regular/regularFile"};
+ assertEquals(1, scanned.size());
+ for (String ex : expected) {
+ assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+ }
}
class TestRatReport implements RatReport {
@@ -92,7 +145,5 @@
public void endReport() {
// no-op
}
-
}
-
}
diff --git a/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin b/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin
new file mode 100644
index 0000000..2c498da
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin
@@ -0,0 +1,8 @@
+sentence 1.
+sentence 2.
+
+
+sentence 3.
+
+sentence 4.
+
diff --git a/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin b/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin
new file mode 100644
index 0000000..a0adb98
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin
@@ -0,0 +1,9 @@
+sentence 1.
+sentence 2.
+
+
+sentence 3.
+
+sentence 4.
+
+
diff --git a/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia b/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia
new file mode 100644
index 0000000..41fa530
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia
Binary files differ
diff --git a/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp b/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp
new file mode 100644
index 0000000..c1d3d69
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/README.md b/apache-rat-core/src/test/resources/tikaFiles/README.md
new file mode 100644
index 0000000..55e546c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/README.md
@@ -0,0 +1,11 @@
+Files in this directory are parsed by Tika process and verified that the proper type is returned.
+
+the `standard` subsirectory contains files that are returned as STANDARD document types.
+
+the `binary` subdirectory contains files that are returned as BINARY types.
+
+the `notice` subdirectory contains files that are NOTICE types
+
+the `archive` subdirectory contains files that are ARCHIVE types.
+
+The `TikeProcessorTest.testTikaFiles()` automatically runs against the files in the directories. To add a new file to test just place it in the proper directory.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar b/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar
new file mode 100644
index 0000000..ccbf9f3
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class b/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class
new file mode 100644
index 0000000..2a6fd7c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif
new file mode 100644
index 0000000..202787b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg
new file mode 100644
index 0000000..68a0505
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg
new file mode 100644
index 0000000..7ad7df6
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png
new file mode 100644
index 0000000..ed8f507
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd
new file mode 100644
index 0000000..58cff9a
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf
new file mode 100644
index 0000000..eaf4d03
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks b/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks
new file mode 100644
index 0000000..9867401
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/file.json b/apache-rat-core/src/test/resources/tikaFiles/binary/file.json
new file mode 100644
index 0000000..b676b6f
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/file.json
@@ -0,0 +1,7 @@
+{
+ "Just": "a",
+ "plain": [
+ "json",
+ "file"
+ ]
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so b/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so
new file mode 100755
index 0000000..c56b19d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif b/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif
new file mode 100644
index 0000000..97eac1d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.au b/apache-rat-core/src/test/resources/tikaFiles/binary/test.au
new file mode 100644
index 0000000..20d1bd2
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.au
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe b/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe
new file mode 100644
index 0000000..a45435f
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv b/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv
new file mode 100644
index 0000000..d35e9bb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff b/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff
new file mode 100644
index 0000000..b3a6377
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore b/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore
new file mode 100644
index 0000000..9867401
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid
new file mode 100644
index 0000000..883ef37
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3
new file mode 100644
index 0000000..698cbaf
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg b/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg
new file mode 100644
index 0000000..8180299
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf b/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf
new file mode 100644
index 0000000..57d7fe1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore b/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore
new file mode 100644
index 0000000..b98bc62
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav
new file mode 100644
index 0000000..59a063e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma
new file mode 100644
index 0000000..ec2e9bd
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3
new file mode 100644
index 0000000..0f25370
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3
new file mode 100644
index 0000000..c2cd30d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3
new file mode 100644
index 0000000..3d4ef17
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3
new file mode 100644
index 0000000..b78a1a3
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3
new file mode 100644
index 0000000..ac96bec
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3
new file mode 100644
index 0000000..704921b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3
new file mode 100644
index 0000000..cdec511
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3
new file mode 100644
index 0000000..f087903
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3
new file mode 100644
index 0000000..d8ab515
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a
new file mode 100644
index 0000000..a9bc731
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a
new file mode 100644
index 0000000..31fdef4
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3
new file mode 100644
index 0000000..414fbbb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks b/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks
new file mode 100644
index 0000000..b98bc62
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/ca-cert b/apache-rat-core/src/test/resources/tikaFiles/ca-cert
new file mode 100644
index 0000000..5f298fb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/ca-cert
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDqTCCApGgAwIBAgIUAgvTSWaYPi8BobX1hhbOk39GW0swDQYJKoZIhvcNAQEL
+BQAwZDELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxGzAZBgNVBAoM
+EkFwYWNoZSBDcmVhZHVyIFJhdDEjMCEGA1UECwwaQXBhY2hlIFNvZnR3YXJlIEZv
+dW5kYXRpb24wHhcNMjQwNTA0MTEzODQ0WhcNMjQwNjAzMTEzODQ0WjBkMQswCQYD
+VQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEbMBkGA1UECgwSQXBhY2hlIENy
+ZWFkdXIgUmF0MSMwIQYDVQQLDBpBcGFjaGUgU29mdHdhcmUgRm91bmRhdGlvbjCC
+ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMPm0tTlla3NVpkMbzB3GsZh
+i8cZOp5Xfnk37Bfy6z9PHwldchpCosOJXfe+/Q61ZoRSc4OOE5JR46Z2ObPWIgRX
+Jl077233kW4vWIFg1livA4jF2eXW74R9Na9OEdV25qJzbF8BZopiiqYyUMqS2E7z
+7u5f4sO9t4Aj9GlXPIkn3XYCLeJ7NvxFpXiFVkyXcLipJmFquLADhHHxO/i+WZqo
++kUhtot48M5DVtJ4dFw0iJuyFGoHb0Jc/msHqde30gipeA7FPjChgrUt/gtSik//
+MCgDMSjtkCVFDJu/GRNekwJ3qmhZvZmVsTfyv7v9+auxdggxK37LGve/Cmgd9KkC
+AwEAAaNTMFEwHQYDVR0OBBYEFKLAoxUfM62sqAOLyl5lw+fvFgaiMB8GA1UdIwQY
+MBaAFKLAoxUfM62sqAOLyl5lw+fvFgaiMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
+hvcNAQELBQADggEBAEuUDV60i/20GB0gHYVRxejMI0OmtXy/J+jg1uutwirByhR5
+vUQr8Wdwxh4pm9UR4JaC2QRxedMkj3f29lqu1UteESyH7xcsQxEToreeYGU3veJY
+CiP4m1qaC8zVUu8Fr2VXvAsFnHKSMYiNFnWI/zGGGJGVQ8vUm2Vf664WpbUOh093
+Cf71I8sgrn1IE3C3E3meRkS+ws4HoID2nwpD+usS7batU/awOw2qASOuAjqRGG+/
+IHbk17HXPBhyXEb7r+/VAJCpzoOe1KwjgOm6FkitM1c3nARWtz6OZN6/spGdGbKa
+rLuOJKaXYxtDhNuP1/xB9VKEeXQQNKu0+BQve3Y=
+-----END CERTIFICATE-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/ca-key b/apache-rat-core/src/test/resources/tikaFiles/ca-key
new file mode 100644
index 0000000..5561cab
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/ca-key
@@ -0,0 +1,30 @@
+-----BEGIN ENCRYPTED PRIVATE KEY-----
+MIIFHDBOBgkqhkiG9w0BBQ0wQTApBgkqhkiG9w0BBQwwHAQIf0t1BVqonX8CAggA
+MAwGCCqGSIb3DQIJBQAwFAYIKoZIhvcNAwcECDcQRWLeuuoXBIIEyF1B0D0OW5+P
+Lk09ebYnI8Cp7mHKqAAL1NBLTscbXTJEddnS7zbNMxHgbfArnwOu17NhblK1lSxK
+hTkmH7RQ5/kZTBv3eBDFQZlAuwMKis8hv1dBWWraDxlEPy4gkVXJRqe8sJI/Nf++
+ADnNrZjLPUrmb8mcoU+R0cOV9S8Vo3iKWZ84Mh6rohc6DqpRkFukjwNV8/O6pBV1
+Zb9+NvIfDSej/VSLTnOvM0XwXwKwvYNyMaXnWh9IcDuwhyuQ693aqOL+hy0AmC7b
+P55fx0/yWkrINcbp8R8QiHOBrZKB/nc0OgMJDlTYWtnnpIXIogsGNgrXXOUEGtYW
+Em6bpE4np/DoshYXZqkRql0ZIQg9TfLr7k0FpqFBjT6ZnR1ezpulu1t9JCekV1K/
+nX4Dj58Xj3NB9Zx6ygHX6oZKZK4p0+meDHTl52Tyzc+Dv45Cry/Pf64fkFRjBDBJ
+plD+YkFiuys5/y+TF3QchsRYqkzdkptSgl3kh8oVN63r6XY0Rpq4eLFuK1VJB155
+oi5GSUr8wHLfYWDp2e3u2Tb1jDrvWKHs8DPnA04YtWp3pSqxI/dNJnKZ0GYThfk/
+LGni+U2neatcJt6pwXXcRAMN1MSerMMXeZ5fPBzNUI9/Iygyr+Fxiik5whhPQUmj
+NnwMI+e5aVCjJAknNH8Z24SSpe3afq0i8IO7R2t1Nw40Ign+zvI/tPMG/ajTpWbF
+NQ5R7r2mjSuDCYJ9enMllh5nGWAv930FRBvifW4MQ/vD/b8ADQF9uer4Y5wNRQWF
+ryF5wpEhR12PlUFJSMD+gBxiNsCRo0kjniVvwa+atDFR8eVaPQ5y8A5j6o4mVsjX
+2ul7EwCDCHcuoU/Cej/gpdUFwYMm5gD/4SZigJu0B8THXe58aeuhuCH773/paCCP
+LU3ZJ68GaByYCfja8vdoT25T5A0vLz7E/+lJq4tf8RTLJABAVXJCEjTWVWspWeQs
+J7z/4HV263CdqHZbn/0Gqa3PPApivgTGm0Nba6rZizkYgDxvCeroATKx5ixKnjdZ
+n3GWp69c2nwmI0xtxcfxglPykO/XKcRMXrnJka7NC6mc98Ijg+oUD+K3e5OUDPzv
+ilomOq1wRKukWQV0EYtvDzJhNCZP3qQne7+Cw+XDdrrlfMrVPB84s8hU9bOG+lDg
+eoHBN5RQiYiE4WfLF8rPKSGI4uhWQSh3uoY0xHnJXmXExGWhFTH+bapChe37KMur
+LqU0mjd5rSgAsea4xLkLRtEIz8bhGIJX+eByZZJBl2o7Hcb9hloDVrf+tUer/cgw
+cQBFL48tIgr/BmweTyIQlyPiBj1FPuRafonS/jg8q3ANl3AJdt0raQ0bxtZMfkYN
+BmTl5guJSzGL5RjdHUHLwNAa1PSPqjA3Ey5LpB1TpDkqlQWL9zI2RvuHq6CctExq
+dNtYEgfkMDLe/2+waxyCwhD5L3HH/Edo4D6eoD2RvH3hSDtsOePnUUA66T+PB5vW
+vLnRfWs4KaSr/R1qK1IdU6ymd7XcOvGmi3+A92kYx+HZGHJAcULFr2zSLWhecsey
+/ylT9qFdG9BdYDr2Sofz6yuizzwZS7tKlTY+LFRGPMswOgy4tx/mXLh+0p878jYT
+2w8E7xAgwNm5CcH48Bc53A==
+-----END ENCRYPTED PRIVATE KEY-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/cert-file b/apache-rat-core/src/test/resources/tikaFiles/cert-file
new file mode 100644
index 0000000..a2ba7e9
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/cert-file
@@ -0,0 +1,19 @@
+-----BEGIN NEW CERTIFICATE REQUEST-----
+MIIDADCCAegCAQAwgYoxEDAOBgNVBAYTB1Vua25vd24xEDAOBgNVBAgTB1Vua25v
+d24xEDAOBgNVBAcTB1Vua25vd24xIzAhBgNVBAoTGmFwYWNoZSBzb2Z0d2FyZSBm
+b3VuZGF0aW9uMRswGQYDVQQLExJhcGFjaGUgY3JlYWR1ciByYXQxEDAOBgNVBAMT
+B1Vua25vd24wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDIfjrZSi8J
+X3byEMM0wRUSUZ3F+syr75YnqsY0DuiHlIqA4DYL1ffsyPBanixRP4PbQ26RPbLU
+/i3QfhbVJpkntVvJJvKjXWWUJ0LQN4VIWdDXw+9feELKTSkoIvXwoNXApE1BfKmS
+cZP4l292E+UzGog5faHt3aoIP4Gp1+fe8ybTFctAWVSLbECKE8fetBS8eP7of4pC
+lYtQDZ9WGTXKIN2TB0V3VS1w4mdd/y/n3UYh7LbAOaqoR5Qp+2aOghNElZHkXMPw
+hngBYfEx7g5hVOgiyEw0vHrMuqI01jtHxkkx/t1F2CPYeXfXXhluoO0vEpfnUhBf
+Jgb0PLVuMzJdAgMBAAGgMDAuBgkqhkiG9w0BCQ4xITAfMB0GA1UdDgQWBBQxKtdV
+jawugSKCnB3djVazFcqR1DANBgkqhkiG9w0BAQsFAAOCAQEAFM8iYzG6enq71btN
+/3AhrlEwiKMk9B1zIoKPOkhLb7g0tU95tHRBWmkbeLkmSQgtkirYyH6ItV/L6dVp
+MnT+nUmYXlcTv5gIB75mHHpl9dxxcZlaggf4cml6mmZ03Jf+B7ShqEZ0QVpTBxxf
+YIdePcHlN6WZWJFa523a2kQF2SQ2Ts84WhhmFNFImzJ1NJwEtaPAPi/u/6WTTyof
+/jS/dIgavtGY8Xew06A7x0nmF3YDDP+ietbtTMQldfqIkAiPb425gQBdMrjnwftd
+lMJNK9OAxa++nZ/+SoyLSZXm3Hv1CA70XCqVYYJjkm3A588PB/kmWby/T7o9iOJ9
+93q1bA==
+-----END NEW CERTIFICATE REQUEST-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/cert-signed b/apache-rat-core/src/test/resources/tikaFiles/cert-signed
new file mode 100644
index 0000000..d136065
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/cert-signed
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdjCCAl4CFCoYTVKkLZGguwEbhwnHe52DRrSmMA0GCSqGSIb3DQEBCwUAMGQx
+CzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMRswGQYDVQQKDBJBcGFj
+aGUgQ3JlYWR1ciBSYXQxIzAhBgNVBAsMGkFwYWNoZSBTb2Z0d2FyZSBGb3VuZGF0
+aW9uMB4XDTI0MDUwNDExNDAyMFoXDTI1MDUwNDExNDAyMFowgYoxEDAOBgNVBAYT
+B1Vua25vd24xEDAOBgNVBAgTB1Vua25vd24xEDAOBgNVBAcTB1Vua25vd24xIzAh
+BgNVBAoTGmFwYWNoZSBzb2Z0d2FyZSBmb3VuZGF0aW9uMRswGQYDVQQLExJhcGFj
+aGUgY3JlYWR1ciByYXQxEDAOBgNVBAMTB1Vua25vd24wggEiMA0GCSqGSIb3DQEB
+AQUAA4IBDwAwggEKAoIBAQDIfjrZSi8JX3byEMM0wRUSUZ3F+syr75YnqsY0DuiH
+lIqA4DYL1ffsyPBanixRP4PbQ26RPbLU/i3QfhbVJpkntVvJJvKjXWWUJ0LQN4VI
+WdDXw+9feELKTSkoIvXwoNXApE1BfKmScZP4l292E+UzGog5faHt3aoIP4Gp1+fe
+8ybTFctAWVSLbECKE8fetBS8eP7of4pClYtQDZ9WGTXKIN2TB0V3VS1w4mdd/y/n
+3UYh7LbAOaqoR5Qp+2aOghNElZHkXMPwhngBYfEx7g5hVOgiyEw0vHrMuqI01jtH
+xkkx/t1F2CPYeXfXXhluoO0vEpfnUhBfJgb0PLVuMzJdAgMBAAEwDQYJKoZIhvcN
+AQELBQADggEBACJM4Nb1hIelkOo0S9Yqx4hQuiKJo7DaVmHubRc71fLSJQsAPdnw
+E4FaNIS8trPKsOCAMNK9jzQHNrdgdeYyFG4wUS5nV9yMqN78HdCnghHR4NivcxDG
+LIJsbwaFTa79/cW8oe4+jwJ4ks+JYf3sA612RaWMDFxyJtIc0wv8dv7kRjZpC953
+Gj5ic6Gf+7DgRWdTAZgYVXHrnyrSfCbEX64Lcga33oSnvDJoxPnmy9JGbqnoIqOB
+e2PjnxZ6MktG17Z6fTpkUxWVsgqdx+zCynGsQnXfV0UKAwlTU2n5beD2aLfa5ysd
+B9VgFAxCwSpjlozIUGzjzRpnS+7uZv07Wik=
+-----END CERTIFICATE-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE b/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE
new file mode 100644
index 0000000..7a4a3ea
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE b/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE
new file mode 100644
index 0000000..967464b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE
@@ -0,0 +1,7 @@
+=========================================================================
+== NOTICE file corresponding to section 4(d) of the Apache License, ==
+== Version 2.0. ==
+=========================================================================
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
\ No newline at end of file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java b/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java
new file mode 100644
index 0000000..29475ee
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.udf.entity;
+
+import java.util.Date;
+
+// taken from https://github.com/apache/linkis/blob/master/linkis-public-enhancements/linkis-pes-common/src/main/java/org/apache/linkis/udf/entity/UDFVersion.java
+public class ChineseCommentsJava {
+ private Long id;
+ private Long udfId;
+ private String path; // 仅存储用户上一次上传的路径 作提示用
+ private String bmlResourceId;
+ private String bmlResourceVersion;
+ private Boolean isPublished; // 共享udf被使用的是已发布的最新版本
+ private String registerFormat;
+ private String useFormat;
+ private String description;
+ private Date createTime;
+
+ /** Constructors and method taken away to only parse above comments but no meaningful Java class :) */
+ private String md5;
+
+ public void setCreateTime(Date createTime) {
+ this.createTime = createTime;
+ }
+}
+
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy
new file mode 100644
index 0000000..7dfa58c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+package tikaFiles.standard;
+public class HelloWorld {
+ public static void main(String[] args) {
+ System.out.println("Hello World");
+ }
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java
new file mode 100644
index 0000000..d99bba4
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+package tikaFiles;
+public class HelloWorld {
+ public static void main(String[] args) {
+ System.out.println("Hello World");
+ }
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf b/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
new file mode 100644
index 0000000..110b45e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml
new file mode 100644
index 0000000..9e9104e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml
new file mode 100644
index 0000000..b82c0ee
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+
+<xmlRoot>
+<descrition>
+The file encoding is UTF-8 with signature. (Special chars: äöü)
+</descrition>
+</xmlRoot>
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.C b/apache-rat-core/src/test/resources/tikaFiles/standard/file.C
new file mode 100644
index 0000000..b12b3c8
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.C
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <stdio.h>
+int main() {
+ printf("Hello world");
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP b/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.c b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c
new file mode 100644
index 0000000..b12b3c8
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <stdio.h>
+int main() {
+ printf("Hello world");
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv b/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv
new file mode 100644
index 0000000..1aa2e35
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv
@@ -0,0 +1 @@
+Just, a, plain, csv, file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+#include <iostream>
+
+int main() {
+ std::cout << "Hello World!";
+ return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain b/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain
new file mode 100644
index 0000000..1adcb85
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+
+Just a plain text file.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv b/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv
new file mode 100644
index 0000000..8b4b188
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv
@@ -0,0 +1 @@
+Just a plain tsv file
diff --git a/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java b/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
index 1a39c0c..2386ed3 100644
--- a/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
+++ b/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
@@ -341,11 +341,15 @@
protected ReportConfiguration getConfiguration() throws MojoExecutionException {
ReportConfiguration config = new ReportConfiguration(makeLog());
reportDeprecatedProcessing();
+ Defaults defaults = getDefaultsBuilder().build(config.getLog());
if (addDefaultLicenses) {
- config.setFrom(getDefaultsBuilder().build(config.getLog()));
+ config.setFrom(defaults);
} else {
config.setStyleSheet(Defaults.getPlainStyleSheet());
+ config.setDirectoriesToIgnore(Defaults.getDirectoriesToIgnore());
+ config.setFilesToIgnore(Defaults.getFilesToIgnore());
}
+
if (additionalLicenseFiles != null) {
for (String licenseFile : additionalLicenseFiles) {
try {
diff --git a/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java b/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
index bb842ed..fc8dc09 100644
--- a/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
+++ b/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
@@ -177,7 +177,7 @@
configuration.setCopyrightMessage(copyrightMessage);
}
if (scanHiddenDirectories) {
- configuration.setDirectoryFilter(null);
+ configuration.setDirectoriesToIgnore(null);
}
if (reportFile != null) {
if (!reportFile.exists()) {
diff --git a/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java b/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
index 2cdc942..3da865e 100644
--- a/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
+++ b/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
@@ -29,6 +29,7 @@
import java.io.FileWriter;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.FalseFileFilter;
import org.apache.rat.ReportConfiguration;
import org.apache.rat.ReportConfigurationTest;
import org.apache.rat.api.Document;
@@ -200,7 +201,10 @@
ReportConfigurationTest.validateDefaultLicenses(config, "MyLicense", "CpyrT", "RegxT", "SpdxT", "TextT",
"Not", "All", "Any");
assertNotNull(LicenseSetFactory.search("MyLicense", config.getLicenses(LicenseFilter.ALL)));
- assertNull("Should not have inputFileFilter", config.getInputFileFilter());
+ assertNotNull("Should have filesToIgnore", config.getFilesToIgnore());
+ assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
+ assertNotNull("Should have directoriesToIgnore", config.getDirectoriesToIgnore());
+ assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
mojo.execute();
ensureRatReportIsCorrect(ratTxtFile, expected, TextUtils.EMPTY);
@@ -228,11 +232,12 @@
assertThat(config.isAddingLicenses()).isFalse();
assertThat(config.isAddingLicensesForced()).isFalse();
assertThat(config.getCopyrightMessage()).isNull();
- assertThat(config.getInputFileFilter()).isNull();
assertThat(config.isStyleReport()).isTrue();
- assertThat(config.getStyleSheet()).isNotNull().withFailMessage("Stylesheet should not be null");
- assertThat(config.getDirectoryFilter()).isNotNull().withFailMessage("Directory filter should not be null");
- assertThat(config.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+ assertThat(config.getStyleSheet()).withFailMessage("Stylesheet should not be null").isNotNull();
+ assertThat(config.getDirectoriesToIgnore()).withFailMessage("directoriesToIgnore filter should not be null").isNotNull();
+ assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+ assertThat(config.getFilesToIgnore()).withFailMessage("filesToIgnore filter should not be null").isNotNull();
+ assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
ReportConfigurationTest.validateDefaultApprovedLicenses(config, 1);
ReportConfigurationTest.validateDefaultLicenseFamilies(config, "BSD", "CC BY");
diff --git a/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java b/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
index 289b04c..e7108dd 100644
--- a/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
+++ b/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
@@ -95,7 +95,7 @@
}
public void setInputFileFilter(FilenameFilter inputFileFilter) {
- configuration.setInputFileFilter(inputFileFilter);
+ configuration.setFilesToIgnore(inputFileFilter);
}
public void setReportFile(File reportFile) {
diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml
new file mode 100644
index 0000000..f2d551f
--- /dev/null
+++ b/checkstyle-suppressions.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<!DOCTYPE suppressions PUBLIC "-//Checkstyle//DTD SuppressionFilter Configuration 1.0//EN" "https://checkstyle.org/dtds/suppressions_1_0.dtd">
+<suppressions>
+ <suppress checks="JavadocMethod" files=".*[/\\]test[/\\].*"/>
+ <suppress checks="JavadocPackage" files=".*[/\\]test[/\\].*"/>
+ <suppress checks="LineLength" files=".*" />
+ <suppress checks="javadoc" files=".*" />
+ <!-- Due to fail in Checkstyle on Windows and in GH Actions -->
+ <suppress checks="NewlineAtEndOfFile" files="target[/\\].*[/\\]pom.properties"/>
+</suppressions>
diff --git a/pom.xml b/pom.xml
index f295021..e0e7857 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,11 +95,6 @@
<version>4.4</version>
</dependency>
<dependency>
- <groupId>commons-beanutils</groupId>
- <artifactId>commons-beanutils</artifactId>
- <version>1.9.4</version>
- </dependency>
- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
@@ -162,6 +157,11 @@
<version>3.25.3</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>2.9.2</version>
+ </dependency>
</dependencies>
</dependencyManagement>
<reporting>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index e0b7710..d9e8f1c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -72,6 +72,25 @@
</release>
-->
<release version="0.17-SNAPSHOT" date="xxxx-yy-zz" description="Current SNAPSHOT - release to be done">
+ <action issue="RAT-301" type="fix" dev="pottlinger" due-to="claudenw">
+ Chinese characters in comments are not classified as binary anymore (due to Tika integration).
+ </action>
+ <action issue="RAT-54" type="fix" dev="claudenw">
+ MIME Detection Using Tika
+ </action>
+ <action issue="RAT-20" type="fix" dev="claudenw">
+ Changed to detecting binary by content not name.
+ </action>
+ <action issue="RAT-147" type="fix" dev="claudenw">
+ Change to detect non UTF-8 text files as text not binary.
+ </action>
+ <action issue="RAT-150" type="fix" dev="claudenw">
+ Switch to Tika to detect file types.
+ </action>
+ <action issue="RAT-211" type="fix" dev="claudenw">
+ Generated rat-output.xml is now well-formed, even if BinaryGuesser fails or there is XML content
+ in the sample element.
+ </action>
<action issue="RAT-368" type="update" dev="claudenw">
Removed ReportFailedRuntimeException, ReportTransformer, RatReportAnalysisResultException, MimeTyper, ToNameTransformer,
UnsuitableDocumentException, ReportTransformerTest, and ToNameTransformerTest as they are no longer used in the codebase.