Merge pull request #240 from Claudenw/tika_based_document_analyzer

RAT-54: Tika based document analyzer
diff --git a/apache-rat-core/pom.xml b/apache-rat-core/pom.xml
index 1b31b09..a4ad439 100644
--- a/apache-rat-core/pom.xml
+++ b/apache-rat-core/pom.xml
@@ -113,10 +113,6 @@
       <artifactId>commons-cli</artifactId>
     </dependency>
     <dependency>
-      <groupId>commons-beanutils</groupId>
-      <artifactId>commons-beanutils</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
@@ -126,5 +122,9 @@
       <artifactId>assertj-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
   </dependencies>
 </project>
diff --git a/apache-rat-core/src/main/java/org/apache/rat/Defaults.java b/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
index cb747f0..be0456b 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/Defaults.java
@@ -19,6 +19,7 @@
 package org.apache.rat;
 
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -28,6 +29,8 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.apache.commons.io.filefilter.FalseFileFilter;
+import org.apache.commons.io.filefilter.IOFileFilter;
 import org.apache.commons.io.function.IOSupplier;
 import org.apache.rat.configuration.Format;
 import org.apache.rat.configuration.LicenseReader;
@@ -37,11 +40,15 @@
 import org.apache.rat.license.LicenseSetFactory;
 import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
 import org.apache.rat.utils.Log;
+import org.apache.rat.walker.NameBasedHiddenFileFilter;
 
 /**
- * A class that holds the list of licenses and approved licenses from one or more configuration files.
+ * A class that provides the standard system defaults for the ReportConfiguration.
+ *
+ * Properties in this class may be overridden or added to by configuration options in the various UIs.
+ * See the specific UI for details.
  */
-public class Defaults {
+public final class Defaults {
 
     /**
      * The default configuration file from the package.
@@ -57,7 +64,11 @@
     public static final String UNAPPROVED_LICENSES_STYLESHEET = "org/apache/rat/unapproved-licenses.xsl";
 
     private final LicenseSetFactory setFactory;
-    
+
+    private static final FilenameFilter FILES_TO_IGNORE = FalseFileFilter.FALSE;
+
+    private static final IOFileFilter DIRECTORIES_TO_IGNORE = NameBasedHiddenFileFilter.HIDDEN;
+
     /**
      * Initialize the system configuration reader..
      */
@@ -71,7 +82,7 @@
     /**
      * Builder constructs instances.
      */
-    private Defaults(Log log, Set<URL> urls) {
+    private Defaults(final Log log, final Set<URL> urls) {
         this.setFactory = Defaults.readConfigFiles(log, urls);
     }
 
@@ -87,7 +98,7 @@
      * Reads the configuration files.
      * @param urls the URLs to read.
      */
-    private static LicenseSetFactory readConfigFiles(Log log, Collection<URL> urls) {
+    private static LicenseSetFactory readConfigFiles(final Log log, final Collection<URL> urls) {
 
         SortedSet<ILicense> licenses = LicenseSetFactory.emptyLicenseSet();
 
@@ -133,16 +144,16 @@
      * @param filter define which type of licenses to return.
      * @return sorted set of licenses.
      */
-    public SortedSet<ILicense> getLicenses(LicenseFilter filter) {
+    public SortedSet<ILicense> getLicenses(final LicenseFilter filter) {
         return setFactory.getLicenses(filter);
     }
-    
+
     /**
      * Gets the sorted set of approved licenses for a given filter condition.
      * @param filter define which type of licenses to return.
      * @return sorted set of license families.
      */
-    public SortedSet<ILicenseFamily> getLicenseFamilies(LicenseFilter filter) {
+    public SortedSet<ILicenseFamily> getLicenseFamilies(final LicenseFilter filter) {
         return setFactory.getLicenseFamilies(filter);
     }
 
@@ -152,14 +163,22 @@
      * @param filter define which type of licenses to return.
      * @return The sorted set of approved licenseIds.
      */
-    public SortedSet<String> getLicenseIds(LicenseFilter filter) {
+    public SortedSet<String> getLicenseIds(final LicenseFilter filter) {
         return setFactory.getLicenseFamilyIds(filter);
     }
+
+    public static FilenameFilter getFilesToIgnore() {
+        return FILES_TO_IGNORE;
+    }
+
+    public static IOFileFilter getDirectoriesToIgnore() {
+        return DIRECTORIES_TO_IGNORE;
+    }
     
     /**
      * The Defaults builder.
      */
-    public static class Builder {
+    public final static class Builder {
         private final Set<URL> fileNames = new TreeSet<>(Comparator.comparing(URL::toString));
 
         private Builder() {
@@ -172,7 +191,7 @@
          * @param url the URL to add
          * @return this Builder for chaining
          */
-        public Builder add(URL url) {
+        public Builder add(final URL url) {
             fileNames.add(url);
             return this;
         }
@@ -184,7 +203,7 @@
          * @return this Builder for chaining
          * @throws MalformedURLException in case the fileName cannot be found.
          */
-        public Builder add(String fileName) throws MalformedURLException {
+        public Builder add(final String fileName) throws MalformedURLException {
             return add(new File(fileName));
         }
 
@@ -195,7 +214,7 @@
          * @return this Builder for chaining
          * @throws MalformedURLException in case the file cannot be found.
          */
-        public Builder add(File file) throws MalformedURLException {
+        public Builder add(final File file) throws MalformedURLException {
             return add(file.toURI().toURL());
         }
 
@@ -205,7 +224,7 @@
          * @param url the URL of the file to remove.
          * @return this Builder for chaining
          */
-        public Builder remove(URL url) {
+        public Builder remove(final URL url) {
             fileNames.remove(url);
             return this;
         }
@@ -217,7 +236,7 @@
          * @return this Builder for chaining
          * @throws MalformedURLException in case the fileName cannot be found.
          */
-        public Builder remove(String fileName) throws MalformedURLException {
+        public Builder remove(final String fileName) throws MalformedURLException {
             return remove(new File(fileName));
         }
 
@@ -228,7 +247,7 @@
          * @return this Builder for chaining
          * @throws MalformedURLException in case the file cannot be found.
          */
-        public Builder remove(File file) throws MalformedURLException {
+        public Builder remove(final File file) throws MalformedURLException {
             return remove(file.toURI().toURL());
         }
 
@@ -246,7 +265,7 @@
          * @param log the Log to use to report errors when building the defaults.
          * @return the current defaults object.
          */
-        public Defaults build(Log log) {
+        public Defaults build(final Log log) {
             return new Defaults(log, fileNames);
         }
     }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/Report.java b/apache-rat-core/src/main/java/org/apache/rat/Report.java
index 6f49556..74ba5d1 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/Report.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/Report.java
@@ -238,7 +238,7 @@
         }
 
         if (cl.hasOption(SCAN_HIDDEN_DIRECTORIES)) {
-            configuration.setDirectoryFilter(null);
+            configuration.setDirectoriesToIgnore(null);
         }
 
         if (cl.hasOption('a') || cl.hasOption('A')) {
@@ -250,14 +250,14 @@
             String[] excludes = cl.getOptionValues(EXCLUDE_CLI);
             if (excludes != null) {
                 final FilenameFilter filter = parseExclusions(Arrays.asList(excludes));
-                configuration.setInputFileFilter(filter);
+                configuration.setFilesToIgnore(filter);
             }
         } else if (cl.hasOption(EXCLUDE_FILE_CLI)) {
             String excludeFileName = cl.getOptionValue(EXCLUDE_FILE_CLI);
             if (excludeFileName != null) {
                 final FilenameFilter filter = parseExclusions(
                         FileUtils.readLines(new File(excludeFileName), StandardCharsets.UTF_8));
-                configuration.setInputFileFilter(filter);
+                configuration.setFilesToIgnore(filter);
             }
         }
 
@@ -452,11 +452,11 @@
             }
 
             if (base.isDirectory()) {
-                return new DirectoryWalker(base, config.getInputFileFilter(), config.getDirectoryFilter());
+                return new DirectoryWalker(base, config.getFilesToIgnore(), config.getDirectoriesToIgnore());
             }
 
             try {
-                return new ArchiveWalker(base, config.getInputFileFilter());
+                return new ArchiveWalker(base, config.getFilesToIgnore());
             } catch (IOException ex) {
                 config.getLog().log(Level.ERROR, "file '"+baseDirectory+"' is not valid gzip data.");
                 return null;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java b/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
index d737989..dee2915 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/ReportConfiguration.java
@@ -50,7 +50,6 @@
 import org.apache.rat.report.IReportable;
 import org.apache.rat.utils.Log;
 import org.apache.rat.utils.ReportingSet;
-import org.apache.rat.walker.NameBasedHiddenFileFilter;
 
 /**
  * A configuration object is used by the front end to invoke the
@@ -69,9 +68,9 @@
     private boolean styleReport;
     private IOSupplier<InputStream> styleSheet;
     private IReportable reportable;
-    private FilenameFilter inputFileFilter;
-    private IOFileFilter directoryFilter;
-    private Log log;
+    private FilenameFilter filesToIgnore;
+    private IOFileFilter directoriesToIgnore;
+    private final Log log;
     private LicenseFilter listFamilies;
     private LicenseFilter listLicenses;
     private boolean dryRun;
@@ -89,7 +88,6 @@
                 .setMsgFormat( s -> String.format( "Duplicate License %s (%s) of type %s", s.getName(), s.getId(), s.getLicenseFamily().getFamilyCategory()));
         approvedLicenseCategories = new TreeSet<>();
         removedLicenseCategories = new TreeSet<>();
-        directoryFilter = NameBasedHiddenFileFilter.HIDDEN;
         styleReport = true;
         listFamilies = LicenseFilter.NONE;
         listLicenses = LicenseFilter.NONE;
@@ -179,31 +177,31 @@
     /**
      * @return The filename filter for the potential input files.
      */
-    public FilenameFilter getInputFileFilter() {
-        return inputFileFilter;
+    public FilenameFilter getFilesToIgnore() {
+        return filesToIgnore;
     }
 
     /**
-     * @param inputFileFilter the filename filter to filter the input files.
+     * @param filesToIgnore the filename filter to filter the input files.
      */
-    public void setInputFileFilter(FilenameFilter inputFileFilter) {
-        this.inputFileFilter = inputFileFilter;
+    public void setFilesToIgnore(FilenameFilter filesToIgnore) {
+        this.filesToIgnore = filesToIgnore;
     }
 
-    public IOFileFilter getDirectoryFilter() {
-        return directoryFilter;
+    public IOFileFilter getDirectoriesToIgnore() {
+        return directoriesToIgnore;
     }
 
-    public void setDirectoryFilter(IOFileFilter directoryFilter) {
-        if (directoryFilter == null) {
-            this.directoryFilter = FalseFileFilter.FALSE;
+    public void setDirectoriesToIgnore(IOFileFilter directoriesToIgnore) {
+        if (directoriesToIgnore == null) {
+            this.directoriesToIgnore = FalseFileFilter.FALSE;
         } else {
-            this.directoryFilter = directoryFilter;
+            this.directoriesToIgnore = directoriesToIgnore;
         }
     }
 
-    public void addDirectoryFilter(IOFileFilter directoryFilter) {
-        this.directoryFilter = this.directoryFilter.and(directoryFilter);
+    public void addDirectoryToIgnore(IOFileFilter directoryToIgnore) {
+        this.directoriesToIgnore = this.directoriesToIgnore.and(directoryToIgnore);
     }
 
     /**
@@ -247,6 +245,8 @@
      * @param defaults The defaults to set.
      */
     public void setFrom(Defaults defaults) {
+        setFilesToIgnore(Defaults.getFilesToIgnore());
+        setDirectoriesToIgnore(Defaults.getDirectoriesToIgnore());
         addLicensesIfNotPresent(defaults.getLicenses(LicenseFilter.ALL));
         addApprovedLicenseCategories(defaults.getLicenseIds(LicenseFilter.APPROVED));
         if (isStyleReport() && getStyleSheet() == null) {
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
index 666cfc8..141de2a 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/DefaultAnalyserFactory.java
@@ -24,9 +24,6 @@
 import org.apache.rat.api.Document;
 import org.apache.rat.document.IDocumentAnalyser;
 import org.apache.rat.document.RatDocumentAnalysisException;
-import org.apache.rat.document.impl.guesser.ArchiveGuesser;
-import org.apache.rat.document.impl.guesser.BinaryGuesser;
-import org.apache.rat.document.impl.guesser.NoteGuesser;
 import org.apache.rat.license.ILicense;
 import org.apache.rat.utils.Log;
 
@@ -63,8 +60,8 @@
 
         /**
          * Constructs a DocumentAnalyser for the specified license.
-         * 
-         * @param license The license to analyse
+         * @param log the Log to use
+         * @param licenses The licenses to analyse
          */
         public DefaultAnalyser(final Log log, final Collection<ILicense> licenses) {
             this.licenses = licenses;
@@ -73,16 +70,23 @@
 
         @Override
         public void analyse(Document document) throws RatDocumentAnalysisException {
-            if (NoteGuesser.isNote(document)) {
-                document.getMetaData().setDocumentType(Document.Type.NOTICE);
-            } else if (ArchiveGuesser.isArchive(document)) {
-                document.getMetaData().setDocumentType(Document.Type.ARCHIVE);
-            } else if (BinaryGuesser.isBinary(document)) {
-                document.getMetaData().setDocumentType(Document.Type.BINARY);
-            } else {
-                document.getMetaData().setDocumentType(Document.Type.STANDARD);
-                new DocumentHeaderAnalyser(log, licenses).analyse(document);
+
+            TikaProcessor.process(log, document);
+
+            switch (document.getMetaData().getDocumentType()) {
+            case STANDARD:
+                DocumentHeaderAnalyser analyser = new DocumentHeaderAnalyser(log, licenses);
+                analyser.analyse(document);
+            case NOTICE:
+            case ARCHIVE:
+            case BINARY:
+            case UNKNOWN:
+            default:
+                break;
             }
+
+
+
         }
     }
 }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
new file mode 100644
index 0000000..113411f
--- /dev/null
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+package org.apache.rat.analysis;
+
+import org.apache.rat.api.Document;
+import org.apache.rat.document.RatDocumentAnalysisException;
+import org.apache.rat.document.impl.guesser.NoteGuesser;
+import org.apache.rat.utils.Log;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A wrapping around the tika processor.
+ */
+public class TikaProcessor {
+
+    /** the Tika parser */
+    private static final Tika TIKA = new Tika();
+    /** A map of mime type string to non BINARY types.
+     * "text" types are already handled everything else
+     * BINARY unless listed here*/
+    private static Map<String, Document.Type> documentTypeMap;
+
+    static {
+        documentTypeMap = new HashMap<>();
+//        org.apache.tika.parser.epub.EpubParser
+        documentTypeMap.put("application/x-ibooks+zip", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/epub+zip", Document.Type.ARCHIVE);
+
+        documentTypeMap.put("application/vnd.wap.xhtml+xml", Document.Type.STANDARD);
+        documentTypeMap.put("application/x-asp", Document.Type.STANDARD);
+        documentTypeMap.put("application/xhtml+xml", Document.Type.STANDARD);
+
+//        org.apache.tika.parser.pdf.PDFParser", Type.BINARY);
+        documentTypeMap.put("application/pdf", Document.Type.STANDARD);
+//org.apache.tika.parser.pkg.CompressorParser
+        documentTypeMap.put("application/zlib", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-gzip", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-bzip2", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-compress", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-java-pack200", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-lzma", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/deflate64", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-lz4", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-snappy", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-brotli", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/gzip", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-bzip", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-xz", Document.Type.ARCHIVE);
+//org.apache.tika.parser.pkg.PackageParser
+        documentTypeMap.put("application/x-tar", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/java-archive", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-arj", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-archive", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/zip", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-cpio", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-tika-unix-dump", Document.Type.ARCHIVE);
+        documentTypeMap.put("application/x-7z-compressed", Document.Type.ARCHIVE);
+//org.apache.tika.parser.pkg.RarParser
+        documentTypeMap.put("application/x-rar-compressed", Document.Type.ARCHIVE);
+
+//        org.apache.tika.parser.xliff.XLIFF12Parser
+        documentTypeMap.put("application/x-xliff+xml", Document.Type.STANDARD);
+//        org.apache.tika.parser.xliff.XLZParser
+        documentTypeMap.put("application/x-xliff+zip", Document.Type.ARCHIVE);
+//        org.apache.tika.parser.xml.DcXMLParser
+        documentTypeMap.put("application/xml", Document.Type.STANDARD);
+        documentTypeMap.put("image/svg+xml", Document.Type.STANDARD);
+//        org.apache.tika.parser.xml.FictionBookParser
+        documentTypeMap.put("application/x-fictionbook+xml", Document.Type.STANDARD);
+    }
+
+    /**
+     * Creates a copy of the document type map.
+     * Exposed for testing.
+     * @return a copy of the document type map.
+     */
+    static Map<String, Document.Type> getDocumentTypeMap() {
+        return new HashMap<>(documentTypeMap);
+    }
+
+    /**
+     * Process the input document.
+     * @param log the log for messages.
+     * @param document the Document to process.
+     * @return the mimetype as a string.
+     * @throws RatDocumentAnalysisException on error.
+     */
+    public static String process(final Log log, final Document document) throws RatDocumentAnalysisException {
+        Metadata metadata = new Metadata();
+        try (InputStream stream = document.inputStream()) {
+            metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, document.getName());
+            String result = TIKA.detect(stream, metadata);
+            String[] parts = result.split("/");
+            MediaType mediaType = new MediaType(parts[0], parts[1]);
+            document.getMetaData().setMediaType(mediaType);
+            document.getMetaData()
+                    .setDocumentType(fromMediaType(mediaType, log));
+            if (Document.Type.STANDARD == document.getMetaData().getDocumentType()) {
+                if (NoteGuesser.isNote(document)) {
+                    document.getMetaData().setDocumentType(Document.Type.NOTICE);
+                }
+            }
+
+            return result;
+        } catch (IOException /* | SAXException | TikaException */ e) {
+            throw new RatDocumentAnalysisException(e);
+        }
+    }
+
+    public static Document.Type fromMediaType(final MediaType mediaType, final Log log) {
+        if ("text".equals(mediaType.getType())) {
+            return Document.Type.STANDARD;
+        }
+
+        Document.Type result = documentTypeMap.get(mediaType.toString());
+        return result == null ? Document.Type.BINARY : result;
+    }
+}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
index c97f886..0473223 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/AbstractHeaderMatcher.java
@@ -24,8 +24,6 @@
 import org.apache.rat.analysis.IHeaderMatcher;
 import org.apache.rat.config.parameters.ComponentType;
 import org.apache.rat.config.parameters.ConfigComponent;
-import org.apache.rat.config.parameters.Description;
-import org.apache.rat.config.parameters.DescriptionBuilder;
 
 /**
  * An abstract class to simplify IHeaderMatcher creation. This class ensures
diff --git a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
index 2f00c9a..1f2ab6f 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/matchers/NotMatcher.java
@@ -18,8 +18,6 @@
  */
 package org.apache.rat.analysis.matchers;
 
-import java.util.Arrays;
-import java.util.List;
 import java.util.Objects;
 
 import org.apache.rat.analysis.IHeaderMatcher;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
index 71f8a47..5d4467d 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
@@ -15,7 +15,7 @@
  * KIND, either express or implied.  See the License for the    *
  * specific language governing permissions and limitations      *
  * under the License.                                           *
- */ 
+ */
 package org.apache.rat.api;
 
 import java.io.IOException;
@@ -33,34 +33,37 @@
      */
     enum Type {
         /** A generated document. */
-        GENERATED, 
+        GENERATED,
         /** An unknown document type. */
         UNKNOWN,
         /** An archive type document. */
-        ARCHIVE, 
+        ARCHIVE,
         /** A notice document (e.g. LICENSE file) */
         NOTICE,
         /** A binary file */
         BINARY,
         /** A standard document */
-        STANDARD}
+        STANDARD;;
+    }
 
     /**
      * @return the name of the current document.
      */
-	String getName();
-    
+    String getName();
+
     /**
      * Reads the contents of this document.
+     * 
      * @return <code>Reader</code> not null
      * @throws IOException if this document cannot be read
-     * @throws CompositeDocumentException if this document can only be read as
-     * a composite archive
+     * @throws CompositeDocumentException if this document can only be read as a
+     * composite archive
      */
     Reader reader() throws IOException;
-    
+
     /**
      * Streams the document's contents.
+     * 
      * @return a non null input stream of the document.
      * @throws IOException when stream could not be opened
      */
@@ -68,12 +71,14 @@
 
     /**
      * Gets data describing this resource.
+     * 
      * @return a non null MetaData object.
      */
     MetaData getMetaData();
-    
+
     /**
      * Tests if this a composite document.
+     * 
      * @return true if composite, false otherwise
      */
     boolean isComposite();
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
index 4acbbe3..bd92bc2 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
@@ -26,6 +26,7 @@
 
 import org.apache.rat.license.ILicense;
 import org.apache.rat.license.ILicenseFamily;
+import org.apache.tika.mime.MediaType;
 
 /**
  * Data about the document under test..
@@ -37,6 +38,7 @@
     /** The list of License Family Categories that are approved */
     private final Set<String> approvedLicenses;
 
+    private MediaType mediaType;
     private Document.Type documentType;
     private String sampleHeader;
 
@@ -49,6 +51,22 @@
     }
 
     /**
+     * Gets the defined media type.
+     * @return the media type.
+     */
+    public MediaType getMediaType() {
+        return mediaType;
+    }
+
+    /**
+     * Sets the defined media type.
+     * @param mediaType the media type.
+     */
+    public void setMediaType(MediaType mediaType) {
+        this.mediaType = mediaType;
+    }
+
+    /**
      * Determines if a matching license has been detected.
      * @return true if there is a matching license.
      */
diff --git a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java b/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java
deleted file mode 100644
index f213d60..0000000
--- a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/ArchiveGuesser.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- */
-package org.apache.rat.document.impl.guesser;
-
-import java.util.Locale;
-
-import org.apache.rat.api.Document;
-
-public class ArchiveGuesser {
-
-    private static final String DOT = ".";
-
-    private static final String[] ARCHIVE_EXTENSIONS = {
-        "jar", "gz",
-        "zip", "tar",
-        "bz", "bz2",
-        "rar", "war",
-        "ear", "mar",
-        "par", "xar",
-        "odb", "odf",
-        "odg", "odp",
-        "ods", "odt",
-        "har", "sar",
-        "wsr",
-    };
-
-    /**
-     * @param document the current document.
-     * @return whether the given document is an archive.
-     */
-    public static boolean isArchive(final Document document) {
-        return isArchive(document.getName());
-    }
-
-    /**
-     * @return Is a file by that name an archive?
-     * @param name file name to check against.
-     */
-    public static boolean isArchive(final String name) {
-        if (name == null) {return false;}
-        String nameToLower = name.toLowerCase(Locale.US);
-        for (int i = 0; i < ArchiveGuesser.ARCHIVE_EXTENSIONS.length; i++) {
-            if (nameToLower.endsWith(DOT + ArchiveGuesser.ARCHIVE_EXTENSIONS[i])) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java b/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
deleted file mode 100644
index 1a900f6..0000000
--- a/apache-rat-core/src/main/java/org/apache/rat/document/impl/guesser/BinaryGuesser.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- */
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.rat.api.Document;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
-import java.nio.charset.*;
-import java.util.Locale;
-
-/**
- * TODO: factor into MIME guesser and MIME-&gt;binary guesser
- */
-public class BinaryGuesser {
-
-    private static final String DOT = ".";
-
-    static final String FILE_ENCODING = "file.encoding";
-    private static final Charset CHARSET_FROM_FILE_ENCODING_OR_UTF8 = getFileEncodingOrUTF8AsFallback();
-
-    private static boolean isBinaryDocument(Document document) {
-        boolean result;
-        InputStream stream = null;
-        try {
-            stream = document.inputStream();
-            result = isBinary(stream);
-        } catch (IOException e) {
-            result = false;
-        } finally {
-            IOUtils.closeQuietly(stream);
-        }
-        return result;
-    }
-
-    private static boolean isBinary(CharSequence taste) {
-        int highBytes = 0;
-        final int length = taste.length();
-        for (int i = 0; i < length; i++) {
-            char c = taste.charAt(i);
-            if (c > BinaryGuesser.NON_ASCII_THRESHOLD
-                    || c <= BinaryGuesser.ASCII_CHAR_THRESHOLD) {
-                highBytes++;
-            }
-        }
-        return highBytes * BinaryGuesser.HIGH_BYTES_RATIO
-                > length * BinaryGuesser.TOTAL_READ_RATIO;
-    }
-
-    /**
-     * @param in the file to check.
-     * @return Do the first few bytes of the stream hint at a binary file?
-     * <p>Any IOException is swallowed internally and the test returns
-     * false.</p>
-     * <p>This method may lead to false negatives if the reader throws
-     * an exception because it can't read characters according to the
-     * reader's encoding from the underlying stream.</p>
-     */
-    public static boolean isBinary(Reader in) {
-        char[] taste = new char[100];
-        try {
-            int bytesRead = in.read(taste);
-            if (bytesRead > 0) {
-                return isBinary(new String(taste, 0, bytesRead));
-            }
-        } catch (IOException e) {
-            // SWALLOW 
-        }
-        return false;
-    }
-
-    /**
-     * @param in the file to check.
-     * @return Do the first few bytes of the stream hint at a binary file?
-     * <p>Any IOException is swallowed internally and the test returns
-     * false.</p>
-     * <p>This method will try to read bytes from the stream and
-     * translate them to characters according to the platform's
-     * default encoding.  If any bytes can not be translated to
-     * characters it will assume the original data must be binary and
-     * return true.</p>
-     */
-    public static boolean isBinary(InputStream in) {
-        try {
-            byte[] taste = new byte[200];
-            int bytesRead = in.read(taste);
-            if (bytesRead > 0) {
-                ByteBuffer bytes = ByteBuffer.wrap(taste, 0, bytesRead);
-                CharBuffer chars = CharBuffer.allocate(2 * bytesRead);
-                CharsetDecoder cd = CHARSET_FROM_FILE_ENCODING_OR_UTF8.newDecoder()
-                        .onMalformedInput(CodingErrorAction.REPORT)
-                        .onUnmappableCharacter(CodingErrorAction.REPORT);
-                while (bytes.remaining() > 0) {
-                    CoderResult res = cd.decode(bytes, chars, true);
-                    if (res.isMalformed() || res.isUnmappable()) {
-                        return true;
-                    } else if (res.isOverflow()) {
-                        chars.limit(chars.position());
-                        chars.rewind();
-                        int c = chars.capacity() * 2;
-                        CharBuffer on = CharBuffer.allocate(c);
-                        on.put(chars);
-                        chars = on;
-                    }
-                }
-                chars.limit(chars.position());
-                chars.rewind();
-                return isBinary(chars);
-            }
-        } catch (IOException e) {
-            // SWALLOW 
-        }
-        return false;
-    }
-
-    static Charset getFileEncodingOrUTF8AsFallback() {
-        try {
-            return Charset.forName(System.getProperty(FILE_ENCODING));
-        } catch (UnsupportedCharsetException e) {
-            return StandardCharsets.UTF_8;
-        }
-    }
-
-    /**
-     * @param name current file name.
-     * @return whether given name is binary.
-     */
-    public static boolean isBinaryData(final String name) {
-        return extensionMatches(name, DATA_EXTENSIONS);
-    }
-
-    /**
-     * @param name current file name.
-     * @return Is a file by that name a known non-binary file?
-     */
-    public static boolean isNonBinary(final String name) {
-        return name != null && extensionMatches(name.toUpperCase(Locale.US), BinaryGuesser.NON_BINARY_EXTENSIONS);
-    }
-
-    /**
-     * @param name current file name.
-     * @return Is a file by that name an executable/binary file?
-     */
-    public static boolean isExecutable(final String name) {
-        return name.equals(BinaryGuesser.JAVA) || extensionMatches(name, EXE_EXTENSIONS)
-                || containsExtension(name, EXE_EXTENSIONS);
-    }
-
-    public static boolean containsExtension(final String name,
-                                            final String[] exts) {
-        for (String ext : exts) {
-            if (name.contains(DOT + ext + DOT)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    public static boolean extensionMatches(final String name,
-                                           final String[] exts) {
-        for (String ext : exts) {
-            if (name.endsWith(DOT + ext)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    public static boolean isBytecode(final String name) {
-        return BinaryGuesser.extensionMatches(name, BYTECODE_EXTENSIONS);
-    }
-
-    public static boolean isImage(final String name) {
-        return BinaryGuesser.extensionMatches(name, IMAGE_EXTENSIONS);
-    }
-
-    public static boolean isKeystore(final String name) {
-        return BinaryGuesser.extensionMatches(name, KEYSTORE_EXTENSIONS);
-    }
-
-    public static boolean isAudio(final String name) {
-        return BinaryGuesser.extensionMatches( name, AUDIO_EXTENSIONS );
-    }
-
-    /**
-     * @param name file name.
-     * @return Is a file by that name a known binary file?
-     */
-    public static boolean isBinary(final String name) {
-        if (name == null) {
-            return false;
-        }
-        String normalisedName = GuessUtils.normalise(name);
-        return BinaryGuesser.JAR_MANIFEST.equalsIgnoreCase(name) || BinaryGuesser.isImage(normalisedName)
-                || BinaryGuesser.isKeystore(normalisedName) || BinaryGuesser.isBytecode(normalisedName)
-                || BinaryGuesser.isBinaryData(normalisedName) || BinaryGuesser.isExecutable(normalisedName)
-                || BinaryGuesser.isAudio( normalisedName );
-    }
-
-    private static final String[] DATA_EXTENSIONS = {
-            "DAT", "DOC",
-            "NCB", "IDB",
-            "SUO", "XCF",
-            "RAJ", "CERT",
-            "KS", "ODP", "SWF",
-            // fonts
-            "WOFF2", "WOFF", "TTF", "EOT",
-            // JSON structure does not allow comments/license injections in the way RAT expects it
-            "JSON"
-    };
-
-    private static final String[] EXE_EXTENSIONS = {
-            "EXE", "DLL",
-            "LIB", "SO",
-            "A", "EXP",
-    };
-
-    private static final String[] KEYSTORE_EXTENSIONS = {
-            "JKS", "KEYSTORE", "PEM", "CRL", "TRUSTSTORE"
-    };
-
-    private static final String[] IMAGE_EXTENSIONS = {
-            "PNG", "PDF",
-            "GIF", "GIFF",
-            "TIF", "TIFF",
-            "JPG", "JPEG",
-            "ICO", "ICNS",
-            "PSD",
-    };
-
-    private static final String[] BYTECODE_EXTENSIONS = {
-            "CLASS", "PYD",
-            "OBJ", "PYC",
-    };
-
-    private static final String[] AUDIO_EXTENSIONS = {
-            "AIF", "IFF",
-            "M3U", "M4A",
-            "MID", "MP3",
-            "MPA", "WAV",
-            "WMA"
-    };
-    
-    /**
-     * Based on <a href="https://www.apache.org/dev/svn-eol-style.txt">https://www.apache.org/dev/svn-eol-style.txt</a>
-     */
-    private static final String[] NON_BINARY_EXTENSIONS = {
-            "AART",
-            "AC",
-            "AM",
-            "BAT",
-            "C",
-            "CAT",
-            "CGI",
-            "CLASSPATH",
-            "CMD",
-            "CONFIG",
-            "CPP",
-            "CSS",
-            "CWIKI",
-            "DATA",
-            "DCL",
-            "DTD",
-            "EGRM",
-            "ENT",
-            "FT",
-            "FN",
-            "FV",
-            "GRM",
-            "G",
-            "GO",
-            "H",
-            "HTACCESS",
-            "HTML",
-            "IHTML",
-            "IN",
-            "JAVA",
-            "JMX",
-            "JSP",
-            "JS",
-            "JSON",
-            "JUNIT",
-            "JX",
-            "M4",            
-            "MANIFEST",
-            "MD",
-            "MF",
-            "META",
-            "MOD",
-            "N3",
-            "PEN",
-            "PL",
-            "PM",
-            "POD",
-            "POM",
-            "PROJECT",
-            "PROPERTIES",
-            "PY",
-            "RB",
-            "RDF",
-            "RNC",
-            "RNG",
-            "RNX",
-            "ROLES",
-            "RSS",
-            "SH",
-            "SQL",
-            "SVG",
-            "TLD",
-            "TXT",
-            "TYPES",
-            "VM",
-            "VSL",
-            "WSDD",
-            "WSDL",
-            "XARGS",
-            "XCAT",
-            "XCONF",
-            "XEGRM",
-            "XGRM",
-            "XLEX",
-            "XLOG",
-            "XMAP",
-            "XML",
-            "XROLES",
-            "XSAMPLES",
-            "XSD",
-            "XSL",
-            "XSLT",
-            "XSP",
-            "XUL",
-            "XWEB",
-            "XWELCOME",
-    };
-    public static final String JAR_MANIFEST = "MANIFEST.MF";
-    public static final String JAVA = "JAVA";
-    public static final int HIGH_BYTES_RATIO = 100;
-    public static final int TOTAL_READ_RATIO = 30;
-    public static final int NON_ASCII_THRESHOLD = 256;
-    public static final int ASCII_CHAR_THRESHOLD = 8;
-
-    public static boolean isBinary(final Document document) {
-        // TODO: reimplement the binary test algorithm?
-        // TODO: more efficient to move into standard analysis
-        // TODO: then use binary as default
-        return isBinary(document.getName())
-                ||
-                // try a taste
-                isBinaryDocument(document);
-    }
-
-}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java b/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
index e7f046a..8465ec5 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/header/LineNumberReader.java
@@ -1,65 +1,65 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one   *

- * or more contributor license agreements.  See the NOTICE file *

- * distributed with this work for additional information        *

- * regarding copyright ownership.  The ASF licenses this file   *

- * to you under the Apache License, Version 2.0 (the            *

- * "License"); you may not use this file except in compliance   *

- * with the License.  You may obtain a copy of the License at   *

- *                                                              *

- *   http://www.apache.org/licenses/LICENSE-2.0                 *

- *                                                              *

- * Unless required by applicable law or agreed to in writing,   *

- * software distributed under the License is distributed on an  *

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *

- * KIND, either express or implied.  See the License for the    *

- * specific language governing permissions and limitations      *

- * under the License.                                           *

- */ 

-package org.apache.rat.header;

-

-import java.io.IOException;

-import java.io.Reader;

-

-/** Replacement for {@link java.io.LineNumberReader}. This class

- * provides a workaround for an incompatibility in the

- * {@link java.io.LineNumberReader}: If the last line in a file

- * isn't terminated with LF, or CR, or CRLF, then that line

- * is counted in Java 16, and beyond, but wasn't counted before.

- * This implementation is compatible with the latter variant,

- * thus providing upwards compatibility for RAT.

- */

-@Deprecated // since 0.17

-public class LineNumberReader {

-	private final Reader parent;

-	private boolean previousCharWasCR = false;

-	private int lineNumber = 0;

-

-	public LineNumberReader(Reader pReader) {

-		parent = pReader;

-	}

-

-	public int read() throws IOException {

-		final int c = parent.read();

-		switch(c) {

-		case 13:

-			previousCharWasCR = true;

-			++lineNumber;

-			break;

-		case 10:

-			if (!previousCharWasCR) {

-				++lineNumber;

-			}

-			previousCharWasCR = false;

-			break;

-		default:

-			previousCharWasCR = false;

-			break;

-		}

-		return c;

-	}

-

-	public int getLineNumber() {

-		return lineNumber;

-	}

-}

+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */ 
+package org.apache.rat.header;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/** Replacement for {@link java.io.LineNumberReader}. This class
+ * provides a workaround for an incompatibility in the
+ * {@link java.io.LineNumberReader}: If the last line in a file
+ * isn't terminated with LF, or CR, or CRLF, then that line
+ * is counted in Java 16, and beyond, but wasn't counted before.
+ * This implementation is compatible with the latter variant,
+ * thus providing upwards compatibility for RAT.
+ */
+@Deprecated // since 0.17
+public class LineNumberReader {
+	private final Reader parent;
+	private boolean previousCharWasCR = false;
+	private int lineNumber = 0;
+
+	public LineNumberReader(Reader pReader) {
+		parent = pReader;
+	}
+
+	public int read() throws IOException {
+		final int c = parent.read();
+		switch(c) {
+		case 13:
+			previousCharWasCR = true;
+			++lineNumber;
+			break;
+		case 10:
+			if (!previousCharWasCR) {
+				++lineNumber;
+			}
+			previousCharWasCR = false;
+			break;
+		default:
+			previousCharWasCR = false;
+			break;
+		}
+		return c;
+	}
+
+	public int getLineNumber() {
+		return lineNumber;
+	}
+}
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
index 3f08d7f..12e070a 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/ClaimStatistic.java
@@ -19,12 +19,12 @@
 
 package org.apache.rat.report.claim;
 
-import java.util.HashMap;
-import java.util.Map;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.rat.api.Document;
 
-
 /**
  * This class provides a numerical overview about
  * the report.
@@ -39,63 +39,131 @@
         /** count of generated files */
         GENERATED, 
         /** count of unknown files */
-        UNKNOWN };
+        UNKNOWN }
     
-    private final Map<String, int[]> licenseFamilyNameMap = new HashMap<>();
-    private final Map<String, int[]> licenseFamilyCodeMap = new HashMap<>();
-    private final Map<Document.Type, int[]> documentCategoryMap = new HashMap<>();
-    private final Map<ClaimStatistic.Counter, int[]> counterMap = new HashMap<>();
+    private final ConcurrentHashMap<String, IntCounter> licenseFamilyNameMap = new ConcurrentHashMap<>();
+    private final ConcurrentHashMap<String, IntCounter> licenseFamilyCategoryMap = new ConcurrentHashMap<>();
+    private final ConcurrentHashMap<Document.Type, IntCounter> documentCategoryMap = new ConcurrentHashMap<>();
+    private final ConcurrentHashMap<ClaimStatistic.Counter, IntCounter> counterMap = new ConcurrentHashMap<>();
 
-
+    /** converts null counter to 0.
+     *
+     * @param counter the Counter to retrieve the value from.
+     * @return 0 if counter is {@code null} or counter value otherwise.
+     */
+    private int getValue(IntCounter counter) {
+        return counter == null ? 0 : counter.value();
+    }
     /**
      * Returns the counts for the counter.
      * @param counter the counter to get the value for.
-     * @return Returns the number of files with approved licenses.
+     * @return Returns the number times the Counter type was seen.
      */
     public int getCounter(Counter counter) {
-        int[] count = counterMap.get(counter);
-        return count == null ? 0 : count[0];
+        return getValue(counterMap.get(counter));
     }
 
     /**
-     * @return Returns a map with the file types. The map keys
-     * are file type names and the map values
-     * are integers with the number of resources matching
-     * the file type.
+     * Increments the counts for hte counter.
+     * @param counter the counter to increment.
+     * @param value the value to increment the counter by.
      */
-    public Map<Counter, int[]> getCounterMap() {
-        return counterMap;
-    }
-
-    
-    /**
-     * @return Returns a map with the file types. The map keys
-     * are file type names and the map values
-     * are integers with the number of resources matching
-     * the file type.
-     */
-    public Map<Document.Type, int[]> getDocumentCategoryMap() {
-        return documentCategoryMap;
+    public void incCounter(Counter counter, int value) {
+        counterMap.compute(counter, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
     }
 
     /**
-     * @return Returns a map with the license family codes. The map
-     * keys are license family category names,
-     * the map values are integers with the number of resources
-     * matching the license family code.
+     * Gets the counts for the Document.Type.
+     * @param documentType the Document.Type to get the counter for.
+     * @return Returns the number times the Document.Type was seen
      */
-    public Map<String, int[]> getLicenseFamilyCodeMap() {
-        return licenseFamilyCodeMap;
+    public int getCounter(Document.Type documentType) {
+        return getValue(documentCategoryMap.get(documentType));
     }
 
     /**
-     * @return Returns a map with the license family codes. The map
-     * keys are the names of the license families and
-     * the map values are integers with the number of resources
-     * matching the license family name.
+     * Increments the number of times the Document.Type was seen.
+     * @param documentType the Document.Type to increment.
+     * @param value the vlaue to increment the counter by.
      */
-    public Map<String, int[]> getLicenseFileNameMap() {
-        return licenseFamilyNameMap;
+    public void incCounter(Document.Type documentType, int value) {
+        documentCategoryMap.compute(documentType, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
     }
 
+    /**
+     * Gets the counts for hte license category.
+     * @param licenseFamilyCategory the license family category to get the count for.
+     * @return the number of times the license family category was seen.
+     */
+    public int getLicenseCategoryCount(String licenseFamilyCategory) {
+        return getValue(licenseFamilyCategoryMap.get(licenseFamilyCategory));
+    }
+
+    /**
+     * Increments the number of times a license family category was seen.
+     * @param licenseFamilyCategory the License family category to incmrement.
+     * @param value the value to increment the count by.
+     */
+    public void incLicenseCategoryCount(String licenseFamilyCategory, int value) {
+        licenseFamilyCategoryMap.compute(licenseFamilyCategory, (k, v)-> v == null? new IntCounter().increment(value) : v.increment(value));
+    }
+
+    /**
+     * Gets the set of license family categories that were seen.
+     * @return A set of license family categories.
+     */
+    public Set<String> getLicenseFamilyCategories() {
+        return Collections.unmodifiableSet(licenseFamilyCategoryMap.keySet());
+    }
+
+    /**
+     * Gets the set of license family names that were seen.
+     * @return a Set of license family names that were seen.
+     */
+    public Set<String> getLicenseFamilyNames() {
+        return Collections.unmodifiableSet(licenseFamilyNameMap.keySet());
+    }
+
+    /**
+     * Retrieves the number of times a license family name was seen.
+     * @param licenseFilename the license family name to look for.
+     * @return the number of times the license family name was seen.
+     */
+    public int getLicenseFamilyNameCount(String licenseFilename) {
+        return getValue(licenseFamilyNameMap.get(licenseFilename));
+    }
+
+    /**
+     * Increments the license family name count.
+     * @param licenseFamilyName the license family name to increment.
+     * @param value the value to increment the count by.
+     */
+    public void incLicenseFamilyNameCount(String licenseFamilyName, int value) {
+        licenseFamilyNameMap.compute(licenseFamilyName, (k,v)-> v == null? new IntCounter().increment(value) : v.increment(value));
+    }
+
+    /**
+     * A class that wraps and int and allows easy increment and retrieval.
+     */
+    static class IntCounter {
+        int value = 0;
+
+        /**
+         * Increment the count.
+         * @param count the count to increment by (may be negative)
+         * @return this.
+         */
+        public IntCounter increment(int count) {
+            value += count;
+            return this;
+        }
+
+        /**
+         * Retrieves the count.
+         * @return the count contained by this counter.
+         */
+        public int value() {
+            return value;
+        }
+    }
 }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
index ad56a97..5004cb3 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/AbstractClaimReporter.java
@@ -19,8 +19,6 @@
 
 package org.apache.rat.report.claim.impl;
 
-import java.util.stream.Collectors;
-
 import org.apache.rat.api.Document;
 import org.apache.rat.api.MetaData;
 import org.apache.rat.api.RatException;
@@ -53,28 +51,18 @@
     }
 
     /**
-     * Increment the license family counter
-     * The default implementation does nothing.
-     * @param licenseFamilyName name of the license family
-     */
-    protected void handleLicenseFamilyNameClaim(String licenseFamilyName) {
-        // Does Nothing
-    }
-
-    /**
-     * Increment the license category count.
+     * Increment the counts associated with the license
      * The default implementation does nothing.
      * @param license the license to record the category for.
      */
-    protected void handleHeaderCategoryClaim(ILicense license) {
+    protected void handleLicenseClaim(ILicense license) {
         // Does nothing
     }
 
     @Override
     public void report(Document subject) throws RatException {
         final MetaData metaData = subject.getMetaData();
-        metaData.licenses().forEach(this::handleHeaderCategoryClaim);
-        metaData.licenses().map(lic -> lic.getLicenseFamily().getFamilyName()).collect(Collectors.toSet()).forEach(this::handleLicenseFamilyNameClaim);
+        metaData.licenses().forEach(this::handleLicenseClaim);
         handleDocumentCategoryClaim(metaData.getDocumentType());
         handleApprovedLicenseClaim(metaData);
     }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
index 598d4a1..8cc3fef 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/claim/impl/ClaimAggregator.java
@@ -19,8 +19,6 @@
 
 package org.apache.rat.report.claim.impl;
 
-import java.util.Map;
-
 import org.apache.rat.api.Document;
 import org.apache.rat.api.MetaData;
 import org.apache.rat.api.RatException;
@@ -43,52 +41,27 @@
         this.statistic = statistic;
     }
 
-    private <T> void incMapValue(Map<T, int[]> map, T key, int value) {
-        final int[] num = map.get(key);
-
-        if (num == null) {
-            map.put(key, new int[] { value });
-        } else {
-            num[0] += value;
-        }
-    }
-
     @Override
     protected void handleDocumentCategoryClaim(Document.Type documentType) {
-        incMapValue(statistic.getDocumentCategoryMap(), documentType, 1);
+        statistic.incCounter(documentType, 1);
     }
 
     @Override
     protected void handleApprovedLicenseClaim(MetaData metadata) {
-        incValueMap(statistic.getCounterMap(), ClaimStatistic.Counter.APPROVED, (int) metadata.approvedLicenses().count());
-        incValueMap(statistic.getCounterMap(), ClaimStatistic.Counter.UNAPPROVED,
-                (int) metadata.unapprovedLicenses().count());
-    }
-
-    private void incValueMap(Map<Counter, int[]> map, Counter key, int value) {
-        final int[] num = map.get(key);
-
-        if (num == null) {
-            map.put(key, new int[] { value });
-        } else {
-            num[0] += value;
-        }
+        statistic.incCounter(ClaimStatistic.Counter.APPROVED, (int) metadata.approvedLicenses().count());
+        statistic.incCounter(ClaimStatistic.Counter.UNAPPROVED,  (int) metadata.unapprovedLicenses().count());
     }
 
     @Override
-    protected void handleLicenseFamilyNameClaim(String licenseFamilyName) {
-        incMapValue(statistic.getLicenseFileNameMap(), licenseFamilyName, 1);
-    }
-
-    @Override
-    protected void handleHeaderCategoryClaim(ILicense license) {
+    protected void handleLicenseClaim(ILicense license) {
         String category = license.getLicenseFamily().getFamilyCategory();
         if (category.equals(ILicenseFamily.GENTERATED_CATEGORY)) {
-            incValueMap(statistic.getCounterMap(), Counter.GENERATED, 1);
+            statistic.incCounter(Counter.GENERATED, 1);
         } else if (category.equals(ILicenseFamily.UNKNOWN_CATEGORY)) {
-            incValueMap(statistic.getCounterMap(), Counter.UNKNOWN, 1);
+            statistic.incCounter(Counter.UNKNOWN, 1);
         }
-        incMapValue(statistic.getLicenseFamilyCodeMap(), category, 1);
+        statistic.incLicenseCategoryCount(category, 1);
+        statistic.incLicenseFamilyNameCount(license.getFamilyName(), 1);
     }
 
     @Override
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
index e2196f8..d61f735 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/ArchiveWalker.java
@@ -34,31 +34,29 @@
 import org.apache.rat.api.Document;
 import org.apache.rat.api.RatException;
 import org.apache.rat.document.impl.ArchiveEntryDocument;
-import org.apache.rat.report.IReportable;
 import org.apache.rat.report.RatReport;
 
 /**
  * Walks various kinds of archives files
  */
-public class ArchiveWalker extends Walker implements IReportable {
+public class ArchiveWalker extends Walker {
 
     /**
      * Constructs a walker.
      * @param file not null
-     * @param filter filters input files (optional), 
-     * or null when no filtering should be performed
-     * @throws FileNotFoundException in case of I/O errors. 
+     * @param filter filters input files (optional) null when no filtering should be performed
+     * @throws FileNotFoundException in case of I/O errors.
      */
-    public ArchiveWalker(File file, final FilenameFilter filter) throws FileNotFoundException {
+    public ArchiveWalker(final File file, final FilenameFilter filter) throws FileNotFoundException {
         super(file, filter);
     }
-    
+
     /**
      * Run a report over all files and directories in this GZIPWalker,
      * ignoring any files/directories set to be ignored.
-     * 
+     *
      * @param report the defined RatReport to run on this GZIP walker.
-     * 
+     *
      */
     public void run(final RatReport report) throws RatException {
 
@@ -68,12 +66,12 @@
             /* I am really sad that classes aren't first-class objects in
                Java :'( */
             try {
-                input = new TarArchiveInputStream(new GzipCompressorInputStream(Files.newInputStream(file.toPath())));
+                input = new TarArchiveInputStream(new GzipCompressorInputStream(Files.newInputStream(getBaseFile().toPath())));
             } catch (IOException e) {
                 try {
-                    input = new TarArchiveInputStream(new BZip2CompressorInputStream(Files.newInputStream(file.toPath())));
+                    input = new TarArchiveInputStream(new BZip2CompressorInputStream(Files.newInputStream(getBaseFile().toPath())));
                 } catch (IOException e2) {
-                    input = new ZipArchiveInputStream(Files.newInputStream(file.toPath()));
+                    input = new ZipArchiveInputStream(Files.newInputStream(getBaseFile().toPath()));
                 }
             }
 
@@ -105,15 +103,13 @@
 
     /**
      * Report on the given file.
-     * 
+     *
      * @param report the report to process the file with
      * @param file the file to be reported on
      * @throws RatException
      */
-    private void report(final RatReport report, byte[] contents, File file) throws RatException {
-
+    private void report(final RatReport report, final byte[] contents, final File file) throws RatException {
         Document document = new ArchiveEntryDocument(file, contents);
         report.report(document);
-
     }
 }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
index ab75cd3..e3cf3a7 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/DirectoryWalker.java
@@ -22,57 +22,33 @@
 import java.io.File;
 import java.io.FilenameFilter;
 import java.util.Arrays;
-import java.util.regex.Pattern;
 
+import org.apache.commons.io.filefilter.FalseFileFilter;
 import org.apache.commons.io.filefilter.IOFileFilter;
-import org.apache.rat.api.Document;
 import org.apache.rat.api.RatException;
 import org.apache.rat.document.impl.FileDocument;
-import org.apache.rat.report.IReportable;
 import org.apache.rat.report.RatReport;
 
 /**
  * Walks directories.
  */
-public class DirectoryWalker extends Walker implements IReportable {
+public class DirectoryWalker extends Walker {
 
     private static final FileNameComparator COMPARATOR = new FileNameComparator();
 
-    private final IOFileFilter directoryFilter;
-
-    /**
-     * Constructs a walker.
-     *
-     * @param file the directory to walk.
-     * @param directoryFilter directory filter to eventually exclude some directories/files from the scan.
-     */
-    public DirectoryWalker(File file, IOFileFilter directoryFilter) {
-        this(file, (FilenameFilter) null, directoryFilter);
-    }
+    private final IOFileFilter directoriesToIgnore;
 
     /**
      * Constructs a walker.
      *
      * @param file the directory to walk (not null).
-     * @param filter filters input files (optional),
+     * @param filesToIgnore filters input files (optional),
      *               or null when no filtering should be performed
-     * @param directoryFilter filters directories (optional), or null when no filtering should be performed.
+     * @param directoriesToIgnore filters directories (optional), or null when no filtering should be performed.
      */
-    public DirectoryWalker(File file, final FilenameFilter filter, IOFileFilter directoryFilter) {
-        super(file.getPath(), file, filter);
-        this.directoryFilter = directoryFilter;
-    }
-
-    /**
-     * Constructs a walker.
-     *
-     * @param file the directory to walk (not null).
-     * @param ignoreNameRegex ignore directories/files with name matching the regex.
-     * @param directoryFilter filters directories (optional), or null when no filtering should be performed.
-     */
-    public DirectoryWalker(File file, final Pattern ignoreNameRegex, IOFileFilter directoryFilter) {
-        super(file.getPath(), file, regexFilter(ignoreNameRegex));
-        this.directoryFilter = directoryFilter;
+    public DirectoryWalker(final File file, final FilenameFilter filesToIgnore, final IOFileFilter directoriesToIgnore) {
+        super(file, filesToIgnore);
+        this.directoriesToIgnore = directoriesToIgnore == null ? FalseFileFilter.FALSE : directoriesToIgnore;
     }
 
     /**
@@ -80,14 +56,10 @@
      *
      * @param report The report to process the directory with
      * @param file   the directory to process
-     * @throws RatException
+     * @throws RatException on error.
      */
-    private void processDirectory(RatReport report, final File file) throws RatException {
-        if (directoryFilter != null) {
-            if (!directoryFilter.accept(file)) {
-                process(report, file);
-            }
-        } else {
+    private void processDirectory(final RatReport report, final File file) throws RatException {
+        if (!directoriesToIgnore.accept(file)) {
             process(report, file);
         }
     }
@@ -97,9 +69,10 @@
      * ignoring any files/directories set to be ignored.
      *
      * @param report the defined RatReport to run on this Directory walker.
+     * @throws RatException on error
      */
     public void run(final RatReport report) throws RatException {
-        process(report, file);
+        process(report, getBaseFile());
     }
 
     /**
@@ -107,9 +80,9 @@
      *
      * @param report the report to use in processing
      * @param file   the run the report against
-     * @throws RatException
+     * @throws RatException on error
      */
-    private void process(final RatReport report, final File file) throws RatException {
+    protected void process(final RatReport report, final File file) throws RatException {
         final File[] files = file.listFiles();
         if (files != null) {
             Arrays.sort(files, COMPARATOR);
@@ -119,16 +92,20 @@
         }
     }
 
+    private boolean isNotIgnoredDirectory(final File file) {
+        return !directoriesToIgnore.accept(file.getParentFile(), file.getName());
+    }
+
     /**
      * Process all directories in a set of file objects, ignoring any directories set to be ignored.
      *
      * @param report the report to use in processing
      * @param files  the files to process (only directories will be processed)
-     * @throws RatException
+     * @throws RatException on error
      */
     private void processDirectories(final RatReport report, final File[] files) throws RatException {
         for (final File file : files) {
-            if (isNotIgnored(file) && file.isDirectory()) {
+            if (file.isDirectory() && isNotIgnoredDirectory(file)) {
                 processDirectory(report, file);
             }
         }
@@ -139,28 +116,13 @@
      *
      * @param report the report to use in processing
      * @param files  the files to process (only files will be processed)
-     * @throws RatException
+     * @throws RatException on error
      */
     private void processNonDirectories(final RatReport report, final File[] files) throws RatException {
         for (final File file : files) {
-            if (isNotIgnored(file) && !file.isDirectory()) {
-                report(report, file);
+            if (!file.isDirectory() && isNotIgnored(file)) {
+                report.report(new FileDocument(file));
             }
         }
-
-    }
-
-    /**
-     * Report on the given file.
-     *
-     * @param report the report to process the file with
-     * @param file   the file to be reported on
-     * @throws RatException
-     */
-    private void report(final RatReport report, File file) throws RatException {
-
-        Document document = new FileDocument(file);
-        report.report(document);
-
     }
 }
diff --git a/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java b/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
index daecbd1..abb8cae 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/walker/Walker.java
@@ -19,52 +19,42 @@
 
 package org.apache.rat.walker;
 
+import org.apache.commons.io.filefilter.FalseFileFilter;
 import org.apache.rat.report.IReportable;
 
 import java.io.File;
 import java.io.FilenameFilter;
-import java.util.regex.Pattern;
 
 /**
  * Abstract walker.
  */
 public abstract class Walker implements IReportable {
 
-    protected final File file;
-    protected final String name;
+    /** The file that this walker started at */
+    private final File baseFile;
 
-    protected final FilenameFilter filter;
+    /** The file name filter that the walker is applying */
+    private final FilenameFilter filesToIgnore;
 
-    protected static FilenameFilter regexFilter(final Pattern pattern) {
-        return (dir, name) -> {
-            final boolean result;
-            if (pattern == null) {
-                result = true;
-            } else {
-                result = !pattern.matcher(name).matches();
-            }
-            return result;
-        };
+    public Walker(final File file, final FilenameFilter filesToIgnore) {
+        this.baseFile = file;
+        this.filesToIgnore = filesToIgnore == null ? FalseFileFilter.FALSE : filesToIgnore;
     }
- 
+
+    /**
+     * Retrieve the file from the constructor.
+     * @return the file from the constructor.
+     */
+    protected File getBaseFile() {
+        return baseFile;
+    }
+
+    /**
+     * Test if the specified file should be ignored.
+     * @param file the file to test.
+     * @return {@code true} if the file should be ignored.
+     */
     protected final boolean isNotIgnored(final File file) {
-        boolean result = false;
-        if (filter != null) {
-            final String name = file.getName();
-            final File dir = file.getParentFile();
-            result = !filter.accept(dir, name);
-        }
-        return !result;
+        return !filesToIgnore.accept(file.getParentFile(), file.getName());
     }
-
-    public Walker(File file, final FilenameFilter filter) {
-        this(file.getPath(), file, filter);
-    }
-
-    protected Walker(final String name, final File file, final FilenameFilter filter) {
-        this.name = name;
-        this.file = file;
-        this.filter = filter;
-    }
-
 }
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
index 9325602..48e962a 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReportConfigurationTest.java
@@ -57,6 +57,7 @@
 import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
 import org.apache.rat.report.IReportable;
 import org.apache.rat.testhelpers.TestingLicense;
+import org.apache.rat.utils.DefaultLog;
 import org.apache.rat.utils.Log;
 import org.apache.rat.utils.Log.Level;
 import org.apache.rat.utils.ReportingSet.Options;
@@ -186,24 +187,33 @@
     }
 
     @Test
-    public void inputFileFilterTest() {
+    public void filesToIgnoreTest() {
+
+        assertThat(underTest.getFilesToIgnore()).isNull();
+
+        underTest.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+        assertThat(underTest.getFilesToIgnore()).isNotNull();
+        assertThat(underTest.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
+
         FilenameFilter filter = mock(FilenameFilter.class);
-        assertThat(underTest.getInputFileFilter()).isNull();
-        underTest.setInputFileFilter(filter);
-        assertThat(underTest.getInputFileFilter()).isEqualTo(filter);
+        underTest.setFilesToIgnore(filter);
+        assertThat(underTest.getFilesToIgnore()).isEqualTo(filter);
     }
 
     @Test
-    public void directoryFilterTest() {
-        assertThat(underTest.getDirectoryFilter()).isNotNull();
-        assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+    public void directoriesToIgnoreTest() {
+        assertThat(underTest.getDirectoriesToIgnore()).isNull();
 
-        underTest.setDirectoryFilter(DirectoryFileFilter.DIRECTORY);
-        underTest.addDirectoryFilter(NameBasedHiddenFileFilter.HIDDEN);
-        assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(AndFileFilter.class);
+        underTest.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+        assertThat(underTest.getDirectoriesToIgnore()).isNotNull();
+        assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
 
-        underTest.setDirectoryFilter(null);
-        assertThat(underTest.getDirectoryFilter()).isExactlyInstanceOf(FalseFileFilter.class);
+        underTest.setDirectoriesToIgnore(DirectoryFileFilter.DIRECTORY);
+        underTest.addDirectoryToIgnore(NameBasedHiddenFileFilter.HIDDEN);
+        assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(AndFileFilter.class);
+
+        underTest.setDirectoriesToIgnore(null);
+        assertThat(underTest.getDirectoriesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
     }
 
     @Test
@@ -549,11 +559,11 @@
         assertThat(config.isAddingLicenses()).isFalse();
         assertThat(config.isAddingLicensesForced()).isFalse();
         assertThat(config.getCopyrightMessage()).isNull();
-        assertThat(config.getInputFileFilter()).isNull();
+        assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
         assertThat(config.isStyleReport()).isTrue();
         assertThat(config.getStyleSheet()).isNotNull().withFailMessage("Stylesheet should not be null");
-        assertThat(config.getDirectoryFilter()).isNotNull().withFailMessage("Directory filter should not be null");
-        assertThat(config.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+        assertThat(config.getDirectoriesToIgnore()).isNotNull().withFailMessage("Directory filter should not be null");
+        assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
         
         validateDefaultApprovedLicenses(config);
         validateDefaultLicenseFamilies(config);
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
index db9e8bd..185848c 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReportTest.java
@@ -91,13 +91,13 @@
         }
         assertTrue(output.exists());
         String content = FileUtils.readFileToString(output, StandardCharsets.UTF_8);
-        TextUtils.isMatching("Notes: 2$", content);
-        TextUtils.isMatching("Binaries: 2$", content);
-        TextUtils.isMatching("Archives: 1$", content);
-        TextUtils.isMatching("Standards: 8$", content);
-        TextUtils.isMatching("Apache Licensed: 5$", content);
-        TextUtils.isMatching("Generated Documents 1$", content);
-        TextUtils.isMatching("^2 Unknown licenses", content);
+        TextUtils.assertPatternInOutput("Notes: 2$", content);
+        TextUtils.assertPatternInOutput("Binaries: 2$", content);
+        TextUtils.assertPatternInOutput("Archives: 1$", content);
+        TextUtils.assertPatternInOutput("Standards: 8$", content);
+        TextUtils.assertPatternInOutput("Apache Licensed: 5$", content);
+        TextUtils.assertPatternInOutput("Generated Documents: 1$", content);
+        TextUtils.assertPatternInOutput("^2 Unknown Licenses", content);
         assertTrue(content.contains(" S target/test-classes/elements/ILoggerFactory.java"));
         assertTrue(content.contains(" B target/test-classes/elements/Image.png"));
         assertTrue(content.contains(" N target/test-classes/elements/LICENSE"));
@@ -108,7 +108,6 @@
         assertTrue(content.contains(" S target/test-classes/elements/Xml.xml"));
         assertTrue(content.contains(" S target/test-classes/elements/buildr.rb"));
         assertTrue(content.contains(" A target/test-classes/elements/dummy.jar"));
-        assertTrue(content.contains(" B target/test-classes/elements/plain.json"));
         assertTrue(content.contains("!S target/test-classes/elements/sub/Empty.txt"));
         assertTrue(content.contains(" S target/test-classes/elements/tri.txt"));
         assertTrue(content.contains(" G target/test-classes/elements/generated.txt"));
diff --git a/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java b/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
index eb02d09..7ca9b7d 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/ReporterTest.java
@@ -53,12 +53,12 @@
      * @param doc The document to check/
      * @param xpath the XPath instance to use.
      * @param resource the xpath statement to locate the node.
-     * @param id the expected family for the node (may be null)
-     * @param approval the expected approval value (may be null)
+     * @param licenseInfo the license info for the node. (may = null)
      * @param type the type of resource located.
+     * @param hasSample true if a sample from the document should be present.
      * @throws Exception on XPath error.
      */
-    public static void checkNode(Document doc, XPath xpath, String resource, LicenseInfo licenseInfo, String type,
+    private static void checkNode(Document doc, XPath xpath, String resource, LicenseInfo licenseInfo, String type,
             boolean hasSample) throws Exception {
         XmlUtils.getNode(doc, xpath, String.format("/rat-report/resource[@name='%s'][@type='%s']", resource, type));
         if (licenseInfo != null) {
@@ -67,7 +67,7 @@
                             resource, type, licenseInfo.id, licenseInfo.family));
             XmlUtils.getNode(doc, xpath,
                     String.format("/rat-report/resource[@name='%s'][@type='%s']/license[@id='%s'][@approval='%s']",
-                            resource, type, licenseInfo.id, Boolean.toString(licenseInfo.approval)));
+                            resource, type, licenseInfo.id, licenseInfo.approval));
             if (licenseInfo.hasNotes) {
                 XmlUtils.getNode(doc, xpath,
                         String.format("/rat-report/resource[@name='%s'][@type='%s']/license[@id='%s']/notes", resource,
@@ -89,7 +89,7 @@
         final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
         configuration.setStyleReport(false);
         configuration.setFrom(defaults);
-        configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+        configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
         configuration.setOut(() -> out);
         new Reporter(configuration).output();
         Document doc = XmlUtils.toDom(new ByteArrayInputStream(out.toByteArray()));
@@ -111,7 +111,6 @@
         checkNode(doc, xPath, "src/test/resources/elements/Xml.xml", apacheLic, "STANDARD", false);
         checkNode(doc, xPath, "src/test/resources/elements/buildr.rb", apacheLic, "STANDARD", false);
         checkNode(doc, xPath, "src/test/resources/elements/dummy.jar", null, "ARCHIVE", false);
-        checkNode(doc, xPath, "src/test/resources/elements/plain.json", null, "BINARY", false);
         checkNode(doc, xPath, "src/test/resources/elements/sub/Empty.txt", new LicenseInfo("?????", false, false),
                 "STANDARD", false);
         checkNode(doc, xPath, "src/test/resources/elements/tri.txt", apacheLic, "STANDARD", false);
@@ -125,7 +124,7 @@
         assertEquals(14, nodeList.getLength());
     }
 
-    private static final String NL = System.getProperty("line.separator");
+    private static final String NL = System.lineSeparator();
     private static final String PARAGRAPH = "*****************************************************";
     private static final String HEADER = NL + PARAGRAPH + NL + //
             "Summary" + NL + //
@@ -133,7 +132,7 @@
             "Generated at: ";
 
     private String documentOut(boolean approved, Type type, String name) {
-        return String.format("^\\Q%s%s %s\\E$", approved ? " " : "!", type.name().substring(0, 1), name);
+        return String.format("^\\Q%s%s %s\\E$", approved ? " " : "!", type.name().charAt(0), name);
     }
 
     private String licenseOut(String family, String name) {
@@ -152,7 +151,7 @@
         final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
         final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
         configuration.setFrom(defaults);
-        configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+        configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
         configuration.setOut(() -> out);
         new Reporter(configuration).output();
 
@@ -169,7 +168,8 @@
         TextUtils.assertPatternInOutput("^Generated Documents: 1$", document);
         TextUtils.assertPatternInOutput("^2 Unknown Licenses$", document);
         TextUtils.assertPatternInOutput(
-                "^Files with unapproved licenses:\\s+" + "\\Qsrc/test/resources/elements/Source.java\\E\\s+"
+                "^Files with unapproved licenses:\\s+" //
+                        + "\\Qsrc/test/resources/elements/Source.java\\E\\s+" //
                         + "\\Qsrc/test/resources/elements/sub/Empty.txt\\E\\s",
                 document);
         TextUtils.assertPatternInOutput(documentOut(true, Type.ARCHIVE, "src/test/resources/elements/dummy.jar"),
@@ -193,8 +193,6 @@
                 + licenseOut("AL", "Apache License Version 2.0"), document);
         TextUtils.assertPatternInOutput(documentOut(true, Type.STANDARD, "src/test/resources/elements/TextHttps.txt")
                 + licenseOut("AL", "Apache License Version 2.0"), document);
-        TextUtils.assertPatternInOutput(documentOut(true, Type.BINARY, "src/test/resources/elements/plain.json"),
-                document);
         TextUtils.assertPatternInOutput(documentOut(true, Type.STANDARD, "src/test/resources/elements/tri.txt")
                 + licenseOut("AL", "Apache License Version 2.0") + licenseOut("BSD-3", "BSD 3 clause")
                 + licenseOut("BSD-3", "TMF", "The Telemanagement Forum License"), document);
@@ -210,7 +208,7 @@
         final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
         final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
         configuration.setFrom(defaults);
-        configuration.setReportable(new DirectoryWalker(new File(elementsPath), HiddenFileFilter.HIDDEN));
+        configuration.setReportable(new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN));
         configuration.setOut(() -> out);
         configuration.setStyleSheet(this.getClass().getResource("/org/apache/rat/unapproved-licenses.xsl"));
         new Reporter(configuration).output();
@@ -224,7 +222,7 @@
         TextUtils.assertPatternInOutput("\\Qsrc/test/resources/elements/sub/Empty.txt\\E", document);
     }
 
-    private class LicenseInfo {
+    private static class LicenseInfo {
         String id;
         String family;
         boolean approval;
diff --git a/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java b/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
index ca8bd0e..9484b6f 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/analysis/AnalyserFactoryTest.java
@@ -18,7 +18,6 @@
  */
 package org.apache.rat.analysis;
 
-import static org.junit.Assert.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -31,6 +30,7 @@
 import org.apache.rat.report.claim.impl.xml.SimpleXmlClaimReporter;
 import org.apache.rat.report.xml.writer.impl.base.XmlWriter;
 import org.apache.rat.test.utils.Resources;
+import org.apache.rat.testhelpers.TextUtils;
 import org.apache.rat.utils.DefaultLog;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -72,13 +72,13 @@
                 " * specific language governing permissions and limitations", //
                 " * under the License.", //
                 " ]]></sample></resource>" };
-        
+
         final MonolithicFileDocument document = new MonolithicFileDocument(
                 Resources.getResourceFile("/elements/Text.txt"));
         analyser.analyse(document);
         reporter.report(document);
         String result = out.toString();
-        for (String exp : expected ) { 
+        for (String exp : expected) {
             assertTrue(result.contains(exp), () -> exp);
         }
     }
@@ -122,4 +122,57 @@
         assertEquals("<resource name='src/test/resources/elements/dummy.jar' type='ARCHIVE'/>", out.toString(),
                 "Open archive element");
     }
+
+    @Test
+    public void RAT211_bmp_Test() throws Exception {
+        MonolithicFileDocument document = new MonolithicFileDocument(
+                Resources.getResourceFile("/jira/RAT211/side_left.bmp"));
+        analyser.analyse(document);
+        reporter.report(document);
+        assertEquals("<resource name='src/test/resources/jira/RAT211/side_left.bmp' type='BINARY'/>", out.toString(),
+                "Open archive element");
+    }
+
+    @Test
+    public void RAT211_dia_Test() throws Exception {
+        MonolithicFileDocument document = new MonolithicFileDocument(
+                Resources.getResourceFile("/jira/RAT211/leader-election-message-arrives.dia"));
+        analyser.analyse(document);
+        reporter.report(document);
+        assertEquals(
+                "<resource name='src/test/resources/jira/RAT211/leader-election-message-arrives.dia' type='ARCHIVE'/>",
+                out.toString(), "Open archive element");
+    }
+
+    @Test
+    public void RAT147_unix_Test() throws Exception {
+        MonolithicFileDocument document = new MonolithicFileDocument(
+                Resources.getResourceFile("/jira/RAT147/unix-newlines.txt.bin"));
+        analyser.analyse(document);
+        reporter.report(document);
+        String result = out.toString();
+        TextUtils.assertPatternInOutput(
+                "<resource name='src/test/resources/jira/RAT147/unix-newlines.txt.bin' type='STANDARD'",
+                result);
+        TextUtils.assertPatternInOutput("sentence 1.$", result);
+        TextUtils.assertPatternInOutput("^sentence 2.$", result);
+        TextUtils.assertPatternInOutput("^sentence 3.$", result);
+        TextUtils.assertPatternInOutput("^sentence 4.$", result);
+    }
+
+    @Test
+    public void RAT147_windows_Test() throws Exception {
+        MonolithicFileDocument document = new MonolithicFileDocument(
+                Resources.getResourceFile("/jira/RAT147/windows-newlines.txt.bin"));
+        analyser.analyse(document);
+        reporter.report(document);
+        String result = out.toString();
+        TextUtils.assertPatternInOutput(
+                "<resource name='src/test/resources/jira/RAT147/windows-newlines.txt.bin' type='STANDARD'",
+                result);
+        TextUtils.assertPatternInOutput("sentence 1.$", result);
+        TextUtils.assertPatternInOutput("^sentence 2.$", result);
+        TextUtils.assertPatternInOutput("^sentence 3.$", result);
+        TextUtils.assertPatternInOutput("^sentence 4.$", result);
+    }
 }
diff --git a/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java b/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java
new file mode 100644
index 0000000..aef464d
--- /dev/null
+++ b/apache-rat-core/src/test/java/org/apache/rat/analysis/TikaProcessorTest.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+package org.apache.rat.analysis;
+
+import org.apache.rat.api.Document;
+import org.apache.rat.api.MetaData;
+import org.apache.rat.document.RatDocumentAnalysisException;
+import org.apache.rat.document.impl.FileDocument;
+import org.apache.rat.report.claim.ClaimStatistic;
+import org.apache.rat.test.utils.Resources;
+import org.apache.rat.utils.DefaultLog;
+import org.apache.tika.mime.MimeTypes;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.MalformedInputException;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class TikaProcessorTest {
+    /**
+     * Used to swallow a MalformedInputException and return false
+     * because the encoding of the stream was different from the
+     * platform's default encoding.
+     *
+     * @throws Exception
+     * @see "RAT-81"
+     */
+    @Test
+    public void RAT81() throws Exception {
+        // create a document that throws a MalformedInputException
+        Document doc = getDocument(new InputStream() {
+            @Override
+            public int read() throws IOException {
+                throw new MalformedInputException(0);
+            }
+        });
+        assertThrows(RatDocumentAnalysisException.class, () -> TikaProcessor.process(DefaultLog.INSTANCE, doc));
+    }
+
+    @Test
+    public void UTF16_input() throws Exception {
+        Document doc = getDocument(Resources.getResourceStream("/binaries/UTF16_with_signature.xml"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+    }
+
+    @Test
+    public void UTF8_input() throws Exception {
+        FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/UTF8_with_signature.xml"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+    }
+
+    @Test
+    public void missNamedBinaryTest() throws Exception {
+        FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/Image-png.not"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.BINARY, doc.getMetaData().getDocumentType());
+    }
+
+
+    @Test
+    public void plainTextTest() throws Exception {
+        FileDocument doc = new FileDocument(Resources.getResourceFile("/elements/Text.txt"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+    }
+
+    @Test
+    public void emptyFileTest() throws Exception {
+        FileDocument doc = new FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+    }
+
+    @Test
+    public void javaFileWithChineseCharacters_RAT301() throws Exception {
+        FileDocument doc = new FileDocument(Resources.getResourceFile("/tikaFiles/standard/ChineseCommentsJava.java"));
+        TikaProcessor.process(DefaultLog.INSTANCE, doc);
+        assertEquals(Document.Type.STANDARD, doc.getMetaData().getDocumentType());
+    }
+
+    @Test
+    public void testTikaFiles() throws RatDocumentAnalysisException, IOException {
+        File dir = new File("src/test/resources/tikaFiles");
+        Map<String, Document.Type> unseenMime = TikaProcessor.getDocumentTypeMap();
+        ClaimStatistic statistic = new ClaimStatistic();
+        for (Document.Type docType : Document.Type.values()) {
+            File typeDir = new File(dir, docType.name().toLowerCase(Locale.ROOT));
+            if (typeDir.isDirectory()) {
+                for (File file : Objects.requireNonNull(typeDir.listFiles())) {
+                    Document doc = new FileDocument(file);
+                    String mimeType = TikaProcessor.process(DefaultLog.INSTANCE, doc);
+                    statistic.incCounter(doc.getMetaData().getDocumentType(), 1);
+                    assertEquals( docType, doc.getMetaData().getDocumentType(), () -> "Wrong type for "+file.toString());
+                    unseenMime.remove(mimeType);
+                }
+            }
+        }
+        System.out.println( "untested mime types");
+        unseenMime.keySet().forEach(System.out::println);
+        for (Document.Type type : Document.Type.values()) {
+            System.out.format("Tested %s %s files%n", statistic.getCounter(type), type );
+        }
+    }
+
+
+    /**
+     * Build a document with the specific input stream
+     * @return
+     */
+    private static Document getDocument(final InputStream stream) {
+        MetaData metaData = new MetaData();
+
+        Document doc = new Document() {
+            @Override
+            public String getName() {
+                return "Testing Document";
+            }
+
+            @Override
+            public Reader reader() throws IOException {
+                return new InputStreamReader(inputStream());
+            }
+
+            @Override
+            public InputStream inputStream() throws IOException {
+                return stream;
+            }
+
+            @Override
+            public MetaData getMetaData() {
+                return metaData;
+            }
+
+            @Override
+            public boolean isComposite() {
+                return false;
+            }
+        };
+        return doc;
+    }
+}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java b/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java
deleted file mode 100644
index 919a17a..0000000
--- a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/ArchiveGuesserTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- */ 
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.rat.document.MockDocument;
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class ArchiveGuesserTest {
-
-    @Test
-    public void matches() {
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.jar")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.tar.gz")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.zip")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.tar")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.bz")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.bz2")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odb")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odf")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odg")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odp")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.ods")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.odt")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.sar")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.har")));
-        assertTrue(ArchiveGuesser.isArchive(new MockDocument("42.wsr")));
-    }
-    
-    @Test
-    public void isArchive() {
-        assertTrue(ArchiveGuesser.isArchive("42.jar"));
-        assertTrue(ArchiveGuesser.isArchive("42.tar.gz"));
-        assertTrue(ArchiveGuesser.isArchive("42.zip"));
-        assertTrue(ArchiveGuesser.isArchive("42.tar"));
-        assertTrue(ArchiveGuesser.isArchive("42.bz"));
-        assertTrue(ArchiveGuesser.isArchive("42.bz2"));
-        assertTrue(ArchiveGuesser.isArchive("42.odb"));
-        assertTrue(ArchiveGuesser.isArchive("42.odf"));
-        assertTrue(ArchiveGuesser.isArchive("42.odg"));
-        assertTrue(ArchiveGuesser.isArchive("42.odp"));
-        assertTrue(ArchiveGuesser.isArchive("42.ods"));
-        assertTrue(ArchiveGuesser.isArchive("42.odt"));
-        assertTrue(ArchiveGuesser.isArchive("42.sar"));
-        assertTrue(ArchiveGuesser.isArchive("42.har"));
-        assertTrue(ArchiveGuesser.isArchive("42.wsr"));
-    }
-
-}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java b/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
deleted file mode 100644
index 43843f6..0000000
--- a/apache-rat-core/src/test/java/org/apache/rat/document/impl/guesser/BinaryGuesserTest.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- */
-package org.apache.rat.document.impl.guesser;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.rat.document.MockDocument;
-import org.apache.rat.document.impl.FileDocument;
-import org.apache.rat.test.utils.Resources;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Arrays;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class BinaryGuesserTest {
-
-    private static final List<String> BINARY_FILES = Arrays.asList(//
-            "image.png",//
-            "image.pdf",//
-            "image.psd",//
-            "image.gif",//
-            "image.giff",//
-            "image.jpg",//
-            "image.jpeg",//
-            "image.exe",//
-            "Whatever.class",//
-            "data.dat",//
-            "libicuda.so.34",//
-            "my.truststore",//
-            //"foo.Java", //
-            //"manifest.Mf",//
-            "deprecatedtechnology.swf",
-            "xyz.aif",
-            "abc.iff",
-            // Audio Files
-            "test.m3u", "test.m4a",
-            "test-audio.mid", "test-audio.mp3",
-            "test-audio.mpa", "test-audio.wav",
-            "test-audio.wma"
-    );
-
-    @Test
-    public void testMatches() {
-        for (String name : BINARY_FILES) {
-            assertTrue(BinaryGuesser.isBinary(new MockDocument(name)), ()->"'" + name + "' should be detected as a binary");
-        }
-
-    }
-
-    @Test
-    public void testIsBinary() {
-        for (String name : BINARY_FILES) {
-            assertTrue(BinaryGuesser.isBinary(name), ()->"'" + name + "' should be detected as a binary");
-        }
-    }
-
-    /**
-     * Used to swallow a MalformedInputException and return false
-     * because the encoding of the stream was different from the
-     * platform's default encoding.
-     *
-     * @throws Exception
-     * @see "RAT-81"
-     */
-    @Test
-    public void binaryWithMalformedInputRAT81() throws Exception {
-        FileDocument doc = new FileDocument(Resources.getResourceFile("/binaries/UTF16_with_signature.xml"));
-        Reader r = doc.reader(); // this will fail test if file is not readable
-        try {
-            char[] dummy = new char[100];
-            r.read(dummy);
-            // if we get here, the UTF-16 encoded file didn't throw
-            // any exception, try the UTF-8 encoded one
-            r.close();
-            r = null; // ensure we detect failure to read second file
-            doc = new FileDocument(Resources.getResourceFile("/binaries/UTF8_with_signature.xml"));
-            r = doc.reader();
-            r.read(dummy);
-            // still here?  can't test on this platform
-            System.err.println("Skipping testBinaryWithMalformedInput");
-        } catch (IOException e) {
-            if (r != null) {
-                IOUtils.closeQuietly(r);
-            } else {
-                throw e; // could not open the second file
-            }
-            r = null;
-            assertTrue(BinaryGuesser.isBinary(doc), "Expected binary for " + doc.getName());
-        } finally {
-            IOUtils.closeQuietly(r);
-        }
-    }
-
-    @Test
-    public void realBinaryContent() throws IOException {
-        // This test is not accurate on all platforms
-        final String encoding = System.getProperty("file.encoding");
-        final boolean isBinary = BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/binaries/Image-png.not")));
-        if (encoding.startsWith("ANSI")) {
-            assertTrue(isBinary);
-        } else {
-            if (isBinary) {
-                System.out.println("BinaryGuesserTest.realBinaryContent() succeeded when using encoding " + encoding);
-            } else {
-                System.err.println("BinaryGuesserTest.realBinaryContent() failed when using encoding " + encoding);
-            }
-        }
-    }
-
-    @Test
-    public void textualContent() throws IOException {
-        assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/Text.txt"))));
-    }
-
-    @Test
-    public void emptyFile() throws IOException {
-        assertFalse(BinaryGuesser.isBinary(new FileDocument(Resources.getResourceFile("/elements/sub/Empty.txt"))));
-    }
-
-    @Test
-    public void testFileEncodingCanBeSetAndHasFallbackInCaseOfErrors() {
-        System.setProperty(BinaryGuesser.FILE_ENCODING, "shouldThrowAnExceptionBecauseNotFound");
-        assertEquals("UTF-8", BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
-
-        final String usAscii = "US-ASCII";
-        System.setProperty(BinaryGuesser.FILE_ENCODING, usAscii);
-        assertEquals(usAscii, BinaryGuesser.getFileEncodingOrUTF8AsFallback().displayName());
-    }
-}
diff --git a/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java b/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
index af7a81b..9759c4f 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/report/ConfigurationReportTest.java
@@ -18,24 +18,19 @@
  */
 package org.apache.rat.report;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.StringWriter;
 import java.util.List;
-import java.util.Optional;
 
 import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathFactory;
 
 import org.apache.rat.Defaults;
 import org.apache.rat.ReportConfiguration;
-import org.apache.rat.config.parameters.Description;
 import org.apache.rat.configuration.MatcherBuilderTracker;
-import org.apache.rat.license.ILicense;
 import org.apache.rat.license.LicenseSetFactory.LicenseFilter;
 import org.apache.rat.report.xml.writer.IXmlWriter;
 import org.apache.rat.report.xml.writer.impl.base.XmlWriter;
diff --git a/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java b/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
index c9d67a2..e386f93 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/report/xml/XmlReportFactoryTest.java
@@ -31,6 +31,7 @@
 
 import org.apache.commons.io.filefilter.HiddenFileFilter;
 import org.apache.rat.ConfigurationException;
+import org.apache.rat.Defaults;
 import org.apache.rat.ReportConfiguration;
 import org.apache.rat.api.Document;
 import org.apache.rat.license.ILicense;
@@ -50,8 +51,7 @@
 
 public class XmlReportFactoryTest {
 
-    private static final Pattern IGNORE_EMPTY = Pattern.compile(".svn|Empty.txt");
-    private ILicenseFamily family = ILicenseFamily.builder().setLicenseFamilyCategory("TEST")
+    private final ILicenseFamily family = ILicenseFamily.builder().setLicenseFamilyCategory("TEST")
             .setLicenseFamilyName("Testing family").build();
 
     private StringWriter out;
@@ -71,12 +71,13 @@
     @Test
     public void standardReport() throws Exception {
         final String elementsPath = Resources.getResourceDirectory("elements/Source.java");
-
-        final TestingLicense testingLicense = new TestingLicense(new TestingMatcher(true), family);
-
-        DirectoryWalker directory = new DirectoryWalker(new File(elementsPath), IGNORE_EMPTY, HiddenFileFilter.HIDDEN);
-        final ClaimStatistic statistic = new ClaimStatistic();
         final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE);
+        final TestingLicense testingLicense = new TestingLicense(new TestingMatcher(true), family);
+        configuration.setFrom(Defaults.builder().build(DefaultLog.INSTANCE));
+
+        DirectoryWalker directory = new DirectoryWalker(new File(elementsPath), configuration.getFilesToIgnore(), HiddenFileFilter.HIDDEN);
+        final ClaimStatistic statistic = new ClaimStatistic();
+
         configuration.addLicense(testingLicense);
         RatReport report = XmlReportFactory.createStandardReport(writer, statistic, configuration);
         report.startReport();
@@ -88,10 +89,10 @@
                 "Preamble and document element are OK");
 
         assertTrue(XmlUtils.isWellFormedXml(output), "Is well formed");
-        assertEquals(2, statistic.getDocumentCategoryMap().get(Document.Type.BINARY)[0], "Binary files");
-        assertEquals(2, statistic.getDocumentCategoryMap().get(Document.Type.NOTICE)[0], "Notice files");
-        assertEquals(8, statistic.getDocumentCategoryMap().get(Document.Type.STANDARD)[0], "Standard files");
-        assertEquals(1, statistic.getDocumentCategoryMap().get(Document.Type.ARCHIVE)[0], "Archives");
+        assertEquals(2, statistic.getCounter(Document.Type.BINARY), "Binary files");
+        assertEquals(2, statistic.getCounter(Document.Type.NOTICE), "Notice files");
+        assertEquals(8, statistic.getCounter(Document.Type.STANDARD), "Standard files");
+        assertEquals(1, statistic.getCounter(Document.Type.ARCHIVE), "Archives");
     }
 
     @Test
diff --git a/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java b/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
index daef24b..0fe276a 100644
--- a/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
+++ b/apache-rat-core/src/test/java/org/apache/rat/walker/DirectoryWalkerTest.java
@@ -19,6 +19,7 @@
 package org.apache.rat.walker;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.File;
 import java.io.FileWriter;
@@ -26,48 +27,100 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.commons.io.filefilter.FalseFileFilter;
 import org.apache.rat.api.Document;
 import org.apache.rat.api.RatException;
+import org.apache.rat.document.impl.DocumentImplUtils;
 import org.apache.rat.report.RatReport;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 
 public class DirectoryWalkerTest {
-    
-    @TempDir
-	private File toWalk;
-    
-    @Test
-    public void walk() throws IOException, RatException {
-        File regular = new File(toWalk, "regular");
-        regular.mkdir();
-        File regularFile = new File(regular, "test");
-        try (FileWriter writer = new FileWriter(regularFile)) {
-            writer.write("test");
+
+	private static File toWalk;
+
+    private static void fileWriter(File dir, String name, String contents) throws IOException {
+        try (FileWriter writer = new FileWriter(new File(dir, name))) {
+            writer.write(contents);
             writer.flush();
         }
+    }
+    @BeforeAll
+    public static void setUp(@TempDir File dir) throws Exception {
+        toWalk = dir;
+        /*
+        Create a directory structure like this:
+
+            regular
+                regularFile
+                .hiddenFile
+            .hidden
+                regularFile
+                .hiddenFile
+         */
+        File regular = new File(toWalk, "regular");
+        regular.mkdir();
+        fileWriter(regular, "regularFile", "regular file");
+        fileWriter(regular, ".hiddenFile", "hidden file");
 
         File hidden = new File(toWalk, ".hidden");
         hidden.mkdir();
-        File hiddenFile = new File(hidden, "test");
+        fileWriter(hidden, "regularFile", "regular file");
+        fileWriter(hidden, ".hiddenFile", "hidden file");
+    }
 
-        try (FileWriter writer = new FileWriter(hiddenFile)) {
-            writer.write("test");
-            writer.flush();
-        }
+    private String expectedName(String name) {
+        return DocumentImplUtils.toName(toWalk)+name;
+    }
 
-        DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN);
+
+    
+    @Test
+    public void noFiltersTest() throws IOException, RatException {
+        DirectoryWalker walker = new DirectoryWalker(toWalk, null,null);
         List<String> scanned = new ArrayList<>();
         walker.run(new TestRatReport(scanned));
+        String[] expected = {"/regular/regularFile", "/regular/.hiddenFile", "/.hidden/regularFile", "/.hidden/.hiddenFile"};
+        assertEquals(4, scanned.size());
+        for (String ex : expected) {
+            assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+        }
+    }
 
-        assertEquals(1, scanned.size());
-
-        walker = new DirectoryWalker(toWalk, FalseFileFilter.FALSE);
-        scanned = new ArrayList<>();
+    @Test
+    public void noHiddenFileFiltersTest() throws IOException, RatException {
+        DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN,null);
+        List<String> scanned = new ArrayList<>();
         walker.run(new TestRatReport(scanned));
-
+        String[] expected = {"/regular/regularFile", "/.hidden/regularFile"};
         assertEquals(2, scanned.size());
+        for (String ex : expected) {
+            assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+        }
+    }
+
+    @Test
+    public void noHiddenDirectoryFiltersTest() throws IOException, RatException {
+        DirectoryWalker walker = new DirectoryWalker(toWalk, null, NameBasedHiddenFileFilter.HIDDEN);
+        List<String> scanned = new ArrayList<>();
+        walker.run(new TestRatReport(scanned));
+        String[] expected = {"/regular/regularFile", "/regular/.hiddenFile"};
+        assertEquals(2, scanned.size());
+        for (String ex : expected) {
+            assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+        }
+    }
+
+    @Test
+    public void noHiddenDirectoryAndNoHiddenFileFiltersTest() throws IOException, RatException {
+        DirectoryWalker walker = new DirectoryWalker(toWalk, NameBasedHiddenFileFilter.HIDDEN, NameBasedHiddenFileFilter.HIDDEN);
+        List<String> scanned = new ArrayList<>();
+        walker.run(new TestRatReport(scanned));
+        String[] expected = {"/regular/regularFile"};
+        assertEquals(1, scanned.size());
+        for (String ex : expected) {
+            assertTrue(scanned.contains(expectedName(ex)), ()-> String.format("Missing %s", expectedName(ex)));
+        }
     }
 
     class TestRatReport implements RatReport {
@@ -92,7 +145,5 @@
         public void endReport() {
             // no-op
         }
-
     }
-
 }
diff --git a/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin b/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin
new file mode 100644
index 0000000..2c498da
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT147/unix-newlines.txt.bin
@@ -0,0 +1,8 @@
+sentence 1.
+sentence 2.
+
+
+sentence 3.
+
+sentence 4.
+
diff --git a/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin b/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin
new file mode 100644
index 0000000..a0adb98
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT147/windows-newlines.txt.bin
@@ -0,0 +1,9 @@
+sentence 1.

+sentence 2.

+

+

+sentence 3.

+

+sentence 4.

+

+

diff --git a/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia b/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia
new file mode 100644
index 0000000..41fa530
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT211/leader-election-message-arrives.dia
Binary files differ
diff --git a/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp b/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp
new file mode 100644
index 0000000..c1d3d69
--- /dev/null
+++ b/apache-rat-core/src/test/resources/jira/RAT211/side_left.bmp
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/README.md b/apache-rat-core/src/test/resources/tikaFiles/README.md
new file mode 100644
index 0000000..55e546c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/README.md
@@ -0,0 +1,11 @@
+Files in this directory are parsed by Tika process and verified that the proper type is returned.
+
+the `standard` subsirectory contains files that are returned as STANDARD document types.
+
+the `binary` subdirectory contains files that are returned as BINARY types.
+
+the `notice` subdirectory contains files that are NOTICE types
+
+the `archive` subdirectory contains files that are ARCHIVE types.
+
+The `TikeProcessorTest.testTikaFiles()` automatically runs against the files in the directories.  To add a new file to test just place it in the proper directory.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar b/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar
new file mode 100644
index 0000000..ccbf9f3
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/archive/dummy.jar
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class b/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class
new file mode 100644
index 0000000..2a6fd7c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Defaults.class
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif
new file mode 100644
index 0000000..202787b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.gif
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg
new file mode 100644
index 0000000..68a0505
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpeg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg
new file mode 100644
index 0000000..7ad7df6
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.jpg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png
new file mode 100644
index 0000000..ed8f507
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.png
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd
new file mode 100644
index 0000000..58cff9a
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.psd
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf
new file mode 100644
index 0000000..eaf4d03
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.xcf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks b/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks
new file mode 100644
index 0000000..9867401
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/KeyStore.jks
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/file.json b/apache-rat-core/src/test/resources/tikaFiles/binary/file.json
new file mode 100644
index 0000000..b676b6f
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/file.json
@@ -0,0 +1,7 @@
+{
+  "Just": "a",
+  "plain": [
+    "json",
+    "file"
+  ]
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so b/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so
new file mode 100755
index 0000000..c56b19d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/maven_libjansi.so
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif b/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif
new file mode 100644
index 0000000..97eac1d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.aif
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.au b/apache-rat-core/src/test/resources/tikaFiles/binary/test.au
new file mode 100644
index 0000000..20d1bd2
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.au
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe b/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe
new file mode 100644
index 0000000..a45435f
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.exe
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv b/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv
new file mode 100644
index 0000000..d35e9bb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.flv
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff b/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff
new file mode 100644
index 0000000..b3a6377
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.iff
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore b/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore
new file mode 100644
index 0000000..9867401
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.keystore
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid
new file mode 100644
index 0000000..883ef37
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mid
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3
new file mode 100644
index 0000000..698cbaf
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg b/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg
new file mode 100644
index 0000000..8180299
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.ogg
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf b/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf
new file mode 100644
index 0000000..57d7fe1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.swf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore b/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore
new file mode 100644
index 0000000..b98bc62
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.truststore
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav
new file mode 100644
index 0000000..59a063e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wav
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma
new file mode 100644
index 0000000..ec2e9bd
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/test.wma
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3
new file mode 100644
index 0000000..0f25370
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3
new file mode 100644
index 0000000..c2cd30d
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3i18n_truncated.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3
new file mode 100644
index 0000000..3d4ef17
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3
new file mode 100644
index 0000000..b78a1a3
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v1_v2.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3
new file mode 100644
index 0000000..ac96bec
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v2.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3
new file mode 100644
index 0000000..704921b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3id3v24.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3
new file mode 100644
index 0000000..cdec511
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3lyrics.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3
new file mode 100644
index 0000000..f087903
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3noid3.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3
new file mode 100644
index 0000000..d8ab515
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP3truncated.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a
new file mode 100644
index 0000000..a9bc731
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4.m4a
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a
new file mode 100644
index 0000000..31fdef4
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testMP4_truncated.m4a
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3 b/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3
new file mode 100644
index 0000000..414fbbb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/testNakedUTF16BOM.mp3
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks b/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks
new file mode 100644
index 0000000..b98bc62
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/binary/truststore.jks
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/ca-cert b/apache-rat-core/src/test/resources/tikaFiles/ca-cert
new file mode 100644
index 0000000..5f298fb
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/ca-cert
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDqTCCApGgAwIBAgIUAgvTSWaYPi8BobX1hhbOk39GW0swDQYJKoZIhvcNAQEL
+BQAwZDELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxGzAZBgNVBAoM
+EkFwYWNoZSBDcmVhZHVyIFJhdDEjMCEGA1UECwwaQXBhY2hlIFNvZnR3YXJlIEZv
+dW5kYXRpb24wHhcNMjQwNTA0MTEzODQ0WhcNMjQwNjAzMTEzODQ0WjBkMQswCQYD
+VQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEbMBkGA1UECgwSQXBhY2hlIENy
+ZWFkdXIgUmF0MSMwIQYDVQQLDBpBcGFjaGUgU29mdHdhcmUgRm91bmRhdGlvbjCC
+ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMPm0tTlla3NVpkMbzB3GsZh
+i8cZOp5Xfnk37Bfy6z9PHwldchpCosOJXfe+/Q61ZoRSc4OOE5JR46Z2ObPWIgRX
+Jl077233kW4vWIFg1livA4jF2eXW74R9Na9OEdV25qJzbF8BZopiiqYyUMqS2E7z
+7u5f4sO9t4Aj9GlXPIkn3XYCLeJ7NvxFpXiFVkyXcLipJmFquLADhHHxO/i+WZqo
++kUhtot48M5DVtJ4dFw0iJuyFGoHb0Jc/msHqde30gipeA7FPjChgrUt/gtSik//
+MCgDMSjtkCVFDJu/GRNekwJ3qmhZvZmVsTfyv7v9+auxdggxK37LGve/Cmgd9KkC
+AwEAAaNTMFEwHQYDVR0OBBYEFKLAoxUfM62sqAOLyl5lw+fvFgaiMB8GA1UdIwQY
+MBaAFKLAoxUfM62sqAOLyl5lw+fvFgaiMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
+hvcNAQELBQADggEBAEuUDV60i/20GB0gHYVRxejMI0OmtXy/J+jg1uutwirByhR5
+vUQr8Wdwxh4pm9UR4JaC2QRxedMkj3f29lqu1UteESyH7xcsQxEToreeYGU3veJY
+CiP4m1qaC8zVUu8Fr2VXvAsFnHKSMYiNFnWI/zGGGJGVQ8vUm2Vf664WpbUOh093
+Cf71I8sgrn1IE3C3E3meRkS+ws4HoID2nwpD+usS7batU/awOw2qASOuAjqRGG+/
+IHbk17HXPBhyXEb7r+/VAJCpzoOe1KwjgOm6FkitM1c3nARWtz6OZN6/spGdGbKa
+rLuOJKaXYxtDhNuP1/xB9VKEeXQQNKu0+BQve3Y=
+-----END CERTIFICATE-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/ca-key b/apache-rat-core/src/test/resources/tikaFiles/ca-key
new file mode 100644
index 0000000..5561cab
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/ca-key
@@ -0,0 +1,30 @@
+-----BEGIN ENCRYPTED PRIVATE KEY-----
+MIIFHDBOBgkqhkiG9w0BBQ0wQTApBgkqhkiG9w0BBQwwHAQIf0t1BVqonX8CAggA
+MAwGCCqGSIb3DQIJBQAwFAYIKoZIhvcNAwcECDcQRWLeuuoXBIIEyF1B0D0OW5+P
+Lk09ebYnI8Cp7mHKqAAL1NBLTscbXTJEddnS7zbNMxHgbfArnwOu17NhblK1lSxK
+hTkmH7RQ5/kZTBv3eBDFQZlAuwMKis8hv1dBWWraDxlEPy4gkVXJRqe8sJI/Nf++
+ADnNrZjLPUrmb8mcoU+R0cOV9S8Vo3iKWZ84Mh6rohc6DqpRkFukjwNV8/O6pBV1
+Zb9+NvIfDSej/VSLTnOvM0XwXwKwvYNyMaXnWh9IcDuwhyuQ693aqOL+hy0AmC7b
+P55fx0/yWkrINcbp8R8QiHOBrZKB/nc0OgMJDlTYWtnnpIXIogsGNgrXXOUEGtYW
+Em6bpE4np/DoshYXZqkRql0ZIQg9TfLr7k0FpqFBjT6ZnR1ezpulu1t9JCekV1K/
+nX4Dj58Xj3NB9Zx6ygHX6oZKZK4p0+meDHTl52Tyzc+Dv45Cry/Pf64fkFRjBDBJ
+plD+YkFiuys5/y+TF3QchsRYqkzdkptSgl3kh8oVN63r6XY0Rpq4eLFuK1VJB155
+oi5GSUr8wHLfYWDp2e3u2Tb1jDrvWKHs8DPnA04YtWp3pSqxI/dNJnKZ0GYThfk/
+LGni+U2neatcJt6pwXXcRAMN1MSerMMXeZ5fPBzNUI9/Iygyr+Fxiik5whhPQUmj
+NnwMI+e5aVCjJAknNH8Z24SSpe3afq0i8IO7R2t1Nw40Ign+zvI/tPMG/ajTpWbF
+NQ5R7r2mjSuDCYJ9enMllh5nGWAv930FRBvifW4MQ/vD/b8ADQF9uer4Y5wNRQWF
+ryF5wpEhR12PlUFJSMD+gBxiNsCRo0kjniVvwa+atDFR8eVaPQ5y8A5j6o4mVsjX
+2ul7EwCDCHcuoU/Cej/gpdUFwYMm5gD/4SZigJu0B8THXe58aeuhuCH773/paCCP
+LU3ZJ68GaByYCfja8vdoT25T5A0vLz7E/+lJq4tf8RTLJABAVXJCEjTWVWspWeQs
+J7z/4HV263CdqHZbn/0Gqa3PPApivgTGm0Nba6rZizkYgDxvCeroATKx5ixKnjdZ
+n3GWp69c2nwmI0xtxcfxglPykO/XKcRMXrnJka7NC6mc98Ijg+oUD+K3e5OUDPzv
+ilomOq1wRKukWQV0EYtvDzJhNCZP3qQne7+Cw+XDdrrlfMrVPB84s8hU9bOG+lDg
+eoHBN5RQiYiE4WfLF8rPKSGI4uhWQSh3uoY0xHnJXmXExGWhFTH+bapChe37KMur
+LqU0mjd5rSgAsea4xLkLRtEIz8bhGIJX+eByZZJBl2o7Hcb9hloDVrf+tUer/cgw
+cQBFL48tIgr/BmweTyIQlyPiBj1FPuRafonS/jg8q3ANl3AJdt0raQ0bxtZMfkYN
+BmTl5guJSzGL5RjdHUHLwNAa1PSPqjA3Ey5LpB1TpDkqlQWL9zI2RvuHq6CctExq
+dNtYEgfkMDLe/2+waxyCwhD5L3HH/Edo4D6eoD2RvH3hSDtsOePnUUA66T+PB5vW
+vLnRfWs4KaSr/R1qK1IdU6ymd7XcOvGmi3+A92kYx+HZGHJAcULFr2zSLWhecsey
+/ylT9qFdG9BdYDr2Sofz6yuizzwZS7tKlTY+LFRGPMswOgy4tx/mXLh+0p878jYT
+2w8E7xAgwNm5CcH48Bc53A==
+-----END ENCRYPTED PRIVATE KEY-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/cert-file b/apache-rat-core/src/test/resources/tikaFiles/cert-file
new file mode 100644
index 0000000..a2ba7e9
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/cert-file
@@ -0,0 +1,19 @@
+-----BEGIN NEW CERTIFICATE REQUEST-----
+MIIDADCCAegCAQAwgYoxEDAOBgNVBAYTB1Vua25vd24xEDAOBgNVBAgTB1Vua25v

+d24xEDAOBgNVBAcTB1Vua25vd24xIzAhBgNVBAoTGmFwYWNoZSBzb2Z0d2FyZSBm

+b3VuZGF0aW9uMRswGQYDVQQLExJhcGFjaGUgY3JlYWR1ciByYXQxEDAOBgNVBAMT

+B1Vua25vd24wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDIfjrZSi8J

+X3byEMM0wRUSUZ3F+syr75YnqsY0DuiHlIqA4DYL1ffsyPBanixRP4PbQ26RPbLU

+/i3QfhbVJpkntVvJJvKjXWWUJ0LQN4VIWdDXw+9feELKTSkoIvXwoNXApE1BfKmS

+cZP4l292E+UzGog5faHt3aoIP4Gp1+fe8ybTFctAWVSLbECKE8fetBS8eP7of4pC

+lYtQDZ9WGTXKIN2TB0V3VS1w4mdd/y/n3UYh7LbAOaqoR5Qp+2aOghNElZHkXMPw

+hngBYfEx7g5hVOgiyEw0vHrMuqI01jtHxkkx/t1F2CPYeXfXXhluoO0vEpfnUhBf

+Jgb0PLVuMzJdAgMBAAGgMDAuBgkqhkiG9w0BCQ4xITAfMB0GA1UdDgQWBBQxKtdV

+jawugSKCnB3djVazFcqR1DANBgkqhkiG9w0BAQsFAAOCAQEAFM8iYzG6enq71btN

+/3AhrlEwiKMk9B1zIoKPOkhLb7g0tU95tHRBWmkbeLkmSQgtkirYyH6ItV/L6dVp

+MnT+nUmYXlcTv5gIB75mHHpl9dxxcZlaggf4cml6mmZ03Jf+B7ShqEZ0QVpTBxxf

+YIdePcHlN6WZWJFa523a2kQF2SQ2Ts84WhhmFNFImzJ1NJwEtaPAPi/u/6WTTyof

+/jS/dIgavtGY8Xew06A7x0nmF3YDDP+ietbtTMQldfqIkAiPb425gQBdMrjnwftd

+lMJNK9OAxa++nZ/+SoyLSZXm3Hv1CA70XCqVYYJjkm3A588PB/kmWby/T7o9iOJ9

+93q1bA==
+-----END NEW CERTIFICATE REQUEST-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/cert-signed b/apache-rat-core/src/test/resources/tikaFiles/cert-signed
new file mode 100644
index 0000000..d136065
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/cert-signed
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdjCCAl4CFCoYTVKkLZGguwEbhwnHe52DRrSmMA0GCSqGSIb3DQEBCwUAMGQx
+CzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMRswGQYDVQQKDBJBcGFj
+aGUgQ3JlYWR1ciBSYXQxIzAhBgNVBAsMGkFwYWNoZSBTb2Z0d2FyZSBGb3VuZGF0
+aW9uMB4XDTI0MDUwNDExNDAyMFoXDTI1MDUwNDExNDAyMFowgYoxEDAOBgNVBAYT
+B1Vua25vd24xEDAOBgNVBAgTB1Vua25vd24xEDAOBgNVBAcTB1Vua25vd24xIzAh
+BgNVBAoTGmFwYWNoZSBzb2Z0d2FyZSBmb3VuZGF0aW9uMRswGQYDVQQLExJhcGFj
+aGUgY3JlYWR1ciByYXQxEDAOBgNVBAMTB1Vua25vd24wggEiMA0GCSqGSIb3DQEB
+AQUAA4IBDwAwggEKAoIBAQDIfjrZSi8JX3byEMM0wRUSUZ3F+syr75YnqsY0DuiH
+lIqA4DYL1ffsyPBanixRP4PbQ26RPbLU/i3QfhbVJpkntVvJJvKjXWWUJ0LQN4VI
+WdDXw+9feELKTSkoIvXwoNXApE1BfKmScZP4l292E+UzGog5faHt3aoIP4Gp1+fe
+8ybTFctAWVSLbECKE8fetBS8eP7of4pClYtQDZ9WGTXKIN2TB0V3VS1w4mdd/y/n
+3UYh7LbAOaqoR5Qp+2aOghNElZHkXMPwhngBYfEx7g5hVOgiyEw0vHrMuqI01jtH
+xkkx/t1F2CPYeXfXXhluoO0vEpfnUhBfJgb0PLVuMzJdAgMBAAEwDQYJKoZIhvcN
+AQELBQADggEBACJM4Nb1hIelkOo0S9Yqx4hQuiKJo7DaVmHubRc71fLSJQsAPdnw
+E4FaNIS8trPKsOCAMNK9jzQHNrdgdeYyFG4wUS5nV9yMqN78HdCnghHR4NivcxDG
+LIJsbwaFTa79/cW8oe4+jwJ4ks+JYf3sA612RaWMDFxyJtIc0wv8dv7kRjZpC953
+Gj5ic6Gf+7DgRWdTAZgYVXHrnyrSfCbEX64Lcga33oSnvDJoxPnmy9JGbqnoIqOB
+e2PjnxZ6MktG17Z6fTpkUxWVsgqdx+zCynGsQnXfV0UKAwlTU2n5beD2aLfa5ysd
+B9VgFAxCwSpjlozIUGzjzRpnS+7uZv07Wik=
+-----END CERTIFICATE-----
diff --git a/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE b/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE
new file mode 100644
index 0000000..7a4a3ea
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/notice/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE b/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE
new file mode 100644
index 0000000..967464b
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/notice/NOTICE
@@ -0,0 +1,7 @@
+=========================================================================
+==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+==  Version 2.0.                                                       ==
+=========================================================================
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
\ No newline at end of file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java b/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java
new file mode 100644
index 0000000..29475ee
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/ChineseCommentsJava.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.udf.entity;
+
+import java.util.Date;
+
+// taken from https://github.com/apache/linkis/blob/master/linkis-public-enhancements/linkis-pes-common/src/main/java/org/apache/linkis/udf/entity/UDFVersion.java
+public class ChineseCommentsJava {
+  private Long id;
+  private Long udfId;
+  private String path; // 仅存储用户上一次上传的路径 作提示用
+  private String bmlResourceId;
+  private String bmlResourceVersion;
+  private Boolean isPublished; // 共享udf被使用的是已发布的最新版本
+  private String registerFormat;
+  private String useFormat;
+  private String description;
+  private Date createTime;
+
+  /** Constructors and method taken away to only parse above comments but no meaningful Java class :) */
+  private String md5;
+
+  public void setCreateTime(Date createTime) {
+    this.createTime = createTime;
+  }
+}
+
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy
new file mode 100644
index 0000000..7dfa58c
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.groovy
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+package tikaFiles.standard;
+public class HelloWorld {
+    public static void main(String[] args) {
+        System.out.println("Hello World");
+    }
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java
new file mode 100644
index 0000000..d99bba4
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/HelloWorld.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+package tikaFiles;
+public class HelloWorld {
+    public static void main(String[] args) {
+        System.out.println("Hello World");
+    }
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf b/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
new file mode 100644
index 0000000..110b45e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml
new file mode 100644
index 0000000..9e9104e
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF16_with_signature.xml
Binary files differ
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml
new file mode 100644
index 0000000..b82c0ee
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/UTF8_with_signature.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+    
+        <!--
+     ***************************************************************
+     * Licensed to the Apache Software Foundation (ASF) under one
+     * or more contributor license agreements.  See the NOTICE file
+     * distributed with this work for additional information
+     * regarding copyright ownership.  The ASF licenses this file
+     * to you under the Apache License, Version 2.0 (the
+     * "License"); you may not use this file except in compliance
+     * with the License.  You may obtain a copy of the License at
+         *
+     *   http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing,
+     * software distributed under the License is distributed on an
+     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+     * KIND, either express or implied.  See the License for the
+     * specific language governing permissions and limitations
+     * under the License.
+     ***************************************************************
+   -->
+
+
+<xmlRoot>
+<descrition>
+The file encoding is UTF-8 with signature. (Special chars: äöü)
+</descrition>
+</xmlRoot>
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.C b/apache-rat-core/src/test/resources/tikaFiles/standard/file.C
new file mode 100644
index 0000000..b12b3c8
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.C
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <stdio.h>
+int main() {
+    printf("Hello world");
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP b/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.CPP
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.c b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c
new file mode 100644
index 0000000..b12b3c8
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <stdio.h>
+int main() {
+    printf("Hello world");
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.c++
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cc
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv b/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv
new file mode 100644
index 0000000..1aa2e35
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.csv
@@ -0,0 +1 @@
+Just, a, plain, csv, file
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx
new file mode 100644
index 0000000..7a1eef1
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.cxx
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+#include <iostream>
+
+int main() {
+    std::cout << "Hello World!";
+    return 0;
+}
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain b/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain
new file mode 100644
index 0000000..1adcb85
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.plain
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ */
+
+Just a plain text file.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv b/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv
new file mode 100644
index 0000000..8b4b188
--- /dev/null
+++ b/apache-rat-core/src/test/resources/tikaFiles/standard/file.tsv
@@ -0,0 +1 @@
+Just    a   plain   tsv file
diff --git a/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java b/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
index 1a39c0c..2386ed3 100644
--- a/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
+++ b/apache-rat-plugin/src/main/java/org/apache/rat/mp/AbstractRatMojo.java
@@ -341,11 +341,15 @@
     protected ReportConfiguration getConfiguration() throws MojoExecutionException {
         ReportConfiguration config = new ReportConfiguration(makeLog());
         reportDeprecatedProcessing();
+        Defaults defaults = getDefaultsBuilder().build(config.getLog());
         if (addDefaultLicenses) {
-            config.setFrom(getDefaultsBuilder().build(config.getLog()));
+            config.setFrom(defaults);
         } else {
             config.setStyleSheet(Defaults.getPlainStyleSheet());
+            config.setDirectoriesToIgnore(Defaults.getDirectoriesToIgnore());
+            config.setFilesToIgnore(Defaults.getFilesToIgnore());
         }
+
         if (additionalLicenseFiles != null) {
             for (String licenseFile : additionalLicenseFiles) {
                 try {
diff --git a/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java b/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
index bb842ed..fc8dc09 100644
--- a/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
+++ b/apache-rat-plugin/src/main/java/org/apache/rat/mp/RatCheckMojo.java
@@ -177,7 +177,7 @@
             configuration.setCopyrightMessage(copyrightMessage);
         }
         if (scanHiddenDirectories) {
-            configuration.setDirectoryFilter(null);
+            configuration.setDirectoriesToIgnore(null);
         }
         if (reportFile != null) {
             if (!reportFile.exists()) {
diff --git a/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java b/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
index 2cdc942..3da865e 100644
--- a/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
+++ b/apache-rat-plugin/src/test/java/org/apache/rat/mp/RatCheckMojoTest.java
@@ -29,6 +29,7 @@
 import java.io.FileWriter;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.FalseFileFilter;
 import org.apache.rat.ReportConfiguration;
 import org.apache.rat.ReportConfigurationTest;
 import org.apache.rat.api.Document;
@@ -200,7 +201,10 @@
         ReportConfigurationTest.validateDefaultLicenses(config, "MyLicense", "CpyrT", "RegxT", "SpdxT", "TextT", 
                 "Not", "All", "Any");
         assertNotNull(LicenseSetFactory.search("MyLicense", config.getLicenses(LicenseFilter.ALL)));
-        assertNull("Should not have inputFileFilter", config.getInputFileFilter());
+        assertNotNull("Should have filesToIgnore", config.getFilesToIgnore());
+        assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
+        assertNotNull("Should have directoriesToIgnore", config.getDirectoriesToIgnore());
+        assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
         mojo.execute();
 
         ensureRatReportIsCorrect(ratTxtFile, expected, TextUtils.EMPTY);
@@ -228,11 +232,12 @@
         assertThat(config.isAddingLicenses()).isFalse();
         assertThat(config.isAddingLicensesForced()).isFalse();
         assertThat(config.getCopyrightMessage()).isNull();
-        assertThat(config.getInputFileFilter()).isNull();
         assertThat(config.isStyleReport()).isTrue();
-        assertThat(config.getStyleSheet()).isNotNull().withFailMessage("Stylesheet should not be null");
-        assertThat(config.getDirectoryFilter()).isNotNull().withFailMessage("Directory filter should not be null");
-        assertThat(config.getDirectoryFilter()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+        assertThat(config.getStyleSheet()).withFailMessage("Stylesheet should not be null").isNotNull();
+        assertThat(config.getDirectoriesToIgnore()).withFailMessage("directoriesToIgnore filter should not be null").isNotNull();
+        assertThat(config.getDirectoriesToIgnore()).isExactlyInstanceOf(NameBasedHiddenFileFilter.class);
+        assertThat(config.getFilesToIgnore()).withFailMessage("filesToIgnore filter should not be null").isNotNull();
+        assertThat(config.getFilesToIgnore()).isExactlyInstanceOf(FalseFileFilter.class);
         
         ReportConfigurationTest.validateDefaultApprovedLicenses(config, 1);
         ReportConfigurationTest.validateDefaultLicenseFamilies(config, "BSD", "CC BY");
diff --git a/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java b/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
index 289b04c..e7108dd 100644
--- a/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
+++ b/apache-rat-tasks/src/main/java/org/apache/rat/anttasks/Report.java
@@ -95,7 +95,7 @@
     }
 
     public void setInputFileFilter(FilenameFilter inputFileFilter) {
-        configuration.setInputFileFilter(inputFileFilter);
+        configuration.setFilesToIgnore(inputFileFilter);
     }
 
     public void setReportFile(File reportFile) {
diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml
new file mode 100644
index 0000000..f2d551f
--- /dev/null
+++ b/checkstyle-suppressions.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<!DOCTYPE suppressions PUBLIC "-//Checkstyle//DTD SuppressionFilter Configuration 1.0//EN" "https://checkstyle.org/dtds/suppressions_1_0.dtd">
+<suppressions>
+    <suppress checks="JavadocMethod" files=".*[/\\]test[/\\].*"/>
+    <suppress checks="JavadocPackage" files=".*[/\\]test[/\\].*"/>
+    <suppress checks="LineLength" files=".*" />
+    <suppress checks="javadoc" files=".*" />
+    <!-- Due to fail in Checkstyle on Windows and in GH Actions -->
+    <suppress checks="NewlineAtEndOfFile" files="target[/\\].*[/\\]pom.properties"/>
+</suppressions>
diff --git a/pom.xml b/pom.xml
index f295021..e0e7857 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,11 +95,6 @@
         <version>4.4</version>
       </dependency>
       <dependency>
-        <groupId>commons-beanutils</groupId>
-        <artifactId>commons-beanutils</artifactId>
-        <version>1.9.4</version>
-      </dependency>
-      <dependency>
         <groupId>commons-io</groupId>
         <artifactId>commons-io</artifactId>
         <version>2.16.1</version>
@@ -162,6 +157,11 @@
         <version>3.25.3</version>
         <scope>test</scope>
       </dependency>
+      <dependency>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-core</artifactId>
+        <version>2.9.2</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
   <reporting>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index e0b7710..d9e8f1c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -72,6 +72,25 @@
     </release>
     -->
     <release version="0.17-SNAPSHOT" date="xxxx-yy-zz" description="Current SNAPSHOT - release to be done">
+      <action issue="RAT-301" type="fix" dev="pottlinger" due-to="claudenw">
+        Chinese characters in comments are not classified as binary anymore (due to Tika integration).
+      </action>
+      <action issue="RAT-54" type="fix" dev="claudenw">
+        MIME Detection Using Tika
+      </action>
+      <action issue="RAT-20" type="fix" dev="claudenw">
+        Changed to detecting binary by content not name.
+      </action>
+      <action issue="RAT-147" type="fix" dev="claudenw">
+        Change to detect non UTF-8 text files as text not binary.
+      </action>
+      <action issue="RAT-150" type="fix" dev="claudenw">
+        Switch to Tika to detect file types.
+      </action>
+      <action issue="RAT-211" type="fix" dev="claudenw">
+        Generated rat-output.xml is now well-formed, even if BinaryGuesser fails or there is XML content
+        in the sample element.
+      </action>
       <action issue="RAT-368" type="update" dev="claudenw">
         Removed ReportFailedRuntimeException, ReportTransformer, RatReportAnalysisResultException, MimeTyper, ToNameTransformer,
         UnsuitableDocumentException, ReportTransformerTest, and ToNameTransformerTest as they are no longer used in the codebase.