Make ASF license header exclusion listings as granular as possible. (#38)

* Make ASF license header exclusion listings as granular as possible.

* Include many more file types for scans; refine exclusions.

* Refinements to ASF config.

* Refinements to ASF config.

* Adjust ASF Release config. to reflect latest exclusions.

* Adjust ASF Release config. to reflect latest exclusions.

* refine changes to cfg files to reflect current documented exclusions.
diff --git a/.travis.yml b/.travis.yml
index cf08943..22f59ef 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,6 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+
 # https://docs.travis-ci.com/user/languages/python/
 language: python
 python:
diff --git a/scancode/ASF-Release-v2.cfg b/scancode/ASF-Release-v2.cfg
index be5f995..7661712 100644
--- a/scancode/ASF-Release-v2.cfg
+++ b/scancode/ASF-Release-v2.cfg
@@ -1,3 +1,6 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+
 # scanCode.py configuration file
 
 # List of filenames containing the text of valid license (headers)
@@ -10,6 +13,7 @@
 ASFLicenseHeaderLua.txt
 ASFMinifiedLicenseHashHeader.txt
 ASFMinifiedLicenseHeader.txt
+ASFMinifiedLicenseHeaderREM.txt
 
 # Filters (path/filename) with wildcards and associated scan checks
 # that are to be run against them.  The checks are actual valid
@@ -26,14 +30,13 @@
 *.py=no_tabs, no_trailing_spaces, eol_at_eof
 *.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
 *.sh=has_block_license, no_trailing_spaces, eol_at_eof
-build.xml=no_tabs, no_trailing_spaces, eol_at_eof
-deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
 
-# List of paths (inclusive of subdirectories) to exlude from code scanning
+# List of paths (inclusive of subdirectories) to exclude from code scanning
 [Excludes]
 # General tooling exclusions
 .tox
 .git
+.github
 .bin
 
 # Go gradle build tool exclusions for created 'vendor' and cache directories
diff --git a/scancode/ASF-Release.cfg b/scancode/ASF-Release.cfg
index be5f995..f6ce749 100644
--- a/scancode/ASF-Release.cfg
+++ b/scancode/ASF-Release.cfg
@@ -1,3 +1,6 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+
 # scanCode.py configuration file
 
 # List of filenames containing the text of valid license (headers)
@@ -10,50 +13,63 @@
 ASFLicenseHeaderLua.txt
 ASFMinifiedLicenseHashHeader.txt
 ASFMinifiedLicenseHeader.txt
+ASFMinifiedLicenseHeaderREM.txt
 
 # Filters (path/filename) with wildcards and associated scan checks
 # that are to be run against them.  The checks are actual valid
 # function names found in scanCode.py.
 [Includes]
-*=is_not_symlink
-*.conf=has_block_license, no_trailing_spaces, eol_at_eof
-*.go=has_block_license, no_trailing_spaces, eol_at_eof
-*.gradle=no_tabs, no_trailing_spaces, eol_at_eof
-*.java=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof
-*.js=no_tabs, no_trailing_spaces, eol_at_eof
-*.lua=has_block_license, no_trailing_spaces, eol_at_eof
-*.md=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof
-*.py=no_tabs, no_trailing_spaces, eol_at_eof
-*.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
-*.sh=has_block_license, no_trailing_spaces, eol_at_eof
-build.xml=no_tabs, no_trailing_spaces, eol_at_eof
-deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
+*=is_not_symlink, regex_check
+*.conf=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.go=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.gradle=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.groovy=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.html=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.java=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.js=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.lua=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.md=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.properties=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.py=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof, regex_check
+*.sh=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.swift=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.yaml=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.yml=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
 
-# List of paths (inclusive of subdirectories) to exlude from code scanning
+# Sanity check files not required to have ASF headers because either they
+# are excluded or are not packaged with the Apache source release.
+*.cfg=regex_check
+*.ini=regex_check
+*.j2=regex_check
+*.json=regex_check
+*.txt=regex_check
+*.xml=regex_check
+
+# List of paths (inclusive of subdirectories) to exclude from code scanning
 [Excludes]
-# General tooling exclusions
-.tox
-.git
+
+# General tooling & binary file exclusions
 .bin
+.dockerignore
+.git
+.gitattributes
+.github
+.gitignore
+.jshintrc
+.pydevproject
+.rat-excludes
 
-# Go gradle build tool exclusions for created 'vendor' and cache directories
-.gogradle
-vendor
-
-# incubator-openwhisk binary artifact exclusions
-bin
-
-# incubator-openwhisk performance test exclusions
-performance
-
-# Jenkins/test generated reports
-tests/build/reports
-
-# incubator-openwhisk-apigateway exclusions
-lua_install
-tests/lua_modules
+# Exclude performance test files (no significant content)
+incubator-openwhisk/performance/wrk_tests
+incubator-openwhisk/tests/dat/actions
+incubator-openwhisk/docs/images
+incubator-openwhisk/bin
 
 # incubator-openwhisk-client-go, incubator-openwhisk-wskdeploy autogenerated Go files
+# NOTE: we SHOULD make this more granular, there are many .go files that can have
+# ASF license headers
+incubator-openwhisk-wskdeploy/specification/images
 wski18n
 
 # Exclude scancode tests that test for bad license headers
@@ -62,7 +78,7 @@
 [Options]
 # Not all code files allow licenses to appear starting at the first character
 # of the file. This option tells the scan to allow licenses to appear starting
-# within first 'x' characters of each code file (as provided by this option's
+# within the first 'x' characters of each code file (as provided by this option's
 # value).
 LICENSE_SLACK_LENGTH=500
 
diff --git a/scancode/ASFMinifiedLicenseHeaderREM.txt b/scancode/ASFMinifiedLicenseHeaderREM.txt
new file mode 100644
index 0000000..3a9a48d
--- /dev/null
+++ b/scancode/ASFMinifiedLicenseHeaderREM.txt
@@ -0,0 +1,2 @@
+@REM Licensed to the Apache Software Foundation (ASF) under one or more contributor
+@REM license agreements; and to You under the Apache License, Version 2.0.
\ No newline at end of file
diff --git a/scancode/NoExclusions.cfg b/scancode/NoExclusions.cfg
new file mode 100644
index 0000000..7907d68
--- /dev/null
+++ b/scancode/NoExclusions.cfg
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+
+# scanCode.py configuration file
+
+# List of filenames containing the text of valid license (headers)
+# These files SHOULD be in the same directory path where scanCode.py
+# resides.
+[Licenses]
+ASFLicenseHeader.txt
+ASFLicenseHeaderBash.txt
+ASFLicenseHeaderHash.txt
+ASFLicenseHeaderLua.txt
+ASFMinifiedLicenseHashHeader.txt
+ASFMinifiedLicenseHeader.txt
+ASFMinifiedLicenseHeaderREM.txt
+
+# Filters (path/filename) with wildcards and associated scan checks
+# that are to be run against them.  The checks are actual valid
+# function names found in scanCode.py.
+[Includes]
+*=is_not_symlink, regex_check, has_block_license
+*.conf=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.go=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.gradle=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.groovy=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.html=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.java=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.js=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.lua=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.md=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.properties=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.py=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof, regex_check
+*.sh=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.swift=no_tabs, has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.yaml=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+*.yml=has_block_license, no_trailing_spaces, eol_at_eof, regex_check
+
+# Sanity check files not required to have ASF headers because either they
+# are excluded or are not packaged with the Apache source release.
+*.cfg=regex_check
+*.ini=regex_check
+*.j2=regex_check
+*.json=regex_check
+*.txt=regex_check
+*.xml=regex_check
+
+# List of paths (inclusive of subdirectories) to exclude from code scanning
+[Excludes]
+
+# General tooling & binary file exclusions
+.bin
+.dockerignore
+.git
+.gitattributes
+.github
+.gitignore
+.jshintrc
+.pydevproject
+.rat-excludes
+
+# Exclude performance test files (no significant content)
+incubator-openwhisk/performance/wrk_tests
+incubator-openwhisk/tests/dat/actions
+incubator-openwhisk/docs/images
+incubator-openwhisk/bin
+
+# incubator-openwhisk-client-go, incubator-openwhisk-wskdeploy autogenerated Go files
+# NOTE: we SHOULD make this more granular, there are many .go files that can have
+# ASF license headers
+incubator-openwhisk-wskdeploy/specification/images
+wski18n
+
+# Exclude scancode tests that test for bad license headers
+incubator-openwhisk-utilities/scancode/tests/exclude
+
+[Options]
+# Not all code files allow licenses to appear starting at the first character
+# of the file. This option tells the scan to allow licenses to appear starting
+# within the first 'x' characters of each code file (as provided by this option's
+# value).
+LICENSE_SLACK_LENGTH=500
+
+# List of regular expressions for forbidden strings, e.g. \w+@company.com
+[Regex]
diff --git a/scancode/scanCode.cfg b/scancode/scanCode.cfg
index 4ccff7e..5666f8d 100644
--- a/scancode/scanCode.cfg
+++ b/scancode/scanCode.cfg
@@ -25,8 +25,6 @@
 *.md=no_tabs, eol_at_eof
 *.py=no_tabs, no_trailing_spaces, eol_at_eof
 *.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
-build.xml=no_tabs, no_trailing_spaces, eol_at_eof
-deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
 
 # List of paths (inclusive of subdirectories) to exlude from code scanning
 [Excludes]
@@ -37,6 +35,7 @@
 
 # OpenWhisk binary artifact exclusion
 bin
+images
 
 # 'vendor' and cache directory create by gograble build tool
 .gogradle
diff --git a/scancode/scanCode.py b/scancode/scanCode.py
index b534a02..723fc98 100755
--- a/scancode/scanCode.py
+++ b/scancode/scanCode.py
@@ -50,7 +50,7 @@
 YELLOW = '\033[33m'
 
 # Translatable messages (error and general)
-ERR_REGEX = "file contains a forbidden string. string=[%s], regex=[%s]"
+ERR_REGEX = "line contains forbidden pattern [%s]. line=[%s]"
 ERR_GENERAL = "an unspecified error was detected."
 ERR_INVALID_CONFIG_FILE = "Invalid configuration file [%s]: %s.\n"
 ERR_INVALID_SCAN_FUNCTION = "Config. file filter [%s] lists invalid " \
@@ -62,7 +62,7 @@
 ERR_REQUIRED_SECTION = "Configuration file missing required section: [%s]"
 ERR_SYMBOLIC_LINK = "file is a symbolic link."
 ERR_TABS = "line contains tabs."
-ERR_TRAILING_WHITESPACE = "line has trailing whitespaces."
+ERR_TRAILING_WHITESPACE = "line has trailing whitespace."
 
 HELP_CONFIG_FILE = "provide custom configuration file"
 HELP_DISPLAY_EXCLUSIONS = "display path exclusion information"
@@ -333,7 +333,7 @@
     # vprint("regex pattern: " + str(regex_patterns))
     for pattern in regex_patterns:
         if re.search(pattern, line):
-            return ERR_REGEX
+            return ERR_REGEX % (pattern, line)
         else:
             return None
 
@@ -536,14 +536,15 @@
             errors += run_line_checks(path, chks2)
             all_errors += map(lambda p: (path, p[0], p[1]), errors)
 
-    # Display path and file exclusion details
-    if args.display_exclusions or VERBOSE:
+    # Display directory (path) exclusion details
+    if VERBOSE:
         print_warning(WARN_SCAN_EXCLUDED_PATH_SUMMARY % len(exclusion_paths))
         # Display all paths that were excluded (by configuration)
         for excluded_path in exclusion_paths:
             print_warning(WARN_SCAN_EXCLUDED_PATH % excluded_path)
 
-        # Inform caller which files where excluded from these paths
+    # Display which files where excluded from these paths
+    if args.display_exclusions:
         print_warning(WARN_SCAN_EXCLUDED_FILE_SUMMARY %
                       len(exclusion_files_set))
         for excluded_file in exclusion_files_set:
diff --git a/scancode/travis.cfg b/scancode/travis.cfg
index 75fda88..6b3ddb1 100644
--- a/scancode/travis.cfg
+++ b/scancode/travis.cfg
@@ -26,8 +26,6 @@
 *.py=no_tabs, no_trailing_spaces, eol_at_eof
 *.scala=has_block_license, no_tabs, no_trailing_spaces, eol_at_eof
 *.sh=has_block_license, no_trailing_spaces, eol_at_eof
-build.xml=no_tabs, no_trailing_spaces, eol_at_eof
-deploy.xml=no_tabs, no_trailing_spaces, eol_at_eof
 
 # List of paths (inclusive of subdirectories) to exlude from code scanning
 [Excludes]