TIKA-4252: add http request headers at fetcher config level
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
index 7053418..bf8e614 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
+++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/main/java/org/apache/tika/pipes/fetcher/http/HttpFetcher.java
@@ -130,6 +130,9 @@
     //httpHeaders to capture in the metadata
     private Set<String> httpHeaders = new HashSet<>();
 
+    //httpRequestHeaders to add to all outgoing http requests
+    private Set<String> httpRequestHeaders = new HashSet<>();
+
     //When making the request, what User-Agent is sent.
     //By default httpclient adds e.g. "Apache-HttpClient/4.5.13 (Java/x.y.z)"
     private String userAgent = null;
@@ -151,20 +154,31 @@
         if (!StringUtils.isBlank(userAgent)) {
             get.setHeader(USER_AGENT, userAgent);
         }
-        // additional http request headers can be sent in here.
+        // Add the headers from the Fetcher configuration.
+        if (httpRequestHeaders != null) {
+            for (String httpRequestHeader : httpRequestHeaders) {
+                parseHeaderAndPutOnRequest(get, httpRequestHeader);
+            }
+        }
+        // Additionally, headers can be specified per-fetch via the metadata.
         String[] httpRequestHeaders = metadata.getValues("httpRequestHeaders");
         if (httpRequestHeaders != null) {
             for (String httpRequestHeader : httpRequestHeaders) {
-                String[] parts = httpRequestHeader.trim().split(":", 2);
-                if (parts.length >= 2) {
-                    String key = parts[0].trim();
-                    String value = parts[1].trim();
-                    get.setHeader(key, value);
-                }
+                parseHeaderAndPutOnRequest(get, httpRequestHeader);
             }
         }
     }
 
+    private static void parseHeaderAndPutOnRequest(HttpGet get, String httpRequestHeader) {
+        String[] parts = httpRequestHeader
+                .trim().split(":", 2);
+        if (parts.length >= 2) {
+            String key = parts[0].trim();
+            String value = parts[1].trim();
+            get.setHeader(key, value);
+        }
+    }
+
     @Override
     public InputStream fetch(String fetchKey, long startRange, long endRange, Metadata metadata)
             throws IOException {
@@ -427,6 +441,17 @@
     }
 
     /**
+     * Which http request headers should we send on the http requests.
+     *
+     * @param httpRequestHeaders
+     */
+    @Field
+    public void setHttpRequestHeaders(List<String> httpRequestHeaders) {
+        this.httpRequestHeaders.clear();
+        this.httpRequestHeaders.addAll(httpRequestHeaders);
+    }
+
+    /**
      * This sets an overall timeout on the request.  If a server is super slow
      * or the file is very long, the other timeouts might not be triggered.
      *
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
index 64eae5c..b189e7b 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
+++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/java/org/apache/tika/pipes/fetcher/http/HttpFetcherTest.java
@@ -140,6 +140,8 @@
         HttpGet httpGet = httpGetArgumentCaptor.getValue();
         Assertions.assertEquals("val1", httpGet.getHeaders("nick1")[0].getValue());
         Assertions.assertEquals("val2", httpGet.getHeaders("nick2")[0].getValue());
+        // also make sure the headers from the fetcher config level are specified - see src/test/resources/tika-config-http.xml
+        Assertions.assertEquals("headerValueFromFetcherConfig", httpGet.getHeaders("headerNameFromFetcherConfig")[0].getValue());
     }
 
     @Test
diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
index bd77de4..5def8f5 100644
--- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
+++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml
@@ -24,6 +24,9 @@
         <header>Expires</header>
         <header>Content-Length</header>
       </httpHeaders>
+      <httpRequestHeaders>
+        <header>headerNameFromFetcherConfig: headerValueFromFetcherConfig</header>
+      </httpRequestHeaders>
     </fetcher>
   </fetchers>
-</properties>
\ No newline at end of file
+</properties>