TIKA-3508 and update CHANGES.txt
diff --git a/CHANGES.txt b/CHANGES.txt
index fe0ea2a..6898dee 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,12 @@
Release 2.0.1 - ???
+ * Fix serialization of embedded docs in OpenSearch emitter
+ and fix embedded documents not being indexed in some use
+ cases in the Solr emitter (TIKA-3490).
+
+ * Add pipesClientId system property to PipesServer so that each
+ forked process can log to its own logger (TIKA-3480).
+
* Add DateNormalizingMetadataFilter let users ensure that all dates
emitted to Solr/OpenSearch are in UTC. Users can configure which
timezone they'd like to use in cases where the file format does
@@ -9,8 +16,6 @@
the SKIP or CONCATENATE attachment strategy, modify the
parseMode in the pipesiterators or in the FetchEmitTuple (TIKA-3494).
- * Fix serialization of embedded docs in OpenSearch emitter (TIKA-3490).
-
Release 2.0.0 - 07/07/2021
* Cleanup of fetcher integration with tika-server.
diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
index 7037f5d..986ce7a 100644
--- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
+++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/TikaPipesOpenSearchTest.java
@@ -148,7 +148,7 @@
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
- ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
"[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
results.getJson().get("hits").get("hits").get(0).get("_id").asText()
);
@@ -211,11 +211,11 @@
JsonNode source = results.getJson().get("hits").get("hits").get(0).get("_source");
Matcher m = Pattern.compile("\\Atest_recursive_embedded" +
- ".docx_[0-9a-f]{8}-[0-9a-f]{4}-" +
+ ".docx-[0-9a-f]{8}-[0-9a-f]{4}-" +
"[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\Z").matcher(
results.getJson().get("hits").get("hits").get(0).get("_id").asText()
);
- assertTrue("test_recursive_embedded.docx_$guid", m.find());
+ assertTrue("test_recursive_embedded.docx-$guid", m.find());
assertNull("test_recursive_embedded.docx",
results.getJson().get("hits").get("hits").get(0).get("_routing"));
diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
index c3ec807..b30a648 100644
--- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
+++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/main/java/org/apache/tika/pipes/emitter/opensearch/OpenSearchClient.java
@@ -70,7 +70,7 @@
for (Metadata metadata : metadataList) {
StringBuilder id = new StringBuilder(emitKey);
if (i > 0) {
- id.append("_").append(UUID.randomUUID());
+ id.append("-").append(UUID.randomUUID());
}
String indexJson = getBulkIndexJson(id.toString(), routing);
sb.append(indexJson).append("\n");
diff --git a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
index d34112e..e51d37d 100644
--- a/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-s3/pom.xml
@@ -85,7 +85,6 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</dependency>
-
</dependencies>
<build>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
index d613e57..142cd94 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/pom.xml
@@ -48,6 +48,7 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
index 12ec5a5..8e1615b 100644
--- a/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
+++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/main/java/org/apache/tika/pipes/emitter/solr/SolrEmitter.java
@@ -101,7 +101,7 @@
SolrInputDocument childSolrInputDocument = new SolrInputDocument();
Metadata m = metadataList.get(i);
childSolrInputDocument
- .setField(idField, emitKey + "_" + UUID.randomUUID().toString());
+ .setField(idField, emitKey + "-" + UUID.randomUUID().toString());
addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
solrInputDocument.addChildDocument(childSolrInputDocument);
}
@@ -113,7 +113,7 @@
SolrInputDocument childSolrInputDocument = new SolrInputDocument();
Metadata m = metadataList.get(i);
childSolrInputDocument.setField(idField,
- solrInputDocument.get(idField) + "-" + UUID.randomUUID().toString());
+ solrInputDocument.get(idField).getValue() + "-" + UUID.randomUUID().toString());
addMetadataToSolrInputDocument(m, childSolrInputDocument, updateStrategy);
docsToUpdate.add(childSolrInputDocument);
}
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
index 8c40259..65917fb 100644
--- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/pom.xml
@@ -80,6 +80,7 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${log4j2.version}</version>
+ <scope>provided</scope>
</dependency>
</dependencies>
<build>