blob: b30a648e1b1c1672f66016a0ba58adefc581b199 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.emitter.opensearch;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.UUID;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.tika.client.TikaClientException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.utils.StringUtils;
public class OpenSearchClient {
private static final Logger LOG = LoggerFactory.getLogger(OpenSearchEmitter.class);
//this includes the full url and the index, should not end in /
//e.g. https://localhost:9200/my-index
protected final String openSearchUrl;
protected final HttpClient httpClient;
private final OpenSearchEmitter.AttachmentStrategy attachmentStrategy;
protected OpenSearchClient(String openSearchUrl, HttpClient httpClient,
OpenSearchEmitter.AttachmentStrategy attachmentStrategy) {
this.openSearchUrl = openSearchUrl;
this.httpClient = httpClient;
this.attachmentStrategy = attachmentStrategy;
}
public void addDocument(String emitKey, List<Metadata> metadataList) throws IOException,
TikaClientException {
StringBuilder sb = new StringBuilder();
int i = 0;
String routing = (attachmentStrategy == OpenSearchEmitter.AttachmentStrategy.PARENT_CHILD) ?
emitKey : null;
for (Metadata metadata : metadataList) {
StringBuilder id = new StringBuilder(emitKey);
if (i > 0) {
id.append("-").append(UUID.randomUUID());
}
String indexJson = getBulkIndexJson(id.toString(), routing);
sb.append(indexJson).append("\n");
if (i == 0) {
sb.append(metadataToJsonContainer(metadata, attachmentStrategy));
} else {
sb.append(metadataToJsonEmbedded(metadata, attachmentStrategy, emitKey));
}
sb.append("\n");
i++;
}
String requestUrl = openSearchUrl + "/_bulk";
if (attachmentStrategy == OpenSearchEmitter.AttachmentStrategy.PARENT_CHILD) {
requestUrl += "?routing=" + URLEncoder.encode(emitKey, StandardCharsets.UTF_8.name());
}
JsonResponse response = postJson(requestUrl, sb.toString());
if (response.getStatus() != 200) {
throw new TikaClientException(response.getMsg());
} else {
//if there's a single error, throw the full json.
//this has not been thoroughly tested with versions of es < 7
JsonNode errorNode = response.getJson().get("errors");
if (errorNode.asText().equals("true")) {
throw new TikaClientException(response.getJson().toString());
}
}
}
protected static String metadataToJsonEmbedded(Metadata metadata,
OpenSearchEmitter.AttachmentStrategy attachmentStrategy,
String emitKey) throws IOException {
StringWriter writer = new StringWriter();
try (JsonGenerator jsonGenerator = new JsonFactory().createGenerator(writer)) {
jsonGenerator.writeStartObject();
writeMetadata(metadata, jsonGenerator);
if (attachmentStrategy == OpenSearchEmitter.AttachmentStrategy.PARENT_CHILD) {
jsonGenerator.writeObjectFieldStart("relation_type");
jsonGenerator.writeStringField("name", "embedded");
jsonGenerator.writeStringField("parent", emitKey);
//end the relation type object
jsonGenerator.writeEndObject();
} else if (attachmentStrategy == OpenSearchEmitter.AttachmentStrategy.SEPARATE_DOCUMENTS) {
jsonGenerator.writeStringField("parent", emitKey);
}
//end the metadata object
jsonGenerator.writeEndObject();
}
return writer.toString();
}
protected static String metadataToJsonContainer(Metadata metadata,
OpenSearchEmitter.AttachmentStrategy attachmentStrategy)
throws IOException {
StringWriter writer = new StringWriter();
try (JsonGenerator jsonGenerator = new JsonFactory().createGenerator(writer)) {
jsonGenerator.writeStartObject();
writeMetadata(metadata, jsonGenerator);
if (attachmentStrategy == OpenSearchEmitter.AttachmentStrategy.PARENT_CHILD) {
jsonGenerator.writeStringField("relation_type", "container");
}
jsonGenerator.writeEndObject();
}
return writer.toString();
}
private static void writeMetadata(Metadata metadata, JsonGenerator jsonGenerator) throws IOException {
//writes the metadata without the start { or the end }
//to allow for other fields to be added
for (String n : metadata.names()) {
String[] vals = metadata.getValues(n);
if (vals.length == 1) {
jsonGenerator.writeStringField(n, vals[0]);
} else {
jsonGenerator.writeArrayFieldStart(n);
for (String v : vals) {
jsonGenerator.writeString(v);
}
jsonGenerator.writeEndArray();
}
}
}
private String getBulkIndexJson(String id, String routing) throws IOException {
StringWriter writer = new StringWriter();
try (JsonGenerator jsonGenerator = new JsonFactory().createGenerator(writer)) {
jsonGenerator.writeStartObject();
jsonGenerator.writeObjectFieldStart("index");
jsonGenerator.writeStringField("_id", id);
if (!StringUtils.isEmpty(routing)) {
jsonGenerator.writeStringField("routing", routing);
}
jsonGenerator.writeEndObject();
jsonGenerator.writeEndObject();
}
return writer.toString();
}
public JsonResponse postJson(String url, String json) throws IOException {
HttpPost httpRequest = new HttpPost(url);
ByteArrayEntity entity = new ByteArrayEntity(json.getBytes(StandardCharsets.UTF_8));
httpRequest.setEntity(entity);
httpRequest.setHeader("Accept", "application/json");
httpRequest.setHeader("Content-type", "application/json; charset=utf-8");
//At one point, this was required because of connection already
// bound exceptions on windows :(
//httpPost.setHeader("Connection", "close");
//try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpResponse response = null;
try {
response = httpClient.execute(httpRequest);
int status = response.getStatusLine().getStatusCode();
if (status == 200) {
try (Reader reader = new BufferedReader(
new InputStreamReader(response.getEntity().getContent(),
StandardCharsets.UTF_8))) {
ObjectMapper mapper = new ObjectMapper();
JsonNode node = mapper.readTree(reader);
if (LOG.isTraceEnabled()) {
LOG.trace("node:", node);
}
return new JsonResponse(200, node);
}
} else {
return new JsonResponse(status,
new String(EntityUtils.toByteArray(response.getEntity()),
StandardCharsets.UTF_8));
}
} finally {
if (response != null && response instanceof CloseableHttpResponse) {
((CloseableHttpResponse)response).close();
}
httpRequest.releaseConnection();
}
}
}