Revert CONNECTORS-674 changes.
git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/release-1.2-branch@1479949 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 2c38235..95d74df 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,10 +6,6 @@
======================= Release 1.2 =====================
-CONNECTORS-674: Use Solrj 4.3.0, and get rid of our own hacked
-class, since it now should no longer be needed.
-(Ryan McKinley, Karl Wright)
-
CONNECTORS-682: Fix expect-continue issues with Solr when there is
a Solr delay of more than 3 seconds.
(Oleg Kalnichevski, Erlend Garåsen, Karl Wright)
diff --git a/build.xml b/build.xml
index d1b59bb..b6ec08a 100644
--- a/build.xml
+++ b/build.xml
@@ -3635,7 +3635,7 @@
<antcall target="download-via-maven">
<param name="target" value="lib"/>
<param name="project-path" value="org/apache/solr"/>
- <param name="artifact-version" value="4.3.0"/>
+ <param name="artifact-version" value="4.1.0"/>
<param name="artifact-name" value="solr-solrj"/>
<param name="artifact-type" value="jar"/>
</antcall>
diff --git a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
index f464ee9..a69e766 100644
--- a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
+++ b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
@@ -266,8 +266,7 @@
}
String httpSolrServerUrl = protocol + "://" + server + ":" + port + location;
- HttpSolrServer httpSolrServer = new HttpSolrServer(httpSolrServerUrl, localClient, new XMLResponseParser());
- httpSolrServer.setUseMultiPartPost(true);
+ HttpSolrServer httpSolrServer = new ModifiedHttpSolrServer(httpSolrServerUrl, localClient, new XMLResponseParser());
// Set the solrj instance we want to use
solrServer = httpSolrServer;
}
diff --git a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java
new file mode 100644
index 0000000..d3b6ac7
--- /dev/null
+++ b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/ModifiedHttpSolrServer.java
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.output.solr;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.ConnectException;
+import java.net.SocketTimeoutException;
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.http.Header;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.NameValuePair;
+import org.apache.http.NoHttpResponseException;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.entity.InputStreamEntity;
+import org.apache.http.entity.mime.FormBodyPart;
+import org.apache.http.entity.mime.HttpMultipartMode;
+import org.apache.http.entity.mime.MultipartEntity;
+import org.apache.http.entity.mime.content.InputStreamBody;
+import org.apache.http.entity.mime.content.StringBody;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.message.BasicHeader;
+import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.util.EntityUtils;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.ResponseParser;
+import org.apache.solr.client.solrj.SolrRequest;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.RequestWriter;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.client.solrj.util.ClientUtils;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.NamedList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/** This class overrides and somewhat changes the behavior of the
+* SolrJ HttpSolrServer class. The point of all this is simply to get
+* the right information to Tika. When SolrJ uses GET or POST but not
+* multipart-post, it does not include multipart headers that Tika uses -
+* specifically, the name of the document and the length of the document.
+* Patches have been submitted to the SOLR ticket queue to address this
+* problem in a method-insensitive way, but so far there has been no sign that
+* the Solr team is interested in committing them.
+*/
+public class ModifiedHttpSolrServer extends HttpSolrServer
+{
+ // Here we duplicate all the private fields we need
+
+ private static final String UTF_8 = "UTF-8";
+ private static final String DEFAULT_PATH = "/select";
+
+ private final HttpClient httpClient;
+ private boolean followRedirects = false;
+ private int maxRetries = 0;
+ private boolean useMultiPartPost = true;
+
+ public ModifiedHttpSolrServer(String baseURL, HttpClient client, ResponseParser parser) {
+ super(baseURL, client, parser);
+ httpClient = client;
+ }
+
+ @Override
+ public NamedList<Object> request(final SolrRequest request,
+ final ResponseParser processor) throws SolrServerException, IOException {
+ HttpRequestBase method = null;
+ InputStream is = null;
+ SolrParams params = request.getParams();
+ Collection<ContentStream> streams = requestWriter.getContentStreams(request);
+ String path = requestWriter.getPath(request);
+ if (path == null || !path.startsWith("/")) {
+ path = DEFAULT_PATH;
+ }
+
+ ResponseParser parser = request.getResponseParser();
+ if (parser == null) {
+ parser = this.parser;
+ }
+
+ // The parser 'wt=' and 'version=' params are used instead of the original
+ // params
+ ModifiableSolrParams wparams = new ModifiableSolrParams(params);
+ if (parser != null) {
+ wparams.set(CommonParams.WT, parser.getWriterType());
+ wparams.set(CommonParams.VERSION, parser.getVersion());
+ }
+ if (invariantParams != null) {
+ wparams.add(invariantParams);
+ }
+ params = wparams;
+
+ int tries = maxRetries + 1;
+ try {
+ while( tries-- > 0 ) {
+ // Note: since we aren't do intermittent time keeping
+ // ourselves, the potential non-timeout latency could be as
+ // much as tries-times (plus scheduling effects) the given
+ // timeAllowed.
+ try {
+ if( SolrRequest.METHOD.GET == request.getMethod() ) {
+ if( streams != null ) {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "GET can't send streams!" );
+ }
+ method = new HttpGet( baseUrl + path + ClientUtils.toQueryString( params, false ) );
+ }
+ else if( SolrRequest.METHOD.POST == request.getMethod() ) {
+
+ String url = baseUrl + path;
+ boolean hasNullStreamName = false;
+ if (streams != null) {
+ for (ContentStream cs : streams) {
+ if (cs.getName() == null) {
+ hasNullStreamName = true;
+ break;
+ }
+ }
+ }
+ boolean isMultipart = (this.useMultiPartPost || ( streams != null && streams.size() > 1 )) && !hasNullStreamName;
+
+ LinkedList<NameValuePair> postParams = new LinkedList<NameValuePair>();
+ if (streams == null || isMultipart) {
+ HttpPost post = new HttpPost(url);
+ post.setHeader("Content-Charset", "UTF-8");
+ if (!isMultipart) {
+ post.addHeader("Content-Type",
+ "application/x-www-form-urlencoded; charset=UTF-8");
+ }
+
+ List<FormBodyPart> parts = new LinkedList<FormBodyPart>();
+ Iterator<String> iter = params.getParameterNamesIterator();
+ while (iter.hasNext()) {
+ String p = iter.next();
+ String[] vals = params.getParams(p);
+ if (vals != null) {
+ for (String v : vals) {
+ if (isMultipart) {
+ parts.add(new FormBodyPart(p, new StringBody(v, Charset.forName("UTF-8"))));
+ } else {
+ postParams.add(new BasicNameValuePair(p, v));
+ }
+ }
+ }
+ }
+
+ if (isMultipart && streams != null) {
+ for (ContentStream content : streams) {
+ String contentType = content.getContentType();
+ if(contentType==null) {
+ contentType = "application/octet-stream"; // default
+ }
+ String contentName = content.getName();
+ parts.add(new FormBodyPart(contentName,
+ new InputStreamBody(
+ content.getStream(),
+ contentType,
+ content.getName())));
+ }
+ }
+
+ if (parts.size() > 0) {
+ MultipartEntity entity = new MultipartEntity(HttpMultipartMode.STRICT);
+ for(FormBodyPart p: parts) {
+ entity.addPart(p);
+ }
+ post.setEntity(entity);
+ } else {
+ //not using multipart
+ post.setEntity(new UrlEncodedFormEntity(postParams, "UTF-8"));
+ }
+
+ method = post;
+ }
+ // It is has one stream, it is the post body, put the params in the URL
+ else {
+ String pstr = ClientUtils.toQueryString(params, false);
+ HttpPost post = new HttpPost(url + pstr);
+
+ // Single stream as body
+ // Using a loop just to get the first one
+ final ContentStream[] contentStream = new ContentStream[1];
+ for (ContentStream content : streams) {
+ contentStream[0] = content;
+ break;
+ }
+ if (contentStream[0] instanceof RequestWriter.LazyContentStream) {
+ post.setEntity(new InputStreamEntity(contentStream[0].getStream(), -1) {
+ @Override
+ public Header getContentType() {
+ return new BasicHeader("Content-Type", contentStream[0].getContentType());
+ }
+
+ @Override
+ public boolean isRepeatable() {
+ return false;
+ }
+
+ });
+ } else {
+ post.setEntity(new InputStreamEntity(contentStream[0].getStream(), -1) {
+ @Override
+ public Header getContentType() {
+ return new BasicHeader("Content-Type", contentStream[0].getContentType());
+ }
+
+ @Override
+ public boolean isRepeatable() {
+ return false;
+ }
+ });
+ }
+ method = post;
+ }
+ }
+ else {
+ throw new SolrServerException("Unsupported method: "+request.getMethod() );
+ }
+ }
+ catch( NoHttpResponseException r ) {
+ method = null;
+ if(is != null) {
+ is.close();
+ }
+ // If out of tries then just rethrow (as normal error).
+ if (tries < 1) {
+ throw r;
+ }
+ }
+ }
+ } catch (IOException ex) {
+ throw new SolrServerException("error reading streams", ex);
+ }
+
+ // XXX client already has this set, is this needed?
+ method.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS,
+ followRedirects);
+ method.addHeader("User-Agent", AGENT);
+
+ InputStream respBody = null;
+ boolean shouldClose = true;
+
+ try {
+ // Execute the method.
+ final HttpResponse response = httpClient.execute(method);
+ int httpStatus = response.getStatusLine().getStatusCode();
+
+ // Read the contents
+ respBody = response.getEntity().getContent();
+
+ // handle some http level checks before trying to parse the response
+ switch (httpStatus) {
+ case HttpStatus.SC_OK:
+ case HttpStatus.SC_BAD_REQUEST:
+ case HttpStatus.SC_CONFLICT: // 409
+ break;
+ case HttpStatus.SC_MOVED_PERMANENTLY:
+ case HttpStatus.SC_MOVED_TEMPORARILY:
+ if (!followRedirects) {
+ throw new SolrServerException("Server at " + getBaseURL()
+ + " sent back a redirect (" + httpStatus + ").");
+ }
+ break;
+ default:
+ throw new SolrException(SolrException.ErrorCode.getErrorCode(httpStatus), "Server at " + getBaseURL()
+ + " returned non ok status:" + httpStatus + ", message:"
+ + response.getStatusLine().getReasonPhrase());
+
+ }
+ if (processor == null) {
+ // no processor specified, return raw stream
+ NamedList<Object> rsp = new NamedList<Object>();
+ rsp.add("stream", respBody);
+ // Only case where stream should not be closed
+ shouldClose = false;
+ return rsp;
+ }
+ String charset = EntityUtils.getContentCharSet(response.getEntity());
+ NamedList<Object> rsp = processor.processResponse(respBody, charset);
+ if (httpStatus != HttpStatus.SC_OK) {
+ String reason = null;
+ try {
+ NamedList err = (NamedList) rsp.get("error");
+ if (err != null) {
+ reason = (String) err.get("msg");
+ // TODO? get the trace?
+ }
+ } catch (Exception ex) {}
+ if (reason == null) {
+ StringBuilder msg = new StringBuilder();
+ msg.append(response.getStatusLine().getReasonPhrase());
+ msg.append("\n\n");
+ msg.append("request: " + method.getURI());
+ reason = java.net.URLDecoder.decode(msg.toString(), UTF_8);
+ }
+ throw new SolrException(
+ SolrException.ErrorCode.getErrorCode(httpStatus), reason);
+ }
+ return rsp;
+ } catch (ConnectException e) {
+ throw new SolrServerException("Server refused connection at: "
+ + getBaseURL(), e);
+ } catch (SocketTimeoutException e) {
+ throw new SolrServerException(
+ "Timeout occured while waiting response from server at: "
+ + getBaseURL(), e);
+ } catch (IOException e) {
+ throw new SolrServerException(
+ "IOException occured when talking to server at: " + getBaseURL(), e);
+ } finally {
+ if (respBody != null && shouldClose) {
+ try {
+ respBody.close();
+ } catch (Throwable t) {} // ignore
+ }
+ }
+ }
+
+ @Override
+ public void setFollowRedirects(boolean followRedirects) {
+ super.setFollowRedirects(followRedirects);
+ this.followRedirects = followRedirects;
+ }
+
+}
diff --git a/pom.xml b/pom.xml
index cb1cae9..0141cd0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -49,7 +49,7 @@
<commons-collections.version>3.2.1</commons-collections.version>
<commons-fileupload.version>1.2.2</commons-fileupload.version>
<httpcomponent.version>4.2.5</httpcomponent.version>
- <solr.version>4.3.0</solr.version>
+ <solr.version>4.1.0</solr.version>
<commons-el.version>1.0</commons-el.version>
<commons-lang.version>2.6</commons-lang.version>
<xalan.version>2.7.1</xalan.version>