blob: 2da15e3d4113966fd635d51a3efeb5e7336c3840 [file] [log] [blame]
package org.apache.manifoldcf.crawler.connectors.confluence.client;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpRequestExecutor;
import org.apache.http.util.EntityUtils;
import org.apache.manifoldcf.connectorcommon.common.InterruptibleSocketFactory;
import org.apache.manifoldcf.connectorcommon.interfaces.KeystoreManagerFactory;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.crawler.connectors.confluence.exception.ConfluenceException;
import org.apache.manifoldcf.crawler.connectors.confluence.model.Attachment;
import org.apache.manifoldcf.crawler.connectors.confluence.model.ConfluenceResource;
import org.apache.manifoldcf.crawler.connectors.confluence.model.ConfluenceResponse;
import org.apache.manifoldcf.crawler.connectors.confluence.model.ConfluenceUser;
import org.apache.manifoldcf.crawler.connectors.confluence.model.Label;
import org.apache.manifoldcf.crawler.connectors.confluence.model.MutableAttachment;
import org.apache.manifoldcf.crawler.connectors.confluence.model.MutablePage;
import org.apache.manifoldcf.crawler.connectors.confluence.model.Page;
import org.apache.manifoldcf.crawler.connectors.confluence.model.Space;
import org.apache.manifoldcf.crawler.connectors.confluence.model.Spaces;
import org.apache.manifoldcf.crawler.connectors.confluence.model.builder.ConfluenceResourceBuilder;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
/**
* <p>
* ConfluenceClient class
* </p>
* <p>
* This class is intended to be used to interact with Confluence REST API
* </p>
* <p>
* There are some methods that make use of the Confluence JSON-RPC 2.0 API, but
* until all the methods are ported to the new REST API, we will have to use
* them to leverage all the features provided by Confluence
* </p>
*
* @author Antonio David Perez Morales <adperezmorales@gmail.com>
*
*/
public class ConfluenceClient {
private static final String VIEW_PERMISSION = "view";
private static final String CONTENT_PATH = "/rest/api/content";
private static final String AUTHORITY_PATH = "/rpc/json-rpc/confluenceservice-v2/";
private static final String EXPANDABLE_PARAMETERS = "expand=body.view,metadata.labels,space,history,version";
private static final String CHILD_ATTACHMENTS_PATH = "/child/attachment/";
private static final String LABEL_PATH = "/label";
private Logger logger = LoggerFactory.getLogger(ConfluenceClient.class);
private String protocol;
private Integer port;
private String host;
private String path;
private String username;
private String password;
private CloseableHttpClient httpClient;
private HttpClientContext httpContext;
/**
* <p>Creates a new client instance using the given parameters</p>
* @param protocol the protocol
* @param host the host
* @param port the port
* @param path the path to Confluence instance
* @param username the username used to make the requests. Null or empty to use anonymous user
* @param password the password
* @throws ManifoldCFException
*/
public ConfluenceClient(String protocol, String host, Integer port,
String path, String username, String password) throws ManifoldCFException {
this.protocol = protocol;
this.host = host;
this.port = port;
this.path = path;
this.username = username;
this.password = password;
connect();
}
/**
* <p>Connect methods used to initialize the underlying client</p>
* @throws ManifoldCFException
*/
private void connect() throws ManifoldCFException {
int socketTimeout = 900000;
int connectionTimeout = 60000;
javax.net.ssl.SSLSocketFactory httpsSocketFactory = KeystoreManagerFactory.getTrustingSecureSocketFactory();
SSLConnectionSocketFactory myFactory = new SSLConnectionSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeout),
SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
HttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager();
// If authentication needed, set that
// Preemptive authentication not working
/*if (username != null)
{
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(
AuthScope.ANY,
new UsernamePasswordCredentials(username,password));
AuthCache authCache = new BasicAuthCache();
authCache.put(new HttpHost(host, port), new BasicScheme());
httpContext = HttpClientContext.create();
httpContext.setCredentialsProvider(credentialsProvider);
httpContext.setAuthCache(authCache);
}*/
RequestConfig.Builder requestBuilder = RequestConfig.custom()
.setCircularRedirectsAllowed(true)
.setSocketTimeout(socketTimeout)
.setStaleConnectionCheckEnabled(true)
.setExpectContinueEnabled(true)
.setConnectTimeout(connectionTimeout)
.setConnectionRequestTimeout(socketTimeout);
httpClient = HttpClients.custom()
.setConnectionManager(connectionManager)
.setMaxConnTotal(1)
.disableAutomaticRetries()
.setDefaultRequestConfig(requestBuilder.build())
.setDefaultSocketConfig(SocketConfig.custom()
.setTcpNoDelay(true)
.setSoTimeout(socketTimeout)
.build())
.setSSLSocketFactory(myFactory)
.setRequestExecutor(new HttpRequestExecutor(socketTimeout))
.setRedirectStrategy(new DefaultRedirectStrategy())
.build();
}
/**
* <p>Close the client. No further requests can be done</p>
*/
public void close() {
if (httpClient != null) {
try {
httpClient.close();
} catch (IOException e) {
logger.debug("Error closing http connection. Reason: {}",
e.getMessage());
e.printStackTrace();
}
}
}
/**
* <p>Check method used to test if Confluence instance is up and running</p>
*
* @return a {@code Boolean} indicating whether the Confluence instance is alive or not
*
* @throws Exception
*/
public boolean check() throws Exception {
HttpResponse response;
try {
if (httpClient == null) {
connect();
}
String url = String.format("%s://%s:%s/%s/%s?limit=1", protocol, host,
port, path, CONTENT_PATH);
logger.debug(
"[Processing] Hitting url: {} for confluence status check fetching : ",
"Confluence URL", sanitizeUrl(url));
HttpGet httpGet = createGetRequest(url);
response = httpClient.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200)
throw new Exception(
"[Checking connection] Confluence server appears to be down");
else
return true;
} catch (IOException e) {
logger.warn(
"[Checking connection] Confluence server appears to be down",
e);
throw new Exception("Confluence appears to be down", e);
}
}
/**
* <p>Check method used to test if Confluence instance is up and running when using Authority connector (JSON-RPC API)</p>
* <p>This method will be deleted when all JSON-RPC methods are available through the REST API
*
* @return a {@code Boolean} indicating whether the Confluence instance is alive or not
*
* @throws Exception
*/
public boolean checkAuth() throws Exception {
try {
if (httpClient == null) {
connect();
}
getSpaces();
return true;
} catch (Exception e) {
logger.warn(
"[Checking connection] Confluence server appears to be down",
e);
throw e;
}
}
/**
* <p>
* Create a get request for the given url
* </p>
*
* @param url
* the url
* @return the created {@code HttpGet} instance
*/
private HttpGet createGetRequest(String url) {
String finalUrl = useBasicAuthentication() ? url + "&os_authType=basic": url;
String sanitizedUrl = sanitizeUrl(finalUrl);
HttpGet httpGet = new HttpGet(sanitizedUrl);
httpGet.addHeader("Accept", "application/json");
if (useBasicAuthentication()) {
httpGet.addHeader(
"Authorization",
"Basic "
+ Base64.encodeBase64String(String.format("%s:%s",
this.username, this.password).getBytes(
Charset.forName("UTF-8"))));
}
return httpGet;
}
/**
* <p>
* Get a list of Confluence pages
* </p>
*
* @return a {@code ConfluenceResponse} containing the result pages and
* some pagination values
* @throws Exception
*/
public ConfluenceResponse<Page> getPages() throws Exception {
return getPages(0, 50, Optional.<String> absent());
}
/**
* <p>
* Get a list of Confluence pages using pagination
* </p>
*
* @param start The start value to get pages from
* @param limit The number of pages to get from start
* @return a {@code ConfluenceResponse} containing the result pages and
* some pagination values
* @throws Exception
*/
@SuppressWarnings("unchecked")
public ConfluenceResponse<Page> getPages(int start, int limit,
Optional<String> space) throws Exception {
String url = String.format("%s://%s:%s/%s/%s?limit=%s&start=%s", protocol,
host, port, path, CONTENT_PATH, limit, start);
if (space.isPresent()) {
url = String.format("%s&spaceKey=%s", url, space.get());
}
return (ConfluenceResponse<Page>) getConfluenceResources(url, Page.builder());
}
/**
* <p>Get the {@code ConfluenceResources} from the given url</p>
* @param url The url identifying the REST resource to get the documents
* @param builder The builder used to build the resources contained in the response
* @return a {@code ConfluenceResponse} containing the page results
* @throws Exception
*/
private ConfluenceResponse<? extends ConfluenceResource> getConfluenceResources(String url, ConfluenceResourceBuilder<? extends ConfluenceResource> builder) throws Exception {
logger.debug("[Processing] Hitting url for get confluence resources: {}", sanitizeUrl(url));
try {
HttpGet httpGet = createGetRequest(url);
HttpResponse response = executeRequest(httpGet);
ConfluenceResponse<? extends ConfluenceResource> confluenceResponse = responseFromHttpEntity(response
.getEntity(), builder);
EntityUtils.consume(response.getEntity());
return confluenceResponse;
} catch (IOException e) {
logger.error("[Processing] Failed to get page(s)", e);
throw new Exception("Confluence appears to be down", e);
}
}
/**
* <p>Creates a ConfluenceResponse from the entity returned in the HttpResponse</p>
* @param entity the {@code HttpEntity} to extract the response from
* @return a {@code ConfluenceResponse} with the requested information
* @throws Exception
*/
private <T extends ConfluenceResource> ConfluenceResponse<T> responseFromHttpEntity(HttpEntity entity, ConfluenceResourceBuilder<T> builder)
throws Exception {
String stringEntity = EntityUtils.toString(entity);
JSONObject responseObject;
try {
responseObject = new JSONObject(stringEntity);
ConfluenceResponse<T> response = ConfluenceResponse
.fromJson(responseObject, builder);
if (response.getResults().size() == 0) {
logger.debug("[Processing] No {} found in the Confluence response", builder.getType().getSimpleName());
}
return response;
} catch (JSONException e) {
logger.debug("Error parsing JSON response");
throw new Exception();
}
}
/**
* <p>Get the attachments of the given page</p>
* @param pageId the page id
* @return a {@code ConfluenceResponse} instance containing the attachment results and some pagination values</p>
* @throws Exception
*/
public ConfluenceResponse<Attachment> getPageAttachments(String pageId)
throws Exception {
return getPageAttachments(pageId, 0, 50);
}
/**
* <p>Get the attachments of the given page using pagination</p>
* @param pageId the page id
* @param start The start value to get attachments from
* @param limit The number of attachments to get from start
* @return a {@code ConfluenceResponse} instance containing the attachment results and some pagination values</p>
* @throws Exception
*/
public ConfluenceResponse<Attachment> getPageAttachments(String pageId, int start,
int limit) throws Exception {
String url = String.format("%s://%s:%s/%s/%s/%s%s?limit=%s&start=%s",
protocol, host, port, path, CONTENT_PATH, pageId, CHILD_ATTACHMENTS_PATH,
limit, start);
@SuppressWarnings("unchecked")
ConfluenceResponse<Attachment> confluenceResources = (ConfluenceResponse<Attachment>) getConfluenceResources(url, Attachment.builder());
return confluenceResources;
}
/**
* <p>
* Gets a specific attachment contained in the specific page
* </p>
*
* @param attachmentId
* @param pageId
* @return the {@code Attachment} instance
*/
public Attachment getAttachment(String attachmentId) {
String url = String
.format("%s://%s:%s/%s/%s/%s?%s",
protocol, host, port, path, CONTENT_PATH, attachmentId, EXPANDABLE_PARAMETERS);
logger.debug(
"[Processing] Hitting url for getting document content : {}",
sanitizeUrl(url));
try {
HttpGet httpGet = createGetRequest(url);
HttpResponse response = executeRequest(httpGet);
HttpEntity entity = response.getEntity();
MutableAttachment attachment = attachmentFromHttpEntity(entity);
EntityUtils.consume(entity);
retrieveAndSetAttachmentContent(attachment);
return attachment;
} catch (Exception e) {
logger.error("[Processing] Failed to get attachment {}. Error: {}",
url, e.getMessage());
}
return new Attachment();
}
/**
* <p>
* Downloads and retrieves the attachment content, setting it in the given
* {@code Attachment} instance
* </p>
*
* @param attachment
* the {@code Attachment} instance to download and set the
* content
* @throws Exception
*/
private void retrieveAndSetAttachmentContent(MutableAttachment attachment)
throws Exception {
StringBuilder sb = new StringBuilder();
sb.append(attachment.getBaseUrl()).append(attachment.getUrlContext())
.append(attachment.getDownloadUrl());
String url = sanitizeUrl(sb.toString());
logger.debug(
"[Processing] Hitting url for getting attachment content : {}",
url);
try {
HttpGet httpGet = createGetRequest(url);
HttpResponse response = executeRequest(httpGet);
attachment.setLength(response.getEntity().getContentLength());
byte[] byteContent = IOUtils.toByteArray(response.getEntity()
.getContent());
EntityUtils.consumeQuietly(response.getEntity());
attachment.setContentStream(new ByteArrayInputStream(byteContent));
} catch (Exception e) {
logger.error(
"[Processing] Failed to get attachment content from {}. Error: {}",
url, e.getMessage());
throw e;
}
}
/**
* <p>Get a Confluence page identified by its id</p>
* @param pageId the page id
* @return the Confluence page
*/
public Page getPage(String pageId) {
String url = String
.format("%s://%s:%s/%s/%s/%s?%s",
protocol, host, port, path, CONTENT_PATH, pageId, EXPANDABLE_PARAMETERS);
url = sanitizeUrl(url);
logger.debug(
"[Processing] Hitting url for getting document content : {}",
url);
try {
HttpGet httpGet = createGetRequest(url);
HttpResponse response = executeRequest(httpGet);
HttpEntity entity = response.getEntity();
MutablePage page = pageFromHttpEntity(entity);
EntityUtils.consume(entity);
List<Label> labels = getLabels(pageId);
page.setLabels(labels);
return page;
} catch (Exception e) {
logger.error("[Processing] Failed to get page {0}. Error: {1}",
url, e.getMessage());
}
return new Page();
}
/**
* <p>Get the labels of a specific page</p>
* @param pageId The pageId to get the labels
* @return a {@code List<Label>} of labels
*/
public List<Label> getLabels(String pageId) {
List<Label> labels = Lists.newArrayList();
int lastStart = 0;
int limit = 50;
boolean isLast = false;
do {
String url = String
.format("%s://%s:%s/%s/%s/%s/%s?start=%s&limit=%s",
protocol, host, port, path, CONTENT_PATH, pageId, LABEL_PATH, lastStart, limit);
url = sanitizeUrl(url);
logger.debug(
"[Processing] Hitting url for getting page labels : {}",
url);
try {
@SuppressWarnings("unchecked")
ConfluenceResponse<Label> response = (ConfluenceResponse<Label>) getConfluenceResources(url, Label.builder());
labels.addAll(response.getResults());
lastStart += response.getResults().size();
isLast = response.isLast();
} catch (Exception e) {
logger.debug("Error getting labels for page {}. Reason: {}", pageId, e.getMessage());
}
}
while(!isLast);
return labels;
}
/**
*
* @param username
* @return
* @throws Exception
*/
public ConfluenceUser getUserAuthorities(String username) throws Exception {
List<String> authorities = Lists.<String>newArrayList();
Spaces spaces = getSpaces();
for(Space space: spaces) {
List<String> permissions = getSpacePermissionsForUser(space, username);
if(permissions.contains(VIEW_PERMISSION)) {
authorities.add(space.getKey());
}
}
return new ConfluenceUser(username, authorities);
}
private HttpPost createPostRequest(String url) {
HttpPost httpPost = new HttpPost(url);
httpPost.addHeader("Accept", "application/json");
httpPost.addHeader("Content-Type", "application/json");
if (useBasicAuthentication()) {
httpPost.addHeader(
"Authorization",
"Basic "
+ Base64.encodeBase64String(String.format("%s:%s",
this.username, this.password).getBytes(
Charset.forName("UTF-8"))));
}
return httpPost;
}
/**
* <p>Execute the given {@code HttpUriRequest} using the configured client</p>
* @param request the {@code HttpUriRequest} to be executed
* @return the {@code HttpResponse} object returned from the server
* @throws Exception
*/
private HttpResponse executeRequest(HttpUriRequest request)
throws Exception {
String url = request.getURI().toString();
logger.debug(
"[Processing] Hitting url for getting document content : {}",
url);
try {
HttpResponse response = httpClient.execute(request, httpContext);
if (response.getStatusLine().getStatusCode() != 200) {
throw new Exception("Confluence error. "
+ response.getStatusLine().getStatusCode() + " "
+ response.getStatusLine().getReasonPhrase());
}
return response;
} catch (Exception e) {
logger.error("[Processing] Failed to get page {}. Error: {}",
url, e.getMessage());
throw e;
}
}
/**
* <p>Creates a Confluence page object from the given entity returned by the server</p>
* @param entity the {@code HttpEntity} to create the {@code MutablePage} from
* @return the Confluence page instance
* @throws Exception
*/
private MutablePage pageFromHttpEntity(HttpEntity entity) throws Exception {
String stringEntity = EntityUtils.toString(entity);
JSONObject responseObject;
try {
responseObject = new JSONObject(stringEntity);
@SuppressWarnings("unchecked")
MutablePage response = ((ConfluenceResourceBuilder<MutablePage>)MutablePage.builder()).fromJson(responseObject, new MutablePage());
return response;
} catch (JSONException e) {
logger.debug("Error parsing JSON page response data");
throw new Exception("Error parsing JSON page response data");
}
}
/**
* <p>Creates a {@code MutableAttachment} object from the given entity returned by the server</p>
* @param entity the {@code HttpEntity} to create the {@code MutableAttachment} from
* @return the Confluence MutableAttachment instance
* @throws Exception
*/
private MutableAttachment attachmentFromHttpEntity(HttpEntity entity)
throws Exception {
String stringEntity = EntityUtils.toString(entity);
JSONObject responseObject;
try {
responseObject = new JSONObject(stringEntity);
MutableAttachment response = (MutableAttachment) Attachment
.builder()
.fromJson(responseObject, new MutableAttachment());
return response;
} catch (JSONException e) {
logger.debug("Error parsing JSON page response data");
throw new Exception("Error parsing JSON page response data");
}
}
/**
* <p>Method to check if basic authentication must be used</p>
* @return {@code Boolean} indicating whether basic authentication must be used or not
*/
private boolean useBasicAuthentication() {
return this.username != null && !"".equals(username)
&& this.password != null;
}
/**
* <p>
* Sanitize the given url replacing the appearance of more than one slash by
* only one slash
* </p>
*
* @param url
* The url to sanitize
* @return the sanitized url
*/
private String sanitizeUrl(String url) {
int colonIndex = url.indexOf(":");
String urlWithoutProtocol = url.startsWith("http") ? url.substring(colonIndex+3) : url;
String sanitizedUrl = urlWithoutProtocol.replaceAll("\\/+", "/");
return url.substring(0,colonIndex) + "://" + sanitizedUrl;
}
private Spaces getSpaces() throws Exception {
String url = String.format("%s://%s:%s%s%sgetSpaces", protocol, host,
port, path, AUTHORITY_PATH);
logger.debug(
"[Processing] Hitting url for getting Confluence spaces : {}",
url);
HttpPost httpPost = createPostRequest(url);
httpPost.setEntity(new StringEntity("[]"));
HttpResponse response = httpClient.execute(httpPost);
if (response.getStatusLine().getStatusCode() != 200) {
throw new ConfluenceException("Confluence error. "
+ response.getStatusLine().getStatusCode() + " "
+ response.getStatusLine().getReasonPhrase());
}
HttpEntity entity = response.getEntity();
Spaces spaces = spacesFromHttpEntity(entity);
EntityUtils.consume(entity);
return spaces;
}
private List<String> getSpacePermissionsForUser(Space space, String username) throws Exception {
String url = String.format("%s://%s:%s%s%sgetPermissionsForUser", protocol, host,
port, path, AUTHORITY_PATH);
logger.debug(
"[Processing] Hitting url {} for getting Confluence permissions for user {} in space {}",
url, username, space.getKey());
HttpPost httpPost = createPostRequest(url);
JSONArray jsonArray = new JSONArray();
jsonArray.put(space.getKey());
jsonArray.put(username);
StringEntity stringEntity = new StringEntity(jsonArray.toString());
httpPost.setEntity(stringEntity);
HttpResponse response = httpClient.execute(httpPost);
if (response.getStatusLine().getStatusCode() != 200) {
throw new ConfluenceException("Confluence error. "
+ response.getStatusLine().getStatusCode() + " "
+ response.getStatusLine().getReasonPhrase());
}
HttpEntity entity = response.getEntity();
List<String> permissions = permissionsFromHttpEntity(entity);
EntityUtils.consume(entity);
return permissions;
}
private Spaces spacesFromHttpEntity(HttpEntity entity) throws Exception {
String stringEntity = EntityUtils.toString(entity);
JSONArray responseObject;
try {
responseObject = new JSONArray(stringEntity);
Spaces response = Spaces.fromJson(responseObject);
return response;
} catch (JSONException e) {
logger.debug("Error parsing JSON spaces response data");
throw new Exception("Error parsing JSON spaces response data");
}
}
private List<String> permissionsFromHttpEntity(HttpEntity entity) throws Exception {
String stringEntity = EntityUtils.toString(entity);
JSONArray responseObject;
List<String> permissions = Lists.newArrayList();
try {
responseObject = new JSONArray(stringEntity);
for(int i=0,len=responseObject.length();i<len;i++) {
permissions.add(responseObject.getString(i));
}
return permissions;
} catch (JSONException e) {
logger.debug("Error parsing JSON space permissions response data");
throw new Exception("Error parsing JSON space permissions respnse data");
}
}
}