| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.manifoldcf.crawler.connectors.confluence.v6; |
| |
| import java.io.IOException; |
| import java.io.InterruptedIOException; |
| import java.io.StringReader; |
| import java.text.DateFormat; |
| import java.text.MessageFormat; |
| import java.util.ArrayList; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.commons.lang.StringUtils; |
| import org.apache.manifoldcf.agents.interfaces.RepositoryDocument; |
| import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; |
| import org.apache.manifoldcf.core.interfaces.ConfigParams; |
| import org.apache.manifoldcf.core.interfaces.IHTTPOutput; |
| import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity; |
| import org.apache.manifoldcf.core.interfaces.IPostParameters; |
| import org.apache.manifoldcf.core.interfaces.IThreadContext; |
| import org.apache.manifoldcf.core.interfaces.ManifoldCFException; |
| import org.apache.manifoldcf.core.interfaces.Specification; |
| import org.apache.manifoldcf.core.interfaces.SpecificationNode; |
| import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.client.ConfluenceClient; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.ConfluenceResponse; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.ConfluenceRestrictionsResponse; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.PageType; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions.ReadRestrictions; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Space; |
| import org.apache.manifoldcf.crawler.connectors.confluence.v6.util.ConfluenceUtil; |
| import org.apache.manifoldcf.crawler.interfaces.IExistingVersions; |
| import org.apache.manifoldcf.crawler.interfaces.IProcessActivity; |
| import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity; |
| import org.apache.manifoldcf.crawler.system.Logging; |
| import org.json.simple.JSONArray; |
| import org.json.simple.JSONObject; |
| import org.json.simple.parser.JSONParser; |
| import org.json.simple.parser.ParseException; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import com.google.common.base.Optional; |
| import com.google.common.collect.Lists; |
| import com.google.common.collect.Maps; |
| |
| /** |
| * <p> |
| * Confluence Repository Connector class |
| * </p> |
| * <p> |
| * ManifoldCF Repository connector to deal with Confluence documents |
| * </p> |
| * |
| * @author Julien Massiera & Antonio David Perez Morales |
| * |
| */ |
| public class ConfluenceRepositoryConnector extends BaseRepositoryConnector { |
| |
| protected final static String ACTIVITY_READ = "read document"; |
| |
| /** Deny access token for default authority */ |
| private final static String defaultAuthorityDenyToken = GLOBAL_DENY_TOKEN; |
| |
| private static final String CHILD_PREFIX = "child+"; |
| |
| /* |
| * Prefix for Confluence configuration and specification parameters |
| */ |
| private static final String PARAMETER_PREFIX = "confluence_"; |
| |
| /* Configuration tabs */ |
| private static final String CONF_SERVER_TAB_PROPERTY = "ConfluenceRepositoryConnector.Server"; |
| |
| /* Specification tabs */ |
| private static final String CONF_SECURITY_TAB_PROPERTY = "ConfluenceRepositoryConnector.Security"; |
| private static final String CONF_SPACES_TAB_PROPERTY = "ConfluenceRepositoryConnector.Spaces"; |
| private static final String CONF_PAGES_TAB_PROPERTY = "ConfluenceRepositoryConnector.Pages"; |
| |
| // pages & js |
| // Template names for Confluence configuration |
| /** |
| * Forward to the javascript to check the configuration parameters |
| */ |
| private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration_conf.js"; |
| /** |
| * Server tab template |
| */ |
| private static final String EDIT_CONFIG_FORWARD_SERVER = "editConfiguration_conf_server.html"; |
| |
| /** |
| * Forward to the HTML template to view the configuration parameters |
| */ |
| private static final String VIEW_CONFIG_FORWARD = "viewConfiguration_conf.html"; |
| |
| // Template names for Confluence job specification |
| /** |
| * Forward to the javascript to check the specification parameters for the job |
| */ |
| private static final String EDIT_SPEC_HEADER_FORWARD = "editSpecification_conf.js"; |
| /** |
| * Forward to the template to edit the security for the job |
| */ |
| private static final String EDIT_SPEC_FORWARD_SECURITY = "editSpecification_confSecurity.html"; |
| /** |
| * Forward to the template to edit the spaces for the job |
| */ |
| private static final String EDIT_SPEC_FORWARD_SPACES = "editSpecification_confSpaces.html"; |
| |
| /** |
| * Forward to the template to edit the pages configuration for the job |
| */ |
| private static final String EDIT_SPEC_FORWARD_CONF_PAGES = "editSpecification_confPages.html"; |
| |
| /** |
| * Forward to the template to view the specification parameters for the job |
| */ |
| private static final String VIEW_SPEC_FORWARD = "viewSpecification_conf.html"; |
| |
| protected long lastSessionFetch = -1L; |
| protected static final long timeToRelease = 300000L; |
| |
| protected final static long interruptionRetryTime = 5L * 60L * 1000L; |
| |
| private final Logger logger = LoggerFactory.getLogger(ConfluenceRepositoryConnector.class); |
| |
| /* Confluence instance parameters */ |
| protected String protocol = null; |
| protected String host = null; |
| protected String port = null; |
| protected String path = null; |
| protected String username = null; |
| protected String password = null; |
| protected String socketTimeout = null; |
| protected String connectionTimeout = null; |
| |
| protected ConfluenceClient confluenceClient = null; |
| |
| /** |
| * <p> |
| * Default constructor |
| * </p> |
| */ |
| public ConfluenceRepositoryConnector() { |
| super(); |
| } |
| |
| /** |
| * Set Confluence Client (Mainly for Testing) |
| * |
| * @param confluenceClient |
| */ |
| public void setConfluenceClient(final ConfluenceClient confluenceClient) { |
| this.confluenceClient = confluenceClient; |
| } |
| |
| @Override |
| public String[] getActivitiesList() { |
| return new String[] { ACTIVITY_READ }; |
| } |
| |
| @Override |
| public String[] getBinNames(final String documentIdentifier) { |
| return new String[] { host }; |
| } |
| |
| /** |
| * Close the connection. Call this before discarding the connection. |
| */ |
| @Override |
| public void disconnect() throws ManifoldCFException { |
| if (confluenceClient != null) { |
| confluenceClient = null; |
| } |
| |
| protocol = null; |
| host = null; |
| port = null; |
| path = null; |
| username = null; |
| password = null; |
| socketTimeout = null; |
| connectionTimeout = null; |
| |
| } |
| |
| /** |
| * Makes connection to server |
| * |
| * |
| */ |
| @Override |
| public void connect(final ConfigParams configParams) { |
| super.connect(configParams); |
| |
| protocol = params.getParameter(ConfluenceConfiguration.Server.PROTOCOL); |
| host = params.getParameter(ConfluenceConfiguration.Server.HOST); |
| port = params.getParameter(ConfluenceConfiguration.Server.PORT); |
| path = params.getParameter(ConfluenceConfiguration.Server.PATH); |
| username = params.getParameter(ConfluenceConfiguration.Server.USERNAME); |
| password = params.getObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD); |
| socketTimeout = params.getParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT); |
| connectionTimeout = params.getParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); |
| |
| try { |
| initConfluenceClient(); |
| } catch (final ManifoldCFException e) { |
| logger.debug("Not possible to initialize Confluence client. Reason: {}", e.getMessage()); |
| e.printStackTrace(); |
| } |
| } |
| |
| /** |
| * Checks if connection is available |
| */ |
| @Override |
| public String check() throws ManifoldCFException { |
| try { |
| if (!isConnected()) { |
| initConfluenceClient(); |
| } |
| final Boolean result = confluenceClient.check(); |
| if (result) { |
| return super.check(); |
| } else { |
| throw new ManifoldCFException("Confluence instance could not be reached"); |
| } |
| } catch (final ServiceInterruption e) { |
| return "Connection temporarily failed: " + e.getMessage(); |
| } catch (final ManifoldCFException e) { |
| return "Connection failed: " + e.getMessage(); |
| } catch (final Exception e) { |
| return "Connection failed: " + e.getMessage(); |
| } |
| } |
| |
| /** |
| * <p> |
| * Initialize Confluence client using the configured parameters |
| * |
| * @throws ManifoldCFException |
| */ |
| protected void initConfluenceClient() throws ManifoldCFException { |
| if (confluenceClient == null) { |
| |
| if (StringUtils.isEmpty(protocol)) { |
| throw new ManifoldCFException("Parameter " + ConfluenceConfiguration.Server.PROTOCOL + " required but not set"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence protocol = '" + protocol + "'"); |
| } |
| |
| if (StringUtils.isEmpty(host)) { |
| throw new ManifoldCFException("Parameter " + ConfluenceConfiguration.Server.HOST + " required but not set"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence host = '" + host + "'"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence port = '" + port + "'"); |
| } |
| |
| // if (StringUtils.isEmpty(path)) { |
| // throw new ManifoldCFException("Parameter " |
| // + ConfluenceConfiguration.Server.PATH |
| // + " required but not set"); |
| // } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence path = '" + path + "'"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence username = '" + username + "'"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence password '" + password != null ? "set" : "not set" + "'"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence socket timeout = '" + socketTimeout + "'"); |
| } |
| |
| if (Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence connection timeout = '" + connectionTimeout + "'"); |
| } |
| |
| int portInt; |
| if (port != null && port.length() > 0) { |
| try { |
| portInt = Integer.parseInt(port); |
| } catch (final NumberFormatException e) { |
| throw new ManifoldCFException("Bad number: " + e.getMessage(), e); |
| } |
| } else { |
| if (protocol.toLowerCase(Locale.ROOT).equals("http")) { |
| portInt = 80; |
| } else { |
| portInt = 443; |
| } |
| } |
| |
| int socketTimeoutInt; |
| if (socketTimeout != null && socketTimeout.length() > 0) { |
| try { |
| socketTimeoutInt = Integer.parseInt(socketTimeout); |
| } catch (final NumberFormatException e) { |
| throw new ManifoldCFException("Bad number: " + e.getMessage(), e); |
| } |
| } else { |
| socketTimeoutInt = 900000; |
| } |
| |
| int connectionTimeoutInt; |
| if (connectionTimeout != null && connectionTimeout.length() > 0) { |
| try { |
| connectionTimeoutInt = Integer.parseInt(connectionTimeout); |
| } catch (final NumberFormatException e) { |
| throw new ManifoldCFException("Bad number: " + e.getMessage(), e); |
| } |
| } else { |
| connectionTimeoutInt = 60000; |
| } |
| |
| /* Generating a client to perform Confluence requests */ |
| confluenceClient = new ConfluenceClient(protocol, host, portInt, path, username, password, socketTimeoutInt, connectionTimeoutInt); |
| lastSessionFetch = System.currentTimeMillis(); |
| } |
| |
| } |
| |
| /** |
| * This method is called to assess whether to count this connector instance should actually be counted as being connected. |
| * |
| * @return true if the connector instance is actually connected. |
| */ |
| @Override |
| public boolean isConnected() { |
| return confluenceClient != null; |
| } |
| |
| @Override |
| public void poll() throws ManifoldCFException { |
| if (lastSessionFetch == -1L) { |
| return; |
| } |
| |
| final long currentTime = System.currentTimeMillis(); |
| if (currentTime >= lastSessionFetch + timeToRelease) { |
| confluenceClient.close(); |
| confluenceClient = null; |
| lastSessionFetch = -1L; |
| } |
| } |
| |
| @Override |
| public int getMaxDocumentRequest() { |
| return super.getMaxDocumentRequest(); |
| } |
| |
| /** |
| * Return the list of relationship types that this connector recognizes. |
| * |
| * @return the list. |
| */ |
| @Override |
| public String[] getRelationshipTypes() { |
| return new String[] {}; |
| } |
| |
| private void fillInServerConfigurationMap(final Map<String, String> serverMap, final IPasswordMapperActivity mapper, final ConfigParams parameters) { |
| String confluenceProtocol = parameters.getParameter(ConfluenceConfiguration.Server.PROTOCOL); |
| String confluenceHost = parameters.getParameter(ConfluenceConfiguration.Server.HOST); |
| String confluencePort = parameters.getParameter(ConfluenceConfiguration.Server.PORT); |
| String confluencePath = parameters.getParameter(ConfluenceConfiguration.Server.PATH); |
| String confluenceUsername = parameters.getParameter(ConfluenceConfiguration.Server.USERNAME); |
| String confluencePassword = parameters.getObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD); |
| String confluenceSocketTimeout = parameters.getParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT); |
| String confluenceConnectionTimeout = parameters.getParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); |
| |
| if (confluenceProtocol == null) { |
| confluenceProtocol = ConfluenceConfiguration.Server.PROTOCOL_DEFAULT_VALUE; |
| } |
| if (confluenceHost == null) { |
| confluenceHost = ConfluenceConfiguration.Server.HOST_DEFAULT_VALUE; |
| } |
| if (confluencePort == null) { |
| confluencePort = ConfluenceConfiguration.Server.PORT_DEFAULT_VALUE; |
| } |
| if (confluencePath == null) { |
| confluencePath = ConfluenceConfiguration.Server.PATH_DEFAULT_VALUE; |
| } |
| |
| if (confluenceUsername == null) { |
| confluenceUsername = ConfluenceConfiguration.Server.USERNAME_DEFAULT_VALUE; |
| } |
| if (confluencePassword == null) { |
| confluencePassword = ConfluenceConfiguration.Server.PASSWORD_DEFAULT_VALUE; |
| } else { |
| confluencePassword = mapper.mapPasswordToKey(confluencePassword); |
| } |
| |
| if (confluenceSocketTimeout == null) { |
| confluenceSocketTimeout = ConfluenceConfiguration.Server.SOCKET_TIMEOUT_DEFAULT_VALUE; |
| } |
| if (confluenceConnectionTimeout == null) { |
| confluenceConnectionTimeout = ConfluenceConfiguration.Server.CONNECTION_TIMEOUT_DEFAULT_VALUE; |
| } |
| |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PROTOCOL, confluenceProtocol); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.HOST, confluenceHost); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PORT, confluencePort); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PATH, confluencePath); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.USERNAME, confluenceUsername); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PASSWORD, confluencePassword); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.SOCKET_TIMEOUT, confluenceSocketTimeout); |
| serverMap.put(PARAMETER_PREFIX + ConfluenceConfiguration.Server.CONNECTION_TIMEOUT, confluenceConnectionTimeout); |
| } |
| |
| @Override |
| public void viewConfiguration(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters) throws ManifoldCFException, IOException { |
| final Map<String, String> paramMap = new HashMap<String, String>(); |
| |
| /* Fill server configuration parameters */ |
| fillInServerConfigurationMap(paramMap, out, parameters); |
| |
| Messages.outputResourceWithVelocity(out, locale, VIEW_CONFIG_FORWARD, paramMap, true); |
| } |
| |
| @Override |
| public void outputConfigurationHeader(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters, final List<String> tabsArray) |
| throws ManifoldCFException, IOException { |
| // Add the Server tab |
| tabsArray.add(Messages.getString(locale, CONF_SERVER_TAB_PROPERTY)); |
| // Map the parameters |
| final Map<String, String> paramMap = new HashMap<String, String>(); |
| |
| /* Fill server configuration parameters */ |
| fillInServerConfigurationMap(paramMap, out, parameters); |
| |
| // Output the Javascript - only one Velocity template for all tabs |
| Messages.outputResourceWithVelocity(out, locale, EDIT_CONFIG_HEADER_FORWARD, paramMap, true); |
| } |
| |
| @Override |
| public void outputConfigurationBody(final IThreadContext threadContext, final IHTTPOutput out, final Locale locale, final ConfigParams parameters, final String tabName) |
| throws ManifoldCFException, IOException { |
| |
| // Call the Velocity templates for each tab |
| final Map<String, String> paramMap = new HashMap<String, String>(); |
| // Set the tab name |
| paramMap.put("TabName", tabName); |
| |
| // Fill in the parameters |
| fillInServerConfigurationMap(paramMap, out, parameters); |
| |
| // Server tab |
| Messages.outputResourceWithVelocity(out, locale, EDIT_CONFIG_FORWARD_SERVER, paramMap, true); |
| |
| } |
| |
| /* |
| * Repository specification post handle, (server and proxy & client secret etc) |
| * |
| * @see org.apache.manifoldcf.core.connector.BaseConnector#processConfigurationPost (org.apache.manifoldcf.core.interfaces.IThreadContext, |
| * org.apache.manifoldcf.core.interfaces.IPostParameters, org.apache.manifoldcf.core.interfaces.ConfigParams) |
| */ |
| @Override |
| public String processConfigurationPost(final IThreadContext threadContext, final IPostParameters variableContext, final ConfigParams parameters) throws ManifoldCFException { |
| |
| final String confluenceProtocol = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PROTOCOL); |
| if (confluenceProtocol != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.PROTOCOL, confluenceProtocol); |
| } |
| |
| final String confluenceHost = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.HOST); |
| if (confluenceHost != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.HOST, confluenceHost); |
| } |
| |
| final String confluencePort = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PORT); |
| if (confluencePort != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.PORT, confluencePort); |
| } |
| |
| final String confluencePath = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PATH); |
| if (confluencePath != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.PATH, confluencePath); |
| } |
| |
| final String confluenceUsername = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.USERNAME); |
| if (confluenceUsername != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.USERNAME, confluenceUsername); |
| } |
| |
| final String confluencePassword = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.PASSWORD); |
| if (confluencePassword != null) { |
| parameters.setObfuscatedParameter(ConfluenceConfiguration.Server.PASSWORD, variableContext.mapKeyToPassword(confluencePassword)); |
| } |
| |
| final String confluenceSocketTimeout = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.SOCKET_TIMEOUT); |
| if (confluenceSocketTimeout != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.SOCKET_TIMEOUT, confluenceSocketTimeout); |
| } |
| |
| final String confluenceConnectionTimeout = variableContext.getParameter(PARAMETER_PREFIX + ConfluenceConfiguration.Server.CONNECTION_TIMEOUT); |
| if (confluenceConnectionTimeout != null) { |
| parameters.setParameter(ConfluenceConfiguration.Server.CONNECTION_TIMEOUT, confluenceConnectionTimeout); |
| } |
| |
| /* null means process configuration has been successful */ |
| return null; |
| } |
| |
| /** |
| * <p> |
| * Fill the configured spaces into the map |
| * </p> |
| * |
| * @param newMap |
| * @param cs |
| */ |
| private void fillInConfSpacesSpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { |
| |
| newMap.put(ConfluenceConfiguration.Specification.SPACES.toUpperCase(Locale.ROOT), cs.getSpaces()); |
| } |
| |
| private void fillInConfSecuritySpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { |
| |
| newMap.put(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY.toUpperCase(Locale.ROOT), cs.isSecurityActive().toString()); |
| return; |
| |
| } |
| |
| /** |
| * <p> |
| * Fill the pages configuration into the map |
| * </p> |
| * |
| * @param newMap |
| * @param cs |
| */ |
| private void fillInConfPagesSpecificationMap(final Map<String, Object> newMap, final ConfluenceSpecification cs) { |
| |
| newMap.put(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY.toUpperCase(Locale.ROOT), cs.isProcessAttachments().toString()); |
| newMap.put(ConfluenceConfiguration.Specification.PAGETYPE.toUpperCase(Locale.ROOT), cs.getPageType()); |
| return; |
| |
| } |
| |
| @Override |
| public void viewSpecification(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber) throws ManifoldCFException, IOException { |
| |
| final Map<String, Object> paramMap = new HashMap<String, Object>(); |
| paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); |
| |
| final ConfluenceSpecification cs = ConfluenceSpecification.from(ds); |
| |
| fillInConfSecuritySpecificationMap(paramMap, cs); |
| fillInConfSpacesSpecificationMap(paramMap, cs); |
| fillInConfPagesSpecificationMap(paramMap, cs); |
| |
| Messages.outputResourceWithVelocity(out, locale, VIEW_SPEC_FORWARD, paramMap); |
| } |
| |
| /* |
| * Handle job specification post |
| * |
| * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# processSpecificationPost |
| * (org.apache.manifoldcf.core.interfaces.IPostParameters, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification) |
| */ |
| |
| @Override |
| public String processSpecificationPost(final IPostParameters variableContext, final Locale locale, final Specification ds, final int connectionSequenceNumber) throws ManifoldCFException { |
| |
| final String seqPrefix = "s" + connectionSequenceNumber + "_"; |
| |
| String xc = variableContext.getParameter(seqPrefix + "spacescount"); |
| if (xc != null) { |
| // Delete all preconfigured spaces |
| int i = 0; |
| while (i < ds.getChildCount()) { |
| final SpecificationNode sn = ds.getChild(i); |
| if (sn.getType().equals(ConfluenceConfiguration.Specification.SPACES)) { |
| ds.removeChild(i); |
| } else { |
| i++; |
| } |
| } |
| |
| final SpecificationNode spaces = new SpecificationNode(ConfluenceConfiguration.Specification.SPACES); |
| ds.addChild(ds.getChildCount(), spaces); |
| final int spacesCount = Integer.parseInt(xc); |
| i = 0; |
| while (i < spacesCount) { |
| final String spaceDescription = "_" + Integer.toString(i); |
| final String spaceOpName = seqPrefix + "spaceop" + spaceDescription; |
| xc = variableContext.getParameter(spaceOpName); |
| if (xc != null && xc.equals("Delete")) { |
| // Next row |
| i++; |
| continue; |
| } |
| // Get the stuff we need |
| final String spaceKey = variableContext.getParameter(seqPrefix + "space" + spaceDescription); |
| final SpecificationNode node = new SpecificationNode(ConfluenceConfiguration.Specification.SPACE); |
| node.setAttribute(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE, spaceKey); |
| spaces.addChild(spaces.getChildCount(), node); |
| i++; |
| } |
| |
| final String op = variableContext.getParameter(seqPrefix + "spaceop"); |
| if (op != null && op.equals("Add")) { |
| final String spaceSpec = variableContext.getParameter(seqPrefix + "space"); |
| final SpecificationNode node = new SpecificationNode(ConfluenceConfiguration.Specification.SPACE); |
| node.setAttribute(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE, spaceSpec); |
| spaces.addChild(spaces.getChildCount(), node); |
| } |
| } |
| |
| /* Delete security configuration */ |
| int i = 0; |
| while (i < ds.getChildCount()) { |
| final SpecificationNode sn = ds.getChild(i); |
| if (sn.getType().equals(ConfluenceConfiguration.Specification.SECURITY)) { |
| ds.removeChild(i); |
| } else { |
| i++; |
| } |
| } |
| |
| final SpecificationNode security = new SpecificationNode(ConfluenceConfiguration.Specification.SECURITY); |
| ds.addChild(ds.getChildCount(), security); |
| |
| final String activateSecurity = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY); |
| if (activateSecurity != null && !activateSecurity.isEmpty()) { |
| security.setAttribute(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY, String.valueOf(activateSecurity)); |
| } |
| |
| /* Delete pages configuration */ |
| i = 0; |
| while (i < ds.getChildCount()) { |
| final SpecificationNode sn = ds.getChild(i); |
| if (sn.getType().equals(ConfluenceConfiguration.Specification.PAGES)) { |
| ds.removeChild(i); |
| } else { |
| i++; |
| } |
| } |
| |
| final SpecificationNode pages = new SpecificationNode(ConfluenceConfiguration.Specification.PAGES); |
| ds.addChild(ds.getChildCount(), pages); |
| |
| final String procAttachments = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY); |
| if (procAttachments != null && !procAttachments.isEmpty()) { |
| pages.setAttribute(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY, String.valueOf(procAttachments)); |
| } |
| |
| final String pageType = variableContext.getParameter(seqPrefix + ConfluenceConfiguration.Specification.PAGETYPE); |
| if (pageType != null && !pageType.isEmpty()) { |
| pages.setAttribute(ConfluenceConfiguration.Specification.PAGETYPE, pageType); |
| } |
| |
| return null; |
| } |
| |
| /* |
| * (non-Javadoc) |
| * |
| * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# outputSpecificationBody |
| * (org.apache.manifoldcf.core.interfaces.IHTTPOutput, java.util.Locale, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, |
| * java.lang.String) |
| */ |
| @Override |
| public void outputSpecificationBody(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber, final int actualSequenceNumber, final String tabName) |
| throws ManifoldCFException, IOException { |
| |
| // Output JIRAQuery tab |
| final Map<String, Object> paramMap = new HashMap<String, Object>(); |
| paramMap.put("TabName", tabName); |
| paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); |
| paramMap.put("SelectedNum", Integer.toString(actualSequenceNumber)); |
| |
| final ConfluenceSpecification cs = ConfluenceSpecification.from(ds); |
| |
| fillInConfSecuritySpecificationMap(paramMap, cs); |
| fillInConfSpacesSpecificationMap(paramMap, cs); |
| fillInConfPagesSpecificationMap(paramMap, cs); |
| |
| Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_SECURITY, paramMap); |
| |
| Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_SPACES, paramMap); |
| |
| Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_FORWARD_CONF_PAGES, paramMap); |
| } |
| |
| /* |
| * Header for the specification |
| * |
| * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# outputSpecificationHeader |
| * (org.apache.manifoldcf.core.interfaces.IHTTPOutput, java.util.Locale, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, |
| * java.util.List) |
| */ |
| @Override |
| public void outputSpecificationHeader(final IHTTPOutput out, final Locale locale, final Specification ds, final int connectionSequenceNumber, final List<String> tabsArray) |
| throws ManifoldCFException, IOException { |
| |
| tabsArray.add(Messages.getString(locale, CONF_SECURITY_TAB_PROPERTY)); |
| tabsArray.add(Messages.getString(locale, CONF_SPACES_TAB_PROPERTY)); |
| tabsArray.add(Messages.getString(locale, CONF_PAGES_TAB_PROPERTY)); |
| |
| final Map<String, Object> paramMap = new HashMap<String, Object>(); |
| paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); |
| |
| Messages.outputResourceWithVelocity(out, locale, EDIT_SPEC_HEADER_FORWARD, paramMap); |
| } |
| |
| /* |
| * Adding seed documents |
| * |
| * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# addSeedDocuments |
| * (org.apache.manifoldcf.crawler.interfaces.ISeedingActivity, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, long, long, |
| * int) |
| */ |
| @Override |
| public String addSeedDocuments(final ISeedingActivity activities, final Specification spec, final String lastSeedVersion, final long seedTime, final int jobMode) |
| throws ManifoldCFException, ServiceInterruption { |
| |
| if (!isConnected()) { |
| initConfluenceClient(); |
| } |
| |
| try { |
| |
| /* |
| * Not uses delta seeding because Confluence can't be queried using dates or in a ordered way, only start and limit which can cause problems |
| * if an already indexed document is deleted, because we will miss some to-be indexed docs due to the last start parameter stored in the |
| * last execution |
| */ |
| // if(lastSeedVersion != null && !lastSeedVersion.isEmpty()) { |
| // StringTokenizer tokenizer = new |
| // StringTokenizer(lastSeedVersion,"|"); |
| // |
| // lastStart = new Long(lastSeedVersion); |
| // } |
| |
| final ConfluenceSpecification confluenceSpecification = ConfluenceSpecification.from(spec); |
| List<String> spaceKeys = confluenceSpecification.getSpaces(); |
| final String pageType = confluenceSpecification.getPageType(); |
| |
| if (spaceKeys.isEmpty()) { |
| logger.info("No spaces configured. Processing all spaces"); |
| spaceKeys = getAllSpaceKeys(); |
| } |
| |
| for (final String space : spaceKeys) { |
| logger.info("Processing configured space {}", space); |
| addSeedDocumentsForSpace(space, Optional.<String>of(pageType), activities, confluenceSpecification, lastSeedVersion, seedTime, jobMode); |
| } |
| |
| return ""; |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| return null; |
| } |
| } |
| |
| private List<Page> getPageChilds(final String pageId) throws ManifoldCFException, ServiceInterruption { |
| long lastStart = 0; |
| final long defaultSize = 25; |
| final List<Page> pageChilds = new ArrayList<>(); |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getPageChilds" })); |
| } |
| |
| try { |
| Boolean isLast = true; |
| do { |
| final ConfluenceResponse<Page> response = confluenceClient.getPageChilds((int) lastStart, (int) defaultSize, pageId); |
| |
| int count = 0; |
| for (final Page page : response.getResults()) { |
| pageChilds.add(page); |
| count++; |
| } |
| |
| lastStart += count; |
| isLast = response.isLast(); |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getPageChilds" })); |
| } |
| } while (!isLast); |
| |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| } |
| return pageChilds; |
| |
| } |
| |
| private List<Restrictions> getPageReadRestrictions(final String pageId) throws ManifoldCFException, ServiceInterruption { |
| long lastStart = 0; |
| final long defaultSize = 200; |
| final List<Restrictions> restrictionsList = new ArrayList<Restrictions>(); |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); |
| } |
| |
| try { |
| Boolean isLast = true; |
| do { |
| final ConfluenceRestrictionsResponse<Restrictions> response = confluenceClient.getPageReadRestrictions((int) lastStart, (int) defaultSize, pageId); |
| |
| if (response.getResult() != null) { |
| restrictionsList.add(response.getResult()); |
| } |
| |
| isLast = response.isLast(); |
| if (!isLast) { |
| lastStart += defaultSize; |
| } |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); |
| } |
| } while (!isLast); |
| |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| } |
| return restrictionsList; |
| } |
| |
| private List<String> getAllSpaceKeys() throws ManifoldCFException, ServiceInterruption { |
| final List<String> spaceKeys = new ArrayList<String>(); |
| long lastStart = 0; |
| final long defaultSize = 25; |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); |
| } |
| |
| try { |
| Boolean isLast = true; |
| do { |
| final ConfluenceResponse<Space> response = confluenceClient.getSpaces((int) lastStart, (int) defaultSize, Optional.<String>absent(), Optional.<String>absent()); |
| |
| int count = 0; |
| for (final Space space : response.getResults()) { |
| spaceKeys.add(space.getKey()); |
| count++; |
| } |
| |
| lastStart += count; |
| isLast = response.isLast(); |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("New start {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, "getAllSpaceKeys" })); |
| } |
| } while (!isLast); |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| } |
| return spaceKeys; |
| } |
| |
| /** |
| * <p> |
| * Add seed documents for a given optional space |
| * </p> |
| * |
| * @throws ServiceInterruption |
| * @throws ManifoldCFException |
| */ |
| private void addSeedDocumentsForSpace(final String space, final Optional<String> pageType, final ISeedingActivity activities, final ConfluenceSpecification confluenceSpec, |
| final String lastSeedVersion, final long seedTime, final int jobMode) throws ManifoldCFException, ServiceInterruption { |
| |
| long lastStart = 0; |
| final long defaultSize = 50; |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| final String spaceDesc = "space with key " + space; |
| Logging.connectors.debug(new MessageFormat("Starting from {0} and size {1} for {2}", Locale.ROOT).format(new Object[] { lastStart, defaultSize, spaceDesc })); |
| } |
| |
| try { |
| Boolean isLast = true; |
| do { |
| final ConfluenceResponse<Page> response = confluenceClient.getSpaceRootPages((int) lastStart, (int) defaultSize, space, pageType); |
| // final ConfluenceResponse<Page> response = confluenceClient.getPages( |
| // (int) lastStart, (int) defaultSize, space, pageType); |
| |
| int count = 0; |
| for (final Page page : response.getResults()) { |
| |
| activities.addSeedDocument(page.getId()); |
| if (confluenceSpec.isProcessAttachments()) { |
| processSeedAttachments(page, activities); |
| } |
| count++; |
| } |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Fetched and added {0} seed documents", Locale.ROOT).format(new Object[] { new Integer(count) })); |
| } |
| |
| lastStart += count; |
| isLast = response.isLast(); |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize })); |
| } |
| } while (!isLast); |
| |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| } |
| |
| } |
| |
| /** |
| * <p> |
| * Process seed attachments for the given page |
| * </p> |
| * |
| * @param page |
| * @param activities |
| */ |
| private void processSeedAttachments(final Page page, final ISeedingActivity activities) throws ManifoldCFException, ServiceInterruption { |
| long lastStart = 0; |
| final long defaultSize = 50; |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Processing page {} attachments starting from {} and size {}", Locale.ROOT).format(new Object[] { page.getId(), lastStart, defaultSize })); |
| } |
| |
| try { |
| Boolean isLast = true; |
| do { |
| final ConfluenceResponse<Attachment> response = confluenceClient.getPageAttachments(page.getId(), (int) lastStart, (int) defaultSize); |
| |
| int count = 0; |
| for (final Page resultPage : response.getResults()) { |
| activities.addSeedDocument(ConfluenceUtil.generateRepositoryDocumentIdentifier(resultPage.getId(), page.getId())); |
| count++; |
| } |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("Fetched and added {} seed document attachments for page {}", Locale.ROOT).format(new Object[] { new Integer(count), page.getId() })); |
| } |
| |
| lastStart += count; |
| isLast = response.isLast(); |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize })); |
| } |
| } while (!isLast); |
| |
| } catch (final Exception e) { |
| handleConfluenceDownException(e, "seeding"); |
| } |
| } |
| |
| protected static void handleConfluenceDownException(final Exception e, final String context) throws ManifoldCFException, ServiceInterruption { |
| final long currentTime = System.currentTimeMillis(); |
| |
| // Server doesn't appear to by up. Try for a brief time then give up. |
| final String message = "Server appears down during " + context + ": " + e.getMessage(); |
| Logging.connectors.warn(message, e); |
| throw new ServiceInterruption(message, e, currentTime + interruptionRetryTime, -1L, 3, true); |
| } |
| |
| /** |
| * Handle page exception : retry 3rd times with a 5 minutes interval without aborting job in case of failure |
| * |
| * @param e |
| * @param context |
| * The error context (ex: 'page processing') |
| * @throws ManifoldCFException |
| * @throws ServiceInterruption |
| */ |
| protected static void handlePageException(final Exception e, final String context) throws ManifoldCFException, ServiceInterruption { |
| final long currentTime = System.currentTimeMillis(); |
| |
| // Server doesn't appear to by up. Try for a brief time then give up. |
| final String message = "Server appears down during " + context + ": " + e.getMessage(); |
| Logging.connectors.warn(message, e); |
| throw new ServiceInterruption(message, e, currentTime + interruptionRetryTime, -1L, 3, false); |
| } |
| |
| /* |
| * Process documents |
| * |
| * @see org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector# processDocuments(java.lang.String[], java.lang.String[], |
| * org.apache.manifoldcf.crawler.interfaces.IProcessActivity, org.apache.manifoldcf.crawler.interfaces.DocumentSpecification, boolean[]) |
| */ |
| @Override |
| public void processDocuments(final String[] documentIdentifiers, final IExistingVersions statuses, final Specification spec, final IProcessActivity activities, final int jobMode, |
| final boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { |
| |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Process Confluence documents: Inside processDocuments"); |
| } |
| |
| final ConfluenceSpecification confluenceSpecification = ConfluenceSpecification.from(spec); |
| |
| final boolean activeSecurity = confluenceSpecification.isSecurityActive(); |
| |
| for (int i = 0; i < documentIdentifiers.length; i++) { |
| final String documentIdentifier = documentIdentifiers[i]; |
| String pageId = documentIdentifier; |
| final String version = statuses.getIndexedVersionString(documentIdentifier); |
| final List<String> parentRestrictions = new ArrayList<>(); |
| if (pageId.startsWith(CHILD_PREFIX)) { |
| final JSONParser parser = new JSONParser(); |
| try { |
| final JSONObject child = (JSONObject) parser.parse(new StringReader(pageId.substring(CHILD_PREFIX.length()))); |
| pageId = child.get("id").toString(); |
| final JSONArray arrParentRestrictions = (JSONArray) child.get("parentRestricions"); |
| arrParentRestrictions.forEach(pr -> parentRestrictions.add(pr.toString())); |
| parentRestrictions.sort(String::compareToIgnoreCase); |
| } catch (IOException | ParseException e) { |
| handleException(e); |
| } |
| } |
| |
| final long startTime = System.currentTimeMillis(); |
| long fileSize = 0L; |
| final String errorCode = "OK"; |
| final String errorDesc = StringUtils.EMPTY; |
| ProcessResult pResult = null; |
| final boolean doLog = true; |
| |
| try { |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence: Processing document identifier '" + pageId + "'"); |
| } |
| |
| /* Ensure Confluence client is connected */ |
| if (!isConnected()) { |
| initConfluenceClient(); |
| } |
| |
| if (ConfluenceUtil.isAttachment(pageId)) { |
| pResult = processPageAsAttachment(activeSecurity, documentIdentifier, parentRestrictions, pageId, version, activities, doLog); |
| } else { |
| pResult = processPage(activeSecurity, documentIdentifier, parentRestrictions, pageId, version, activities, doLog, Maps.<String, String>newHashMap()); |
| } |
| } catch (final IOException ioe) { |
| handleIOException(ioe); |
| } catch (final Exception e) { |
| handleException(e); |
| } |
| |
| finally { |
| if (doLog) { |
| if (pResult != null && pResult.errorCode != null && !pResult.errorCode.isEmpty()) { |
| activities.recordActivity(new Long(startTime), ACTIVITY_READ, pResult.fileSize, pageId, pResult.errorCode, pResult.errorDescription, null); |
| } else { |
| if (pResult != null) { |
| fileSize = pResult.fileSize; |
| } |
| activities.recordActivity(new Long(startTime), ACTIVITY_READ, fileSize, pageId, errorCode, errorDesc, null); |
| } |
| } |
| } |
| |
| } |
| } |
| |
| /** |
| * <p> |
| * Process the specific page |
| * </p> |
| * |
| * @param activeSecurity |
| * Security enabled/disabled |
| * @param documentIdentifier |
| * The original documentIdentifier |
| * @param parentRestrictions |
| * The list of parent restrictions |
| * @param pageId |
| * The pageId being an attachment |
| * @param version |
| * The version of the page |
| * @param activities |
| * @param doLog |
| * |
| * @throws ManifoldCFException |
| * @throws IOException |
| * @throws ServiceInterruption |
| */ |
| private ProcessResult processPage(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, |
| final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException { |
| Page page = new Page(); |
| try { |
| page = confluenceClient.getPage(pageId); |
| } catch (final Exception e) { |
| handlePageException(e, "page processing"); |
| } |
| return processPageInternal(activeSecurity, parentRestrictions, page, documentIdentifier, version, activities, doLog, extraProperties); |
| |
| } |
| |
| /** |
| * <p> |
| * Process the specific attachment |
| * </p> |
| * |
| * @param activeSecurity |
| * Security enabled/disabled |
| * @param documentIdentifier |
| * The original documentIdentifier |
| * @param parentRestrictions |
| * The list of parent restrictions |
| * @param pageId |
| * The pageId being an attachment |
| * @param version |
| * The version of the page |
| * @param activities |
| * @param doLog |
| * @throws IOException |
| * @throws ServiceInterruption |
| */ |
| private ProcessResult processPageAsAttachment(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, |
| final IProcessActivity activities, final boolean doLog) throws ManifoldCFException, ServiceInterruption, IOException { |
| |
| final String[] ids = ConfluenceUtil.getAttachmentAndPageId(pageId); |
| Attachment attachment = new Attachment(); |
| try { |
| attachment = confluenceClient.getAttachment(ids[0]); |
| } catch (final Exception e) { |
| handlePageException(e, "attachment processing"); |
| } |
| final Map<String, String> extraProperties = Maps.newHashMap(); |
| extraProperties.put("attachedBy", ids[1]); |
| return processPageInternal(activeSecurity, parentRestrictions, attachment, documentIdentifier, version, activities, doLog, extraProperties); |
| |
| } |
| |
| /** |
| * <p> |
| * Process the specific page |
| * </p> |
| * |
| * @param activeSecurity |
| * Security enabled/disabled |
| * @param parentRestrictions |
| * The list of parent restrictions |
| * @param page |
| * The page to process |
| * @param manifoldDocumentIdentifier |
| * @param version |
| * The version of the page |
| * @param activities |
| * @param doLog |
| * |
| * @throws ManifoldCFException |
| * @throws IOException |
| * @throws ServiceInterruption |
| */ |
| private ProcessResult processPageInternal(final boolean activeSecurity, final List<String> parentRestrictions, final Page page, final String manifoldDocumentIdentifier, final String version, |
| final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException { |
| |
| /* Remove page if it has no content */ |
| /* |
| * Page does not have content if there was an error trying to get the page |
| */ |
| if (!page.hasContent()) { |
| activities.deleteDocument(manifoldDocumentIdentifier); |
| return new ProcessResult(page.getLength(), "DELETED", ""); |
| } |
| if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) { |
| Logging.connectors.debug("Confluence: This content exists: " + page.getId()); |
| } |
| |
| final RepositoryDocument rd = new RepositoryDocument(); |
| final Date createdDate = page.getCreatedDate(); |
| final Date lastModified = page.getLastModifiedDate(); |
| final DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.ROOT); |
| |
| /* |
| * Retain page in Manifold because it has not changed from last time This is needed to keep the identifier in Manifold data, because by |
| * default if a document is not retained nor ingested, it will be deleted by the framework |
| */ |
| final StringBuilder versionBuilder = new StringBuilder(); |
| versionBuilder.append(df.format(lastModified)); |
| final List<String> pageRestrictions = new ArrayList<String>(); |
| if (activeSecurity) { |
| final List<Restrictions> restrictions = getPageReadRestrictions(page.getId()); |
| for (final Restrictions res : restrictions) { |
| final ReadRestrictions rr = res.getReadRestrictions(); |
| rr.getUsers().forEach(user -> { |
| pageRestrictions.add("user-" + user.getUserKey()); |
| }); |
| rr.getGroups().forEach(group -> { |
| pageRestrictions.add("group-" + group.getName()); |
| }); |
| } |
| } |
| // Order the page restrictions alphabetically so the version will be always the same in case the same restrictions between two crawls are |
| // not retrieved in the same order |
| pageRestrictions.sort(String::compareToIgnoreCase); |
| versionBuilder.append("+"); |
| packList(versionBuilder, pageRestrictions, '+'); |
| versionBuilder.append("+"); |
| packList(versionBuilder, parentRestrictions, '+'); |
| final String lastVersion = versionBuilder.toString(); |
| |
| // Get and reference page direct childs if any |
| if (page.getType() == PageType.PAGE) { |
| final List<Page> pageChilds = getPageChilds(page.getId()); |
| for (final Page childPage : pageChilds) { |
| final JSONObject child = new JSONObject(); |
| child.put("id", childPage.getId()); |
| final List<String> childParentRestrictions = new ArrayList<>(); |
| // MCF only manage one level of parent ACLs, so if the current page has restrictions they must replace the current parent restrictions for |
| // its child pages |
| if (activeSecurity) { |
| if (pageRestrictions.isEmpty()) { |
| childParentRestrictions.addAll(parentRestrictions); |
| } else { |
| childParentRestrictions.addAll(pageRestrictions); |
| } |
| } |
| childParentRestrictions.sort(String::compareToIgnoreCase); |
| child.put("parentRestricions", childParentRestrictions); |
| activities.addDocumentReference(CHILD_PREFIX + child.toJSONString()); |
| } |
| } |
| |
| if (!activities.checkDocumentNeedsReindexing(manifoldDocumentIdentifier, lastVersion)) { |
| return new ProcessResult(page.getLength(), "RETAINED", ""); |
| } |
| |
| if (!activities.checkLengthIndexable(page.getLength())) { |
| activities.noDocument(manifoldDocumentIdentifier, lastVersion); |
| final String errorCode = IProcessActivity.EXCLUDED_LENGTH; |
| final String errorDesc = "Excluding document because of length (" + page.getLength() + ")"; |
| return new ProcessResult(page.getLength(), errorCode, errorDesc); |
| } |
| |
| if (!activities.checkMimeTypeIndexable(page.getMediaType())) { |
| activities.noDocument(manifoldDocumentIdentifier, lastVersion); |
| final String errorCode = IProcessActivity.EXCLUDED_MIMETYPE; |
| final String errorDesc = "Excluding document because of mime type (" + page.getMediaType() + ")"; |
| return new ProcessResult(page.getLength(), errorCode, errorDesc); |
| } |
| |
| if (!activities.checkDateIndexable(lastModified)) { |
| activities.noDocument(manifoldDocumentIdentifier, lastVersion); |
| final String errorCode = IProcessActivity.EXCLUDED_DATE; |
| final String errorDesc = "Excluding document because of date (" + lastModified + ")"; |
| return new ProcessResult(page.getLength(), errorCode, errorDesc); |
| } |
| |
| if (!activities.checkURLIndexable(page.getWebUrl())) { |
| activities.noDocument(manifoldDocumentIdentifier, lastVersion); |
| final String errorCode = IProcessActivity.EXCLUDED_URL; |
| final String errorDesc = "Excluding document because of URL ('" + page.getWebUrl() + "')"; |
| return new ProcessResult(page.getLength(), errorCode, errorDesc); |
| } |
| |
| /* Add repository document information */ |
| rd.setMimeType(page.getMediaType()); |
| if (createdDate != null) { |
| rd.setCreatedDate(createdDate); |
| } |
| if (lastModified != null) { |
| rd.setModifiedDate(lastModified); |
| } |
| rd.setIndexingDate(new Date()); |
| |
| /* Adding Page Metadata */ |
| final Map<String, Object> pageMetadata = page.getMetadataAsMap(); |
| for (final Entry<String, Object> entry : pageMetadata.entrySet()) { |
| if (entry.getValue() instanceof List) { |
| final List<?> list = (List<?>) entry.getValue(); |
| rd.addField(entry.getKey(), list.toArray(new String[list.size()])); |
| } else if (entry.getValue() != null) { |
| final String key = entry.getKey(); |
| final String value = entry.getValue().toString(); |
| rd.addField(key, value); |
| if (key.toLowerCase().contentEquals("title")) { |
| rd.addField("stream_name", value); |
| } |
| } |
| } |
| rd.addField("source", "confluence"); |
| |
| /* Adding extra properties */ |
| for (final Entry<String, String> entry : extraProperties.entrySet()) { |
| rd.addField(entry.getKey(), entry.getValue()); |
| } |
| |
| final String documentURI = page.getWebUrl(); |
| |
| /* Set repository document ACLs */ |
| if (activeSecurity) { |
| |
| rd.setSecurity(RepositoryDocument.SECURITY_TYPE_SHARE, new String[] { "space-" + page.getSpace() }, new String[] { defaultAuthorityDenyToken }); |
| |
| if (parentRestrictions.size() > 0) { |
| rd.setSecurity(RepositoryDocument.SECURITY_TYPE_PARENT, parentRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken }); |
| } |
| |
| if (pageRestrictions.size() > 0) { |
| rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, pageRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken }); |
| } |
| |
| } |
| |
| rd.setBinary(page.getContentStream(), page.getLength()); |
| rd.addField("size", String.valueOf(page.getLength())); |
| |
| /* Ingest document */ |
| activities.ingestDocumentWithException(manifoldDocumentIdentifier, lastVersion, documentURI, rd); |
| |
| return new ProcessResult(page.getLength(), null, null); |
| } |
| |
| /** |
| * <p> |
| * Handles IO Exception to manage whether the exception is an interruption so that the process needs to be executed again later on |
| * </p> |
| * |
| * @param e |
| * The Exception |
| * @throws ManifoldCFException |
| * @throws ServiceInterruption |
| */ |
| private static void handleIOException(final IOException e) throws ManifoldCFException, ServiceInterruption { |
| if (!(e instanceof java.net.SocketTimeoutException) && e instanceof InterruptedIOException) { |
| throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); |
| } |
| Logging.connectors.warn("IO exception: " + e.getMessage(), e); |
| final long currentTime = System.currentTimeMillis(); |
| throw new ServiceInterruption("IO exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false); |
| } |
| |
| /** |
| * <p> |
| * Handles general exceptions |
| * </p> |
| * |
| * @param e |
| * The Exception |
| * @throws ServiceInterruption |
| * @throws ManifoldCFException |
| */ |
| private static void handleException(final Exception e) throws ServiceInterruption, ManifoldCFException { |
| if (!(e instanceof ServiceInterruption)) { |
| Logging.connectors.warn("Exception: " + e.getMessage(), e); |
| throw new ManifoldCFException("Exception: " + e.getMessage(), e, ManifoldCFException.REPOSITORY_CONNECTION_ERROR); |
| } else { |
| throw (ServiceInterruption) e; |
| } |
| } |
| |
| private class ProcessResult { |
| private final long fileSize; |
| private final String errorCode; |
| private final String errorDescription; |
| |
| private ProcessResult(final long fileSize, final String errorCode, final String errorDescription) { |
| this.fileSize = fileSize; |
| this.errorCode = errorCode; |
| this.errorDescription = errorDescription; |
| } |
| } |
| |
| /** |
| * <p> |
| * Internal private class used to parse and keep the specification configuration in object format |
| * </p> |
| * |
| * @author Antonio David Perez Morales <adperezmorales@gmail.com> |
| * |
| */ |
| private static class ConfluenceSpecification { |
| private List<String> spaces; |
| private Boolean activateSecurity = true; |
| private Boolean processAttachments = false; |
| private String pageType = null; |
| |
| public Boolean isSecurityActive() { |
| return this.activateSecurity; |
| } |
| |
| /** |
| * <p> |
| * Returns if attachments should be processed |
| * </p> |
| * |
| * @return a {@code Boolean} indicating if the attachments should be processed or not |
| */ |
| public Boolean isProcessAttachments() { |
| return this.processAttachments; |
| } |
| |
| /** |
| * <p> |
| * Returns the list of configured spaces or an empty list meaning that all spaces should be processed |
| * </p> |
| * |
| * @return a {@code List<String>} of configured spaces |
| */ |
| public List<String> getSpaces() { |
| return this.spaces; |
| } |
| |
| /** |
| * <p> |
| * Returns configured page type |
| * </p> |
| * |
| * @return a {@code String} of configured page type |
| */ |
| public String getPageType() { |
| if (this.pageType == null || this.pageType.isEmpty()) { |
| return "page"; |
| } |
| |
| return this.pageType; |
| } |
| |
| public static ConfluenceSpecification from(final Specification spec) { |
| final ConfluenceSpecification cs = new ConfluenceSpecification(); |
| cs.spaces = Lists.newArrayList(); |
| for (int i = 0, len = spec.getChildCount(); i < len; i++) { |
| final SpecificationNode sn = spec.getChild(i); |
| if (sn.getType().equals(ConfluenceConfiguration.Specification.SPACES)) { |
| for (int j = 0, sLen = sn.getChildCount(); j < sLen; j++) { |
| final SpecificationNode specNode = sn.getChild(j); |
| if (specNode.getType().equals(ConfluenceConfiguration.Specification.SPACE)) { |
| cs.spaces.add(specNode.getAttributeValue(ConfluenceConfiguration.Specification.SPACE_KEY_ATTRIBUTE)); |
| |
| } |
| } |
| |
| } else if (sn.getType().equals(ConfluenceConfiguration.Specification.PAGES)) { |
| final String s = sn.getAttributeValue(ConfluenceConfiguration.Specification.PROCESS_ATTACHMENTS_ATTRIBUTE_KEY); |
| cs.processAttachments = Boolean.valueOf(s); |
| cs.pageType = sn.getAttributeValue(ConfluenceConfiguration.Specification.PAGETYPE); |
| } else if (sn.getType().equals(ConfluenceConfiguration.Specification.SECURITY)) { |
| final String s = sn.getAttributeValue(ConfluenceConfiguration.Specification.ACTIVATE_SECURITY_ATTRIBUTE_KEY); |
| cs.activateSecurity = Boolean.valueOf(s); |
| } |
| } |
| |
| return cs; |
| |
| } |
| } |
| |
| } |