blob: a154b4d7f24f902d4832f79d13ef603d6a1bb717 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.core.config;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_DESCRIPTION;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_DATA_ITERABLE;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_DATA_PROVIDER;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_ID_ITERATOR;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_POST_PROCESSOR;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_PROCESSOR;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_ENTITY_SCORE_PROVIDER;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_INDEXING_DESTINATION;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_INDEX_FIELD_CONFIG;
import static org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants.KEY_NAME;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.commons.namespaceprefix.service.StanbolNamespacePrefixService;
import org.apache.stanbol.entityhub.core.mapping.FieldMappingUtils;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
import org.apache.stanbol.entityhub.indexing.core.EntityIterator;
import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
import org.apache.stanbol.entityhub.indexing.core.EntityScoreProvider;
import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
import org.apache.stanbol.entityhub.indexing.core.normaliser.DefaultNormaliser;
import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
import org.apache.stanbol.entityhub.servicesapi.site.SiteConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class IndexingConfig {
private static final String DEFAULT_ROOT_PATH = "indexing";
private static final String CONFIG_FOLDER = "config";
private static final String CONFIG_PATH = DEFAULT_ROOT_PATH+File.separatorChar+CONFIG_FOLDER;
private static final String SOURCE_FOLDER = "resources";
private static final String SOURCE_PATH = DEFAULT_ROOT_PATH+File.separatorChar+SOURCE_FOLDER;
private static final String DESTINATION_FOLDER = "destination";
private static final String DISTRIBUTION_FOLDER = "dist";
private static final String INDEXING_PROPERTIES = "indexing.properties";
private static final String CONFIG_PARAM = "config";
public static final String KEY_INDEXING_CONFIG = "indexingConfig";
/**
* Internally used to explain the syntax in the configuration file to parse parameters
*/
private static final String SYNTAX_ERROR_MESSAGE = "{key}={value1},{param1}:{value1},{param2}:{value2};{value2},{param1}:{value1} ...";
private static final Logger log = LoggerFactory.getLogger(IndexingConfig.class);
private static final String DEFAULT_INDEX_FIELD_CONFIG_FILE_NAME = "indexFieldConfig.txt";
public static final String DEFAULT_INDEXED_ENTITIES_ID_FILE_NAME = "indexed-entities-ids.zip";
/**
* This stores the context within the classpath to initialise missing
* configurations and source based on the defaults in the classpath.
* This might be a directory or an jar file.
* @see {@link #loadViaClasspath(String)}
* @see #getConfigClasspathRootFolder()
*/
private final File classPathRootDir;
/**
* The root directory for the indexing (defaults to {@link #DEFAULT_ROOT_PATH})
*/
private final File rootDir;
// /**
// * The root directory for the configuration
// */
// private final File configDir;
// /**
// * The root directory for the resources (indexing source files)
// */
// private final File sourceDir;
// /**
// * The root directory for the files created during the indexing process
// */
// private final File destinationDir;
// /**
// * The root directory for the distribution files created in the finalisation
// * phase of the indexing (e.g. The archive with the index,
// * OSGI configuration, ...)
// */
// private final File distributionDir;
//
// /**
// * Map between the relative paths stored in {@link #rootDir}, {@link #configDir},
// * {@link #sourceDir}, {@link #destinationDir} and {@link #distributionDir}
// * to the {@link File#getCanonicalFile()} counterparts as returned by the
// * {@link #getRootFolder()} ... methods.
// */
// private final Map<File,File> canonicalDirs = new HashMap<File,File>();
/**
* The main indexing configuration as parsed form {@link #INDEXING_PROPERTIES}
* file within the {@link #configDir}.
*/
private final Map<String,Object> configuration;
/**
* The value of the {@link IndexingConstants#KEY_NAME} property
*/
private String name;
/**
* The {@link EntityDataIterable} instance initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_DATA_ITERABLE} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getDataIterable()
*/
private EntityDataIterable entityDataIterable = null;
/**
* The {@link EntityDataProvider} instance initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_DATA_PROVIDER} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getEntityDataProvider()
*/
private EntityDataProvider entityDataProvider = null;
/**
* The {@link EntityIterator} instance initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_ID_ITERATOR} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getEntityIdIterator()
*/
private EntityIterator entityIdIterator = null;
/**
* The {@link EntityScoreProvider} instance initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_SCORE_PROVIDER} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getEntityScoreProvider()
*/
private EntityScoreProvider entityScoreProvider = null;
/**
* The {@link ScoreNormaliser} instance initialised based on the value
* of the {@link IndexingConstants#KEY_SCORE_NORMALIZER} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getNormaliser()
*/
private ScoreNormaliser scoreNormaliser = null;
/**
* The {@link EntityProcessor}s initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_PROCESSOR} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getEntityProcessor()
*/
private List<EntityProcessor> entityProcessor = null;
/**
* The {@link EntityProcessor}s initialised based on the value
* of the {@link IndexingConstants#KEY_ENTITY_POST_PROCESSOR} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getEntityProcessor()
*/
private List<EntityProcessor> entityPostProcessor = null;
/**
* The {@link IndexingDestination} instance initialised based on the value
* of the {@link IndexingConstants#KEY_INDEXING_DESTINATION} key or
* <code>null</code> if not configured.
* This variable uses lazy initialisation
* @see #getIndexingDestination()
*/
private IndexingDestination indexingDestination = null;
/**
* The configuration of the fields/languages included/excluded in the index
* as parsed based on the value of the
* {@link IndexingConstants#KEY_INDEX_FIELD_CONFIG} key.
*/
private Collection<FieldMapping> fieldMappings;
/**
* offset to load resources via the classpath (only used for unit testing)
*/
private String classpathResourceOffset;
private NamespacePrefixService namespacePrefixService;
/**
* Creates an instance using {@link #DEFAULT_ROOT_PATH} (relative to the
* working directory) as {@link #getIndexingFolder()} for the indexing
*/
public IndexingConfig(){
this(null);
}
/**
* Creates an isntace using the parsed offset plus {@link #DEFAULT_ROOT_PATH}
* as {@link #getIndexingFolder()} for the indexing
* @param rootPath
*/
public IndexingConfig(String rootPath){
this(rootPath,null);
}
/**
* Internally used for unit testing. Allows to parse an offset for loading
* the indexer configuration from the classpath. Currently a protected
* feature, but might be moved to the public API at a later point of time.
* (would allow to include multiple default configurations via the
* classpath).
* @param rootPath
* @param classpathOffset
*/
protected IndexingConfig(String rootPath,String classpathOffset){
this.classpathResourceOffset = classpathOffset;
//first get the root
File root;// = new File(System.getProperty("user.dir"));
if(rootPath != null){
root = new File(rootPath);
} else {
root = new File(".");
}
try {
root = root.getCanonicalFile();
} catch (IOException e) {
throw new IllegalStateException("Unable to get canonical file for "
+root,e);
}
log.info("Indexing Working Directory: {}",root.getAbsoluteFile());
this.rootDir = root;
File configDir = getConfigFolder();
if(!configDir.getAbsoluteFile().isDirectory()){
log.info(" > config directory {} does not exist",configDir);
if(!configDir.getAbsoluteFile().mkdirs()){
throw new IllegalStateException(
"Unable to create configuration folder '"+
configDir.getAbsolutePath()+"'!");
} else {
log.info(" - created");
}
}
File sourceDir = getSourceFolder();
if(!sourceDir.getAbsoluteFile().exists()){
log.info(" > resource folder '{} does not exist ",sourceDir);
if(!sourceDir.getAbsoluteFile().mkdirs()){
throw new IllegalStateException(
"Unable to create resource folder '"+
sourceDir.getAbsolutePath()+"'!");
} else {
log.info(" - created");
}
}
File destinationDir = getDestinationFolder();
if(!destinationDir.getAbsoluteFile().exists()){
log.debug(" > destination folder '{} does not exist ",destinationDir);
if(!destinationDir.getAbsoluteFile().mkdirs()){
throw new IllegalStateException(
"Unable to create target folder '"+
destinationDir.getAbsolutePath()+"'!");
} else {
log.debug(" - created");
}
}
File distributionDir = getDistributionFolder();
if(!distributionDir.getAbsoluteFile().exists()){
log.debug(" > distribution folder '{} does not exist ",distributionDir);
if(!distributionDir.getAbsoluteFile().mkdirs()){
throw new IllegalStateException(
"Unable to create distribution '"+
destinationDir.getAbsolutePath()+"'!");
} else {
log.debug(" - created");
}
}
//set up the root folder for the classpath
this.classPathRootDir = getConfigClasspathRootFolder();
log.info("Classpath Indexing Root {}",classPathRootDir);
//read the prefixnamespace mappings
try {
initNamespacePrefixMapper();
} catch (IOException e) {
throw new IllegalStateException("Unable to get create NamespacePrefixMapper",e);
}
//check the main configuration
this.configuration = loadConfig(INDEXING_PROPERTIES,true);
Object value = configuration.get(KEY_NAME);
if(value == null){
throw new IllegalArgumentException("Indexing Configuration '"+
INDEXING_PROPERTIES+"' is missing the required key "+KEY_NAME+"!");
}
this.name = value.toString();
if(name.isEmpty()){
throw new IllegalArgumentException("Invalid Indexing Configuration '"+
INDEXING_PROPERTIES+"': The value for the parameter"+KEY_NAME+" MUST NOT be empty!");
}
value = configuration.get(KEY_INDEX_FIELD_CONFIG);
if(value == null || value.toString().isEmpty()){
value = DEFAULT_INDEX_FIELD_CONFIG_FILE_NAME;
}
final File indexFieldConfig = getConfigFile(value.toString());
if(indexFieldConfig.isFile()){
try {
this.fieldMappings = FieldMappingUtils.parseFieldMappings(new Iterator<String>() {
LineIterator it = IOUtils.lineIterator(new FileInputStream(indexFieldConfig), "UTF-8");
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public String next() {
return it.nextLine();
}
@Override
public void remove() {
it.remove();
}
},getNamespacePrefixService());
} catch (IOException e) {
throw new IllegalStateException("Unable to read Index Field Configuration form '"
+indexFieldConfig+"'!",e);
}
} else {
throw new IllegalArgumentException("Invalid Indexing Configuration: " +
"IndexFieldConfiguration '"+indexFieldConfig+"' not found. " +
"Provide the missing file or use the '"+KEY_INDEX_FIELD_CONFIG+
"' in the '"+INDEXING_PROPERTIES+"' to configure a different one!");
}
}
public NamespacePrefixService getNamespacePrefixService() {
return namespacePrefixService;
}
/**
* @param configDir
* @throws IOException
*/
private void initNamespacePrefixMapper() throws IOException {
File nsPrefixMappings = getConfigFile("namespaceprefix.mappings");
if(!nsPrefixMappings.isFile()){
FileUtils.writeLines(nsPrefixMappings,"UTF-8",Arrays.asList(
"# Syntax: '{prefix}\\t{namespace}\\n",
"# where:",
"# {prefix} ... [0..9A..Za..z-_]",
"# {namespace} ... must end with '#' or '/' for URLs and ':' for URNs",
"# one mapping per line, multiple prefixes for the same namespace allowed"));
}
namespacePrefixService = new StanbolNamespacePrefixService(nsPrefixMappings);
}
/**
* Searches for a configuration file. If the configuration is not found
* within the {@link #getConfigFolder()} than it searches the Classpath for
* the configuration. If the configuration is found within the Classpath it
* is copied the the configuration folder and than opened.<p>
* The intension behind that is that the default values are provided within
* the indexer archive but that the user can modify the configuration after
* the first call.
* @param configFile the name of the configuration file
* @return
* @throws IOException
*/
public InputStream openConfig(String configFileName) throws IOException {
return openResource(CONFIG_PATH,configFileName);
}
public InputStream openSource(String sourceFileName) throws IOException {
return openResource(SOURCE_PATH,sourceFileName);
}
/**
* Getter for the config file with the given name. If the file/directory is
* not present within the {@link #getConfigFolder()} it is searched via the
* classpath and created (if found).
* @param configName
* @return
*/
public File getConfigFile(String configName) {
return getResource(CONFIG_PATH, configName);
}
/**
* Getter for the source file with the given name. If the file/directory is
* not present within the {@link #getSourceFolder()} it is searched via the
* classpath and created (if found).
* @param configName
* @return
*/
public File getSourceFile(String configName) {
return getResource(SOURCE_PATH, configName);
}
private InputStream openResource(String path,String fileName) throws IOException {
File resource = getResource(path, fileName);
InputStream in = null;
if(resource.isFile()){
in = new FileInputStream(resource);
} //else not found -> return null
return in;
}
/**
* Searches for a resource with the parsed name in the parsed directory.
* If it can not be found it tries to initialise it via the classpath.
* @param root the (relative path) to the directory containing the file.
* typically on of {@link #configDir} or {@link #sourceDir}.
* @param fileName the name of the file (file or directory)
* @return the absolute File or <code>null</code> if not found.
*/
private File getResource(String path, String fileName) {
File resourceDir = new File(getWorkingDirectory(),path);
File resource = new File(resourceDir,fileName);
log.info("request for RDFTerm {} (folder: {})",fileName,resourceDir);
if(resource.getAbsoluteFile().exists()){
log.info(" > rquested RDFTerm present");
} else if(copyFromClasspath(new File(path,fileName))){
log.info(" > rquested RDFTerm copied from Classpath ");
} else {
log.info(" > rquested RDFTerm not found");
}
return resource.getAbsoluteFile();
}
/**
* This method copies Resources from the Classpath over to the target
* resource. It supports both files and directories. In case of directories
* all sub-directories and there files are copied.<p>
* One can not use {@link ClassLoader#getResource(String)} because it does
* only support files and no directories.
* @param resource the target resource (relative path also found in the jar)
* @return <code>true</code> if the resource was found and copied.
*/
private boolean copyFromClasspath(File resource){
String resourcePath;
if(classpathResourceOffset != null){
String rs = resource.getPath();
resourcePath = FilenameUtils.concat(classpathResourceOffset, rs);
} else {
resourcePath = resource.getPath();
}
if(classPathRootDir == null){ //not available
return false;
} else if(classPathRootDir.isDirectory()){ // loaded from directory
File classpathResource = new File(classPathRootDir,resourcePath);
try {
if(classpathResource.isFile()){
FileUtils.copyFile(classpathResource, new File(getWorkingDirectory(),resource.getPath()));
return true;
} else if(classpathResource.isDirectory()){
FileUtils.copyDirectory(classpathResource, new File(getWorkingDirectory(),resource.getPath()));
return true;
} else {
return false;
}
} catch(IOException e){
throw new IllegalStateException(
String.format("Unable to copy Configuration form classpath " +
"resource %s to target file %s!",
classpathResource, resource.getAbsolutePath()),e);
}
} else { //loaded form a jar file
boolean found = false;
JarFile jar = null;
//when loading from a jar file we need Unix style separators
String unixResourcePath = FilenameUtils.separatorsToUnix(resourcePath);
try {
jar = new JarFile(classPathRootDir);
//String resourceName = resource.getPath();
Enumeration<JarEntry> entries = jar.entries();
boolean completed = false;
//we need to iterate over the entries because the resource might
//refer to an file but missing the tailing '/'
while(entries.hasMoreElements() && !completed){
JarEntry entry = entries.nextElement();
String entryName = entry.getName();
if(entryName.startsWith(resourcePath) || entryName.startsWith(unixResourcePath)){
log.info("found entry : {}[dir={}]",entryName,entry.isDirectory());
if((entryName.equals(resourcePath) || entryName.equals(unixResourcePath))
&& !entry.isDirectory()){
//found the resource and it is an file -> copy and return
completed = true;
}
if(!entry.isDirectory()){ //copy a file
//still check if the target folder exist
//TODO: this depends on user.dir is root dir
File targetFolder = new File(getWorkingDirectory(),
FilenameUtils.getPathNoEndSeparator(entryName));
if(targetFolder.exists() || targetFolder.mkdirs()){
File outFile = new File(targetFolder,
FilenameUtils.getName(entry.getName()));
InputStream is = jar.getInputStream(entry);
OutputStream os = new FileOutputStream(outFile);
IOUtils.copyLarge(is,os);
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(os);
//found one resource
found = true;
log.info(" > created File {}",outFile);
} else {
throw new IllegalStateException("Unable to create" +
"folder "+targetFolder);
}
} else { //directory
//TODO: this depends on user.dir is root dir
File targetFolder = new File(entryName);
if(!targetFolder.exists() && !targetFolder.mkdirs()){
throw new IllegalStateException("Unable to create" +
"folder "+targetFolder);
} else { //created a directory
log.info(" > created Directory {}",targetFolder);
found = true;
}
}
} // else entry does not start with the parsed resource
} //end while entries
} catch (IOException e) {
throw new IllegalStateException("Unable to copy resources from" +
"jar file "+classPathRootDir+"!",e);
} finally {
if(jar != null){
try {
jar.close();
} catch (IOException e) {
//ignore
}
}
}
return found;
}
}
/**
* First uses the {@link Thread#currentThread() current threads} class loader
* to load the parsed resource. If not found the class loader of this class
* is used.
* @param resource the resource to load
* @return the URL for the resource or <code>null</code> if not found
*/
private static URL loadViaClasspath(String resource) {
String unixResource = FilenameUtils.separatorsToUnix(resource);
URL resourceUrl = Thread.currentThread().getContextClassLoader().getResource(
unixResource);
if(resourceUrl == null){
resourceUrl = IndexingConfig.class.getClassLoader().getResource(
unixResource);
}
return resourceUrl;
}
/**
* Uses the Classpath to search for the File (maybe within a jar archive)
* that is the root to load the config. This is needed in cases directories
* are requested by the {@link #getResource(File, String)} methods because
* the normal {@link ClassLoader#getResource(String)} method does not work
* for directories.
* @param clazz the class used as context to find the jar file
* @return the archive the parsed class was loaded from
* @throws IOException In case the jar file can not be accessed.
*/
private File getConfigClasspathRootFolder() {
//use the indexing.properties file as context
//STANBOL-
String contextResource;
if(classpathResourceOffset != null){
contextResource = FilenameUtils.concat(classpathResourceOffset,
CONFIG_PATH+File.separatorChar+INDEXING_PROPERTIES);
} else {
contextResource = CONFIG_PATH+File.separatorChar+INDEXING_PROPERTIES;
}
URL contextUrl = loadViaClasspath(contextResource);
if(contextUrl == null){// if indexing.properties is not found via classpath
log.info("No '{}' found via classpath. Loading RDFTerm via" +
"the classpath is deactivated.",
contextResource);
return null;
}
String resourcePath;
try {
resourcePath = new File(contextUrl.toURI()).getAbsolutePath();
} catch (Exception e) {
//if we can not convert it to an URI, try directly with the URL
//URLs with jar:file:/{jarPath}!{classPath} can cause problems
//so try to parse manually by using the substring from the first
//'/' to (including '!')
String urlString;
try {
urlString = URLDecoder.decode(contextUrl.toString(),"UTF-8");
} catch (UnsupportedEncodingException e1) {
throw new IllegalStateException("Encoding 'UTF-8' is not supported",e);
}
int slashIndex = urlString.indexOf('/');
int exclamationIndex = urlString.indexOf('!');
if(slashIndex >=0 && exclamationIndex > 0){
resourcePath = urlString.substring(slashIndex, exclamationIndex+1);
log.info("manually parsed plassPath: {} from {}",resourcePath,contextUrl);
} else {
//looks like there is an other reason than an URL as described above
//so better to throw an exception than to guess ...
throw new IllegalStateException("Unable to Access Source at location "+contextUrl,e);
}
}
//now get the file for the root folder in the archive containing the config
File classpathRoot;
if(resourcePath.indexOf('!')>0){
classpathRoot = new File(resourcePath.substring(0,resourcePath.indexOf('!')));
} else {
classpathRoot = new File(resourcePath.substring(0,resourcePath.length()-contextResource.length()));
}
return classpathRoot;
}
/**
* Loads an {@link Properties} configuration from the parsed file and
* returns it as Map
* @param configFile the file
* @param required if <code>true</code> an {@link IllegalArgumentException}
* will be thrown if the config was not present otherwise an empty map will
* be returned
* @return The configuration as Map
*/
private Map<String,Object> loadConfig(String configFile, boolean required) {
//Uses an own implementation to parse key=value configuration
//The problem with the java properties is that keys do not support
//UTF-8, but some configurations might want to use URLs as keys!
Map<String,Object> configMap = new HashMap<String,Object>();
try {
InputStream in = openConfig(configFile);
if(in != null){
LineIterator lines = IOUtils.lineIterator(in, "UTF-8");
while(lines.hasNext()){
String line = lines.next();
if(!line.isEmpty()){
int indexOfEquals = line.indexOf('=');
String key = indexOfEquals > 0 ?
line.substring(0,indexOfEquals).trim():
line.trim();
if(key.charAt(0) != '#' && key.charAt(0) != '!'){ //no comment
String value;
if(indexOfEquals > 0 && indexOfEquals < line.length()-1){
value = line.substring(indexOfEquals+1,line.length());
} else {
value = null;
}
configMap.put(key,value);
} // else ignore comments
} //else ignore empty lines
}
} else if(required){
throw new IllegalArgumentException(
"Unable to find configuration file '"+
configFile+"'!");
} else {//-> optional and not found -> return empty map
log.info("Unable to find optional configuration {}",configFile);
}
} catch (IOException e) {
if(required){
throw new IllegalStateException(
"Unable to read configuration file '"+
configFile+"'!",e);
} else {
log.warn("Unable to read configuration file '"+configFile+"'!",e);
}
}
// Old code that used java.util.Properties to load configurations!
// Properties config = new Properties();
// try {
// config.load(new FileInputStream(configFile));
// } catch (FileNotFoundException e) {
// if(required){
// throw new IllegalArgumentException(
// "Unable to find configuration file '"+
// configFile.getAbsolutePath()+"'!");
// }
// } catch (IOException e) {
// if(required){
// throw new IllegalStateException(
// "Unable to read configuration file '"+
// configFile.getAbsolutePath()+"'!",e);
// }
// }
// if(config != null){
// for(Enumeration<String> keys = (Enumeration<String>)config.propertyNames();keys.hasMoreElements();){
// String key = keys.nextElement();
// configMap.put(key, config.getProperty(key));
// }
// }
return configMap;
}
/**
* Getter for the working direcotry of the Indexing tool. (the directory
* containing the /indexing folder). By defualt htis
* @return
*/
public final File getWorkingDirectory(){
return rootDir;
}
/**
* Getter for the root folder used for the Indexing (root/indexing)
* @return the root folder (containing the config, resources, target and dist folders)
*/
public final File getIndexingFolder() {
return new File(getWorkingDirectory(),DEFAULT_ROOT_PATH);
}
/**
* The root folder for the configuration. Guaranteed to exist.
* @return the root folder for the configuration
*/
public final File getConfigFolder() {
return new File(getIndexingFolder(),CONFIG_FOLDER);
}
/**
* The root folder containing the resources used as input for the
* indexing process. Might not exist if no resources are available
* @return the root folder for the resources
*/
public final File getSourceFolder() {
return new File(getIndexingFolder(),SOURCE_FOLDER);
}
/**
* The root folder containing the files created by the indexing process.
* Guaranteed to exist.
* @return the target folder
*/
public final File getDestinationFolder() {
return new File(getIndexingFolder(),DESTINATION_FOLDER);
}
/**
* The root folder for the distribution. Guaranteed to exist.
* @return the distribution folder
*/
public final File getDistributionFolder() {
return new File(getIndexingFolder(),DISTRIBUTION_FOLDER);
}
/**
* Getter for the name as configured by the {@link IndexingConstants#KEY_NAME}
* by the main indexing configuration.
* @return the name of this data source to index
*/
public String getName() {
return name;
}
/**
* Getter for the description as configured by the {@link IndexingConstants#KEY_DESCRIPTION}
* by the main indexing configuration.
* @return the description of the data source to index or <code>null</code>
* if not defined
*/
public String getDescription(){
Object value = configuration.get(KEY_DESCRIPTION);
return value != null?value.toString():null;
}
/**
* Getter for the failOnError as configured by the {@link IndexingConstants#KEY_FAIL_ON_ERROR_LOADING_RESOURCE}
* by the main indexing configuration.
* @return the boolean value of the failOnError parameter
*/
public boolean isFailOnError(){
//by default failOnError is false to continue execution of the indexing tool
boolean failOnError = false;
Object value = configuration.get(IndexingConstants.KEY_FAIL_ON_ERROR_LOADING_RESOURCE);
if(value != null && !value.toString().isEmpty()){
failOnError = Boolean.parseBoolean(value.toString());
}
return failOnError;
}
/**
* The {@link ScoreNormaliser} as configured by the {@link IndexingConstants#KEY_SCORE_NORMALIZER}
* by the main indexing configuration.
* @return the configured {@link ScoreNormaliser} or a {@link DefaultNormaliser} if
* this configuration is missing.
*/
public ScoreNormaliser getNormaliser(){
if(scoreNormaliser == null){
initNormaliser();
}
return scoreNormaliser;
}
/**
* The {@link EntityDataIterable} as configured by the {@link IndexingConstants#KEY_ENTITY_DATA_ITERABLE}
* by the main indexing configuration.
* @return the configured {@link EntityDataIterable} or a <code>null</code> if
* this configuration is not present.
*/
public EntityDataIterable getDataIterable(){
if(entityDataIterable != null){
return entityDataIterable;
} else if(configuration.containsKey(KEY_ENTITY_DATA_ITERABLE)){
ConfigEntry config = parseConfigEntry(configuration.get(KEY_ENTITY_DATA_ITERABLE).toString());
try {
entityDataIterable = (EntityDataIterable)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityDataIterable configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, entityDataIterable.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
entityDataIterable.setConfiguration(configMap);
return entityDataIterable;
} else {
return null;
}
}
public EntityIterator getEntityIdIterator() {
if(entityIdIterator != null){
return entityIdIterator;
} else if(configuration.containsKey(KEY_ENTITY_ID_ITERATOR)){
ConfigEntry config = parseConfigEntry(configuration.get(KEY_ENTITY_ID_ITERATOR).toString());
try {
entityIdIterator = (EntityIterator)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityIterator configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, entityIdIterator.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
entityIdIterator.setConfiguration(configMap);
return entityIdIterator;
} else {
return null;
}
}
public EntityDataProvider getEntityDataProvider() {
if(entityDataProvider != null){
return entityDataProvider;
} else if (configuration.containsKey(KEY_ENTITY_DATA_PROVIDER)){
ConfigEntry config = parseConfigEntry(configuration.get(KEY_ENTITY_DATA_PROVIDER).toString());
try {
entityDataProvider = (EntityDataProvider)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityDataProvider configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, entityDataProvider.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
entityDataProvider.setConfiguration(configMap);
return entityDataProvider;
} else {
return null;
}
}
public EntityScoreProvider getEntityScoreProvider() {
if(entityScoreProvider != null){
return entityScoreProvider;
} else if (configuration.containsKey(KEY_ENTITY_SCORE_PROVIDER)){
ConfigEntry config = parseConfigEntry(configuration.get(KEY_ENTITY_SCORE_PROVIDER).toString());
try {
entityScoreProvider = (EntityScoreProvider)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityScoreProvider configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, entityScoreProvider.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
entityScoreProvider.setConfiguration(configMap);
return entityScoreProvider;
} else {
return null;
}
}
/**
* The fields and languages included/excluded in the created index.<p>
* NOTE: Currently this uses the {@link FieldMapping} class was initially
* defined to be used as configuration for the {@link FieldMapper}. In
* future this might change to an Interface that is more tailored to
* defining the fields and languages included/excluded in the index and does
* not allow to define mappings and data type conversions as the current one
* @return
*/
public Collection<FieldMapping> getIndexFieldConfiguration(){
return fieldMappings;
}
/**
* Getter for the list of {@link EntityProcessor}s or <code>null</code> if
* none are configured.
* @return
*/
public List<EntityProcessor> getEntityProcessors() {
if(entityProcessor != null){
return entityProcessor;
} else if (configuration.containsKey(KEY_ENTITY_PROCESSOR)){
List<ConfigEntry> configs = parseConfigEntries(configuration.get(KEY_ENTITY_PROCESSOR).toString());
List<EntityProcessor> processorList = new ArrayList<EntityProcessor>(configs.size());
for(ConfigEntry config : configs){
EntityProcessor processor;
try {
processor = (EntityProcessor)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityProcessor configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, processor.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
processor.setConfiguration(configMap);
processorList.add(processor);
}
if(!processorList.isEmpty()){ //do not set empty lists
entityProcessor = Collections.unmodifiableList(processorList);
}
return entityProcessor;
} else {
return null;
}
}
/**
* Getter for the {@link EntityProcessor}s configured to be used for
* post-processing or <code>null</code> if none.
* @return
*/
public List<EntityProcessor> getEntityPostProcessors(){
if(entityPostProcessor != null){
return entityPostProcessor;
} else if(configuration.containsKey(KEY_ENTITY_POST_PROCESSOR)){
List<ConfigEntry> configs = parseConfigEntries(configuration.get(KEY_ENTITY_POST_PROCESSOR).toString());
List<EntityProcessor> postProcessorList = new ArrayList<EntityProcessor>(configs.size());
for(ConfigEntry config : configs){
EntityProcessor postProcessor;
try {
postProcessor = (EntityProcessor)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid EntityProcessor configuration '"+config.getConfigString()+"' for post-processing!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, postProcessor.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
postProcessor.setConfiguration(configMap);
postProcessorList.add(postProcessor);
}
if(!postProcessorList.isEmpty()){ //do not set empty lists
entityPostProcessor = Collections.unmodifiableList(postProcessorList);
}
return entityPostProcessor;
} else {
return null;
}
}
public IndexingDestination getIndexingDestination() {
if(indexingDestination != null){
return indexingDestination;
} else if (configuration.containsKey(KEY_INDEXING_DESTINATION)){
ConfigEntry config = parseConfigEntry(configuration.get(KEY_INDEXING_DESTINATION).toString());
try {
indexingDestination = (IndexingDestination)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid IndexingDestination configuration '"+config.getConfigString()+"'!",e);
}
//add the configuration
Map<String,Object> configMap = getComponentConfig(config, indexingDestination.getClass().getSimpleName(), false);
//add also the directly provided parameters
configMap.putAll(config.getParams());
indexingDestination.setConfiguration(configMap);
return indexingDestination;
} else {
return null;
}
}
public File getIndexedEntitiesIdsFile(){
Object value = configuration.get(IndexingConstants.KEX_INDEXED_ENTITIES_FILE);
if(value == null){
return new File(getDestinationFolder(),DEFAULT_INDEXED_ENTITIES_ID_FILE_NAME);
} else if (value.toString().isEmpty()){
return null; //deactivate this feature;
} else {
return new File(getDestinationFolder(),value.toString());
}
}
private void initNormaliser() {
Object value = configuration.get(IndexingConstants.KEY_SCORE_NORMALIZER);
if(value == null){
this.scoreNormaliser = new DefaultNormaliser();
} else {
ScoreNormaliser normaliser = null;
ScoreNormaliser last = null;
List<ConfigEntry> configs = parseConfigEntries(value.toString());
for(int i=configs.size()-1;i>=0;i--){
last = normaliser;
normaliser = null;
ConfigEntry config = configs.get(i);
try {
normaliser = (ScoreNormaliser)Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Normaliser configuration '"+config.getConfigString()+"'!",e);
}
Map<String,Object> normaliserConfig = getComponentConfig(config,normaliser.getClass().getSimpleName(),config.getParams().containsKey(CONFIG_PARAM));
//add also the directly provided parameters
normaliserConfig.putAll(config.getParams());
if(last != null){
normaliserConfig.put(ScoreNormaliser.CHAINED_SCORE_NORMALISER, last);
}
normaliser.setConfiguration(normaliserConfig);
}
//set the normaliser!
this.scoreNormaliser = normaliser;
}
}
/**
* Loads a configuration based on the value of the {@link #CONFIG_PARAM}
* parameter of the parsed {@link ConfigEntry}.
* @param configEntry
* @param defaultName
* @return
*/
private Map<String,Object> getComponentConfig(ConfigEntry configEntry,String defaultName, boolean required) {
//Removed support for parsing the relative path to the config file
//because it was not used! (String relConfigPath was the first param)
// File configDir;
// if(relConfigPath == null || relConfigPath.isEmpty()){
// configDir = this.configDir;
// } else {
// configDir = new File(this.configDir,relConfigPath);
// }
// //test also if relConfigPath = null, because also the root might not exist!
// if(!configDir.isDirectory()){
// if(required){
// throw new IllegalArgumentException("The Configuration Directory '"+
// configDir+"' does not exist (or ist not a directory)!");
// } else {
// return new HashMap<String,Object>();
// }
// }
//if the CONFIG_PARAM is present in the config we assume that a config is required
String name = configEntry.getParams().get(CONFIG_PARAM);
Map<String,Object> config = loadConfigFile(name == null ? defaultName : name, required);
//we need to also add the key used to get (this) indexing config
config.put(KEY_INDEXING_CONFIG, this);
return config;
}
/**
* Loads the config with the given name from the parsed directory and throwing
* an {@link IllegalArgumentException} if the configuration is required but
* not found
* @param name the name (".properties" is appended if missing)
* @param configDir the directory to look for the config
* @param required if this config is required or optional
* @return the key value mappings as map
*/
private Map<String,Object> loadConfigFile(String name, boolean required) {
Map<String,Object> loadedConfig;
name = name.endsWith(".properties") ? name : name+".properties";
loadedConfig = loadConfig(name,required);
return loadedConfig;
}
private ConfigEntry parseConfigEntry(String config){
return new ConfigEntry(config);
}
private List<ConfigEntry> parseConfigEntries(String config){
List<ConfigEntry> configs = new ArrayList<ConfigEntry>();
for(String configPart : config.split(";")){
configs.add(parseConfigEntry(configPart));
}
return configs;
}
private final class ConfigEntry {
private String configString;
private String className;
private Map<String,String> params;
private ConfigEntry(String config){
configString = config;
String[] parts = config.split(",");
className = parts[0];
params = new HashMap<String,String>();
if(parts.length>1){
for(int i=1;i<parts.length;i++){
String[] param = parts[i].split(":"); //TODO: maybe use also "=" there
String value = null;
if(param.length>1){
value = parts[i].substring(parts[i].indexOf(':')+1);
}
params.put(param[0], value);
}
}
}
public final String getConfigString() {
return configString;
}
public final String getClassName() {
return className;
}
public final Map<String,String> getParams() {
return params;
}
}
/**
* Can be used to look for a config within the configuration directory
* of the {@link IndexingConfig}.
* @param string the name of the configuration (".properties" is appended if
* missing)
* @param required if this is an required or optional configuration.
* @return the key value mappings as map
* @throws IllegalArgumentException if the configuration was not found and
* <code>true</code> was parsed for required
*/
public Map<String,Object> getConfig(String name,boolean required) throws IllegalArgumentException {
return loadConfigFile(name, required);
}
/**
* Getter for configured properties directly by the key. Typically used
* to get Properties as defined by the {@link SiteConfiguration} interface
* @param key the key of the property
* @return the value or <code>null</code> if not present. Might also return
* <code>null</code> in case the value <code>null</code> is set for the
* requested property.
*/
public Object getProperty(String key){
return configuration.get(key);
}
}