blob: 3a1adab2e65784a5ceabaa46831f18784f6f7dc8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.datamap.lucene;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.carbondata.common.annotations.InterfaceAudience;
import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.datamap.DataMapDistributable;
import org.apache.carbondata.core.datamap.DataMapMeta;
import org.apache.carbondata.core.datamap.dev.DataMap;
import org.apache.carbondata.core.datamap.dev.DataMapFactory;
import org.apache.carbondata.core.datamap.dev.DataMapWriter;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.CarbonMetadata;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.events.Event;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
/**
* Base implementation for CG and FG lucene DataMapFactory.
*/
@InterfaceAudience.Internal
abstract class LuceneDataMapFactoryBase<T extends DataMap> implements DataMapFactory<T> {
static final String TEXT_COLUMNS = "text_columns";
/**
* Logger
*/
final LogService LOGGER = LogServiceFactory.getLogService(this.getClass().getName());
/**
* table's index columns
*/
DataMapMeta dataMapMeta = null;
/**
* analyzer for lucene
*/
Analyzer analyzer = null;
/**
* index name
*/
String dataMapName = null;
/**
* table identifier
*/
AbsoluteTableIdentifier tableIdentifier = null;
@Override
public void init(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema)
throws IOException, MalformedDataMapCommandException {
Objects.requireNonNull(identifier);
Objects.requireNonNull(dataMapSchema);
this.tableIdentifier = identifier;
this.dataMapName = dataMapSchema.getDataMapName();
// get carbonmetadata from carbonmetadata instance
CarbonMetadata carbonMetadata = CarbonMetadata.getInstance();
String tableUniqueName = identifier.getCarbonTableIdentifier().getTableUniqueName();
// get carbon table
CarbonTable carbonTable = carbonMetadata.getCarbonTable(tableUniqueName);
if (carbonTable == null) {
String errorMessage =
String.format("failed to get carbon table with name %s", tableUniqueName);
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
// validate DataMapSchema and get index columns
List<String> indexedColumns = validateAndGetIndexedColumns(dataMapSchema, carbonTable);
// add optimizedOperations
List<ExpressionType> optimizedOperations = new ArrayList<ExpressionType>();
// optimizedOperations.add(ExpressionType.EQUALS);
// optimizedOperations.add(ExpressionType.GREATERTHAN);
// optimizedOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
// optimizedOperations.add(ExpressionType.LESSTHAN);
// optimizedOperations.add(ExpressionType.LESSTHAN_EQUALTO);
// optimizedOperations.add(ExpressionType.NOT);
optimizedOperations.add(ExpressionType.TEXT_MATCH);
this.dataMapMeta = new DataMapMeta(indexedColumns, optimizedOperations);
// get analyzer
// TODO: how to get analyzer ?
analyzer = new StandardAnalyzer();
}
/**
* validate Lucene DataMap
* 1. require TEXT_COLUMNS property
* 2. TEXT_COLUMNS can't contains illegal argument(empty, blank)
* 3. TEXT_COLUMNS can't contains duplicate same columns
* 4. TEXT_COLUMNS should be exists in table columns
* 5. TEXT_COLUMNS support only String DataType columns
*/
private List<String> validateAndGetIndexedColumns(DataMapSchema dataMapSchema,
CarbonTable carbonTable) throws MalformedDataMapCommandException {
String textColumnsStr = dataMapSchema.getProperties().get(TEXT_COLUMNS);
if (textColumnsStr == null || StringUtils.isBlank(textColumnsStr)) {
throw new MalformedDataMapCommandException(
"Lucene DataMap require proper TEXT_COLUMNS property.");
}
String[] textColumns = textColumnsStr.split(",", -1);
for (int i = 0; i < textColumns.length; i++) {
textColumns[i] = textColumns[i].trim().toLowerCase();
}
for (int i = 0; i < textColumns.length; i++) {
if (textColumns[i].isEmpty()) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS contains illegal argument.");
}
for (int j = i + 1; j < textColumns.length; j++) {
if (textColumns[i].equals(textColumns[j])) {
throw new MalformedDataMapCommandException(
"TEXT_COLUMNS has duplicate columns :" + textColumns[i]);
}
}
}
List<String> textColumnList = new ArrayList<String>(textColumns.length);
for (int i = 0; i < textColumns.length; i++) {
CarbonColumn column = carbonTable.getColumnByName(carbonTable.getTableName(), textColumns[i]);
if (null == column) {
throw new MalformedDataMapCommandException("TEXT_COLUMNS: " + textColumns[i]
+ " does not exist in table. Please check create DataMap statement.");
} else if (column.getDataType() != DataTypes.STRING) {
throw new MalformedDataMapCommandException(
"TEXT_COLUMNS only supports String column. " + "Unsupported column: " + textColumns[i]
+ ", DataType: " + column.getDataType());
}
textColumnList.add(column.getColName());
}
return textColumnList;
}
/**
* Return a new write for this datamap
*/
public DataMapWriter createWriter(String segmentId, String writeDirectoryPath) {
LOGGER.info("lucene data write to " + writeDirectoryPath);
return new LuceneDataMapWriter(
tableIdentifier, dataMapName, segmentId, writeDirectoryPath, true);
}
/**
* Get all distributable objects of a segmentid
*/
public List<DataMapDistributable> toDistributable(String segmentId) {
List<DataMapDistributable> lstDataMapDistribute = new ArrayList<DataMapDistributable>();
DataMapDistributable luceneDataMapDistributable = new LuceneDataMapDistributable(
CarbonTablePath.getSegmentPath(tableIdentifier.getTablePath(), segmentId));
lstDataMapDistribute.add(luceneDataMapDistributable);
return lstDataMapDistribute;
}
public void fireEvent(Event event) {
}
/**
* Clears datamap of the segment
*/
public void clear(String segmentId) {
}
/**
* Clear all datamaps from memory
*/
public void clear() {
}
/**
* Return metadata of this datamap
*/
public DataMapMeta getMeta() {
return dataMapMeta;
}
}