eagle-core/eagle-query/eagle-entity-base/src/main/java/org/apache/eagle/log/entity/filter/HBaseFilterBuilder.java - eagle - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.eagle.log.entity.filter;

 import org.apache.eagle.common.config.EagleConfigFactory;
 import org.apache.eagle.log.entity.EntityQualifierUtils;
 import org.apache.eagle.log.entity.meta.EntityDefinition;
 import org.apache.eagle.log.entity.meta.Qualifier;
 import org.apache.eagle.common.ByteUtil;
 import org.apache.eagle.query.parser.*;
 import org.apache.hadoop.hbase.filter.*;
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.filter.FilterList.Operator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import java.nio.charset.Charset;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
  * the steps of building hbase filters 1. receive ORExpression from eagle-antlr 2. iterate all ANDExpression
  * in ORExpression 2.1 put each ANDExpression to a new filter list with MUST_PASS_ONE option 2.2 iterate all
  * AtomicExpression in ANDExpression 2.2.1 group AtomicExpression into 2 groups by looking up metadata, one is
  * for tag filters, the other is for column filters 2.2.2 put the above 2 filters to a filter list with
  * MUST_PASS_ALL option
  */
 public class HBaseFilterBuilder {
     private static final Logger LOG = LoggerFactory.getLogger(HBaseFilterBuilder.class);

     /*
      * syntax is @<fieldname>
      */
     // private static final String fnRegex = "^@(.*)$";
     private static final Pattern _fnPattern = TokenConstant.ID_PATTERN;// Pattern.compile(fnRegex);
     private static final Charset _defaultCharset = Charset.forName("ISO-8859-1");

     private ORExpression orExpr;
     private EntityDefinition ed;
     private boolean filterIfMissing;
     private Charset charset = _defaultCharset;

     /**
      * TODO: Verify performance impact
      *
      * @return
      */
     public Set<String> getFilterFields() {
         return filterFields;
     }

     /**
      * Just add filter fields for expression filter
      */
     private Set<String> filterFields;

     public HBaseFilterBuilder(EntityDefinition ed, ORExpression orExpr) {
         this(ed, orExpr, false);
     }

     public HBaseFilterBuilder(EntityDefinition ed, ORExpression orExpr, boolean filterIfMissing) {
         this.ed = ed;
         this.orExpr = orExpr;
         this.filterIfMissing = filterIfMissing;
     }

     public void setCharset(String charsetName) {
         charset = Charset.forName(charsetName);
     }

     public Charset getCharset() {
         return charset;
     }

     /**
      * Because we don't have metadata for tag, we regard non-qualifer field as tag. So one field possibly is
      * not a real tag when this function return true. This happens when a user input an wrong field name which
      * is neither tag or qualifier
      *
      * @param field
      */
     private boolean isTag(String field) {
         return ed.isTag(field);
     }

     /**
      * check whether this field is one entity attribute or not
      *
      * @param fieldName
      * @return
      */
     private String parseEntityAttribute(String fieldName) {
         Matcher m = _fnPattern.matcher(fieldName);
         if (m.find()) {
             return m.group(1);
         }
         return null;
     }

     /**
      * Return the partition values for each or expression. The size of the returned list should be equal to
      * the size of FilterList that {@link #buildFilters()} returns. TODO: For now we don't support one query
      * to query multiple partitions. In future if partition is defined, for the entity, internally We need to
      * spawn multiple queries and send one query for each partition.
      *
      * @return Return the partition values for each or expression. Return null if the entity doesn't support
      *         partition
      */
     public List<String[]> getPartitionValues() {
         final String[] partitions = ed.getPartitions();
         if (partitions == null || partitions.length == 0) {
             return null;
         }
         final List<String[]> result = new ArrayList<String[]>();
         final Map<String, String> partitionKeyValueMap = new HashMap<String, String>();
         for (ANDExpression andExpr : orExpr.getANDExprList()) {
             partitionKeyValueMap.clear();
             for (AtomicExpression ae : andExpr.getAtomicExprList()) {
                 // TODO temporarily ignore those fields which are not for attributes
                 if (ae.getKeyType() == TokenType.ID) {
                     final String fieldName = parseEntityAttribute(ae.getKey());
                     if (fieldName == null) {
                         LOG.warn(fieldName + " field does not have format @<FieldName>, ignored");
                         continue;
                     }
                     if (ed.isPartitionTag(fieldName) && ComparisonOperator.EQUAL.equals(ae.getOp())) {
                         final String value = ae.getValue();
                         partitionKeyValueMap.put(fieldName, value);
                     }
                 }
             }
             final String[] values = new String[partitions.length];
             result.add(values);
             for (int i = 0; i < partitions.length; ++i) {
                 final String partition = partitions[i];
                 final String value = partitionKeyValueMap.get(partition);
                 values[i] = value;
             }
         }
         return result;
     }

     /**
      * @see org.apache.eagle.query.parser.TokenType
      * @return
      */
     public FilterList buildFilters() {
         // TODO: Optimize to select between row filter or column filter for better performance
         // Use row key filter priority by default
         boolean rowFilterPriority = true;

         FilterList fltList = new FilterList(Operator.MUST_PASS_ONE);
         for (ANDExpression andExpr : orExpr.getANDExprList()) {

             FilterList list = new FilterList(Operator.MUST_PASS_ALL);
             Map<String, List<String>> tagFilters = new HashMap<String, List<String>>();
             List<QualifierFilterEntity> qualifierFilters = new ArrayList<QualifierFilterEntity>();
             // List<QualifierFilterEntry> tagLikeQualifierFilters = new ArrayList<QualifierFilterEntry>();

             // TODO refactor not to use too much if/else
             for (AtomicExpression ae : andExpr.getAtomicExprList()) {
                 // TODO temporarily ignore those fields which are not for attributes

                 String fieldName = ae.getKey();
                 if (ae.getKeyType() == TokenType.ID) {
                     fieldName = parseEntityAttribute(fieldName);
                     if (fieldName == null) {
                         LOG.warn(fieldName + " field does not have format @<FieldName>, ignored");
                         continue;
                     }
                 }

                 String value = ae.getValue();
                 ComparisonOperator op = ae.getOp();
                 TokenType keyType = ae.getKeyType();
                 TokenType valueType = ae.getValueType();
                 QualifierFilterEntity entry = new QualifierFilterEntity(fieldName, value, op, keyType,
                                                                         valueType);

                 // TODO Exact match, need to add escape for those special characters here, including:
                 // "-", "[", "]", "/", "{", "}", "(", ")", "*", "+", "?", ".", "\\", "^", "$", "|"

                 if (keyType == TokenType.ID && isTag(fieldName)) {
                     if ((ComparisonOperator.EQUAL.equals(op) || ComparisonOperator.IS.equals(op))
                         && !TokenType.NULL.equals(valueType)) {
                         // Use RowFilter for equal TAG
                         if (tagFilters.get(fieldName) == null) {
                             tagFilters.put(fieldName, new ArrayList<String>());
                         }
                         tagFilters.get(fieldName).add(value);
                     } else if (rowFilterPriority && ComparisonOperator.IN.equals(op)) {
                         // Use RowFilter here by default
                         if (tagFilters.get(fieldName) == null) {
                             tagFilters.put(fieldName, new ArrayList<String>());
                         }
                         tagFilters.get(fieldName).addAll(EntityQualifierUtils.parseList(value));
                     } else if (ComparisonOperator.LIKE.equals(op) || ComparisonOperator.NOT_LIKE.equals(op)
                                || ComparisonOperator.CONTAINS.equals(op)
                                || ComparisonOperator.NOT_CONTAINS.equals(op)
                                || ComparisonOperator.IN.equals(op) || ComparisonOperator.IS.equals(op)
                                || ComparisonOperator.IS_NOT.equals(op)
                                || ComparisonOperator.NOT_EQUAL.equals(op)
                                || ComparisonOperator.EQUAL.equals(op)
                                || ComparisonOperator.NOT_IN.equals(op)) {
                         qualifierFilters.add(entry);
                     } else {
                         LOG.warn("Don't support operation: \"" + op + "\" on tag field: " + fieldName
                                  + " yet, going to ignore");
                         throw new IllegalArgumentException("Don't support operation: " + op
                                                            + " on tag field: " + fieldName
                                                            + ", avaliable options: =, =!, =~, !=~, in, not in, contains, not contains");
                     }
                 } else {
                     qualifierFilters.add(entry);
                 }
             }

             // Build RowFilter for equal tags
             list.addFilter(buildTagFilter(tagFilters));

             // Build SingleColumnValueFilter
             FilterList qualifierFilterList = buildQualifierFilter(qualifierFilters);
             if (qualifierFilterList != null && qualifierFilterList.getFilters().size() > 0) {
                 list.addFilter(qualifierFilterList);
             } else {
                 if (LOG.isDebugEnabled()) {
                     LOG.debug("Ignore empty qualifier filter from " + qualifierFilters.toString());
                 }
             }
             fltList.addFilter(list);
         }
         LOG.info("Query: " + orExpr.toString() + " => Filter: " + fltList.toString());
         return fltList;
     }

     /**
      * charset is used to decode the byte array, in hbase server, RegexStringComparator uses the same charset
      * to decode the byte array stored in qualifier for tag filter regex, it's always ISO-8859-1 as it only
      * comes from String's hashcode (Integer) Note: regex comparasion is to compare String
      */
     protected Filter buildTagFilter(Map<String, List<String>> tagFilters) {
         RegexStringComparator regexStringComparator = new RegexStringComparator(buildTagFilterRegex(tagFilters));
         regexStringComparator.setCharset(charset);
         RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
         return filter;
     }

     /**
      * all qualifiers' condition must be satisfied.
      * <H1>Use RegexStringComparator for:</H1> IN LIKE NOT_LIKE
      * <H1>Use SubstringComparator for:</H1> CONTAINS
      * <H1>Use EntityQualifierHelper for:</H1> EQUALS NOT_EUQALS LESS LESS_OR_EQUAL GREATER GREATER_OR_EQUAL
      * <H2>TODO: Compare performance of RegexStringComparator ,SubstringComparator ,EntityQualifierHelper</H2>
      *
      * @param qualifierFilters
      * @return
      */
     protected FilterList buildQualifierFilter(List<QualifierFilterEntity> qualifierFilters) {
         FilterList list = new FilterList(Operator.MUST_PASS_ALL);
         // iterate all the qualifiers
         for (QualifierFilterEntity entry : qualifierFilters) {
             // if contains expression based filter
             if (entry.getKeyType() == TokenType.EXP || entry.getValueType() == TokenType.EXP
                 || entry.getKeyType() != TokenType.ID) {
                 if (!EagleConfigFactory.load().isCoprocessorEnabled()) {
                     LOG.warn("Expression in filter may not support, because custom filter and coprocessor is disabled: "
                              + entry.toString());
                 }
                 list.addFilter(buildExpressionBasedFilter(entry));
                 continue;
             }

             // else using SingleColumnValueFilter
             String qualifierName = entry.getKey();
             if (!isTag(entry.getKey())) {
                 Qualifier qualifier = ed.getDisplayNameMap().get(entry.getKey());
                 qualifierName = qualifier.getQualifierName();
             }

             // Comparator to be used for building HBase Filter
             // WritableByteArrayComparable comparator;
             ByteArrayComparable comparable;
             if (ComparisonOperator.IN.equals(entry.getOp())
                 || ComparisonOperator.NOT_IN.equals(entry.getOp())) {
                 Filter setFilter = buildListQualifierFilter(entry);
                 if (setFilter != null) {
                     list.addFilter(setFilter);
                 }
             } else {
                 // If [=,!=,is,is not] NULL, use NullComparator else throw exception
                 if (TokenType.NULL.equals(entry.getValueType())) {
                     if (ComparisonOperator.EQUAL.equals(entry.getOp())
                         || ComparisonOperator.NOT_EQUAL.equals(entry.getOp())
                         || ComparisonOperator.IS.equals(entry.getOp())
                         || ComparisonOperator.IS_NOT.equals(entry.getOp())) {
                         comparable = new NullComparator();
                     } else {
                         throw new IllegalArgumentException("Operation: " + entry.getOp()
                                                            + " with NULL is not supported yet: "
                                                            + entry.toString()
                                                            + ", avaliable options: [=, !=, is, is not] null|NULL");
                     }
                 } else if (ComparisonOperator.CONTAINS.equals(entry.getOp())
                          || ComparisonOperator.NOT_CONTAINS.equals(entry.getOp())) {
                     // If [contains, not contains],use SubstringComparator
                     comparable = new SubstringComparator(entry.getValue());
                 } else if (ComparisonOperator.LIKE.equals(entry.getOp())
                          || ComparisonOperator.NOT_LIKE.equals(entry.getOp())) {
                     // If [like, not like], use RegexStringComparator
                     // Use RegexStringComparator for LIKE / NOT_LIKE
                     RegexStringComparator _comparator = new RegexStringComparator(buildQualifierRegex(entry
                         .getValue()));
                     _comparator.setCharset(charset);
                     comparable = _comparator;
                 } else {
                     Class type = EntityQualifierUtils.getType(ed, entry.getKey());
                     // if type is null (is Tag or not found) or not defined for TypedByteArrayComparator
                     if (!EagleConfigFactory.load().isCoprocessorEnabled() || type == null
                         || TypedByteArrayComparator.get(type) == null) {
                         comparable = new BinaryComparator(EntityQualifierUtils.toBytes(ed, entry.getKey(),
                                                                                        entry.getValue()));
                     } else {
                         comparable = new TypedByteArrayComparator(EntityQualifierUtils
                             .toBytes(ed, entry.getKey(), entry.getValue()), type);
                     }
                 }

                 SingleColumnValueFilter filter = new SingleColumnValueFilter(ed.getColumnFamily()
                     .getBytes(), qualifierName.getBytes(), convertToHBaseCompareOp(entry.getOp()),
                                                                              comparable);
                 filter.setFilterIfMissing(filterIfMissing);
                 list.addFilter(filter);
             }
         }

         return list;
     }

     private Filter buildExpressionBasedFilter(QualifierFilterEntity entry) {
         BooleanExpressionComparator expressionComparator = new BooleanExpressionComparator(entry, ed);
         filterFields = expressionComparator.getRequiredFields();
         RowValueFilter filter = new RowValueFilter(expressionComparator);
         return filter;
     }

     /**
      * Currently use BinaryComparator only
      * <h2>TODO:</h2> Possibility to tune performance by using: OR[BinaryComparator,...] instead of
      * RegexStringComparator? <br/>
      * <br/>
      * ! Check op must be IN or NOTIN in caller
      *
      * @param entry
      * @return
      */
     private Filter buildListQualifierFilter(QualifierFilterEntity entry) {
         List<String> valueSet = EntityQualifierUtils.parseList(entry.getValue());
         Iterator<String> it = valueSet.iterator();
         String fieldName = entry.getKey();
         String qualifierName = fieldName;
         if (!ed.isTag(entry.getKey())) {
             qualifierName = ed.getDisplayNameMap().get(entry.getKey()).getQualifierName();
         }

         // TODO: Try to use RegExp just work if possible
         // Because single SingleColumnValueFilter is much faster than multi SingleColumnValueFilters in OR
         // list.
         // Class qualifierType = EntityQualifierHelper.getType(ed,fieldName);
         // if( qualifierType == null || qualifierType == String.class){
         // boolean first = true;
         // StringBuilder filterRegex = new StringBuilder();
         // filterRegex.append("^(");
         // while(it.hasNext()) {
         // String value = it.next();
         // if(value == null) {
         // logger.warn("ignore empty value in set qualifier filter: "+entry.toString());
         // continue;
         // }
         // if(!first) filterRegex.append("|");
         // filterRegex.append(value);
         // first = false;
         // }
         // filterRegex.append(")$");
         // RegexStringComparator regexStringComparator = new RegexStringComparator(filterRegex.toString());
         // return new SingleColumnValueFilter(ed.getColumnFamily().getBytes(), qualifierName.getBytes(),
         // convertToHBaseCompareOp(entry.getOp()), regexStringComparator);
         // }else{
         FilterList setFilterList;
         if (ComparisonOperator.IN.equals(entry.getOp())) {
             setFilterList = new FilterList(Operator.MUST_PASS_ONE);
         } else if (ComparisonOperator.NOT_IN.equals(entry.getOp())) {
             setFilterList = new FilterList(Operator.MUST_PASS_ALL);
         } else {
             throw new IllegalArgumentException(String
                 .format("Don't support operation: %s on LIST type of value yet: %s, valid options: IN/NOT IN [LIST]",
                         entry.getOp(), entry.toString()));
         }

         while (it.hasNext()) {
             String value = it.next();
             BinaryComparator comparator = new BinaryComparator(EntityQualifierUtils.toBytes(ed, fieldName,
                                                                                             value));
             SingleColumnValueFilter filter = new SingleColumnValueFilter(ed.getColumnFamily()
                 .getBytes(), qualifierName.getBytes(), convertToHBaseCompareOp(entry.getOp()), comparator);
             filter.setFilterIfMissing(filterIfMissing);
             setFilterList.addFilter(filter);
         }

         return setFilterList;
         // }
     }

     /**
      * Just used for LIKE and NOT_LIKE
      *
      * @param qualifierValue
      * @return
      */
     protected String buildQualifierRegex(String qualifierValue) {
         StringBuilder sb = new StringBuilder();
         // sb.append("(?s)");
         sb.append("^");
         sb.append(qualifierValue);
         sb.append("$");
         return sb.toString();
     }

     /**
      * Appends the given ID to the given buffer, followed by "\\E". [steal it from opentsdb, thanks opentsdb
      * :) https://github.com/OpenTSDB/opentsdb/blob/master/src/core/TsdbQuery.java]
      */
     private static void addId(final StringBuilder buf, final byte[] id) {
         buf.append("\\Q");
         boolean backslash = false;
         for (final byte b : id) {
             buf.append((char)(b & 0xFF));
             if (b == 'E' && backslash) { // If we saw a `\' and now we have a `E'.
                 // So we just terminated the quoted section because we just added \E
                 // to `buf'. So let's put a litteral \E now and start quoting again.
                 buf.append("\\\\E\\Q");
             } else {
                 backslash = b == '\\';
             }
         }
         buf.append("\\E");
     }

     @SuppressWarnings("unused")
     private static void addId(final StringBuilder buf, final String id) {
         buf.append("\\Q");
         int len = id.length() - 1;
         boolean backslash = false;
         for (int i = 0; i < len; i++) {
             char c = id.charAt(i);
             buf.append(c);
             if (c == 'E' && backslash) { // If we saw a `\' and now we have a `E'.
                 // So we just terminated the quoted section because we just added \E
                 // to `buf'. So let's put a litteral \E now and start quoting again.
                 buf.append("\\\\E\\Q");
             } else {
                 backslash = c == '\\';
             }
         }
         buf.append("\\E");
     }

     /**
      * one search tag may have multiple values which have OR relationship, and relationship between different
      * search tags is AND the query is like "(TAG1=value11 OR TAG1=value12) AND TAG2=value2"
      *
      * @param tags
      * @return
      */
     protected String buildTagFilterRegex(Map<String, List<String>> tags) {
         // TODO need consider that \E could be part of tag, refer to
         // https://github.com/OpenTSDB/opentsdb/blob/master/src/core/TsdbQuery.java
         final SortedMap<Integer, List<Integer>> tagHash = new TreeMap<Integer, List<Integer>>();
         final int numOfPartitionFields = (ed.getPartitions() == null) ? 0 : ed.getPartitions().length;
         for (Map.Entry<String, List<String>> entry : tags.entrySet()) {
             String tagName = entry.getKey();
             // Ignore tag if the tag is one of partition fields
             if (ed.isPartitionTag(tagName)) {
                 continue;
             }
             List<String> stringValues = entry.getValue();
             List<Integer> hashValues = new ArrayList<Integer>(stringValues.size());
             for (String value : stringValues) {
                 hashValues.add(value.hashCode());
             }
             tagHash.put(tagName.hashCode(), hashValues);
         }

         // header = prefix(4 bytes) + partition_hashes(4*N bytes) + timestamp (8 bytes)
         final int headerLength = 4 + numOfPartitionFields * 4 + 8;

         // <tag1:4><value1:4> ... <tagn:4><valuen:4>
         StringBuilder sb = new StringBuilder();
         sb.append("(?s)");
         sb.append("^(?:.{").append(headerLength).append("})");
         sb.append("(?:.{").append(8).append("})*"); // for any number of tags
         for (Map.Entry<Integer, List<Integer>> entry : tagHash.entrySet()) {
             try {
                 addId(sb, ByteUtil.intToBytes(entry.getKey()));
                 List<Integer> hashValues = entry.getValue();
                 sb.append("(?:");
                 boolean first = true;
                 for (Integer value : hashValues) {
                     if (!first) {
                         sb.append('|');
                     }
                     addId(sb, ByteUtil.intToBytes(value));
                     first = false;
                 }
                 sb.append(")");
                 sb.append("(?:.{").append(8).append("})*"); // for any number of tags
             } catch (Exception ex) {
                 LOG.error("constructing regex error", ex);
             }
         }
         sb.append("$");
         if (LOG.isDebugEnabled()) {
             LOG.debug("Tag filter pattern is " + sb.toString());
         }
         return sb.toString();
     }

     /**
      * Convert ComparisonOperator to native HBase CompareOp Support: =, =~,CONTAINS,<,<=,>,>=,!=,!=~
      *
      * @param comp
      * @return
      */
     protected static CompareOp convertToHBaseCompareOp(ComparisonOperator comp) {
         if (comp == ComparisonOperator.EQUAL || comp == ComparisonOperator.LIKE
             || comp == ComparisonOperator.CONTAINS || comp == ComparisonOperator.IN
             || comp == ComparisonOperator.IS) {
             return CompareOp.EQUAL;
         } else if (comp == ComparisonOperator.LESS) {
             return CompareOp.LESS;
         } else if (comp == ComparisonOperator.LESS_OR_EQUAL) {
             return CompareOp.LESS_OR_EQUAL;
         } else if (comp == ComparisonOperator.GREATER) {
             return CompareOp.GREATER;
         } else if (comp == ComparisonOperator.GREATER_OR_EQUAL) {
             return CompareOp.GREATER_OR_EQUAL;
         } else if (comp == ComparisonOperator.NOT_EQUAL || comp == ComparisonOperator.NOT_LIKE
                    || comp == ComparisonOperator.NOT_CONTAINS || comp == ComparisonOperator.IS_NOT
                    || comp == ComparisonOperator.NOT_IN) {
             return CompareOp.NOT_EQUAL;
         } else {
             LOG.error("{} operation is not supported now\n", comp);
             throw new IllegalArgumentException("Illegal operation: " + comp + ", avaliable options: "
                                                + Arrays.toString(ComparisonOperator.values()));
         }
     }

     protected static CompareOp getHBaseCompareOp(String comp) {
         return convertToHBaseCompareOp(ComparisonOperator.locateOperator(comp));
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.eagle.log.entity.filter;

	import org.apache.eagle.common.config.EagleConfigFactory;
	import org.apache.eagle.log.entity.EntityQualifierUtils;
	import org.apache.eagle.log.entity.meta.EntityDefinition;
	import org.apache.eagle.log.entity.meta.Qualifier;
	import org.apache.eagle.common.ByteUtil;
	import org.apache.eagle.query.parser.*;
	import org.apache.hadoop.hbase.filter.*;
	import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
	import org.apache.hadoop.hbase.filter.FilterList.Operator;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import java.nio.charset.Charset;
	import java.util.*;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	/**
	* the steps of building hbase filters 1. receive ORExpression from eagle-antlr 2. iterate all ANDExpression
	* in ORExpression 2.1 put each ANDExpression to a new filter list with MUST_PASS_ONE option 2.2 iterate all
	* AtomicExpression in ANDExpression 2.2.1 group AtomicExpression into 2 groups by looking up metadata, one is
	* for tag filters, the other is for column filters 2.2.2 put the above 2 filters to a filter list with
	* MUST_PASS_ALL option
	*/
	public class HBaseFilterBuilder {
	private static final Logger LOG = LoggerFactory.getLogger(HBaseFilterBuilder.class);

	/*
	* syntax is @<fieldname>
	*/
	// private static final String fnRegex = "^@(.*)$";
	private static final Pattern _fnPattern = TokenConstant.ID_PATTERN;// Pattern.compile(fnRegex);
	private static final Charset _defaultCharset = Charset.forName("ISO-8859-1");

	private ORExpression orExpr;
	private EntityDefinition ed;
	private boolean filterIfMissing;
	private Charset charset = _defaultCharset;

	/**
	* TODO: Verify performance impact
	*
	* @return
	*/
	public Set<String> getFilterFields() {
	return filterFields;
	}

	/**
	* Just add filter fields for expression filter
	*/
	private Set<String> filterFields;

	public HBaseFilterBuilder(EntityDefinition ed, ORExpression orExpr) {
	this(ed, orExpr, false);
	}

	public HBaseFilterBuilder(EntityDefinition ed, ORExpression orExpr, boolean filterIfMissing) {
	this.ed = ed;
	this.orExpr = orExpr;
	this.filterIfMissing = filterIfMissing;
	}

	public void setCharset(String charsetName) {
	charset = Charset.forName(charsetName);
	}

	public Charset getCharset() {
	return charset;
	}

	/**
	* Because we don't have metadata for tag, we regard non-qualifer field as tag. So one field possibly is
	* not a real tag when this function return true. This happens when a user input an wrong field name which
	* is neither tag or qualifier
	*
	* @param field
	*/
	private boolean isTag(String field) {
	return ed.isTag(field);
	}

	/**
	* check whether this field is one entity attribute or not
	*
	* @param fieldName
	* @return
	*/
	private String parseEntityAttribute(String fieldName) {
	Matcher m = _fnPattern.matcher(fieldName);
	if (m.find()) {
	return m.group(1);
	}
	return null;
	}

	/**
	* Return the partition values for each or expression. The size of the returned list should be equal to
	* the size of FilterList that {@link #buildFilters()} returns. TODO: For now we don't support one query
	* to query multiple partitions. In future if partition is defined, for the entity, internally We need to
	* spawn multiple queries and send one query for each partition.
	*
	* @return Return the partition values for each or expression. Return null if the entity doesn't support
	* partition
	*/
	public List<String[]> getPartitionValues() {
	final String[] partitions = ed.getPartitions();
	if (partitions == null \|\| partitions.length == 0) {
	return null;
	}
	final List<String[]> result = new ArrayList<String[]>();
	final Map<String, String> partitionKeyValueMap = new HashMap<String, String>();
	for (ANDExpression andExpr : orExpr.getANDExprList()) {
	partitionKeyValueMap.clear();
	for (AtomicExpression ae : andExpr.getAtomicExprList()) {
	// TODO temporarily ignore those fields which are not for attributes
	if (ae.getKeyType() == TokenType.ID) {
	final String fieldName = parseEntityAttribute(ae.getKey());
	if (fieldName == null) {
	LOG.warn(fieldName + " field does not have format @<FieldName>, ignored");
	continue;
	}
	if (ed.isPartitionTag(fieldName) && ComparisonOperator.EQUAL.equals(ae.getOp())) {
	final String value = ae.getValue();
	partitionKeyValueMap.put(fieldName, value);
	}
	}
	}
	final String[] values = new String[partitions.length];
	result.add(values);
	for (int i = 0; i < partitions.length; ++i) {
	final String partition = partitions[i];
	final String value = partitionKeyValueMap.get(partition);
	values[i] = value;
	}
	}
	return result;
	}

	/**
	* @see org.apache.eagle.query.parser.TokenType
	* @return
	*/
	public FilterList buildFilters() {
	// TODO: Optimize to select between row filter or column filter for better performance
	// Use row key filter priority by default
	boolean rowFilterPriority = true;

	FilterList fltList = new FilterList(Operator.MUST_PASS_ONE);
	for (ANDExpression andExpr : orExpr.getANDExprList()) {

	FilterList list = new FilterList(Operator.MUST_PASS_ALL);
	Map<String, List<String>> tagFilters = new HashMap<String, List<String>>();
	List<QualifierFilterEntity> qualifierFilters = new ArrayList<QualifierFilterEntity>();
	// List<QualifierFilterEntry> tagLikeQualifierFilters = new ArrayList<QualifierFilterEntry>();

	// TODO refactor not to use too much if/else
	for (AtomicExpression ae : andExpr.getAtomicExprList()) {
	// TODO temporarily ignore those fields which are not for attributes

	String fieldName = ae.getKey();
	if (ae.getKeyType() == TokenType.ID) {
	fieldName = parseEntityAttribute(fieldName);
	if (fieldName == null) {
	LOG.warn(fieldName + " field does not have format @<FieldName>, ignored");
	continue;
	}
	}

	String value = ae.getValue();
	ComparisonOperator op = ae.getOp();
	TokenType keyType = ae.getKeyType();
	TokenType valueType = ae.getValueType();
	QualifierFilterEntity entry = new QualifierFilterEntity(fieldName, value, op, keyType,
	valueType);

	// TODO Exact match, need to add escape for those special characters here, including:
	// "-", "[", "]", "/", "{", "}", "(", ")", "*", "+", "?", ".", "\\", "^", "$", "\|"

	if (keyType == TokenType.ID && isTag(fieldName)) {
	if ((ComparisonOperator.EQUAL.equals(op) \|\| ComparisonOperator.IS.equals(op))
	&& !TokenType.NULL.equals(valueType)) {
	// Use RowFilter for equal TAG
	if (tagFilters.get(fieldName) == null) {
	tagFilters.put(fieldName, new ArrayList<String>());
	}
	tagFilters.get(fieldName).add(value);
	} else if (rowFilterPriority && ComparisonOperator.IN.equals(op)) {
	// Use RowFilter here by default
	if (tagFilters.get(fieldName) == null) {
	tagFilters.put(fieldName, new ArrayList<String>());
	}
	tagFilters.get(fieldName).addAll(EntityQualifierUtils.parseList(value));
	} else if (ComparisonOperator.LIKE.equals(op) \|\| ComparisonOperator.NOT_LIKE.equals(op)
	\|\| ComparisonOperator.CONTAINS.equals(op)
	\|\| ComparisonOperator.NOT_CONTAINS.equals(op)
	\|\| ComparisonOperator.IN.equals(op) \|\| ComparisonOperator.IS.equals(op)
	\|\| ComparisonOperator.IS_NOT.equals(op)
	\|\| ComparisonOperator.NOT_EQUAL.equals(op)
	\|\| ComparisonOperator.EQUAL.equals(op)
	\|\| ComparisonOperator.NOT_IN.equals(op)) {
	qualifierFilters.add(entry);
	} else {
	LOG.warn("Don't support operation: \"" + op + "\" on tag field: " + fieldName
	+ " yet, going to ignore");
	throw new IllegalArgumentException("Don't support operation: " + op
	+ " on tag field: " + fieldName
	+ ", avaliable options: =, =!, =~, !=~, in, not in, contains, not contains");
	}
	} else {
	qualifierFilters.add(entry);
	}
	}

	// Build RowFilter for equal tags
	list.addFilter(buildTagFilter(tagFilters));

	// Build SingleColumnValueFilter
	FilterList qualifierFilterList = buildQualifierFilter(qualifierFilters);
	if (qualifierFilterList != null && qualifierFilterList.getFilters().size() > 0) {
	list.addFilter(qualifierFilterList);
	} else {
	if (LOG.isDebugEnabled()) {
	LOG.debug("Ignore empty qualifier filter from " + qualifierFilters.toString());
	}
	}
	fltList.addFilter(list);
	}
	LOG.info("Query: " + orExpr.toString() + " => Filter: " + fltList.toString());
	return fltList;
	}

	/**
	* charset is used to decode the byte array, in hbase server, RegexStringComparator uses the same charset
	* to decode the byte array stored in qualifier for tag filter regex, it's always ISO-8859-1 as it only
	* comes from String's hashcode (Integer) Note: regex comparasion is to compare String
	*/
	protected Filter buildTagFilter(Map<String, List<String>> tagFilters) {
	RegexStringComparator regexStringComparator = new RegexStringComparator(buildTagFilterRegex(tagFilters));
	regexStringComparator.setCharset(charset);
	RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
	return filter;
	}

	/**
	* all qualifiers' condition must be satisfied.
	* <H1>Use RegexStringComparator for:</H1> IN LIKE NOT_LIKE
	* <H1>Use SubstringComparator for:</H1> CONTAINS
	* <H1>Use EntityQualifierHelper for:</H1> EQUALS NOT_EUQALS LESS LESS_OR_EQUAL GREATER GREATER_OR_EQUAL
	* <H2>TODO: Compare performance of RegexStringComparator ,SubstringComparator ,EntityQualifierHelper</H2>
	*
	* @param qualifierFilters
	* @return
	*/
	protected FilterList buildQualifierFilter(List<QualifierFilterEntity> qualifierFilters) {
	FilterList list = new FilterList(Operator.MUST_PASS_ALL);
	// iterate all the qualifiers
	for (QualifierFilterEntity entry : qualifierFilters) {
	// if contains expression based filter
	if (entry.getKeyType() == TokenType.EXP \|\| entry.getValueType() == TokenType.EXP
	\|\| entry.getKeyType() != TokenType.ID) {
	if (!EagleConfigFactory.load().isCoprocessorEnabled()) {
	LOG.warn("Expression in filter may not support, because custom filter and coprocessor is disabled: "
	+ entry.toString());
	}
	list.addFilter(buildExpressionBasedFilter(entry));
	continue;
	}

	// else using SingleColumnValueFilter
	String qualifierName = entry.getKey();
	if (!isTag(entry.getKey())) {
	Qualifier qualifier = ed.getDisplayNameMap().get(entry.getKey());
	qualifierName = qualifier.getQualifierName();
	}

	// Comparator to be used for building HBase Filter
	// WritableByteArrayComparable comparator;
	ByteArrayComparable comparable;
	if (ComparisonOperator.IN.equals(entry.getOp())
	\|\| ComparisonOperator.NOT_IN.equals(entry.getOp())) {
	Filter setFilter = buildListQualifierFilter(entry);
	if (setFilter != null) {
	list.addFilter(setFilter);
	}
	} else {
	// If [=,!=,is,is not] NULL, use NullComparator else throw exception
	if (TokenType.NULL.equals(entry.getValueType())) {
	if (ComparisonOperator.EQUAL.equals(entry.getOp())
	\|\| ComparisonOperator.NOT_EQUAL.equals(entry.getOp())
	\|\| ComparisonOperator.IS.equals(entry.getOp())
	\|\| ComparisonOperator.IS_NOT.equals(entry.getOp())) {
	comparable = new NullComparator();
	} else {
	throw new IllegalArgumentException("Operation: " + entry.getOp()
	+ " with NULL is not supported yet: "
	+ entry.toString()
	+ ", avaliable options: [=, !=, is, is not] null\|NULL");
	}
	} else if (ComparisonOperator.CONTAINS.equals(entry.getOp())
	\|\| ComparisonOperator.NOT_CONTAINS.equals(entry.getOp())) {
	// If [contains, not contains],use SubstringComparator
	comparable = new SubstringComparator(entry.getValue());
	} else if (ComparisonOperator.LIKE.equals(entry.getOp())
	\|\| ComparisonOperator.NOT_LIKE.equals(entry.getOp())) {
	// If [like, not like], use RegexStringComparator
	// Use RegexStringComparator for LIKE / NOT_LIKE
	RegexStringComparator _comparator = new RegexStringComparator(buildQualifierRegex(entry
	.getValue()));
	_comparator.setCharset(charset);
	comparable = _comparator;
	} else {
	Class type = EntityQualifierUtils.getType(ed, entry.getKey());
	// if type is null (is Tag or not found) or not defined for TypedByteArrayComparator
	if (!EagleConfigFactory.load().isCoprocessorEnabled() \|\| type == null
	\|\| TypedByteArrayComparator.get(type) == null) {
	comparable = new BinaryComparator(EntityQualifierUtils.toBytes(ed, entry.getKey(),
	entry.getValue()));
	} else {
	comparable = new TypedByteArrayComparator(EntityQualifierUtils
	.toBytes(ed, entry.getKey(), entry.getValue()), type);
	}
	}

	SingleColumnValueFilter filter = new SingleColumnValueFilter(ed.getColumnFamily()
	.getBytes(), qualifierName.getBytes(), convertToHBaseCompareOp(entry.getOp()),
	comparable);
	filter.setFilterIfMissing(filterIfMissing);
	list.addFilter(filter);
	}
	}

	return list;
	}

	private Filter buildExpressionBasedFilter(QualifierFilterEntity entry) {
	BooleanExpressionComparator expressionComparator = new BooleanExpressionComparator(entry, ed);
	filterFields = expressionComparator.getRequiredFields();
	RowValueFilter filter = new RowValueFilter(expressionComparator);
	return filter;
	}

	/**
	* Currently use BinaryComparator only
	* <h2>TODO:</h2> Possibility to tune performance by using: OR[BinaryComparator,...] instead of
	* RegexStringComparator? <br/>
	* <br/>
	* ! Check op must be IN or NOTIN in caller
	*
	* @param entry
	* @return
	*/
	private Filter buildListQualifierFilter(QualifierFilterEntity entry) {
	List<String> valueSet = EntityQualifierUtils.parseList(entry.getValue());
	Iterator<String> it = valueSet.iterator();
	String fieldName = entry.getKey();
	String qualifierName = fieldName;
	if (!ed.isTag(entry.getKey())) {
	qualifierName = ed.getDisplayNameMap().get(entry.getKey()).getQualifierName();
	}

	// TODO: Try to use RegExp just work if possible
	// Because single SingleColumnValueFilter is much faster than multi SingleColumnValueFilters in OR
	// list.
	// Class qualifierType = EntityQualifierHelper.getType(ed,fieldName);
	// if( qualifierType == null \|\| qualifierType == String.class){
	// boolean first = true;
	// StringBuilder filterRegex = new StringBuilder();
	// filterRegex.append("^(");
	// while(it.hasNext()) {
	// String value = it.next();
	// if(value == null) {
	// logger.warn("ignore empty value in set qualifier filter: "+entry.toString());
	// continue;
	// }
	// if(!first) filterRegex.append("\|");
	// filterRegex.append(value);
	// first = false;
	// }
	// filterRegex.append(")$");
	// RegexStringComparator regexStringComparator = new RegexStringComparator(filterRegex.toString());
	// return new SingleColumnValueFilter(ed.getColumnFamily().getBytes(), qualifierName.getBytes(),
	// convertToHBaseCompareOp(entry.getOp()), regexStringComparator);
	// }else{
	FilterList setFilterList;
	if (ComparisonOperator.IN.equals(entry.getOp())) {
	setFilterList = new FilterList(Operator.MUST_PASS_ONE);
	} else if (ComparisonOperator.NOT_IN.equals(entry.getOp())) {
	setFilterList = new FilterList(Operator.MUST_PASS_ALL);
	} else {
	throw new IllegalArgumentException(String
	.format("Don't support operation: %s on LIST type of value yet: %s, valid options: IN/NOT IN [LIST]",
	entry.getOp(), entry.toString()));
	}

	while (it.hasNext()) {
	String value = it.next();
	BinaryComparator comparator = new BinaryComparator(EntityQualifierUtils.toBytes(ed, fieldName,
	value));
	SingleColumnValueFilter filter = new SingleColumnValueFilter(ed.getColumnFamily()
	.getBytes(), qualifierName.getBytes(), convertToHBaseCompareOp(entry.getOp()), comparator);
	filter.setFilterIfMissing(filterIfMissing);
	setFilterList.addFilter(filter);
	}

	return setFilterList;
	// }
	}

	/**
	* Just used for LIKE and NOT_LIKE
	*
	* @param qualifierValue
	* @return
	*/
	protected String buildQualifierRegex(String qualifierValue) {
	StringBuilder sb = new StringBuilder();
	// sb.append("(?s)");
	sb.append("^");
	sb.append(qualifierValue);
	sb.append("$");
	return sb.toString();
	}

	/**
	* Appends the given ID to the given buffer, followed by "\\E". [steal it from opentsdb, thanks opentsdb
	* :) https://github.com/OpenTSDB/opentsdb/blob/master/src/core/TsdbQuery.java]
	*/
	private static void addId(final StringBuilder buf, final byte[] id) {
	buf.append("\\Q");
	boolean backslash = false;
	for (final byte b : id) {
	buf.append((char)(b & 0xFF));
	if (b == 'E' && backslash) { // If we saw a `\' and now we have a `E'.
	// So we just terminated the quoted section because we just added \E
	// to `buf'. So let's put a litteral \E now and start quoting again.
	buf.append("\\\\E\\Q");
	} else {
	backslash = b == '\\';
	}
	}
	buf.append("\\E");
	}

	@SuppressWarnings("unused")
	private static void addId(final StringBuilder buf, final String id) {
	buf.append("\\Q");
	int len = id.length() - 1;
	boolean backslash = false;
	for (int i = 0; i < len; i++) {
	char c = id.charAt(i);
	buf.append(c);
	if (c == 'E' && backslash) { // If we saw a `\' and now we have a `E'.
	// So we just terminated the quoted section because we just added \E
	// to `buf'. So let's put a litteral \E now and start quoting again.
	buf.append("\\\\E\\Q");
	} else {
	backslash = c == '\\';
	}
	}
	buf.append("\\E");
	}

	/**
	* one search tag may have multiple values which have OR relationship, and relationship between different
	* search tags is AND the query is like "(TAG1=value11 OR TAG1=value12) AND TAG2=value2"
	*
	* @param tags
	* @return
	*/
	protected String buildTagFilterRegex(Map<String, List<String>> tags) {
	// TODO need consider that \E could be part of tag, refer to
	// https://github.com/OpenTSDB/opentsdb/blob/master/src/core/TsdbQuery.java
	final SortedMap<Integer, List<Integer>> tagHash = new TreeMap<Integer, List<Integer>>();
	final int numOfPartitionFields = (ed.getPartitions() == null) ? 0 : ed.getPartitions().length;
	for (Map.Entry<String, List<String>> entry : tags.entrySet()) {
	String tagName = entry.getKey();
	// Ignore tag if the tag is one of partition fields
	if (ed.isPartitionTag(tagName)) {
	continue;
	}
	List<String> stringValues = entry.getValue();
	List<Integer> hashValues = new ArrayList<Integer>(stringValues.size());
	for (String value : stringValues) {
	hashValues.add(value.hashCode());
	}
	tagHash.put(tagName.hashCode(), hashValues);
	}

	// header = prefix(4 bytes) + partition_hashes(4*N bytes) + timestamp (8 bytes)
	final int headerLength = 4 + numOfPartitionFields * 4 + 8;

	// <tag1:4><value1:4> ... <tagn:4><valuen:4>
	StringBuilder sb = new StringBuilder();
	sb.append("(?s)");
	sb.append("^(?:.{").append(headerLength).append("})");
	sb.append("(?:.{").append(8).append("})*"); // for any number of tags
	for (Map.Entry<Integer, List<Integer>> entry : tagHash.entrySet()) {
	try {
	addId(sb, ByteUtil.intToBytes(entry.getKey()));
	List<Integer> hashValues = entry.getValue();
	sb.append("(?:");
	boolean first = true;
	for (Integer value : hashValues) {
	if (!first) {
	sb.append('\|');
	}
	addId(sb, ByteUtil.intToBytes(value));
	first = false;
	}
	sb.append(")");
	sb.append("(?:.{").append(8).append("})*"); // for any number of tags
	} catch (Exception ex) {
	LOG.error("constructing regex error", ex);
	}
	}
	sb.append("$");
	if (LOG.isDebugEnabled()) {
	LOG.debug("Tag filter pattern is " + sb.toString());
	}
	return sb.toString();
	}

	/**
	* Convert ComparisonOperator to native HBase CompareOp Support: =, =~,CONTAINS,<,<=,>,>=,!=,!=~
	*
	* @param comp
	* @return
	*/
	protected static CompareOp convertToHBaseCompareOp(ComparisonOperator comp) {
	if (comp == ComparisonOperator.EQUAL \|\| comp == ComparisonOperator.LIKE
	\|\| comp == ComparisonOperator.CONTAINS \|\| comp == ComparisonOperator.IN
	\|\| comp == ComparisonOperator.IS) {
	return CompareOp.EQUAL;
	} else if (comp == ComparisonOperator.LESS) {
	return CompareOp.LESS;
	} else if (comp == ComparisonOperator.LESS_OR_EQUAL) {
	return CompareOp.LESS_OR_EQUAL;
	} else if (comp == ComparisonOperator.GREATER) {
	return CompareOp.GREATER;
	} else if (comp == ComparisonOperator.GREATER_OR_EQUAL) {
	return CompareOp.GREATER_OR_EQUAL;
	} else if (comp == ComparisonOperator.NOT_EQUAL \|\| comp == ComparisonOperator.NOT_LIKE
	\|\| comp == ComparisonOperator.NOT_CONTAINS \|\| comp == ComparisonOperator.IS_NOT
	\|\| comp == ComparisonOperator.NOT_IN) {
	return CompareOp.NOT_EQUAL;
	} else {
	LOG.error("{} operation is not supported now\n", comp);
	throw new IllegalArgumentException("Illegal operation: " + comp + ", avaliable options: "
	+ Arrays.toString(ComparisonOperator.values()));
	}
	}

	protected static CompareOp getHBaseCompareOp(String comp) {
	return convertToHBaseCompareOp(ComparisonOperator.locateOperator(comp));
	}
	}