lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java - lens - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.lens.cube.parse;

 import static java.util.stream.Collectors.toMap;

 import java.util.*;

 import org.apache.lens.cube.metadata.FactPartition;
 import org.apache.lens.server.api.error.LensException;

 import org.apache.commons.lang.StringUtils;

 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;

 /**
  * Collapses the time range filters using IN operators
  */
 public class AbridgedTimeRangeWriter implements TimeRangeWriter {

   /**
    * Return IN clause for the partitions selected in the cube query
    *
    * @param cubeQueryContext cube query context
    * @param tableName        table name
    * @param parts            partitions
    * @return
    * @throws LensException
    */
   @Override
   public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext,
     String tableName,
     Set<FactPartition> parts) throws LensException {
     if (parts == null || parts.isEmpty()) {
       return "";
     }
     // Collect partition specs by column in a map
     // All filters which contain only a single column will be combined in an IN operator clause
     // This clause will be ORed with filters which contain multiple columns.
     List<String> subFilters = new ArrayList<String>();
     for (Map.Entry<Set<FactPartition>, Set<FactPartition>> entry : groupPartitions(parts).entrySet()) {
       List<String> clauses = new ArrayList<String>();
       String clause;
       clause = getClause(cubeQueryContext, tableName, entry.getKey());
       if (clause != null && !clause.isEmpty()) {
         clauses.add(clause);
       }
       clause = getClause(cubeQueryContext, tableName, entry.getValue());
       if (clause != null && !clause.isEmpty()) {
         clauses.add(clause);
       }
       subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")");
     }
     return StringUtils.join(subFilters, " OR ");
   }

   private String getClause(CubeQueryContext cubeQueryContext,
     String tableName,
     Set<FactPartition> parts) throws LensException {
     Map<String, List<String>> partFilterMap = new HashMap<String, List<String>>();
     List<String> allTimeRangeFilters = new ArrayList<String>();

     for (FactPartition factPartition : parts) {
       String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName);
       if (filter.contains("AND")) {
         allTimeRangeFilters.add("(" + filter + ")");
       } else {
         extractColumnAndCondition(filter, partFilterMap);
       }
     }

     List<String> inClauses = new ArrayList<String>(partFilterMap.size());
     for (String column : partFilterMap.keySet()) {
       String clause =
         "(" + StringUtils.join(partFilterMap.get(column), ",") + ")";
       inClauses.add(column + " IN " + clause);
     }

     allTimeRangeFilters.add(StringUtils.join(inClauses, " AND "));
     return StringUtils.join(allTimeRangeFilters, " OR ");
   }

   /**
    * parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one
    * specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec)
    * <p/>
    * Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by
    * itself is a binary AND clause.
    * <p/>
    * So Collection&lt;FactPartition&gt; is nothing but (a AND b) OR (c AND d) OR (e AND f) ...
    * <p/>
    * This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the
    * following class of clauses:
    * <p/>
    * (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d))
    * <p/>
    * Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d).
    * Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d))
    *
    * @param parts
    * @return
    */
   private Map<Set<FactPartition>, Set<FactPartition>> groupPartitions(Collection<FactPartition> parts) {
     Map<FactPartition, Set<FactPartition>> partitionSetMap = new HashMap<FactPartition, Set<FactPartition>>();
     for (FactPartition part : parts) {
       partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining());
     }
     Map<Set<FactPartition>, Set<FactPartition>> setSetOppositeMap = Maps.newHashMap();
     for (Map.Entry<FactPartition, Set<FactPartition>> entry : partitionSetMap.entrySet()) {
       setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet());
       if (entry.getKey() != null) {
         setSetOppositeMap.get(entry.getValue()).add(entry.getKey());
       }
     }
     // inverse again
     return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey));
   }

   // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter
   // splits the filters by column names and filters are collected by column name in the
   // map passed as argument
   private void extractColumnAndCondition(String token, Map<String, List<String>> partFilterMap) {
     token = token.trim();

     String[] subTokens = StringUtils.split(token, '=');

     String column = subTokens[0].trim();
     String filterValue = subTokens[1].trim();

     partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.lens.cube.parse;

	import static java.util.stream.Collectors.toMap;

	import java.util.*;

	import org.apache.lens.cube.metadata.FactPartition;
	import org.apache.lens.server.api.error.LensException;

	import org.apache.commons.lang.StringUtils;

	import com.google.common.collect.Maps;
	import com.google.common.collect.Sets;

	/**
	* Collapses the time range filters using IN operators
	*/
	public class AbridgedTimeRangeWriter implements TimeRangeWriter {

	/**
	* Return IN clause for the partitions selected in the cube query
	*
	* @param cubeQueryContext cube query context
	* @param tableName table name
	* @param parts partitions
	* @return
	* @throws LensException
	*/
	@Override
	public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext,
	String tableName,
	Set<FactPartition> parts) throws LensException {
	if (parts == null \|\| parts.isEmpty()) {
	return "";
	}
	// Collect partition specs by column in a map
	// All filters which contain only a single column will be combined in an IN operator clause
	// This clause will be ORed with filters which contain multiple columns.
	List<String> subFilters = new ArrayList<String>();
	for (Map.Entry<Set<FactPartition>, Set<FactPartition>> entry : groupPartitions(parts).entrySet()) {
	List<String> clauses = new ArrayList<String>();
	String clause;
	clause = getClause(cubeQueryContext, tableName, entry.getKey());
	if (clause != null && !clause.isEmpty()) {
	clauses.add(clause);
	}
	clause = getClause(cubeQueryContext, tableName, entry.getValue());
	if (clause != null && !clause.isEmpty()) {
	clauses.add(clause);
	}
	subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")");
	}
	return StringUtils.join(subFilters, " OR ");
	}

	private String getClause(CubeQueryContext cubeQueryContext,
	String tableName,
	Set<FactPartition> parts) throws LensException {
	Map<String, List<String>> partFilterMap = new HashMap<String, List<String>>();
	List<String> allTimeRangeFilters = new ArrayList<String>();

	for (FactPartition factPartition : parts) {
	String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName);
	if (filter.contains("AND")) {
	allTimeRangeFilters.add("(" + filter + ")");
	} else {
	extractColumnAndCondition(filter, partFilterMap);
	}
	}

	List<String> inClauses = new ArrayList<String>(partFilterMap.size());
	for (String column : partFilterMap.keySet()) {
	String clause =
	"(" + StringUtils.join(partFilterMap.get(column), ",") + ")";
	inClauses.add(column + " IN " + clause);
	}

	allTimeRangeFilters.add(StringUtils.join(inClauses, " AND "));
	return StringUtils.join(allTimeRangeFilters, " OR ");
	}

	/**
	* parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one
	* specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec)
	* <p/>
	* Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by
	* itself is a binary AND clause.
	* <p/>
	* So Collection<FactPartition> is nothing but (a AND b) OR (c AND d) OR (e AND f) ...
	* <p/>
	* This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the
	* following class of clauses:
	* <p/>
	* (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d))
	* <p/>
	* Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d).
	* Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d))
	*
	* @param parts
	* @return
	*/
	private Map<Set<FactPartition>, Set<FactPartition>> groupPartitions(Collection<FactPartition> parts) {
	Map<FactPartition, Set<FactPartition>> partitionSetMap = new HashMap<FactPartition, Set<FactPartition>>();
	for (FactPartition part : parts) {
	partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining());
	}
	Map<Set<FactPartition>, Set<FactPartition>> setSetOppositeMap = Maps.newHashMap();
	for (Map.Entry<FactPartition, Set<FactPartition>> entry : partitionSetMap.entrySet()) {
	setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet());
	if (entry.getKey() != null) {
	setSetOppositeMap.get(entry.getValue()).add(entry.getKey());
	}
	}
	// inverse again
	return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey));
	}

	// This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter
	// splits the filters by column names and filters are collected by column name in the
	// map passed as argument
	private void extractColumnAndCondition(String token, Map<String, List<String>> partFilterMap) {
	token = token.trim();

	String[] subTokens = StringUtils.split(token, '=');

	String column = subTokens[0].trim();
	String filterValue = subTokens[1].trim();

	partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue);
	}
	}