| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.lens.cube.parse; |
| |
| import static java.util.stream.Collectors.toMap; |
| |
| import java.util.*; |
| |
| import org.apache.lens.cube.metadata.FactPartition; |
| import org.apache.lens.server.api.error.LensException; |
| |
| import org.apache.commons.lang.StringUtils; |
| |
| import com.google.common.collect.Maps; |
| import com.google.common.collect.Sets; |
| |
| /** |
| * Collapses the time range filters using IN operators |
| */ |
| public class AbridgedTimeRangeWriter implements TimeRangeWriter { |
| |
| /** |
| * Return IN clause for the partitions selected in the cube query |
| * |
| * @param cubeQueryContext cube query context |
| * @param tableName table name |
| * @param parts partitions |
| * @return |
| * @throws LensException |
| */ |
| @Override |
| public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext, |
| String tableName, |
| Set<FactPartition> parts) throws LensException { |
| if (parts == null || parts.isEmpty()) { |
| return ""; |
| } |
| // Collect partition specs by column in a map |
| // All filters which contain only a single column will be combined in an IN operator clause |
| // This clause will be ORed with filters which contain multiple columns. |
| List<String> subFilters = new ArrayList<String>(); |
| for (Map.Entry<Set<FactPartition>, Set<FactPartition>> entry : groupPartitions(parts).entrySet()) { |
| List<String> clauses = new ArrayList<String>(); |
| String clause; |
| clause = getClause(cubeQueryContext, tableName, entry.getKey()); |
| if (clause != null && !clause.isEmpty()) { |
| clauses.add(clause); |
| } |
| clause = getClause(cubeQueryContext, tableName, entry.getValue()); |
| if (clause != null && !clause.isEmpty()) { |
| clauses.add(clause); |
| } |
| subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")"); |
| } |
| return StringUtils.join(subFilters, " OR "); |
| } |
| |
| private String getClause(CubeQueryContext cubeQueryContext, |
| String tableName, |
| Set<FactPartition> parts) throws LensException { |
| Map<String, List<String>> partFilterMap = new HashMap<String, List<String>>(); |
| List<String> allTimeRangeFilters = new ArrayList<String>(); |
| |
| for (FactPartition factPartition : parts) { |
| String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName); |
| if (filter.contains("AND")) { |
| allTimeRangeFilters.add("(" + filter + ")"); |
| } else { |
| extractColumnAndCondition(filter, partFilterMap); |
| } |
| } |
| |
| List<String> inClauses = new ArrayList<String>(partFilterMap.size()); |
| for (String column : partFilterMap.keySet()) { |
| String clause = |
| "(" + StringUtils.join(partFilterMap.get(column), ",") + ")"; |
| inClauses.add(column + " IN " + clause); |
| } |
| |
| allTimeRangeFilters.add(StringUtils.join(inClauses, " AND ")); |
| return StringUtils.join(allTimeRangeFilters, " OR "); |
| } |
| |
| /** |
| * parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one |
| * specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec) |
| * <p/> |
| * Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by |
| * itself is a binary AND clause. |
| * <p/> |
| * So Collection<FactPartition> is nothing but (a AND b) OR (c AND d) OR (e AND f) ... |
| * <p/> |
| * This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the |
| * following class of clauses: |
| * <p/> |
| * (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d)) |
| * <p/> |
| * Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d). |
| * Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d)) |
| * |
| * @param parts |
| * @return |
| */ |
| private Map<Set<FactPartition>, Set<FactPartition>> groupPartitions(Collection<FactPartition> parts) { |
| Map<FactPartition, Set<FactPartition>> partitionSetMap = new HashMap<FactPartition, Set<FactPartition>>(); |
| for (FactPartition part : parts) { |
| partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining()); |
| } |
| Map<Set<FactPartition>, Set<FactPartition>> setSetOppositeMap = Maps.newHashMap(); |
| for (Map.Entry<FactPartition, Set<FactPartition>> entry : partitionSetMap.entrySet()) { |
| setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet()); |
| if (entry.getKey() != null) { |
| setSetOppositeMap.get(entry.getValue()).add(entry.getKey()); |
| } |
| } |
| // inverse again |
| return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey)); |
| } |
| |
| // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter |
| // splits the filters by column names and filters are collected by column name in the |
| // map passed as argument |
| private void extractColumnAndCondition(String token, Map<String, List<String>> partFilterMap) { |
| token = token.trim(); |
| |
| String[] subTokens = StringUtils.split(token, '='); |
| |
| String column = subTokens[0].trim(); |
| String filterValue = subTokens[1].trim(); |
| |
| partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue); |
| } |
| } |