blob: 8111ba2acfee6ff58d644e1e36544e3be8ba3549 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.resolver;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.drill.common.types.TypeProtos.MinorType;
import com.google.common.graph.ImmutableValueGraph;
import com.google.common.graph.ValueGraphBuilder;
public class ResolverTypePrecedence {
// A weighted directed graph that represents the cost of casting between
// pairs of data types. The edge weights represent casting preferences and
// it is important to note that only some of these preferences can be
// understood in terms of factors like computational cost or loss of
// precision. The others are derived from the expected behaviour of the query
// engine in the face of various data types and queries as expressed by the
// test suite.
//
// Bear in mind that this class only establishes which casts will be tried
// automatically and how they're ranked. See
// {@link org.apache.drill.exec.resolver.TypeCastRules} for listings of which
// casts are possible at all.
public static final ImmutableValueGraph<MinorType, Float> CAST_GRAPH = ValueGraphBuilder
.directed()
.<MinorType, Float>immutable()
// null type source vertex (null is castable to any type)
// prefer to cast NULL to a non-NULL over any non-NULL to another non-NULL
.putEdgeValue(MinorType.NULL, MinorType.VARCHAR, 1f)
.putEdgeValue(MinorType.NULL, MinorType.BIT, 1.1f)
.putEdgeValue(MinorType.NULL, MinorType.INT, 1.2f)
.putEdgeValue(MinorType.NULL, MinorType.FLOAT4, 1.3f)
.putEdgeValue(MinorType.NULL, MinorType.DECIMAL9, 1.4f)
.putEdgeValue(MinorType.NULL, MinorType.DATE, 1.5f)
.putEdgeValue(MinorType.NULL, MinorType.INTERVALDAY, 1.6f)
.putEdgeValue(MinorType.NULL, MinorType.MONEY, 1.7f)
.putEdgeValue(MinorType.NULL, MinorType.LIST, 1.8f)
.putEdgeValue(MinorType.NULL, MinorType.DICT, 1.9f)
// Apart from NULL, casts between differing types have a starting cost
// of 10f so that when they're summed, a smaller number of casts gets
// preferred over a larger number.
// bit conversions
// prefer to cast VARCHAR to BIT than BIT to numerics
.putEdgeValue(MinorType.BIT, MinorType.TINYINT, 100f)
.putEdgeValue(MinorType.BIT, MinorType.UINT1, 100f)
// unsigned int widening
.putEdgeValue(MinorType.UINT1, MinorType.UINT2, 10f)
.putEdgeValue(MinorType.UINT2, MinorType.UINT4, 10f)
.putEdgeValue(MinorType.UINT4, MinorType.UINT8, 10f)
.putEdgeValue(MinorType.UINT8, MinorType.VARDECIMAL, 10f)
// unsigned int conversions
// prefer to cast UINTs to BIGINT over FLOAT4
.putEdgeValue(MinorType.UINT4, MinorType.BIGINT, 10f)
.putEdgeValue(MinorType.UINT4, MinorType.FLOAT4, 11f)
.putEdgeValue(MinorType.UINT8, MinorType.FLOAT4, 12f)
// int widening
.putEdgeValue(MinorType.TINYINT, MinorType.SMALLINT, 10f)
.putEdgeValue(MinorType.SMALLINT, MinorType.INT, 10f)
.putEdgeValue(MinorType.INT, MinorType.BIGINT, 10f)
// int conversions
.putEdgeValue(MinorType.BIGINT, MinorType.FLOAT4, 10f)
// prefer to cast INTs to FLOATs over VARDECIMALs
.putEdgeValue(MinorType.BIGINT, MinorType.VARDECIMAL, 100f)
// float widening
.putEdgeValue(MinorType.FLOAT4, MinorType.FLOAT8, 10f)
// float conversion
// FLOATs are not currently castable to VARDECIMAL (see TypeCastRules)
// but it is not possible to avoid some path between them here since
// FLOATs must ultimately be implicitly castable to VARCHAR, and VARCHAR
// to VARDECIMAL.
// prefer the cast in the opposite direction
.putEdgeValue(MinorType.FLOAT8, MinorType.VARDECIMAL, 10_000f)
// decimal widening
.putEdgeValue(MinorType.DECIMAL9, MinorType.DECIMAL18, 10f)
.putEdgeValue(MinorType.DECIMAL18, MinorType.DECIMAL28DENSE, 10f)
.putEdgeValue(MinorType.DECIMAL28DENSE, MinorType.DECIMAL28SPARSE, 10f)
.putEdgeValue(MinorType.DECIMAL28SPARSE, MinorType.DECIMAL38DENSE, 10f)
.putEdgeValue(MinorType.DECIMAL38DENSE, MinorType.DECIMAL38SPARSE, 10f)
.putEdgeValue(MinorType.DECIMAL38SPARSE, MinorType.VARDECIMAL, 10f)
.putEdgeValue(MinorType.MONEY, MinorType.VARDECIMAL, 10f)
// decimal conversions
// prefer to cast INTs to VARDECIMALs over VARDECIMALs to FLOATs
.putEdgeValue(MinorType.VARDECIMAL, MinorType.FLOAT8, 1_000f)
.putEdgeValue(MinorType.VARDECIMAL, MinorType.FLOAT4, 1_001f)
// prefer the casts in the opposite directions
.putEdgeValue(MinorType.VARDECIMAL, MinorType.INT, 1_002f)
.putEdgeValue(MinorType.VARDECIMAL, MinorType.VARCHAR, 1_003f)
// interval widening
.putEdgeValue(MinorType.INTERVALDAY, MinorType.INTERVALYEAR, 10f)
.putEdgeValue(MinorType.INTERVALYEAR, MinorType.INTERVAL, 10f)
// interval conversions
// prefer the cast in the opposite direction
.putEdgeValue(MinorType.INTERVAL, MinorType.VARCHAR, 1000f)
// dict widening
.putEdgeValue(MinorType.DICT, MinorType.MAP, 10f)
// timestamp widening
.putEdgeValue(MinorType.DATE, MinorType.TIMESTAMP, 10f)
.putEdgeValue(MinorType.TIMESTAMP, MinorType.TIMESTAMPTZ, 10f)
.putEdgeValue(MinorType.TIME, MinorType.TIMETZ, 10f)
// timestamp conversions
// prefer the casts in the opposite directions
.putEdgeValue(MinorType.TIMESTAMP, MinorType.DATE, 100f)
.putEdgeValue(MinorType.TIMESTAMP, MinorType.TIME, 101f)
.putEdgeValue(MinorType.TIMESTAMPTZ, MinorType.VARCHAR, 1_000f)
.putEdgeValue(MinorType.TIMETZ, MinorType.VARCHAR, 1_000f)
// char and binary widening
.putEdgeValue(MinorType.FIXEDCHAR, MinorType.VARCHAR, 10f)
.putEdgeValue(MinorType.FIXEDBINARY, MinorType.VARBINARY, 10f)
// char and binary conversions
.putEdgeValue(MinorType.VARCHAR, MinorType.BIGINT, 100f)
.putEdgeValue(MinorType.VARCHAR, MinorType.INT, 101f)
.putEdgeValue(MinorType.VARCHAR, MinorType.FLOAT8, 102f)
.putEdgeValue(MinorType.VARCHAR, MinorType.FLOAT4, 103f)
.putEdgeValue(MinorType.VARCHAR, MinorType.VARDECIMAL, 104f)
.putEdgeValue(MinorType.VARCHAR, MinorType.TIMESTAMP, 105f)
.putEdgeValue(MinorType.VARCHAR, MinorType.INTERVALDAY, 106f)
.putEdgeValue(MinorType.VARCHAR, MinorType.BIT, 107f)
.putEdgeValue(MinorType.VARCHAR, MinorType.VARBINARY, 108f)
.putEdgeValue(MinorType.VARBINARY, MinorType.VARCHAR, 109f)
// union type sink vertex
.putEdgeValue(MinorType.LIST, MinorType.UNION, 10f)
.putEdgeValue(MinorType.MAP, MinorType.UNION, 10f)
.putEdgeValue(MinorType.VARBINARY, MinorType.UNION, 10f)
.putEdgeValue(MinorType.UNION, MinorType.LATE, 10f)
.build();
/**
* Searches the implicit casting graph for the path of least total cost using
* Dijkstra's algorithm.
* @param fromType type to cast from
* @param toType type to cast to
* @return a positive float path cost or +∞ if no path exists
*/
public static float computeCost(MinorType fromType, MinorType toType) {
TreeSet<VertexDatum> remaining = new TreeSet<>();
Map<MinorType, VertexDatum> vertexData = new HashMap<>();
Set<MinorType> shortestPath = new HashSet<>();
VertexDatum sourceDatum = new VertexDatum(fromType, 0, null);
remaining.add(sourceDatum);
vertexData.put(fromType, sourceDatum);
while (!remaining.isEmpty()) {
// Poll the datum with lowest totalDistance in remaining
VertexDatum vertexDatum = remaining.pollFirst();
MinorType vertex = vertexDatum.vertex;
shortestPath.add(vertex);
if (vertex.equals(toType)) {
// Goal found. We only wanted to calculate the path distance so we
// don't need to go on to backtrace it.
return vertexDatum.totalDistance;
}
Set<MinorType> successors = CAST_GRAPH.successors(vertex);
for (MinorType successor : successors) {
if (shortestPath.contains(successor)) {
continue;
}
float distance = CAST_GRAPH.edgeValue(vertex, successor).orElseThrow(IllegalStateException::new);
float totalDistance = vertexDatum.totalDistance + distance;
VertexDatum successorDatum = vertexData.get(successor);
if (successorDatum == null) {
successorDatum = new VertexDatum(successor, totalDistance, vertexDatum);
vertexData.put(successor, successorDatum);
remaining.add(successorDatum);
} else if (totalDistance < successorDatum.totalDistance) {
successorDatum.totalDistance = totalDistance;
successorDatum.predecessor = vertexDatum;
}
}
}
return Float.POSITIVE_INFINITY;
}
/**
* A struct to hold working data used by Dijkstra's algorithm and allowing
* comparison based on total distance from the source vertex to this vertex.
*/
static class VertexDatum implements Comparable<VertexDatum> {
final MinorType vertex;
float totalDistance;
VertexDatum predecessor;
public VertexDatum(MinorType vertex, float totalDistance, VertexDatum predecessor) {
this.vertex = vertex;
this.totalDistance = totalDistance;
this.predecessor = predecessor;
}
@Override
public int compareTo(VertexDatum other) {
int distComparison = Float.compare(this.totalDistance, other.totalDistance);
// TreeSet uses this method to determine member equality, not equals(), so we
// need to differentiate between vertices with the same totalDistance.
return distComparison != 0 ? distComparison : this.vertex.compareTo(other.vertex);
}
@Override
public String toString() {
return String.format("vertex: %s, totalDistance: %f, predecessor: %s", vertex, totalDistance, predecessor);
}
}
}