blob: 1f4d8cb4fb935102c80e4dd32dd1184dc97382c8 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.schedule;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import com.carrotsearch.hppc.cursors.ObjectLongCursor;
import com.google.common.collect.Iterators;
import com.google.common.collect.Maps;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
/**
* The AssignmentCreator is responsible for assigning a set of work units to the available slices.
*/
public class AssignmentCreator<T extends CompleteWork> {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AssignmentCreator.class);
/**
* Comparator used to sort in order of decreasing affinity
*/
private static Comparator<Entry<DrillbitEndpoint,Long>> comparator = new Comparator<Entry<DrillbitEndpoint,Long>>() {
@Override
public int compare(Entry<DrillbitEndpoint, Long> o1, Entry<DrillbitEndpoint,Long> o2) {
return (int) (o1.getValue() - o2.getValue());
}
};
/**
* the maximum number of work units to assign to any minor fragment
*/
private int maxWork;
/**
* The units of work to be assigned
*/
private List<T> units;
/**
* Mappings
*/
private ArrayListMultimap<Integer, T> mappings = ArrayListMultimap.create();
/**
* A list of DrillbitEndpoints, where the index in the list corresponds to the minor fragment id
*/
private List<DrillbitEndpoint> incomingEndpoints;
private AssignmentCreator(List<DrillbitEndpoint> incomingEndpoints, List<T> units) {
this.incomingEndpoints = incomingEndpoints;
this.units = units;
}
/**
* Assign each unit of work to a minor fragment, given that a list of DrillbitEndpoints, whose index in the list correspond
* to the minor fragment id for each fragment. A given DrillbitEndpoint can appear multiple times in this list. This method
* will try to assign work based on the affinity of each work unit, but will also evenly distribute the work units among
* all of the minor fragments
*
* @param incomingEndpoints The list of incomingEndpoints, indexed by minor fragment id
* @param units the list of work units to be assigned
* @return A multimap that maps each minor fragment id to a list of work units
*/
public static <T extends CompleteWork> ListMultimap<Integer,T> getMappings(List<DrillbitEndpoint> incomingEndpoints, List<T> units) {
AssignmentCreator<T> creator = new AssignmentCreator(incomingEndpoints, units);
return creator.getMappings();
}
/**
* Does the work of creating the mappings for this AssignmentCreator
* @return the minor fragment id to work units mapping
*/
private ListMultimap<Integer, T> getMappings() {
Stopwatch watch = new Stopwatch();
watch.start();
maxWork = (int) Math.ceil(units.size() / ((float) incomingEndpoints.size()));
LinkedList<WorkEndpointListPair<T>> workList = getWorkList();
LinkedList<WorkEndpointListPair<T>> unassignedWorkList;
Map<DrillbitEndpoint,FragIteratorWrapper> endpointIterators = getEndpointIterators();
unassignedWorkList = assign(workList, endpointIterators, true);
assignLeftovers(unassignedWorkList, endpointIterators, true);
assignLeftovers(unassignedWorkList, endpointIterators, false);
if (unassignedWorkList.size() != 0) {
throw new DrillRuntimeException("There are still unassigned work units");
}
logger.debug("Took {} ms to assign {} work units to {} fragments", watch.elapsed(TimeUnit.MILLISECONDS), units.size(), incomingEndpoints.size());
return mappings;
}
/**
*
* @param workList the list of work units to assign
* @param endpointIterators the endpointIterators to assign to
* @param assignMinimum whether to assign only up to the minimum required
* @return a list of unassigned work units
*/
private LinkedList<WorkEndpointListPair<T>> assign(List<WorkEndpointListPair<T>> workList, Map<DrillbitEndpoint,FragIteratorWrapper> endpointIterators, boolean assignMinimum) {
LinkedList<WorkEndpointListPair<T>> currentUnassignedList = Lists.newLinkedList();
outer: for (WorkEndpointListPair<T> workPair : workList) {
List<DrillbitEndpoint> endpoints = workPair.sortedEndpoints;
for (DrillbitEndpoint endpoint : endpoints) {
FragIteratorWrapper iteratorWrapper = endpointIterators.get(endpoint);
if (iteratorWrapper == null) {
continue;
}
if (iteratorWrapper.count < (assignMinimum ? iteratorWrapper.minCount : iteratorWrapper.maxCount)) {
Integer assignment = iteratorWrapper.iter.next();
iteratorWrapper.count++;
mappings.put(assignment, workPair.work);
continue outer;
}
}
currentUnassignedList.add(workPair);
}
return currentUnassignedList;
}
/**
*
* @param unassignedWorkList the work units to assign
* @param endpointIterators the endpointIterators to assign to
* @param assignMinimum wheterh to assign the minimum amount
*/
private void assignLeftovers(LinkedList<WorkEndpointListPair<T>> unassignedWorkList, Map<DrillbitEndpoint,FragIteratorWrapper> endpointIterators, boolean assignMinimum) {
outer: for (FragIteratorWrapper iteratorWrapper : endpointIterators.values()) {
while (iteratorWrapper.count < (assignMinimum ? iteratorWrapper.minCount : iteratorWrapper.maxCount)) {
WorkEndpointListPair<T> workPair = unassignedWorkList.poll();
if (workPair == null) {
break outer;
}
Integer assignment = iteratorWrapper.iter.next();
iteratorWrapper.count++;
mappings.put(assignment, workPair.work);
}
}
}
/**
* Builds the list of WorkEndpointListPairs, which pair a work unit with a list of endpoints sorted by affinity
* @return the list of WorkEndpointListPairs
*/
private LinkedList<WorkEndpointListPair<T>> getWorkList() {
Stopwatch watch = new Stopwatch();
watch.start();
LinkedList<WorkEndpointListPair<T>> workList = Lists.newLinkedList();
for (T work : units) {
List<Map.Entry<DrillbitEndpoint,Long>> entries = Lists.newArrayList();
for (ObjectLongCursor<DrillbitEndpoint> cursor : work.getByteMap()) {
final DrillbitEndpoint ep = cursor.key;
final Long val = cursor.value;
Map.Entry<DrillbitEndpoint,Long> entry = new Entry() {
@Override
public Object getKey() {
return ep;
}
@Override
public Object getValue() {
return val;
}
@Override
public Object setValue(Object value) {
throw new UnsupportedOperationException();
}
};
entries.add(entry);
}
Collections.sort(entries, comparator);
List<DrillbitEndpoint> sortedEndpoints = Lists.newArrayList();
for (Entry<DrillbitEndpoint,Long> entry : entries) {
sortedEndpoints.add(entry.getKey());
}
workList.add(new WorkEndpointListPair<T>(work, sortedEndpoints));
}
return workList;
}
/**
* A wrapper class around a work unit and its associated sort list of Endpoints (sorted by affinity in decreasing order)
*/
private static class WorkEndpointListPair<T> {
T work;
List<DrillbitEndpoint> sortedEndpoints;
WorkEndpointListPair(T work, List<DrillbitEndpoint> sortedEndpoints) {
this.work = work;
this.sortedEndpoints = sortedEndpoints;
}
}
/**
* Groups minor fragments together by corresponding endpoint, and creates an iterator that can be used to evenly
* distribute work assigned to a given endpoint to all corresponding minor fragments evenly
*
* @return
*/
private Map<DrillbitEndpoint,FragIteratorWrapper> getEndpointIterators() {
Stopwatch watch = new Stopwatch();
watch.start();
Map<DrillbitEndpoint,FragIteratorWrapper> map = Maps.newLinkedHashMap();
Map<DrillbitEndpoint,List<Integer>> mmap = Maps.newLinkedHashMap();
for (int i = 0; i < incomingEndpoints.size(); i++) {
DrillbitEndpoint endpoint = incomingEndpoints.get(i);
List<Integer> intList = mmap.get(incomingEndpoints.get(i));
if (intList == null) {
intList = Lists.newArrayList();
}
intList.add(Integer.valueOf(i));
mmap.put(endpoint, intList);
}
for (DrillbitEndpoint endpoint : mmap.keySet()) {
FragIteratorWrapper wrapper = new FragIteratorWrapper();
wrapper.iter = Iterators.cycle(mmap.get(endpoint));
wrapper.maxCount = maxWork * mmap.get(endpoint).size();
wrapper.minCount = Math.max(maxWork - 1, 1) * mmap.get(endpoint).size();
map.put(endpoint, wrapper);
}
return map;
}
/**
* A struct that holds a fragment iterator and keeps track of how many units have been assigned, as well as the maximum
* number of assignment it will accept
*/
private static class FragIteratorWrapper {
int count = 0;
int maxCount;
int minCount;
Iterator<Integer> iter;
}
}