| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hbase.util; |
| |
| import java.util.Arrays; |
| import java.util.Deque; |
| import java.util.LinkedList; |
| |
| import org.apache.hadoop.hbase.classification.InterfaceAudience; |
| |
| /** |
| * Computes the optimal (minimal cost) assignment of jobs to workers (or other |
| * analogous) concepts given a cost matrix of each pair of job and worker, using |
| * the algorithm by James Munkres in "Algorithms for the Assignment and |
| * Transportation Problems", with additional optimizations as described by Jin |
| * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment |
| * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in |
| * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or |
| * workers, whichever is greater. |
| */ |
| @InterfaceAudience.Private |
| public class MunkresAssignment { |
| |
| // The original algorithm by Munkres uses the terms STAR and PRIME to denote |
| // different states of zero values in the cost matrix. These values are |
| // represented as byte constants instead of enums to save space in the mask |
| // matrix by a factor of 4n^2 where n is the size of the problem. |
| private static final byte NONE = 0; |
| private static final byte STAR = 1; |
| private static final byte PRIME = 2; |
| |
| // The algorithm requires that the number of column is at least as great as |
| // the number of rows. If that is not the case, then the cost matrix should |
| // be transposed before computation, and the solution matrix transposed before |
| // returning to the caller. |
| private final boolean transposed; |
| |
| // The number of rows of internal matrices. |
| private final int rows; |
| |
| // The number of columns of internal matrices. |
| private final int cols; |
| |
| // The cost matrix, the cost of assigning each row index to column index. |
| private float[][] cost; |
| |
| // Mask of zero cost assignment states. |
| private byte[][] mask; |
| |
| // Covering some rows of the cost matrix. |
| private boolean[] rowsCovered; |
| |
| // Covering some columns of the cost matrix. |
| private boolean[] colsCovered; |
| |
| // The alternating path between starred zeroes and primed zeroes |
| private Deque<Pair<Integer, Integer>> path; |
| |
| // The solution, marking which rows should be assigned to which columns. The |
| // positions of elements in this array correspond to the rows of the cost |
| // matrix, and the value of each element correspond to the columns of the cost |
| // matrix, i.e. assignments[i] = j indicates that row i should be assigned to |
| // column j. |
| private int[] assignments; |
| |
| // Improvements described by Jin Kue Wong cache the least value in each row, |
| // as well as the column index of the least value in each row, and the pending |
| // adjustments to each row and each column. |
| private float[] leastInRow; |
| private int[] leastInRowIndex; |
| private float[] rowAdjust; |
| private float[] colAdjust; |
| |
| /** |
| * Construct a new problem instance with the specified cost matrix. The cost |
| * matrix must be rectangular, though not necessarily square. If one dimension |
| * is greater than the other, some elements in the greater dimension will not |
| * be assigned. The input cost matrix will not be modified. |
| * @param costMatrix |
| */ |
| public MunkresAssignment(float[][] costMatrix) { |
| // The algorithm assumes that the number of columns is at least as great as |
| // the number of rows. If this is not the case of the input matrix, then |
| // all internal structures must be transposed relative to the input. |
| this.transposed = costMatrix.length > costMatrix[0].length; |
| if (this.transposed) { |
| this.rows = costMatrix[0].length; |
| this.cols = costMatrix.length; |
| } else { |
| this.rows = costMatrix.length; |
| this.cols = costMatrix[0].length; |
| } |
| |
| cost = new float[rows][cols]; |
| mask = new byte[rows][cols]; |
| rowsCovered = new boolean[rows]; |
| colsCovered = new boolean[cols]; |
| path = new LinkedList<Pair<Integer, Integer>>(); |
| |
| leastInRow = new float[rows]; |
| leastInRowIndex = new int[rows]; |
| rowAdjust = new float[rows]; |
| colAdjust = new float[cols]; |
| |
| assignments = null; |
| |
| // Copy cost matrix. |
| if (transposed) { |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| cost[r][c] = costMatrix[c][r]; |
| } |
| } |
| } else { |
| for (int r = 0; r < rows; r++) { |
| System.arraycopy(costMatrix[r], 0, cost[r], 0, cols); |
| } |
| } |
| |
| // Costs must be finite otherwise the matrix can get into a bad state where |
| // no progress can be made. If your use case depends on a distinction |
| // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong. |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| if (cost[r][c] == Float.POSITIVE_INFINITY) { |
| cost[r][c] = Float.MAX_VALUE; |
| } |
| } |
| } |
| } |
| |
| /** |
| * Get the optimal assignments. The returned array will have the same number |
| * of elements as the number of elements as the number of rows in the input |
| * cost matrix. Each element will indicate which column should be assigned to |
| * that row or -1 if no column should be assigned, i.e. if result[i] = j then |
| * row i should be assigned to column j. Subsequent invocations of this method |
| * will simply return the same object without additional computation. |
| * @return an array with the optimal assignments |
| */ |
| public int[] solve() { |
| // If this assignment problem has already been solved, return the known |
| // solution |
| if (assignments != null) { |
| return assignments; |
| } |
| |
| preliminaries(); |
| |
| // Find the optimal assignments. |
| while (!testIsDone()) { |
| while (!stepOne()) { |
| stepThree(); |
| } |
| stepTwo(); |
| } |
| |
| // Extract the assignments from the mask matrix. |
| if (transposed) { |
| assignments = new int[cols]; |
| outer: |
| for (int c = 0; c < cols; c++) { |
| for (int r = 0; r < rows; r++) { |
| if (mask[r][c] == STAR) { |
| assignments[c] = r; |
| continue outer; |
| } |
| } |
| // There is no assignment for this row of the input/output. |
| assignments[c] = -1; |
| } |
| } else { |
| assignments = new int[rows]; |
| outer: |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| if (mask[r][c] == STAR) { |
| assignments[r] = c; |
| continue outer; |
| } |
| } |
| } |
| } |
| |
| // Once the solution has been computed, there is no need to keep any of the |
| // other internal structures. Clear all unnecessary internal references so |
| // the garbage collector may reclaim that memory. |
| cost = null; |
| mask = null; |
| rowsCovered = null; |
| colsCovered = null; |
| path = null; |
| leastInRow = null; |
| leastInRowIndex = null; |
| rowAdjust = null; |
| colAdjust = null; |
| |
| return assignments; |
| } |
| |
| /** |
| * Corresponds to the "preliminaries" step of the original algorithm. |
| * Guarantees that the matrix is an equivalent non-negative matrix with at |
| * least one zero in each row. |
| */ |
| private void preliminaries() { |
| for (int r = 0; r < rows; r++) { |
| // Find the minimum cost of each row. |
| float min = Float.POSITIVE_INFINITY; |
| for (int c = 0; c < cols; c++) { |
| min = Math.min(min, cost[r][c]); |
| } |
| |
| // Subtract that minimum cost from each element in the row. |
| for (int c = 0; c < cols; c++) { |
| cost[r][c] -= min; |
| |
| // If the element is now zero and there are no zeroes in the same row |
| // or column which are already starred, then star this one. There |
| // must be at least one zero because of subtracting the min cost. |
| if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) { |
| mask[r][c] = STAR; |
| // Cover this row and column so that no other zeroes in them can be |
| // starred. |
| rowsCovered[r] = true; |
| colsCovered[c] = true; |
| } |
| } |
| } |
| |
| // Clear the covered rows and columns. |
| Arrays.fill(rowsCovered, false); |
| Arrays.fill(colsCovered, false); |
| } |
| |
| /** |
| * Test whether the algorithm is done, i.e. we have the optimal assignment. |
| * This occurs when there is exactly one starred zero in each row. |
| * @return true if the algorithm is done |
| */ |
| private boolean testIsDone() { |
| // Cover all columns containing a starred zero. There can be at most one |
| // starred zero per column. Therefore, a covered column has an optimal |
| // assignment. |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| if (mask[r][c] == STAR) { |
| colsCovered[c] = true; |
| } |
| } |
| } |
| |
| // Count the total number of covered columns. |
| int coveredCols = 0; |
| for (int c = 0; c < cols; c++) { |
| coveredCols += colsCovered[c] ? 1 : 0; |
| } |
| |
| // Apply an row and column adjustments that are pending. |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| cost[r][c] += rowAdjust[r]; |
| cost[r][c] += colAdjust[c]; |
| } |
| } |
| |
| // Clear the pending row and column adjustments. |
| Arrays.fill(rowAdjust, 0); |
| Arrays.fill(colAdjust, 0); |
| |
| // The covers on columns and rows may have been reset, recompute the least |
| // value for each row. |
| for (int r = 0; r < rows; r++) { |
| leastInRow[r] = Float.POSITIVE_INFINITY; |
| for (int c = 0; c < cols; c++) { |
| if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) { |
| leastInRow[r] = cost[r][c]; |
| leastInRowIndex[r] = c; |
| } |
| } |
| } |
| |
| // If all columns are covered, then we are done. Since there may be more |
| // columns than rows, we are also done if the number of covered columns is |
| // at least as great as the number of rows. |
| return (coveredCols == cols || coveredCols >= rows); |
| } |
| |
| /** |
| * Corresponds to step 1 of the original algorithm. |
| * @return false if all zeroes are covered |
| */ |
| private boolean stepOne() { |
| while (true) { |
| Pair<Integer, Integer> zero = findUncoveredZero(); |
| if (zero == null) { |
| // No uncovered zeroes, need to manipulate the cost matrix in step |
| // three. |
| return false; |
| } else { |
| // Prime the uncovered zero and find a starred zero in the same row. |
| mask[zero.getFirst()][zero.getSecond()] = PRIME; |
| Pair<Integer, Integer> star = starInRow(zero.getFirst()); |
| if (star != null) { |
| // Cover the row with both the newly primed zero and the starred zero. |
| // Since this is the only place where zeroes are primed, and we cover |
| // it here, and rows are only uncovered when primes are erased, then |
| // there can be at most one primed uncovered zero. |
| rowsCovered[star.getFirst()] = true; |
| colsCovered[star.getSecond()] = false; |
| updateMin(star.getFirst(), star.getSecond()); |
| } else { |
| // Will go to step two after, where a path will be constructed, |
| // starting from the uncovered primed zero (there is only one). Since |
| // we have already found it, save it as the first node in the path. |
| path.clear(); |
| path.offerLast(new Pair<Integer, Integer>(zero.getFirst(), |
| zero.getSecond())); |
| return true; |
| } |
| } |
| } |
| } |
| |
| /** |
| * Corresponds to step 2 of the original algorithm. |
| */ |
| private void stepTwo() { |
| // Construct a path of alternating starred zeroes and primed zeroes, where |
| // each starred zero is in the same column as the previous primed zero, and |
| // each primed zero is in the same row as the previous starred zero. The |
| // path will always end in a primed zero. |
| while (true) { |
| Pair<Integer, Integer> star = starInCol(path.getLast().getSecond()); |
| if (star != null) { |
| path.offerLast(star); |
| } else { |
| break; |
| } |
| Pair<Integer, Integer> prime = primeInRow(path.getLast().getFirst()); |
| path.offerLast(prime); |
| } |
| |
| // Augment path - unmask all starred zeroes and star all primed zeroes. All |
| // nodes in the path will be either starred or primed zeroes. The set of |
| // starred zeroes is independent and now one larger than before. |
| for (Pair<Integer, Integer> p : path) { |
| if (mask[p.getFirst()][p.getSecond()] == STAR) { |
| mask[p.getFirst()][p.getSecond()] = NONE; |
| } else { |
| mask[p.getFirst()][p.getSecond()] = STAR; |
| } |
| } |
| |
| // Clear all covers from rows and columns. |
| Arrays.fill(rowsCovered, false); |
| Arrays.fill(colsCovered, false); |
| |
| // Remove the prime mask from all primed zeroes. |
| for (int r = 0; r < rows; r++) { |
| for (int c = 0; c < cols; c++) { |
| if (mask[r][c] == PRIME) { |
| mask[r][c] = NONE; |
| } |
| } |
| } |
| } |
| |
| /** |
| * Corresponds to step 3 of the original algorithm. |
| */ |
| private void stepThree() { |
| // Find the minimum uncovered cost. |
| float min = leastInRow[0]; |
| for (int r = 1; r < rows; r++) { |
| if (leastInRow[r] < min) { |
| min = leastInRow[r]; |
| } |
| } |
| |
| // Add the minimum cost to each of the costs in a covered row, or subtract |
| // the minimum cost from each of the costs in an uncovered column. As an |
| // optimization, do not actually modify the cost matrix yet, but track the |
| // adjustments that need to be made to each row and column. |
| for (int r = 0; r < rows; r++) { |
| if (rowsCovered[r]) { |
| rowAdjust[r] += min; |
| } |
| } |
| for (int c = 0; c < cols; c++) { |
| if (!colsCovered[c]) { |
| colAdjust[c] -= min; |
| } |
| } |
| |
| // Since the cost matrix is not being updated yet, the minimum uncovered |
| // cost per row must be updated. |
| for (int r = 0; r < rows; r++) { |
| if (!colsCovered[leastInRowIndex[r]]) { |
| // The least value in this row was in an uncovered column, meaning that |
| // it would have had the minimum value subtracted from it, and therefore |
| // will still be the minimum value in that row. |
| leastInRow[r] -= min; |
| } else { |
| // The least value in this row was in a covered column and would not |
| // have had the minimum value subtracted from it, so the minimum value |
| // could be some in another column. |
| for (int c = 0; c < cols; c++) { |
| if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) { |
| leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r]; |
| leastInRowIndex[r] = c; |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Find a zero cost assignment which is not covered. If there are no zero cost |
| * assignments which are uncovered, then null will be returned. |
| * @return pair of row and column indices of an uncovered zero or null |
| */ |
| private Pair<Integer, Integer> findUncoveredZero() { |
| for (int r = 0; r < rows; r++) { |
| if (leastInRow[r] == 0) { |
| return new Pair<Integer, Integer>(r, leastInRowIndex[r]); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * A specified row has become covered, and a specified column has become |
| * uncovered. The least value per row may need to be updated. |
| * @param row the index of the row which was just covered |
| * @param col the index of the column which was just uncovered |
| */ |
| private void updateMin(int row, int col) { |
| // If the row is covered we want to ignore it as far as least values go. |
| leastInRow[row] = Float.POSITIVE_INFINITY; |
| |
| for (int r = 0; r < rows; r++) { |
| // Since the column has only just been uncovered, it could not have any |
| // pending adjustments. Only covered rows can have pending adjustments |
| // and covered costs do not count toward row minimums. Therefore, we do |
| // not need to consider rowAdjust[r] or colAdjust[col]. |
| if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) { |
| leastInRow[r] = cost[r][col]; |
| leastInRowIndex[r] = col; |
| } |
| } |
| } |
| |
| /** |
| * Find a starred zero in a specified row. If there are no starred zeroes in |
| * the specified row, then null will be returned. |
| * @param r the index of the row to be searched |
| * @return pair of row and column indices of starred zero or null |
| */ |
| private Pair<Integer, Integer> starInRow(int r) { |
| for (int c = 0; c < cols; c++) { |
| if (mask[r][c] == STAR) { |
| return new Pair<Integer, Integer>(r, c); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Find a starred zero in the specified column. If there are no starred zeroes |
| * in the specified row, then null will be returned. |
| * @param c the index of the column to be searched |
| * @return pair of row and column indices of starred zero or null |
| */ |
| private Pair<Integer, Integer> starInCol(int c) { |
| for (int r = 0; r < rows; r++) { |
| if (mask[r][c] == STAR) { |
| return new Pair<Integer, Integer>(r, c); |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Find a primed zero in the specified row. If there are no primed zeroes in |
| * the specified row, then null will be returned. |
| * @param r the index of the row to be searched |
| * @return pair of row and column indices of primed zero or null |
| */ |
| private Pair<Integer, Integer> primeInRow(int r) { |
| for (int c = 0; c < cols; c++) { |
| if (mask[r][c] == PRIME) { |
| return new Pair<Integer, Integer>(r, c); |
| } |
| } |
| return null; |
| } |
| } |