fe/src/main/java/org/apache/impala/catalog/HdfsPartitionLocationCompressor.java - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 package org.apache.impala.catalog;

 import java.util.List;

 import org.apache.impala.common.Pair;
 import org.apache.impala.thrift.THdfsPartitionLocation;
 import org.apache.impala.util.ListMap;
 import com.google.common.base.Preconditions;

 /**
  * Utility class for storing HdfsPartition locations in a comrpessed format.  Each
  * instance of this class is owned by a single HdfsTable instance.
  *
  * This class is not thread-safe by itself since it is only modified when the lock on an
  * HdfsTable object is held.
  *
  * TODO: Generalize this to compress other sets of Strings that are likely to share common
  * prefixes, like table locations.
  *
  */
 class HdfsPartitionLocationCompressor {
   int numClusteringColumns_;

   // A bi-directional map between partition location prefixes and their compressed
   // representation, an int.
   final private ListMap<String> prefixMap_ = new ListMap<String>();

   public HdfsPartitionLocationCompressor(int numClusteringColumns) {
     numClusteringColumns_ = numClusteringColumns;
   }

   // Construct an HdfsPartitionLocationCompressor with a pre-filled bidirectional map
   // (indexToPrefix_, prefixToIndex_).
   public HdfsPartitionLocationCompressor(
       int numClusteringColumns, List<String> prefixes) {
     numClusteringColumns_ = numClusteringColumns;
     prefixMap_.populate(prefixes);
   }

   public void setClusteringColumns(int numClusteringColumns) {
     numClusteringColumns_ = numClusteringColumns;
   }

   public List<String> getPrefixes() {
     return prefixMap_.getList();
   }

   // One direction of the map: returns the prefix associated with an index, or "" is the
   // index is -1. Indexes less than -1 or greater than indexToPrefix_.size()-1 are invalid
   // and casue and IllegalArgumentException to be thrown.
   private String indexToPrefix(int i) {
     // Uncompressed location are represented by -1:
     if (i == -1) return "";
     Preconditions.checkElementIndex(i, prefixMap_.size());
     return prefixMap_.getEntry(i);
   }

   // Compress a location prefix, adding it to the bidirectional map (indexToPrefix_,
   // prefixToIndex_) if it is not already present.
   private int prefixToIndex(String s) {
     return prefixMap_.getIndex(s);
   }

   // A surrogate for THdfsPartitionLocation, which represents a partition's location
   // relative to its parent table's list of partition prefixes.
   public class Location {
     // 'prefix_index_' represents the portion of the partition's location that comes before
     // the last N directories, where N is the number of partitioning columns.
     // 'prefix_index_' is an index into
     // HdfsPartitionLocationCompressor.this.indexToPrefix_. 'suffix_' is the rest of the
     // partition location.
     //
     // TODO: Since each partition stores the literal values for the partitioning columns,
     // we could also elide the column names and values from suffix_ when a partition is in
     // the canonical location "/partitioning_column_name_1=value_1/..."
     private final int prefix_index_;
     private final String suffix_;

     public Location(String location) {
       Preconditions.checkNotNull(location);
       Pair<String,String> locationParts = decompose(location);
       prefix_index_ =
           HdfsPartitionLocationCompressor.this.prefixToIndex(locationParts.first);
       suffix_ = locationParts.second;
     }

     public Location(THdfsPartitionLocation thrift) {
       Preconditions.checkNotNull(thrift);
       prefix_index_ = thrift.prefix_index;
       suffix_ = thrift.getSuffix();
     }

     public THdfsPartitionLocation toThrift() {
       return new THdfsPartitionLocation(prefix_index_, suffix_);
     }

     @Override
     public String toString() {
       return HdfsPartitionLocationCompressor.this.indexToPrefix(prefix_index_) + suffix_;
     }

     @Override
     public int hashCode() { return toString().hashCode(); }

     @Override
     public boolean equals(Object obj) {
       return (obj instanceof Location) && (toString() == obj.toString());
     }

     // Decompose a location string by removing its last N directories, where N is the
     // number of clustering columns. The result is a Pair<String,String> where the first
     // String is the prefix and the second is the suffix. (In orther words, their
     // concatenation equals the input.) If the input does not have at least N '/'
     // characters, the prefix is empty and the suffix is the entire input.
     private Pair<String,String> decompose(String s) {
       Preconditions.checkNotNull(s);
       int numClusteringColumns =
           HdfsPartitionLocationCompressor.this.numClusteringColumns_;
       if (numClusteringColumns == 0) return new Pair<String,String>(s, "");
       // Iterate backwards over the input until we have passed 'numClusteringColumns'
       // directories. What is left is the prefix.
       int i = s.length() - 1;
       // If the string ends in '/', iterating past it does not pass a clustering column.
       if (i >= 0 && s.charAt(i) == '/') --i;
       for (; numClusteringColumns > 0 && i >= 0; --i) {
         if (s.charAt(i) == '/') --numClusteringColumns;
       }
       // If we successfully removed all the partition directories, s.charAt(i+1) is '/'
       // and we can include it in the prefix.
       if (0 == numClusteringColumns) ++i;
       return new Pair<String,String>(s.substring(0, i + 1), s.substring(i + 1));
     }
   }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	package org.apache.impala.catalog;

	import java.util.List;

	import org.apache.impala.common.Pair;
	import org.apache.impala.thrift.THdfsPartitionLocation;
	import org.apache.impala.util.ListMap;
	import com.google.common.base.Preconditions;

	/**
	* Utility class for storing HdfsPartition locations in a comrpessed format. Each
	* instance of this class is owned by a single HdfsTable instance.
	*
	* This class is not thread-safe by itself since it is only modified when the lock on an
	* HdfsTable object is held.
	*
	* TODO: Generalize this to compress other sets of Strings that are likely to share common
	* prefixes, like table locations.
	*
	*/
	class HdfsPartitionLocationCompressor {
	int numClusteringColumns_;

	// A bi-directional map between partition location prefixes and their compressed
	// representation, an int.
	final private ListMap<String> prefixMap_ = new ListMap<String>();

	public HdfsPartitionLocationCompressor(int numClusteringColumns) {
	numClusteringColumns_ = numClusteringColumns;
	}

	// Construct an HdfsPartitionLocationCompressor with a pre-filled bidirectional map
	// (indexToPrefix_, prefixToIndex_).
	public HdfsPartitionLocationCompressor(
	int numClusteringColumns, List<String> prefixes) {
	numClusteringColumns_ = numClusteringColumns;
	prefixMap_.populate(prefixes);
	}

	public void setClusteringColumns(int numClusteringColumns) {
	numClusteringColumns_ = numClusteringColumns;
	}

	public List<String> getPrefixes() {
	return prefixMap_.getList();
	}

	// One direction of the map: returns the prefix associated with an index, or "" is the
	// index is -1. Indexes less than -1 or greater than indexToPrefix_.size()-1 are invalid
	// and casue and IllegalArgumentException to be thrown.
	private String indexToPrefix(int i) {
	// Uncompressed location are represented by -1:
	if (i == -1) return "";
	Preconditions.checkElementIndex(i, prefixMap_.size());
	return prefixMap_.getEntry(i);
	}

	// Compress a location prefix, adding it to the bidirectional map (indexToPrefix_,
	// prefixToIndex_) if it is not already present.
	private int prefixToIndex(String s) {
	return prefixMap_.getIndex(s);
	}

	// A surrogate for THdfsPartitionLocation, which represents a partition's location
	// relative to its parent table's list of partition prefixes.
	public class Location {
	// 'prefix_index_' represents the portion of the partition's location that comes before
	// the last N directories, where N is the number of partitioning columns.
	// 'prefix_index_' is an index into
	// HdfsPartitionLocationCompressor.this.indexToPrefix_. 'suffix_' is the rest of the
	// partition location.
	//
	// TODO: Since each partition stores the literal values for the partitioning columns,
	// we could also elide the column names and values from suffix_ when a partition is in
	// the canonical location "/partitioning_column_name_1=value_1/..."
	private final int prefix_index_;
	private final String suffix_;

	public Location(String location) {
	Preconditions.checkNotNull(location);
	Pair<String,String> locationParts = decompose(location);
	prefix_index_ =
	HdfsPartitionLocationCompressor.this.prefixToIndex(locationParts.first);
	suffix_ = locationParts.second;
	}

	public Location(THdfsPartitionLocation thrift) {
	Preconditions.checkNotNull(thrift);
	prefix_index_ = thrift.prefix_index;
	suffix_ = thrift.getSuffix();
	}

	public THdfsPartitionLocation toThrift() {
	return new THdfsPartitionLocation(prefix_index_, suffix_);
	}

	@Override
	public String toString() {
	return HdfsPartitionLocationCompressor.this.indexToPrefix(prefix_index_) + suffix_;
	}

	@Override
	public int hashCode() { return toString().hashCode(); }

	@Override
	public boolean equals(Object obj) {
	return (obj instanceof Location) && (toString() == obj.toString());
	}

	// Decompose a location string by removing its last N directories, where N is the
	// number of clustering columns. The result is a Pair<String,String> where the first
	// String is the prefix and the second is the suffix. (In orther words, their
	// concatenation equals the input.) If the input does not have at least N '/'
	// characters, the prefix is empty and the suffix is the entire input.
	private Pair<String,String> decompose(String s) {
	Preconditions.checkNotNull(s);
	int numClusteringColumns =
	HdfsPartitionLocationCompressor.this.numClusteringColumns_;
	if (numClusteringColumns == 0) return new Pair<String,String>(s, "");
	// Iterate backwards over the input until we have passed 'numClusteringColumns'
	// directories. What is left is the prefix.
	int i = s.length() - 1;
	// If the string ends in '/', iterating past it does not pass a clustering column.
	if (i >= 0 && s.charAt(i) == '/') --i;
	for (; numClusteringColumns > 0 && i >= 0; --i) {
	if (s.charAt(i) == '/') --numClusteringColumns;
	}
	// If we successfully removed all the partition directories, s.charAt(i+1) is '/'
	// and we can include it in the prefix.
	if (0 == numClusteringColumns) ++i;
	return new Pair<String,String>(s.substring(0, i + 1), s.substring(i + 1));
	}
	}
	}