| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.hadoop.conf.Configurable; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.io.RawComparator; |
| import org.apache.hadoop.io.Writable; |
| import org.apache.hadoop.mapreduce.Partitioner; |
| import org.apache.pig.backend.executionengine.ExecException; |
| import org.apache.pig.backend.hadoop.HDataType; |
| import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce; |
| import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil; |
| import org.apache.pig.data.DataBag; |
| import org.apache.pig.data.InternalMap; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.impl.PigImplConstants; |
| import org.apache.pig.impl.builtin.FindQuantiles; |
| import org.apache.pig.impl.io.NullableBigDecimalWritable; |
| import org.apache.pig.impl.io.NullableBigIntegerWritable; |
| import org.apache.pig.impl.io.NullableBooleanWritable; |
| import org.apache.pig.impl.io.NullableBytesWritable; |
| import org.apache.pig.impl.io.NullableDateTimeWritable; |
| import org.apache.pig.impl.io.NullableDoubleWritable; |
| import org.apache.pig.impl.io.NullableFloatWritable; |
| import org.apache.pig.impl.io.NullableIntWritable; |
| import org.apache.pig.impl.io.NullableLongWritable; |
| import org.apache.pig.impl.io.NullableText; |
| import org.apache.pig.impl.io.NullableTuple; |
| import org.apache.pig.impl.io.PigNullableWritable; |
| import org.apache.pig.impl.io.ReadToEndLoader; |
| import org.apache.pig.impl.util.Utils; |
| |
| public class WeightedRangePartitioner extends Partitioner<PigNullableWritable, Writable> |
| implements Configurable { |
| |
| protected Map<PigNullableWritable, DiscreteProbabilitySampleGenerator> weightedParts = |
| new HashMap<PigNullableWritable, DiscreteProbabilitySampleGenerator>(); |
| protected PigNullableWritable[] quantiles; |
| protected RawComparator<PigNullableWritable> comparator; |
| protected Configuration job; |
| |
| protected boolean inited = false; |
| |
| @SuppressWarnings("unchecked") |
| @Override |
| public int getPartition(PigNullableWritable key, Writable value, |
| int numPartitions){ |
| if (!inited) { |
| init(); |
| } |
| if (comparator == null) { |
| comparator = (RawComparator<PigNullableWritable>)PigMapReduce.sJobContext.getSortComparator(); |
| } |
| |
| if(!weightedParts.containsKey(key)){ |
| int index = Arrays.binarySearch(quantiles, key, comparator); |
| if (index < 0) |
| index = -index-1; |
| else |
| index = index + 1; |
| return Math.min(index, numPartitions - 1); |
| } |
| DiscreteProbabilitySampleGenerator gen = weightedParts.get(key); |
| return gen.getNext(); |
| } |
| |
| @SuppressWarnings("unchecked") |
| public void init() { |
| weightedParts = new HashMap<PigNullableWritable, DiscreteProbabilitySampleGenerator>(); |
| |
| String quantilesFile = job.get("pig.quantilesFile", ""); |
| if (quantilesFile.length() == 0) { |
| throw new RuntimeException(this.getClass().getSimpleName() |
| + " used but no quantiles found"); |
| } |
| |
| try{ |
| // use local file system to get the quantilesFile |
| Map<String, Object> quantileMap = null; |
| Configuration conf; |
| if (job.getBoolean(PigImplConstants.PIG_EXECTYPE_MODE_LOCAL, false)) { |
| conf = new Configuration(false); |
| } else { |
| conf = new Configuration(job); |
| } |
| if (job.get("fs.file.impl") != null) { |
| conf.set("fs.file.impl", job.get("fs.file.impl")); |
| } |
| if (job.get("fs.hdfs.impl") != null) { |
| conf.set("fs.hdfs.impl", job.get("fs.hdfs.impl")); |
| } |
| |
| MapRedUtil.copyTmpFileConfigurationValues(job, conf); |
| conf.set(MapRedUtil.FILE_SYSTEM_NAME, "file:///"); |
| |
| ReadToEndLoader loader = new ReadToEndLoader(Utils.getTmpFileStorageObject(conf), |
| conf, quantilesFile, 0); |
| Tuple t = loader.getNext(); |
| if (t != null) { |
| // the Quantiles file has a tuple as under: |
| // (numQuantiles, bag of samples) |
| // numQuantiles here is the reduce parallelism |
| quantileMap = (Map<String, Object>) t.get(0); |
| } |
| |
| if (quantileMap!=null) { |
| DataBag quantilesList = (DataBag) quantileMap.get(FindQuantiles.QUANTILES_LIST); |
| InternalMap weightedPartsData = (InternalMap) quantileMap.get(FindQuantiles.WEIGHTED_PARTS); |
| convertToArray(quantilesList); |
| for (Entry<Object, Object> ent : weightedPartsData.entrySet()) { |
| Tuple key = (Tuple)ent.getKey(); // sample item which repeats |
| float[] probVec = getProbVec((Tuple)ent.getValue()); |
| weightedParts.put(getPigNullableWritable(key), |
| new DiscreteProbabilitySampleGenerator(probVec)); |
| } |
| } |
| // else - the quantiles file is empty - unless we have a bug, the |
| // input must also be empty in which case we don't need to put |
| // anything in weightedParts since getPartition() should never get |
| // called. If the quantiles file is empty due to either a bug or |
| // a transient failure situation on the dfs, then weightedParts will |
| // not be populated and the job will fail in getPartition() |
| } catch (Exception e) { |
| throw new RuntimeException(e); |
| } |
| inited = true; |
| } |
| |
| @Override |
| public void setConf(Configuration configuration) { |
| job = configuration; |
| } |
| |
| /** |
| * @param value |
| * @return |
| * @throws ExecException |
| */ |
| protected float[] getProbVec(Tuple values) throws ExecException { |
| float[] probVec = new float[values.size()]; |
| for(int i = 0; i < values.size(); i++) { |
| probVec[i] = (Float)values.get(i); |
| } |
| return probVec; |
| } |
| |
| protected PigNullableWritable getPigNullableWritable(Tuple t) { |
| try { |
| // user comparators work with tuples - so if user comparator |
| // is being used OR if there are more than 1 sort cols, use |
| // NullableTuple |
| if ("true".equals(job.get("pig.usercomparator")) || t.size() > 1) { |
| return new NullableTuple(t); |
| } else { |
| Object o = t.get(0); |
| String kts = job.get("pig.reduce.key.type"); |
| if (kts == null) { |
| throw new RuntimeException("Didn't get reduce key type " |
| + "from config file."); |
| } |
| return HDataType.getWritableComparableTypes(o, |
| Byte.valueOf(kts)); |
| } |
| } catch (Exception e) { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| protected void convertToArray(DataBag quantilesListAsBag) { |
| ArrayList<PigNullableWritable> quantilesList = getList(quantilesListAsBag); |
| |
| if ("true".equals(job.get("pig.usercomparator")) || |
| quantilesList.get(0).getClass().equals(NullableTuple.class)) { |
| quantiles = quantilesList.toArray(new NullableTuple[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableBytesWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableBytesWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableDoubleWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableDoubleWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableBigIntegerWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableBigIntegerWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableBigDecimalWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableBigDecimalWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableFloatWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableFloatWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableBooleanWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableBooleanWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableIntWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableIntWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableLongWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableLongWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableDateTimeWritable.class)) { |
| quantiles = quantilesList.toArray(new NullableDateTimeWritable[0]); |
| } else if (quantilesList.get(0).getClass().equals(NullableText.class)) { |
| quantiles = quantilesList.toArray(new NullableText[0]); |
| } else { |
| throw new RuntimeException("Unexpected class in " + this.getClass().getSimpleName()); |
| } |
| } |
| |
| /** |
| * @param quantilesListAsBag |
| * @return |
| */ |
| private ArrayList<PigNullableWritable> getList(DataBag quantilesListAsBag) { |
| ArrayList<PigNullableWritable> list = new ArrayList<PigNullableWritable>(); |
| for (Tuple tuple : quantilesListAsBag) { |
| list.add(getPigNullableWritable(tuple)); |
| } |
| return list; |
| } |
| |
| @Override |
| public Configuration getConf() { |
| return job; |
| } |
| |
| } |