| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer; |
| |
| import java.io.IOException; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configurable; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.io.WritableComparator; |
| import org.apache.pig.backend.executionengine.ExecException; |
| import org.apache.pig.data.DataType; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.data.TupleFactory; |
| import org.apache.pig.data.TupleRawComparator; |
| import org.apache.pig.impl.io.NullableTuple; |
| import org.apache.pig.impl.util.ObjectSerializer; |
| |
| public class PigTupleSortComparator extends WritableComparator implements Configurable { |
| |
| private final Log mLog = LogFactory.getLog(getClass()); |
| private boolean[] mAsc; |
| private boolean mWholeTuple; |
| private TupleRawComparator mComparator=null; |
| |
| public PigTupleSortComparator() { |
| super(TupleFactory.getInstance().tupleClass()); |
| } |
| |
| @Override |
| public void setConf(Configuration conf) { |
| try { |
| mAsc = (boolean[]) ObjectSerializer.deserialize(conf.get("pig.sortOrder")); |
| } catch (IOException ioe) { |
| mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); |
| throw new RuntimeException(ioe); |
| } |
| if (mAsc == null) { |
| mAsc = new boolean[1]; |
| mAsc[0] = true; |
| } |
| // If there's only one entry in mAsc, it means it's for the whole |
| // tuple. So we can't be looking for each column. |
| mWholeTuple = (mAsc.length == 1); |
| try { |
| Class<? extends TupleRawComparator> mComparatorClass = TupleFactory.getInstance().tupleRawComparatorClass(); |
| if (mComparatorClass!=null) |
| mComparator = mComparatorClass.newInstance(); |
| } catch (InstantiationException e) { |
| throw new RuntimeException(e); |
| } catch (IllegalAccessException e) { |
| throw new RuntimeException(e); |
| } catch (AbstractMethodError e) { |
| // Tuple does not implement method getRawComparatorClass, or backward compatibility, we assign mComparator to null. |
| // We will use conventional comparator in this case |
| } |
| if (mComparator==null) { |
| try { |
| mComparator = PigTupleDefaultRawComparator.class.newInstance(); |
| } catch (InstantiationException e) { |
| throw new RuntimeException(e); |
| } catch (IllegalAccessException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| ((Configurable)mComparator).setConf(conf); |
| } |
| |
| @Override |
| public Configuration getConf() { |
| return null; |
| } |
| |
| /** |
| * Compare two NullableTuples as raw bytes. Tuples are compared field-wise. |
| * If both are null, then the indices are compared. |
| * Otherwise the null one is defined to be less. |
| */ |
| @Override |
| public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { |
| int rc = 0; |
| if (b1[s1] == 0 && b2[s2] == 0) { |
| // skip mNull and mIndex |
| rc = mComparator.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2); |
| // handle PIG-927. If tuples are equal but any field inside tuple is null, |
| // then we do not merge keys if indices are not same |
| if (rc == 0 && mComparator.hasComparedTupleNull()) { |
| rc = b1[s1 + 1] - b2[s2 + 1]; |
| } |
| } else { |
| // Two nulls are equal if indices are same |
| if (b1[s1] != 0 && b2[s2] != 0) { |
| rc = b1[s1 + 1] - b2[s2 + 1]; |
| } |
| else if (b1[s1] != 0) |
| rc = -1; |
| else |
| rc = 1; |
| if (mWholeTuple && !mAsc[0]) |
| rc *= -1; |
| } |
| return rc; |
| } |
| |
| @Override |
| @SuppressWarnings("unchecked") |
| public int compare(Object o1, Object o2) { |
| NullableTuple nt1 = (NullableTuple) o1; |
| NullableTuple nt2 = (NullableTuple) o2; |
| int rc = 0; |
| |
| // If either are null, handle differently. |
| if (!nt1.isNull() && !nt2.isNull()) { |
| rc = mComparator.compare((Tuple) nt1.getValueAsPigType(), (Tuple) nt2.getValueAsPigType()); |
| // handle PIG-927. If tuples are equal but any field inside tuple is null, |
| // then we do not merge keys if indices are not same |
| if (rc == 0 && mComparator.hasComparedTupleNull()) { |
| rc = nt1.getIndex() - nt2.getIndex(); |
| } |
| } else { |
| // Two nulls are equal if indices are same |
| if (nt1.isNull() && nt2.isNull()) { |
| rc = nt1.getIndex() - nt2.getIndex(); |
| } |
| else if (nt1.isNull()) |
| rc = -1; |
| else |
| rc = 1; |
| if (mWholeTuple && !mAsc[0]) |
| rc *= -1; |
| } |
| return rc; |
| } |
| |
| private int compareTuple(Tuple t1, Tuple t2) { |
| int sz1 = t1.size(); |
| int sz2 = t2.size(); |
| if (sz2 < sz1) { |
| return 1; |
| } else if (sz2 > sz1) { |
| return -1; |
| } else { |
| for (int i = 0; i < sz1; i++) { |
| try { |
| int c = DataType.compare(t1.get(i), t2.get(i)); |
| if (c != 0) { |
| if (!mWholeTuple && !mAsc[i]) |
| c *= -1; |
| else if (mWholeTuple && !mAsc[0]) |
| c *= -1; |
| return c; |
| } |
| } catch (ExecException e) { |
| throw new RuntimeException("Unable to compare tuples", e); |
| } |
| } |
| return 0; |
| } |
| } |
| } |