blob: b7f125791645ea3ef227631ab137bf758620f294 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.pig.theta;
import java.io.IOException;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.CompactSketch;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.UpdateSketch;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
/**
* Common methods for the pig classes.
*/
class PigUtil {
static final ResizeFactor RF = ResizeFactor.X8;
/**
* Serialize an ordered CompactSketch to SketchTuple
*
* @param sketch The compact ordered sketch to serialize
* @return Sketch Tuple.
*/
static final Tuple compactOrderedSketchToTuple(final CompactSketch sketch) {
final Tuple outputTuple = TupleFactory.getInstance().newTuple(1);
final byte[] bytes = sketch.toByteArray();
final DataByteArray dba = new DataByteArray(bytes);
if (!sketch.isOrdered()) {
throw new IllegalArgumentException("Given sketch must be ordered.");
}
try {
outputTuple.set(0, dba);
}
catch (final IOException e) {
throw new IllegalArgumentException("IOException thrown: " + e);
}
return outputTuple;
}
/**
* Deserialize a sketch from a tuple and return it.
*
* @param tuple The tuple containing the sketch. The tuple should have a single entry that is a
* DataByteArray.
* @param seed to check against other sketches.
* @return A sketch
*/
static final Sketch tupleToSketch(final Tuple tuple, final long seed) {
DataByteArray sketchDBA = null;
sketchDBA = (DataByteArray) extractFieldAtIndex(tuple, 0);
final Memory srcMem = Memory.wrap(sketchDBA.get());
final Sketch sketch = Sketch.wrap(srcMem, seed);
return sketch;
}
/**
* Extract a non-empty DataBag from field 0 of the given tuple. Caught exceptions include
* IOException, NullPointerException and IndexOutOfBoundsException. The tuple cannot be null
* and have at least one field, which cannot be null and contain a DataBag, which must
* have a size greater than zero.
* If an exception is caught a null is returned to the caller as a signal.
* @param tuple the given tuple.
* @return a DataBag or null.
*/
static final DataBag extractBag(final Tuple tuple) {
DataBag bag = null;
try {
bag = (DataBag) tuple.get(0);
if (bag.size() == 0) { return null; }
}
catch (final IOException | NullPointerException | IndexOutOfBoundsException e ) {
return null; //as a signal
}
return bag;
}
/**
* Extract a non-null Object from field at index <i>index</i> (0-based) of the given tuple.
* Caught exceptions include IOException, NullPointerException and IndexOutOfBoundsException.
* The tuple cannot be null and must have at least <i>index+1</i> fields.
* The field at <i>index</i> cannot be null.
* If an exception is caught a null is returned to the caller as a signal.
* @param tuple the given tuple.
* @param index the 0-based index of the desired field
* @return a non-null Object or null.
*/
static final Object extractFieldAtIndex(final Tuple tuple, final int index) {
Object fi = null;
try {
fi = tuple.get(index);
fi.hashCode(); //cannot be null
}
catch (final IOException | NullPointerException | IndexOutOfBoundsException e) {
return null; //as a signal
}
return fi;
}
/**
* Extract a non-null Byte from getType(index) of the given tuple. Caught exceptions include
* IOException, NullPointerException and IndexOutOfBoundsException.
* If an exception is caught a null is returned to the caller as a signal.
* @param tuple the given tuple.
* @param index the 0-based index of the desired field
* @return a Byte of Pig DataType or null.
*/
static final Byte extractTypeAtIndex(final Tuple tuple, final int index) {
Byte type = null;
try {
type = tuple.getType(index);
}
catch (final IOException | NullPointerException | IndexOutOfBoundsException e) {
return null;
}
return type;
}
/**
* Return an empty Compact Ordered Sketch Tuple. Empty sketch is only 8 bytes.
* @param seed the given seed
* @return an empty compact ordered sketch tuple
*/
static final Tuple emptySketchTuple(final long seed) {
final UpdateSketch sketch = UpdateSketch.builder().setSeed(seed).setResizeFactor(RF)
.setNominalEntries(16).build();
final CompactSketch compOrdSketch = sketch.compact(true, null);
return compactOrderedSketchToTuple(compOrdSketch);
}
}