fix issues not initially caught by intellij, let ArrayOfTuplesSerDe avoid copying data every time
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java b/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java
index 949b2ad..d4ccb2f 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java
+++ b/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java
@@ -15,6 +15,7 @@
import org.apache.pig.data.WritableByteArray;
import com.yahoo.memory.Memory;
+import com.yahoo.memory.WritableMemory;
import com.yahoo.sketches.ArrayOfItemsSerDe;
/**
@@ -27,10 +28,9 @@
@Override
public byte[] serializeToByteArray(final Tuple[] items) {
final WritableByteArray wba = new WritableByteArray();
- final DataOutputStream os = new DataOutputStream(wba);
- try {
+ try (final DataOutputStream os = new DataOutputStream(wba)) {
for (Tuple t : items) {
- // BinInterSedes is more efficient, but only suitable for intermediate data within a job
+ // BinInterSedes is more efficient, but only suitable for intermediate data within a job.
DataReaderWriter.writeDatum(os, t);
}
} catch (final IOException e) {
@@ -42,20 +42,17 @@
@Override
public Tuple[] deserializeFromMemory(final Memory mem, final int numItems) {
- // if we could get the correct offset into the region, the following avoids a copy:
- //final byte[] bytes = (byte[]) ((WritableMemory) mem).getArray();
- final int size = (int) mem.getCapacity();
- final byte[] bytes = new byte[size];
- mem.getByteArray(0, bytes, 0, size);
-
- final DataInputStream is = new DataInputStream(new ByteArrayInputStream(bytes));
+ final byte[] bytes = (byte[]) ((WritableMemory) mem).getArray();
+ final int offset = (int) ((WritableMemory) mem).getRegionOffset(0L);
+ final int length = (int) mem.getCapacity();
final Tuple[] result = new Tuple[numItems];
- try {
+ try (final ByteArrayInputStream bais = new ByteArrayInputStream(bytes, offset, length);
+ final DataInputStream dis = new DataInputStream(bais)) {
for (int i = 0; i < numItems; ++i) {
- // BinInterSedes is more efficient, but only suitable for intermediate data within a job
- // we know we're getting Tuples back in this case
- result[i] = (Tuple) DataReaderWriter.readDatum(is);
+ // BinInterSedes is more efficient, but only suitable for intermediate data within a job.
+ // We know we're getting Tuples back in this case so cast is safe
+ result[i] = (Tuple) DataReaderWriter.readDatum(dis);
}
} catch (final IOException e) {
throw new RuntimeException("Error deserializing tuple: " + e.getMessage());
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java b/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java
index 985ed39..a056c76 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java
+++ b/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java
@@ -88,7 +88,7 @@
try {
// create (weight, item) tuples to add to output bag
- for (final VarOptItemsSamples.WeightedSample ws : samples) {
+ for (final VarOptItemsSamples<Tuple>.WeightedSample ws : samples) {
final Tuple weightedSample = TUPLE_FACTORY.newTuple(2);
weightedSample.set(0, ws.getWeight());
weightedSample.set(1, ws.getItem());
diff --git a/src/test/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketchTest.java b/src/test/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketchTest.java
index efba41f..673854b 100644
--- a/src/test/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketchTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketchTest.java
@@ -26,6 +26,7 @@
public class DataToVarOptSketchTest {
@Test
+ @SuppressWarnings("unused")
public void checkConstructors() {
// these three should work
DataToVarOptSketch udf = new DataToVarOptSketch();
diff --git a/src/test/java/com/yahoo/sketches/pig/sampling/GetVarOptSamplesTest.java b/src/test/java/com/yahoo/sketches/pig/sampling/GetVarOptSamplesTest.java
index bac5243..9951390 100644
--- a/src/test/java/com/yahoo/sketches/pig/sampling/GetVarOptSamplesTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/sampling/GetVarOptSamplesTest.java
@@ -73,33 +73,37 @@
}
assertEquals(cumResultWt, cumWt, EPS);
} catch (final IOException e) {
- fail("Unexpected IOException");
+ fail("Unexpected IOException" + e.getMessage());
}
}
@Test
- public void validOutputSchemaTest() throws IOException {
+ public void validOutputSchemaTest() {
final GetVarOptSamples udf = new GetVarOptSamples();
- final Schema serializedSketch = new Schema();
- serializedSketch.add(new Schema.FieldSchema("field1", DataType.BYTEARRAY));
+ try {
+ final Schema serializedSketch = new Schema();
+ serializedSketch.add(new Schema.FieldSchema("field1", DataType.BYTEARRAY));
- final Schema output = udf.outputSchema(serializedSketch);
- assertEquals(output.size(), 1);
- assertEquals(output.getField(0).type, DataType.BAG);
+ final Schema output = udf.outputSchema(serializedSketch);
+ assertEquals(output.size(), 1);
+ assertEquals(output.getField(0).type, DataType.BAG);
- final List<Schema.FieldSchema> outputFields = output.getField(0).schema.getFields();
- assertEquals(outputFields.size(), 2);
+ final List<Schema.FieldSchema> outputFields = output.getField(0).schema.getFields();
+ assertEquals(outputFields.size(), 2);
- // check high-level structure
- assertEquals(outputFields.get(0).alias, WEIGHT_ALIAS);
- assertEquals(outputFields.get(0).type, DataType.DOUBLE);
- assertEquals(outputFields.get(1).alias, RECORD_ALIAS);
- assertEquals(outputFields.get(1).type, DataType.TUPLE);
+ // check high-level structure
+ assertEquals(outputFields.get(0).alias, WEIGHT_ALIAS);
+ assertEquals(outputFields.get(0).type, DataType.DOUBLE);
+ assertEquals(outputFields.get(1).alias, RECORD_ALIAS);
+ assertEquals(outputFields.get(1).type, DataType.TUPLE);
+ } catch (final IOException e) {
+ fail("Unexpected IOException: " + e.getMessage());
+ }
}
@Test
- public void badOutputSchemaTest() throws IOException {
+ public void badOutputSchemaTest() {
final GetVarOptSamples udf = new GetVarOptSamples();
try {
diff --git a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptCommonAlgebraicTest.java b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptCommonAlgebraicTest.java
index 0235e73..002ab87 100644
--- a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptCommonAlgebraicTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptCommonAlgebraicTest.java
@@ -31,6 +31,7 @@
private static final ArrayOfTuplesSerDe serDe_ = new ArrayOfTuplesSerDe();
// constructors: just make sure result not null with valid args, throw exceptions if invalid
+ @SuppressWarnings("unused")
@Test
public void rawTuplesToSketchConstructors() {
VarOptCommonImpl.RawTuplesToSketchTuple udf;
@@ -66,6 +67,7 @@
}
}
+ @SuppressWarnings("unused")
@Test
public void unionSketchesAsSketchConstructors() {
VarOptCommonImpl.UnionSketchesAsTuple udf;
@@ -101,6 +103,7 @@
}
}
+ @SuppressWarnings("unused")
@Test
public void unionSketchesAsByteArrayConstructors() {
VarOptCommonImpl.UnionSketchesAsByteArray udf;
diff --git a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptSamplingTest.java b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptSamplingTest.java
index a1cffae..a7eca40 100644
--- a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptSamplingTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptSamplingTest.java
@@ -31,6 +31,7 @@
static final double EPS = 1e-10;
private static final ArrayOfTuplesSerDe serDe_ = new ArrayOfTuplesSerDe();
+ @SuppressWarnings("unused")
@Test
public void baseConstructors() {
// these three should work
diff --git a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptUnionTest.java b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptUnionTest.java
index d4dc9d8..c149b28 100644
--- a/src/test/java/com/yahoo/sketches/pig/sampling/VarOptUnionTest.java
+++ b/src/test/java/com/yahoo/sketches/pig/sampling/VarOptUnionTest.java
@@ -26,6 +26,7 @@
import com.yahoo.sketches.sampling.VarOptItemsUnion;
public class VarOptUnionTest {
+ @SuppressWarnings("unused")
@Test
public void checkConstructors() {
// these three should work