/* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package org.apache.pig.test; | |
import static org.junit.Assert.assertEquals; | |
import static org.junit.Assert.assertFalse; | |
import java.io.File; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.PrintStream; | |
import java.text.DecimalFormat; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.List; | |
import org.apache.pig.ExecType; | |
import org.apache.pig.PigServer; | |
import org.apache.pig.data.DataType; | |
import org.apache.pig.data.Tuple; | |
import org.joda.time.DateTime; | |
import org.joda.time.DateTimeZone; | |
import org.junit.After; | |
import org.junit.AfterClass; | |
import org.junit.Before; | |
import org.junit.Test; | |
public class TestOrderBy { | |
private static final int DATALEN = 1024; | |
private String[][] DATA = new String[2][DATALEN]; | |
static MiniCluster cluster = MiniCluster.buildCluster(); | |
private PigServer pig; | |
private File tmpFile; | |
public TestOrderBy() throws Throwable { | |
DecimalFormat myFormatter = new DecimalFormat("0000000"); | |
for (int i = 0; i < DATALEN; i++) { | |
DATA[0][i] = myFormatter.format(i); | |
DATA[1][i] = myFormatter.format(DATALEN - i - 1); | |
} | |
pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); | |
} | |
@Before | |
public void setUp() throws Exception { | |
tmpFile = File.createTempFile("test", "txt"); | |
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); | |
for(int i = 0; i < DATALEN; i++) { | |
ps.println("1\t" + DATA[1][i] + "\t" + DATA[0][i]); | |
} | |
ps.close(); | |
} | |
@After | |
public void tearDown() throws Exception { | |
tmpFile.delete(); | |
} | |
@AfterClass | |
public static void oneTimeTearDown() throws Exception { | |
cluster.shutDown(); | |
} | |
private void verify(String query, boolean descending) throws Exception { | |
pig.registerQuery(query); | |
Iterator<Tuple> it = pig.openIterator("myid"); | |
int col = (descending ? 1 : 0); | |
for(int i = 0; i < DATALEN; i++) { | |
Tuple t = (Tuple)it.next(); | |
int value = DataType.toInteger(t.get(1)); | |
assertEquals(Integer.parseInt(DATA[col][i]), value); | |
} | |
assertFalse(it.hasNext()); | |
} | |
@Test | |
public void testTopLevelOrderBy_Star_NoUsing() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + "') BY *;", false); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col1_NoUsing() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + "') BY $1;", false); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col2_NoUsing() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + "') BY $2;", true); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col21_NoUsing() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + "') BY $2, $1;", true); | |
} | |
@Test | |
public void testTopLevelOrderBy_Star_Using() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY * USING org.apache.pig.test.OrdAsc;", false); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY * USING org.apache.pig.test.OrdDesc;", true); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY * USING org.apache.pig.test.OrdDescNumeric;", true); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col1_Using() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $1 USING org.apache.pig.test.OrdAsc;", false); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $1 USING org.apache.pig.test.OrdDesc;", true); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $1 USING org.apache.pig.test.OrdDescNumeric;", true); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col2_Using() throws Exception { | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2 USING org.apache.pig.test.OrdAsc;", true); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2 USING org.apache.pig.test.OrdDesc;", false); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2 USING org.apache.pig.test.OrdDescNumeric;", false); | |
} | |
@Test | |
public void testTopLevelOrderBy_Col21_Using() throws Exception { | |
// col2/col1 ascending - | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2, $1 USING org.apache.pig.test.OrdAsc;", true); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2, $1 USING org.apache.pig.test.OrdDesc;", false); | |
verify("myid = order (load 'file:" + tmpFile + | |
"') BY $2, $1 USING org.apache.pig.test.OrdDescNumeric;", false); | |
} | |
@Test | |
public void testNestedOrderBy_Star_NoUsing() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY *; generate flatten(D); };", false); | |
} | |
@Test | |
public void testNestedOrderBy_Col1_NoUsing() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $1; generate flatten(D); };", false); | |
} | |
@Test | |
public void testNestedOrderBy_Col2_NoUsing() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2; generate flatten(D); };", true); | |
} | |
@Test | |
public void testNestedOrderBy_Col21_NoUsing() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2, $1; generate flatten(D); };", true); | |
} | |
@Test | |
public void testNestedOrderBy_Star_Using() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY * USING " + | |
"org.apache.pig.test.OrdAsc; generate flatten(D); };", false); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY * USING " + | |
"org.apache.pig.test.OrdDesc; generate flatten(D); };", true); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY * USING " + | |
"org.apache.pig.test.OrdDescNumeric; generate flatten(D); };", true); | |
} | |
@Test | |
public void testNestedOrderBy_Col1_Using() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $1 USING " + | |
"org.apache.pig.test.OrdAsc; generate flatten(D); };", false); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $1 USING " + | |
"org.apache.pig.test.OrdDesc; generate flatten(D); };", true); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $1 USING " + | |
"org.apache.pig.test.OrdDescNumeric; generate flatten(D); };", | |
true); | |
} | |
@Test | |
public void testNestedOrderBy_Col2_Using() throws Exception { | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2 USING " + | |
"org.apache.pig.test.OrdAsc; generate flatten(D); };", true); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2 USING " + | |
"org.apache.pig.test.OrdDesc; generate flatten(D); };", false); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2 USING " + | |
"org.apache.pig.test.OrdDescNumeric; generate flatten(D); };", | |
false); | |
} | |
@Test | |
public void testNestedOrderBy_Col21_Using() throws Exception { | |
// col2/col1 ascending - | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2, $1 USING " + | |
"org.apache.pig.test.OrdAsc; generate flatten(D); };", true); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2, $1 USING " + | |
"org.apache.pig.test.OrdDesc; generate flatten(D); };", false); | |
verify("myid = foreach (group (load 'file:" + tmpFile + | |
"') by $0) { D = ORDER $1 BY $2, $1 USING " + | |
"org.apache.pig.test.OrdDescNumeric; generate flatten(D); };", | |
false); | |
} | |
// this test case is for JIRA_1034 | |
@Test | |
public void testOrderByGroup() throws Exception{ | |
tmpFile = File.createTempFile("test", "txt"); | |
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); | |
for(int i = 0; i < 100; i++) { | |
ps.println(i); | |
} | |
ps.close(); | |
pig.registerQuery("a = load 'file:" + tmpFile +"' as (f1:int);"); | |
pig.registerQuery("b = group a by $0;"); | |
pig.registerQuery("c = order b by group;"); | |
Iterator<Tuple> iter = pig.openIterator("c"); | |
int count = 0; | |
while(iter.hasNext()){ | |
Tuple tuple=iter.next(); | |
assertEquals(count, tuple.get(0)); | |
count++; | |
} | |
assertEquals(count, 100); | |
} | |
@Test | |
public void testOrderByBooleanColumn() throws Exception { | |
File tmpFile = genDataSetFileForOrderByBooleanColumn(); | |
List<Tuple> expectedResults = new ArrayList<Tuple>(); | |
expectedResults.add(Util.buildTuple("value3", null)); | |
expectedResults.add(Util.buildTuple("value4", null)); | |
expectedResults.add(Util.buildTuple("value10", null)); | |
expectedResults.add(Util.buildTuple("value2", Boolean.FALSE)); | |
expectedResults.add(Util.buildTuple("value6", Boolean.FALSE)); | |
expectedResults.add(Util.buildTuple("value7", Boolean.FALSE)); | |
expectedResults.add(Util.buildTuple("value1", Boolean.TRUE)); | |
expectedResults.add(Util.buildTuple("value5", Boolean.TRUE)); | |
expectedResults.add(Util.buildTuple("value8", Boolean.TRUE)); | |
expectedResults.add(Util.buildTuple("value9", Boolean.TRUE)); | |
pig.registerQuery("blah = load '" | |
+ Util.generateURI(tmpFile.toString(), pig.getPigContext()) | |
+ "' as (data:chararray, test:boolean);"); | |
pig.registerQuery("ordered = order blah by test;"); | |
Iterator<Tuple> expectedItr = expectedResults.iterator(); | |
Iterator<Tuple> actualItr = pig.openIterator("ordered"); | |
while (expectedItr.hasNext() && actualItr.hasNext()) { | |
Tuple expectedTuple = expectedItr.next(); | |
Tuple actualTuple = actualItr.next(); | |
assertEquals(expectedTuple, actualTuple); | |
} | |
assertEquals(expectedItr.hasNext(), actualItr.hasNext()); | |
} | |
private File genDataSetFileForOrderByBooleanColumn() throws IOException { | |
File fp1 = File.createTempFile("order_by_boolean", "txt"); | |
PrintStream ps = new PrintStream(new FileOutputStream(fp1)); | |
ps.println("value1\ttrue"); | |
ps.println("value2\tfalse"); | |
ps.println("value3\t"); | |
ps.println("value4\t"); | |
ps.println("value5\ttrue"); | |
ps.println("value6\tfalse"); | |
ps.println("value7\tfalse"); | |
ps.println("value8\ttrue"); | |
ps.println("value9\ttrue"); | |
ps.println("value10\t"); | |
ps.close(); | |
return fp1; | |
} | |
@Test | |
public void testOrderByDateTimeColumn() throws Exception { | |
File tmpFile = genDataSetFileForOrderByDateTimeColumn(); | |
List<Tuple> expectedResults = new ArrayList<Tuple>(); | |
expectedResults.add(Util.buildTuple("value3", null)); | |
expectedResults.add(Util.buildTuple("value4", null)); | |
expectedResults.add(Util.buildTuple("value10", null)); | |
expectedResults.add(Util.buildTuple("value2", new DateTime("1970-01-01T00:00:00.000Z"))); | |
expectedResults.add(Util.buildTuple("value6", new DateTime("1970-01-01T00:00:01.000Z"))); | |
expectedResults.add(Util.buildTuple("value7", new DateTime("1970-01-01T00:00:01.000Z"))); | |
expectedResults.add(Util.buildTuple("value1", new DateTime("1970-01-01T00:01:00.000Z"))); | |
expectedResults.add(Util.buildTuple("value5", new DateTime("1970-01-01T01:00:00.000Z"))); | |
expectedResults.add(Util.buildTuple("value8", new DateTime("1970-01-02T00:00:00.000Z"))); | |
expectedResults.add(Util.buildTuple("value9", new DateTime("1970-02-01T00:00:00.000Z"))); | |
pig.registerQuery("blah = load '" | |
+ Util.generateURI(tmpFile.toString(), pig.getPigContext()) | |
+ "' as (data:chararray, test:datetime);"); | |
pig.registerQuery("ordered = order blah by test;"); | |
Iterator<Tuple> expectedItr = expectedResults.iterator(); | |
Iterator<Tuple> actualItr = pig.openIterator("ordered"); | |
while (expectedItr.hasNext() && actualItr.hasNext()) { | |
Tuple expectedTuple = expectedItr.next(); | |
Tuple actualTuple = actualItr.next(); | |
assertEquals(expectedTuple, actualTuple); | |
} | |
assertEquals(expectedItr.hasNext(), actualItr.hasNext()); | |
} | |
private File genDataSetFileForOrderByDateTimeColumn() throws IOException { | |
File fp1 = File.createTempFile("order_by_datetime", "txt"); | |
PrintStream ps = new PrintStream(new FileOutputStream(fp1)); | |
ps.println("value1\t1970-01-01T00:01:00.000Z"); | |
ps.println("value2\t1970-01-01T00:00:00.000Z"); | |
ps.println("value3\t"); | |
ps.println("value4\t"); | |
ps.println("value5\t1970-01-01T01:00:00.000Z"); | |
ps.println("value6\t1970-01-01T00:00:01.000Z"); | |
ps.println("value7\t1970-01-01T00:00:01.000Z"); | |
ps.println("value8\t1970-01-02T00:00:00.000Z"); | |
ps.println("value9\t1970-02-01T00:00:00.000Z"); | |
ps.println("value10\t"); | |
ps.close(); | |
return fp1; | |
} | |
} |