| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.test; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.util.Iterator; |
| import java.util.Properties; |
| import java.util.Random; |
| import java.util.zip.GZIPOutputStream; |
| |
| import org.apache.pig.PigServer; |
| import org.apache.pig.builtin.DIFF; |
| import org.apache.pig.data.BagFactory; |
| import org.apache.pig.data.DataBag; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.test.utils.TestHelper; |
| import org.junit.After; |
| import org.junit.AfterClass; |
| import org.junit.Before; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| public class TestCompressedFiles { |
| private static PigServer pig; |
| private static Properties properties; |
| private static MiniGenericCluster cluster; |
| |
| private File datFile; |
| private File gzFile; |
| |
| @Before |
| public void setUp() throws Exception { |
| datFile = File.createTempFile("compTest", ".dat"); |
| gzFile = File.createTempFile("compTest", ".gz"); |
| FileOutputStream dat = new FileOutputStream(datFile); |
| GZIPOutputStream gz = new GZIPOutputStream(new FileOutputStream(gzFile)); |
| Random rand = new Random(); |
| for(int i = 0; i < 1024; i++) { |
| StringBuffer sb = new StringBuffer(); |
| int x = rand.nextInt(); |
| int y = rand.nextInt(); |
| sb.append(x); |
| sb.append('\t'); |
| sb.append(y); |
| sb.append('\n'); |
| byte bytes[] = sb.toString().getBytes(); |
| dat.write(bytes); |
| gz.write(bytes); |
| } |
| dat.close(); |
| gz.close(); |
| } |
| |
| @After |
| public void tearDown() throws Exception { |
| datFile.delete(); |
| gzFile.delete(); |
| } |
| |
| @BeforeClass |
| public static void oneTimeSetUp() throws Exception { |
| cluster = MiniGenericCluster.buildCluster(); |
| properties = cluster.getProperties(); |
| } |
| |
| @AfterClass |
| public static void oneTimeTearDown() throws Exception { |
| cluster.shutDown(); |
| } |
| |
| @Test |
| public void testCompressed1() throws Throwable { |
| pig = new PigServer(cluster.getExecType(), properties); |
| pig.registerQuery("A = foreach (cogroup (load '" |
| + Util.generateURI(gzFile.toString(), pig.getPigContext()) |
| + "') by $1, (load '" |
| + Util.generateURI(datFile.toString(), pig.getPigContext()) |
| + "') by $1) generate flatten( " + DIFF.class.getName() |
| + "($1.$1,$2.$1)) ;"); |
| Iterator<Tuple> it = pig.openIterator("A"); |
| assertFalse(it.hasNext()); |
| } |
| |
| @Test |
| public void testCompressed2() throws Throwable { |
| pig = new PigServer(cluster.getExecType(), properties); |
| pig.registerQuery("A = load '" |
| + Util.generateURI(gzFile.toString(), pig.getPigContext()) |
| + "';"); |
| |
| DataBag dbGz = BagFactory.getInstance().newDefaultBag(), dbDt = BagFactory.getInstance().newDefaultBag(); |
| { |
| Iterator<Tuple> iter = pig.openIterator("A"); |
| |
| while(iter.hasNext()) { |
| dbGz.add(iter.next()); |
| } |
| } |
| pig.registerQuery("B = load '" |
| + Util.generateURI(datFile.toString(), pig.getPigContext()) |
| + "';"); |
| Iterator<Tuple> iter = pig.openIterator("B"); |
| |
| while(iter.hasNext()) { |
| dbDt.add(iter.next()); |
| } |
| |
| assertTrue(dbGz.size() > 0); |
| assertTrue(dbDt.size() > 0); |
| assertEquals(dbGz.size(), dbDt.size()); |
| assertTrue(TestHelper.compareBags(dbGz, dbDt)); |
| } |
| } |