| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with this |
| * work for additional information regarding copyright ownership. The ASF |
| * licenses this file to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| * License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| package org.apache.pig.piggybank.test.storage; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.Properties; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.pig.ExecType; |
| import org.apache.pig.PigServer; |
| import org.apache.pig.backend.executionengine.ExecException; |
| import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; |
| import org.apache.pig.data.DataByteArray; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.test.MiniGenericCluster; |
| import org.apache.pig.test.Util; |
| import org.junit.Assert; |
| import org.junit.Test; |
| |
| public class TestCSVStorage { |
| protected static final Log LOG = LogFactory.getLog(TestCSVStorage.class); |
| |
| private PigServer pigServer; |
| private MiniGenericCluster cluster; |
| |
| public TestCSVStorage() throws ExecException, IOException { |
| cluster = MiniGenericCluster.buildCluster(); |
| pigServer = new PigServer(ExecType.LOCAL, new Properties()); |
| pigServer.getPigContext().getProperties() |
| .setProperty(MRConfiguration.MAP_MAX_ATTEMPTS, "1"); |
| pigServer.getPigContext().getProperties() |
| .setProperty(MRConfiguration.REDUCE_MAX_ATTEMPTS, "1"); |
| } |
| |
| @Test |
| public void testSimpleCsv() throws IOException { |
| String inputFileName = "TestCSVLoader-simple.txt"; |
| Util.createLocalInputFile(inputFileName, new String[] {"foo,bar,baz", "fee,foe,fum"}); |
| String script = "a = load '" + inputFileName + "' using org.apache.pig.piggybank.storage.CSVLoader() " + |
| " as (a:chararray, b:chararray, c:chararray); "; |
| Util.registerMultiLineQuery(pigServer, script); |
| Iterator<Tuple> it = pigServer.openIterator("a"); |
| assertEquals(Util.createTuple(new String[] {"foo", "bar", "baz"}), it.next()); |
| } |
| |
| @Test |
| public void testQuotedCommas() throws IOException { |
| String inputFileName = "TestCSVLoader-quotedcommas.txt"; |
| Util.createLocalInputFile(inputFileName, new String[] {"\"foo,bar,baz\"", "fee,foe,fum"}); |
| String script = "a = load '" + inputFileName + "' using org.apache.pig.piggybank.storage.CSVLoader() " + |
| " as (a:chararray, b:chararray, c:chararray); "; |
| Util.registerMultiLineQuery(pigServer, script); |
| Iterator<Tuple> it = pigServer.openIterator("a"); |
| assertEquals(Util.createTuple(new String[] {"foo,bar,baz", null, null}), it.next()); |
| assertEquals(Util.createTuple(new String[] {"fee", "foe", "fum"}), it.next()); |
| } |
| |
| @Test |
| public void testQuotedQuotes() throws IOException { |
| String inputFileName = "TestCSVLoader-quotedquotes.txt"; |
| Util.createLocalInputFile(inputFileName, |
| new String[] {"\"foo,\"\"bar\"\",baz\"", "\"\"\"\"\"\"\"\""}); |
| String script = "a = load '" + inputFileName + "' using org.apache.pig.piggybank.storage.CSVLoader() " + |
| " as (a:chararray); "; |
| Util.registerMultiLineQuery(pigServer, script); |
| Iterator<Tuple> it = pigServer.openIterator("a"); |
| assertEquals(Util.createTuple(new String[] {"foo,\"bar\",baz"}), it.next()); |
| assertEquals(Util.createTuple(new String[] {"\"\"\""}), it.next()); |
| } |
| |
| @Test |
| public void testNullPadding() throws IOException { |
| String inputFileName = "TestCSVLoader-nullpadding.txt"; |
| Util.createLocalInputFile(inputFileName, new String[] { "a", "b,", "c,d", ",e"}); |
| String script = "a = load '" + inputFileName + "' using org.apache.pig.piggybank.storage.CSVLoader() " + |
| " as (field1, field2); dump a;"; |
| Util.registerMultiLineQuery(pigServer, script); |
| Iterator<Tuple> it = pigServer.openIterator("a"); |
| assertEquals(Util.createTuple(new DataByteArray[] {new DataByteArray("a"), null}), it.next()); |
| assertEquals(Util.createTuple(new DataByteArray[] {new DataByteArray("b"), null}), it.next()); |
| assertEquals(Util.createTuple(new DataByteArray[] {new DataByteArray("c"), new DataByteArray("d")}), it.next()); |
| assertEquals(Util.createTuple(new DataByteArray[] {new DataByteArray(""), new DataByteArray("e")}), it.next()); |
| Assert.assertFalse(it.hasNext()); |
| } |
| |
| |
| } |