| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.accumulo.core.client.mapreduce; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.DataOutputStream; |
| import java.io.IOException; |
| import java.util.Base64; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.accumulo.core.client.IteratorSetting; |
| import org.apache.accumulo.core.iterators.user.RegExFilter; |
| import org.apache.accumulo.core.iterators.user.WholeRowIterator; |
| import org.apache.accumulo.core.util.Pair; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Job; |
| import org.junit.Test; |
| |
| public class AccumuloInputFormatTest { |
| |
| /** |
| * Check that the iterator configuration is getting stored in the Job conf correctly. |
| */ |
| @Test |
| public void testSetIterator() throws IOException { |
| Job job = Job.getInstance(); |
| |
| IteratorSetting is = new IteratorSetting(1, "WholeRow", |
| "org.apache.accumulo.core.iterators.WholeRowIterator"); |
| AccumuloInputFormat.addIterator(job, is); |
| Configuration conf = job.getConfiguration(); |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| is.write(new DataOutputStream(baos)); |
| String iterators = conf.get("AccumuloInputFormat.ScanOpts.Iterators"); |
| assertEquals(Base64.getEncoder().encodeToString(baos.toByteArray()), iterators); |
| } |
| |
| @Test |
| public void testAddIterator() throws IOException { |
| Job job = Job.getInstance(); |
| |
| AccumuloInputFormat.addIterator(job, |
| new IteratorSetting(1, "WholeRow", WholeRowIterator.class)); |
| AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", |
| "org.apache.accumulo.core.iterators.VersioningIterator")); |
| IteratorSetting iter = new IteratorSetting(3, "Count", |
| "org.apache.accumulo.core.iterators.CountingIterator"); |
| iter.addOption("v1", "1"); |
| iter.addOption("junk", "\0omg:!\\xyzzy"); |
| AccumuloInputFormat.addIterator(job, iter); |
| |
| List<IteratorSetting> list = AccumuloInputFormat.getIterators(job); |
| |
| // Check the list size |
| assertEquals(3, list.size()); |
| |
| // Walk the list and make sure our settings are correct |
| IteratorSetting setting = list.get(0); |
| assertEquals(1, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", |
| setting.getIteratorClass()); |
| assertEquals("WholeRow", setting.getName()); |
| assertEquals(0, setting.getOptions().size()); |
| |
| setting = list.get(1); |
| assertEquals(2, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", |
| setting.getIteratorClass()); |
| assertEquals("Versions", setting.getName()); |
| assertEquals(0, setting.getOptions().size()); |
| |
| setting = list.get(2); |
| assertEquals(3, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass()); |
| assertEquals("Count", setting.getName()); |
| assertEquals(2, setting.getOptions().size()); |
| assertEquals("1", setting.getOptions().get("v1")); |
| assertEquals("\0omg:!\\xyzzy", setting.getOptions().get("junk")); |
| } |
| |
| /** |
| * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR |
| * character (':') and ITERATOR_SEPARATOR (',') characters. There should be no exceptions thrown |
| * when trying to parse these types of option entries. |
| * |
| * This test makes sure that the expected raw values, as appears in the Job, are equal to what's |
| * expected. |
| */ |
| @Test |
| public void testIteratorOptionEncoding() throws Throwable { |
| String key = "colon:delimited:key"; |
| String value = "comma,delimited,value"; |
| IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class"); |
| someSetting.addOption(key, value); |
| Job job = Job.getInstance(); |
| AccumuloInputFormat.addIterator(job, someSetting); |
| |
| List<IteratorSetting> list = AccumuloInputFormat.getIterators(job); |
| assertEquals(1, list.size()); |
| assertEquals(1, list.get(0).getOptions().size()); |
| assertEquals(list.get(0).getOptions().get(key), value); |
| |
| someSetting.addOption(key + "2", value); |
| someSetting.setPriority(2); |
| someSetting.setName("it2"); |
| AccumuloInputFormat.addIterator(job, someSetting); |
| list = AccumuloInputFormat.getIterators(job); |
| assertEquals(2, list.size()); |
| assertEquals(1, list.get(0).getOptions().size()); |
| assertEquals(list.get(0).getOptions().get(key), value); |
| assertEquals(2, list.get(1).getOptions().size()); |
| assertEquals(list.get(1).getOptions().get(key), value); |
| assertEquals(list.get(1).getOptions().get(key + "2"), value); |
| } |
| |
| /** |
| * Test getting iterator settings for multiple iterators set |
| */ |
| @Test |
| public void testGetIteratorSettings() throws IOException { |
| Job job = Job.getInstance(); |
| |
| AccumuloInputFormat.addIterator(job, |
| new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator")); |
| AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", |
| "org.apache.accumulo.core.iterators.VersioningIterator")); |
| AccumuloInputFormat.addIterator(job, |
| new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator")); |
| |
| List<IteratorSetting> list = AccumuloInputFormat.getIterators(job); |
| |
| // Check the list size |
| assertEquals(3, list.size()); |
| |
| // Walk the list and make sure our settings are correct |
| IteratorSetting setting = list.get(0); |
| assertEquals(1, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass()); |
| assertEquals("WholeRow", setting.getName()); |
| |
| setting = list.get(1); |
| assertEquals(2, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", |
| setting.getIteratorClass()); |
| assertEquals("Versions", setting.getName()); |
| |
| setting = list.get(2); |
| assertEquals(3, setting.getPriority()); |
| assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass()); |
| assertEquals("Count", setting.getName()); |
| |
| } |
| |
| @Test |
| public void testSetRegex() throws IOException { |
| Job job = Job.getInstance(); |
| |
| String regex = ">\"*%<>\'\\"; |
| |
| IteratorSetting is = new IteratorSetting(50, regex, RegExFilter.class); |
| RegExFilter.setRegexs(is, regex, null, null, null, false); |
| AccumuloInputFormat.addIterator(job, is); |
| |
| assertEquals(regex, AccumuloInputFormat.getIterators(job).get(0).getName()); |
| } |
| |
| @Test |
| public void testEmptyColumnFamily() throws IOException { |
| Job job = Job.getInstance(); |
| Set<Pair<Text,Text>> cols = new HashSet<>(); |
| cols.add(new Pair<>(new Text(""), null)); |
| cols.add(new Pair<>(new Text("foo"), new Text("bar"))); |
| cols.add(new Pair<>(new Text(""), new Text("bar"))); |
| cols.add(new Pair<>(new Text(""), new Text(""))); |
| cols.add(new Pair<>(new Text("foo"), new Text(""))); |
| AccumuloInputFormat.fetchColumns(job, cols); |
| Set<Pair<Text,Text>> setCols = AccumuloInputFormat.getFetchedColumns(job); |
| assertEquals(cols, setCols); |
| } |
| } |