| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig.builtin; |
| |
| import java.io.IOException; |
| import java.util.List; |
| |
| import org.apache.pig.EvalFunc; |
| import org.apache.pig.data.DataType; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.data.TupleFactory; |
| import org.apache.pig.impl.logicalLayer.FrontendException; |
| import org.apache.pig.impl.logicalLayer.schema.Schema; |
| |
| import com.google.common.collect.Lists; |
| |
| /** |
| * This is a UDF which allows the user to specify a string prefix, and then |
| * filter for the columns in a relation that begin with that prefix. |
| * |
| * Example: |
| * a = load 'a' as (x, y); |
| * b = load 'b' as (x, y); |
| * c = join a by x, b by x; |
| * DEFINE pluck PluckTuple('a::'); |
| * d = foreach c generate FLATTEN(pluck(*)); |
| * describe c; |
| * c: {a::x: bytearray,a::y: bytearray,b::x: bytearray,b::y: bytearray} |
| * describe d; |
| * d: {plucked::a::x: bytearray,plucked::a::y: bytearray} |
| */ |
| public class PluckTuple extends EvalFunc<Tuple> { |
| private static final TupleFactory mTupleFactory = TupleFactory.getInstance(); |
| |
| private boolean isInitialized = false; |
| private int[] indicesToInclude; |
| private String prefix; |
| |
| public PluckTuple(String prefix) { |
| this.prefix = prefix; |
| } |
| |
| @Override |
| public Tuple exec(Tuple input) throws IOException { |
| if (!isInitialized) { |
| List<Integer> indicesToInclude = Lists.newArrayList(); |
| Schema inputSchema = getInputSchema(); |
| for (int i = 0; i < inputSchema.size(); i++) { |
| String alias = inputSchema.getField(i).alias; |
| if (alias.startsWith(prefix)) { |
| indicesToInclude.add(i); |
| } |
| } |
| this.indicesToInclude = new int[indicesToInclude.size()]; |
| int idx = 0; |
| for (int val : indicesToInclude) { |
| this.indicesToInclude[idx++] = val; |
| } |
| isInitialized = true; |
| } |
| Tuple result = mTupleFactory.newTuple(indicesToInclude.length); |
| int idx = 0; |
| for (int val : indicesToInclude) { |
| result.set(idx++, input.get(val)); |
| } |
| return result; |
| } |
| |
| public Schema outputSchema(Schema inputSchema) { |
| if (!isInitialized) { |
| List<Integer> indicesToInclude = Lists.newArrayList(); |
| for (int i = 0; i < inputSchema.size(); i++) { |
| String alias; |
| try { |
| alias = inputSchema.getField(i).alias; |
| } catch (FrontendException e) { |
| throw new RuntimeException(e); // Should never happen |
| } |
| if (alias.startsWith(prefix)) { |
| indicesToInclude.add(i); |
| } |
| } |
| this.indicesToInclude = new int[indicesToInclude.size()]; |
| int idx = 0; |
| for (int val : indicesToInclude) { |
| this.indicesToInclude[idx++] = val; |
| } |
| isInitialized = true; |
| } |
| Schema outputSchema = new Schema(); |
| for (int val : indicesToInclude) { |
| try { |
| outputSchema.add(inputSchema.getField(val)); |
| } catch (FrontendException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| try { |
| return new Schema(new Schema.FieldSchema("plucked", outputSchema, DataType.TUPLE)); |
| } catch (FrontendException e) { |
| throw new RuntimeException(e); // Should never happen |
| } |
| } |
| } |