| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.tutorial; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.pig.EvalFunc; |
| import org.apache.pig.FilterFunc; |
| import org.apache.pig.data.DataBag; |
| import org.apache.pig.data.DefaultBagFactory; |
| import org.apache.pig.data.Tuple; |
| import org.apache.pig.data.TupleFactory; |
| |
| public class TutorialTest { |
| |
| private static Tuple[] getTuples(String[] queries) { |
| Tuple[] tuples = new Tuple[queries.length]; |
| for (int i = 0; i < tuples.length; i++) { |
| tuples[i] = TupleFactory.getInstance().newTuple(1); |
| try{tuples[i].set(0, queries[i]);}catch(Exception e){} |
| } |
| return tuples; |
| } |
| |
| public static String[] testDataAtomEvals(EvalFunc<String> eval, Tuple[] tuples) { |
| List<String> res = new ArrayList<String>(); |
| try { |
| for (Tuple t : tuples) { |
| String output = eval.exec(t); |
| System.out.println("Converted: " + t + " to (" + output + ")"); |
| res.add(output); |
| } |
| } catch (IOException e) { |
| e.printStackTrace(); |
| System.exit(1); |
| } |
| |
| System.out.println("==="); |
| return res.toArray(new String[res.size()]); |
| } |
| |
| public static DataBag[] testDataBagEvals(EvalFunc<DataBag> eval, Tuple[] tuples) { |
| List<DataBag> res = new ArrayList<DataBag>(); |
| try { |
| for (Tuple t : tuples) { |
| DataBag output = eval.exec(t); |
| System.out.println("Converted: " + t + " to (" + output + ")"); |
| res.add(output); |
| } |
| } catch (IOException e) { |
| e.printStackTrace(); |
| System.exit(1); |
| } |
| |
| System.out.println("==="); |
| return res.toArray(new DataBag[res.size()]); |
| } |
| |
| public static String[] testFilters (FilterFunc filter, Tuple[] tuples) { |
| List<String> res = new ArrayList<String>(); |
| try { |
| for (Tuple t : tuples) { |
| if (filter.exec(t)) { |
| System.out.println("accepted: " + t); |
| res.add((String)t.get(0)); |
| } else { |
| System.out.println("rejected: " + t); |
| } |
| } |
| } catch (Exception e) { |
| e.printStackTrace(); |
| System.exit(1); |
| } |
| |
| System.out.println("==="); |
| return res.toArray(new String[res.size()]); |
| } |
| |
| public static void main(String[] args) { |
| String[] queries = { |
| "http://www.yahoo.com/", |
| "\"http://www.yahoo.com/\"", |
| " http;//www.yahoo.com/ ", |
| "https://www.yahoo.com/", |
| "www.yahoo.com/", |
| "\"www.yahoo.com/\"", |
| "a real nice query ", |
| "an UPPER CASE query", |
| " ", |
| " nude picture", |
| " +XXX", |
| "\" +porno \"", |
| }; |
| |
| NonURLDetector filter1 = new NonURLDetector(); |
| String[] q1 = testFilters(filter1, getTuples(queries)); |
| |
| ToLower eval1 = new ToLower(); |
| String[] q2 = testDataAtomEvals(eval1, getTuples(q1)); |
| |
| String[] timestamps = { |
| "970916072134", |
| "970916072311", |
| "970916123431", |
| }; |
| |
| ExtractHour eval2 = new ExtractHour(); |
| testDataAtomEvals(eval2, getTuples(timestamps)); |
| |
| DataBag bag = DefaultBagFactory.getInstance().newDefaultBag(); |
| |
| Tuple t1 = TupleFactory.getInstance().newTuple(3); |
| try{ |
| t1.set(0, "word"); |
| t1.set(1, "02"); |
| t1.set(2, 2); |
| }catch(Exception e){} |
| bag.add(t1); |
| |
| Tuple t2 = TupleFactory.getInstance().newTuple(3); |
| try{ |
| t2.set(0, "word"); |
| t2.set(1, "05"); |
| t2.set(2, 2); |
| }catch(Exception e){} |
| bag.add(t2); |
| |
| Tuple t3 = TupleFactory.getInstance().newTuple(3); |
| try{ |
| t3.set(0, "word"); |
| t3.set(1, "04"); |
| t3.set(2, 3); |
| }catch(Exception e){} |
| bag.add(t3); |
| |
| Tuple t4 = TupleFactory.getInstance().newTuple(3); |
| try{ |
| t4.set(0, "word"); |
| t4.set(1, "06"); |
| t4.set(2, 4); |
| }catch(Exception e){} |
| bag.add(t4); |
| |
| Tuple[] t = new Tuple[1]; |
| t[0] = TupleFactory.getInstance().newTuple(1); |
| try{ |
| t[0].set(0, bag); |
| }catch(Exception e){} |
| |
| ScoreGenerator eval4 = new ScoreGenerator(); |
| testDataBagEvals(eval4, t); |
| } |
| } |