blob: a6df11a861ea6dce6c4f98352724e423b8c5afa9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A test udf to test that users can pass in escaped dot
* as part of a regex to udf's argument.
* Example: In perl the regex would be "www\.abc\.com" -
* the user's intent is to supply this as a regex pattern
* where dot (.) is escaped. As a java
* string this would be "www\\.abc\\.com" - the parser should
* eventually give this java string to the udf. In pig script too
* the user would give this as 'www\\.abc\\.com'
*/
package org.apache.pig.test;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
public class RegexGroupCount extends EvalFunc<Integer> {
private final Pattern pattern_;
public RegexGroupCount(String patternStr) {
System.out.println("My pattern supplied is "+patternStr);
System.out.println("Equality test "+patternStr.equals("www\\.xyz\\.com/sports"));
pattern_ = Pattern.compile(patternStr, Pattern.DOTALL|Pattern.CASE_INSENSITIVE);
}
//@Override
public Integer exec(Tuple input) throws IOException {
int i = 9999;
if (input == null || input.size() == 0) { return 8888; }
String istr = (String) input.get(0);
System.out.println("My input is: "+istr);
try {
i = 0;
Matcher matcher = pattern_.matcher(istr);
while (matcher.find()) {
i++;
}
} catch (NullPointerException e) {
i = 7777;
} catch (Exception e) {
i = 6666;
throw new IOException("Caught exception processing RegexGroupCount", e);
}
return i;
}
}