| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.pig.test; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.lang.reflect.Method; |
| import java.util.Random; |
| import java.util.regex.Pattern; |
| |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.PORegexp; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.regex.CompiledAutomaton; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.regex.CompiledRegex; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.regex.NonConstantRegex; |
| import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.regex.RegexInit; |
| import org.apache.pig.impl.plan.OperatorKey; |
| import org.junit.Test; |
| |
| public class TestPORegexp { |
| static Random r = new Random(42L); |
| |
| @Test |
| public void testOrdering() { |
| CompiledAutomaton auto2 = new CompiledAutomaton("[a-z]{3}"); |
| assertFalse(auto2.match("1234", "abc") ); |
| assertTrue(auto2.match("abc", "1234") ); |
| |
| auto2 = new CompiledAutomaton(".*#c"); |
| assertTrue(auto2.match("ab#c", "dummy")); |
| |
| auto2 = new CompiledAutomaton(".*@.*"); |
| assertTrue(auto2.match("ab@c", "dummy")); |
| |
| auto2 = new CompiledAutomaton("abc&def"); |
| assertTrue(auto2.match("abc&def", "dummy")); |
| |
| auto2 = new CompiledAutomaton("abc~[ab]"); |
| assertTrue(auto2.match("abc~a", "dummy")); |
| |
| CompiledRegex regex2 = new CompiledRegex(Pattern.compile("[a-z]{3}")); |
| assertFalse(regex2.match("1234", "abc") ); |
| assertTrue(regex2.match("abc", "1234") ); |
| |
| NonConstantRegex ncr = new NonConstantRegex(); |
| assertFalse(ncr.match("1234", "abc")); |
| assertFalse(ncr.match("abc", "1234")); |
| assertTrue(ncr.match("1234", "\\d\\d\\d\\d")); |
| assertTrue(ncr.match("abc", "[a-z]{3}")); |
| } |
| |
| @Test |
| public void testRegexDetermination() throws Exception { |
| Method m = RegexInit.class.getDeclaredMethod("determineBestRegexMethod", String.class); |
| m.setAccessible(true); |
| |
| RegexInit regex = new RegexInit(new PORegexp(new OperatorKey())); |
| |
| assertEquals(1, m.invoke(regex, "abc") ); |
| |
| assertEquals(1, m.invoke(regex, "\\\\abc") ); |
| |
| assertEquals(1, m.invoke(regex, "abc.*")); |
| |
| assertEquals(1, m.invoke(regex, ".*abc")); |
| |
| assertEquals(1, m.invoke(regex, ".*abc.*")); |
| |
| assertEquals(1, m.invoke(regex, ".*abc\\.*")); |
| |
| assertEquals(1, m.invoke(regex, ".*abc\\\\")); |
| |
| assertEquals(0, m.invoke(regex, ".*abc\\d")); |
| |
| assertEquals(0, m.invoke(regex, ".*abc\\s")); |
| |
| assertEquals(0, m.invoke(regex, ".*abc\\Sw")); |
| |
| assertEquals(0, m.invoke(regex, "abc\\Sw")); |
| |
| assertEquals(0, m.invoke(regex, "a\\Q")); |
| |
| assertEquals(0, m.invoke(regex, "\\QThis is something")); |
| |
| assertEquals(0, m.invoke(regex, "(\\w)*\\s\\1")); |
| |
| assertEquals(0, m.invoke(regex, "[^a]bc")); |
| |
| assertEquals(0, m.invoke(regex, "\\p{Alpha}hi")); |
| |
| assertEquals(0, m.invoke(regex, "\\d{1,2}hi")); |
| |
| assertEquals(0, m.invoke(regex, "^abc.*")); |
| |
| assertEquals(1, m.invoke(regex, ".*[A-F]{2,3}.*")); |
| |
| assertEquals(0, m.invoke(regex, "\\d+")); |
| |
| assertEquals(0, m.invoke(regex, "\\d{2,3}")); |
| |
| assertEquals(0, m.invoke(regex, "\\\\\\d{2,3}")); |
| |
| assertEquals(0, m.invoke(regex, ".*\\d{2,3}.*")); |
| |
| assertEquals(0, m.invoke(regex, "\\d\\.0\\d")); |
| |
| assertEquals(0, m.invoke(regex, "[^f]ed.*")); |
| |
| assertEquals(0, m.invoke(regex, "[a-m[n-z]]")); |
| |
| assertEquals(0, m.invoke(regex, "[a-z&&[def]]")); |
| |
| assertEquals(0, m.invoke(regex, "[a-z&&[^abc]]")); |
| |
| assertEquals(1, m.invoke(regex, "[a-m\\[n-z\\]")); |
| |
| assertEquals(1, m.invoke(regex, "[a-m\\\\\\[n-z\\\\\\]]")); |
| |
| assertEquals(0, m.invoke(regex, "[a-m\\\\\\[n-z\\\\\\][0-9]]")); |
| |
| assertEquals(0, m.invoke(regex, "[a-m\\\\[n-z]]")); |
| |
| assertEquals(0, m.invoke(regex, "\\\\\\[[a-m\\\\\\[n-z\\\\\\][0-9]]")); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]??" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]*?" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]+?" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]{4}?" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]{2,4}?" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\??" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\*?" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\+?" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]{4\\}?" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]{2,4\\}?" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]?+" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]*+" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]++" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]{4}+" )); |
| |
| assertEquals(0, m.invoke(regex, "[a-z]{2,4}+" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\?+" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\*+" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]\\++" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]{4\\}+" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-z]{2,4\\}+" )); |
| |
| assertEquals(1, m.invoke(regex, "[a-m\\[n-z\\]]" )); |
| |
| assertEquals(0, m.invoke(regex, "\\0101" )); |
| |
| assertEquals(0, m.invoke(regex, "\\x0A" )); |
| |
| assertEquals(0, m.invoke(regex, "\\u000A" )); |
| |
| assertEquals(0, m.invoke(regex, "&&" )); |
| |
| assertEquals(1, m.invoke(regex, "\\&&asdkfjalsdf" )); |
| |
| assertEquals(0, m.invoke(regex, "&&asdf\\&&" )); |
| |
| assertEquals(0, m.invoke(regex, "&&asdf\\&&asdfasdf" )); |
| |
| assertEquals(0, m.invoke(regex, "&&asdfas\\&&asdfasdfa\\&&" )); |
| |
| assertEquals(0, m.invoke(regex, "&&asdflj&&" )); |
| |
| assertEquals(0, m.invoke(regex, "\\\\&&asdfasdf" )); |
| |
| assertEquals(1, m.invoke(regex, "\\\\\\&&asdfasdf" )); |
| |
| assertEquals(0, m.invoke(regex, "\\\\&&asdfasdf&&" )); |
| |
| assertEquals(0, m.invoke(regex, "\\&&asdfasdf\\\\&&" )); |
| |
| assertEquals(0, m.invoke(regex, "\\&&asd&&fasdf\\\\\\&&" )); |
| |
| assertEquals(0, m.invoke(regex, "\\dasdfasdf" )); |
| |
| assertEquals(1, m.invoke(regex, "\\\\dasdfasdf" )); |
| |
| assertEquals(0, m.invoke(regex, "\\\\dasdfasdf\\d" )); |
| |
| assertEquals(0, m.invoke(regex, "\\\\dasdf\\dasdf\\\\d" )); |
| |
| assertEquals(0, m.invoke(regex, "\\\\dasd\\\\dfasdf\\d" )); |
| |
| assertEquals(1, m.invoke(regex, "\\\\dasdfasdf\\" )); |
| |
| assertEquals(0, m.invoke(regex, "\\dasase\\\\dfasdf\\" )); |
| |
| assertEquals(1, m.invoke(regex, "\\\\dasdfasdf\\\\" )); |
| |
| assertEquals(1, m.invoke(regex, "xyz#abc") ); |
| } |
| } |