| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.drill.exec.expr.fn.impl; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.StandardCharsets; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.drill.common.exceptions.DrillRuntimeException; |
| import org.apache.drill.exec.memory.BufferAllocator; |
| import org.apache.drill.exec.memory.RootAllocatorFactory; |
| import org.apache.drill.test.BaseTest; |
| import org.junit.After; |
| import org.junit.Before; |
| import org.junit.Test; |
| |
| import io.netty.buffer.DrillBuf; |
| |
| public class TestSqlPatterns extends BaseTest { |
| BufferAllocator allocator; |
| DrillBuf drillBuf; |
| CharsetEncoder charsetEncoder; |
| CharBuffer charBuffer; |
| ByteBuffer byteBuffer; |
| |
| String wideString = "b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" + |
| "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" + |
| "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" + |
| "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" + |
| " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" + |
| "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" + |
| "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" + |
| "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" + |
| "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" + |
| "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" + |
| "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" + |
| "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" + |
| "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" + |
| "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" + |
| "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" + |
| "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" + |
| "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" + |
| "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" + |
| "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" + |
| "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" + |
| "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" + |
| "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" + |
| "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" + |
| "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" + |
| "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" + |
| "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" + |
| "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" + |
| "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" + |
| "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" + |
| "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" + |
| "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" + |
| "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" + |
| "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" + |
| "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" + |
| "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" + |
| "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" + |
| "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" + |
| "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" + |
| "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" + |
| "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" + |
| "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" + |
| "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" + |
| "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" + |
| "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" + |
| "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" + |
| "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" + |
| "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" + |
| "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" + |
| "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" + |
| "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" + |
| "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" + |
| "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" + |
| "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" + |
| "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" + |
| "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" + |
| "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" + |
| "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" + |
| "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" + |
| "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" + |
| "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" + |
| "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" + |
| "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"; |
| |
| @Before |
| public void setup() { |
| allocator = RootAllocatorFactory.newRoot(16384); |
| drillBuf = allocator.buffer(8192); |
| charsetEncoder = StandardCharsets.UTF_8.newEncoder(); |
| } |
| |
| @Test |
| public void testSqlRegexLike() { |
| // Given SQL like pattern, verify patternType is correct. |
| // Java pattern should have % replaced with .*, _ replaced with . |
| // Simple pattern should have meta (% and _) and escape characters removed. |
| |
| // A%B is complex |
| RegexpUtil.SqlPatternInfo patternInfo = RegexpUtil.sqlToRegexLike("A%B"); |
| assertEquals("A.*B", patternInfo.getJavaPatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); |
| |
| // A_B is complex |
| patternInfo = RegexpUtil.sqlToRegexLike("A_B"); |
| assertEquals("A.B", patternInfo.getJavaPatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); |
| |
| // A%B%D is complex |
| patternInfo = RegexpUtil.sqlToRegexLike("A%B%D"); |
| assertEquals("A.*B.*D", patternInfo.getJavaPatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); |
| |
| // %AB% is contains |
| patternInfo = RegexpUtil.sqlToRegexLike("%AB%"); |
| assertEquals(".*AB.*", patternInfo.getJavaPatternString()); |
| assertEquals("AB", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType()); |
| |
| // %AB is ends with |
| patternInfo = RegexpUtil.sqlToRegexLike("%AB"); |
| assertEquals(".*AB", patternInfo.getJavaPatternString()); |
| assertEquals("AB", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType()); |
| |
| // AB% is starts with |
| patternInfo = RegexpUtil.sqlToRegexLike("AB%"); |
| assertEquals("AB.*", patternInfo.getJavaPatternString()); |
| assertEquals("AB", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType()); |
| |
| // AB is constant. |
| patternInfo = RegexpUtil.sqlToRegexLike("AB"); |
| assertEquals("AB", patternInfo.getJavaPatternString()); |
| assertEquals("AB", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType()); |
| |
| // A.B is constant. DRILL-8278 |
| patternInfo = RegexpUtil.sqlToRegexLike("A.B"); |
| // The . should be escaped with a \ so that it represents a literal . |
| assertEquals("A\\.B", patternInfo.getJavaPatternString()); |
| assertEquals("A.B", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType()); |
| |
| // Test with escape characters. |
| |
| // A%#B is invalid escape sequence |
| try { |
| patternInfo = RegexpUtil.sqlToRegexLike("A%#B", '#'); |
| } catch (Exception ex) { |
| assertTrue(ex.getMessage().contains("Invalid escape sequence")); |
| } |
| |
| // A#%B with # as escape character is constant A%B |
| patternInfo = RegexpUtil.sqlToRegexLike("A#%B", '#'); |
| assertEquals("A%B", patternInfo.getJavaPatternString()); |
| assertEquals("A%B", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType()); |
| |
| // %A#%B% is contains A%B |
| patternInfo = RegexpUtil.sqlToRegexLike("%A#%B%", '#'); |
| assertEquals(".*A%B.*", patternInfo.getJavaPatternString()); |
| assertEquals("A%B", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType()); |
| |
| // #%AB% is starts with %AB |
| patternInfo = RegexpUtil.sqlToRegexLike("#%AB%", '#'); |
| assertEquals("%AB.*", patternInfo.getJavaPatternString()); |
| assertEquals("%AB", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType()); |
| |
| // %#%AB#% is ends with %AB% |
| patternInfo = RegexpUtil.sqlToRegexLike("%#%AB#%", '#'); |
| assertEquals(".*%AB%", patternInfo.getJavaPatternString()); |
| assertEquals("%AB%", patternInfo.getSimplePatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType()); |
| |
| // #_A#%B%C is complex |
| patternInfo = RegexpUtil.sqlToRegexLike("#_A#%B%C", '#'); |
| assertEquals("_A%B.*C", patternInfo.getJavaPatternString()); |
| assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType()); |
| |
| } |
| |
| private void setDrillBuf(String input) { |
| drillBuf.clear(); |
| charBuffer = CharBuffer.wrap(input); |
| try { |
| byteBuffer = charsetEncoder.encode(charBuffer); |
| } catch (CharacterCodingException e) { |
| throw new DrillRuntimeException("Error while encoding the pattern string ", e); |
| } |
| drillBuf.setBytes(0, byteBuffer, byteBuffer.position(), byteBuffer.remaining()); |
| } |
| |
| @Test |
| public void testSqlPatternStartsWith() { |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH, "", "ABC"); |
| SqlPatternMatcher sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("ABCD"); |
| assertEquals(1, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // ABCD should match StartsWith ABC |
| |
| setDrillBuf("BCD"); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // BCD should not match StartsWith ABC |
| |
| setDrillBuf("XYZABC"); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // XYZABC should not match StartsWith ABC |
| |
| // null text |
| setDrillBuf(""); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // null String should not match StartsWith ABC |
| |
| // pattern length > txt length |
| setDrillBuf("AB"); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // AB should not match StartsWith ABC |
| |
| // startsWith null pattern should match anything |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH, "", ""); |
| sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("AB"); |
| assertEquals(1, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // AB should match StartsWith null pattern |
| |
| // null pattern and null text |
| setDrillBuf(""); |
| assertEquals(1, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // null text should match null pattern |
| |
| // wide character string. |
| setDrillBuf(wideString); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH, "", "b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4"); |
| sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // should match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH, "", "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); |
| sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| // non ascii |
| setDrillBuf("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| assertEquals(0, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH, "", "¤EÀsÆW"); |
| sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| setDrillBuf("¤EÀsÆW"); |
| assertEquals(1, sqlPatternStartsWith.match(0, byteBuffer.limit(), drillBuf)); // should match |
| } |
| |
| @Test |
| public void testSqlPatternEndsWith() { |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "BCD"); |
| SqlPatternMatcher sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("ABCD"); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // ABCD should match EndsWith BCD |
| |
| setDrillBuf("ABC"); |
| assertEquals(0, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // ABC should not match EndsWith BCD |
| |
| setDrillBuf(""); |
| assertEquals(0, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // null string should not match EndsWith BCD |
| |
| setDrillBuf("A"); |
| assertEquals(0, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // ABCD should not match EndsWith A |
| |
| setDrillBuf("XYZBCD"); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // XYZBCD should match EndsWith BCD |
| |
| // EndsWith null pattern should match anything |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", ""); |
| sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // AB should match StartsWith null pattern |
| |
| // null pattern and null text |
| setDrillBuf(""); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // null text should match null pattern |
| |
| // wide character string. |
| setDrillBuf(wideString); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ"); |
| sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // should match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", ""); |
| sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf(""); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // null text should match null pattern |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "atXzj4x3CgF6j1gn10aUJknF7KQLJ84D"); |
| sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(0, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| // non ascii |
| setDrillBuf("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| assertEquals(0, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "TÆU2~~"); |
| sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternEndsWith.match(0, byteBuffer.limit(), drillBuf)); // should match |
| } |
| |
| @Test |
| public void testSqlPatternContains() { |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, ".*ABC.*", "ABCD"); |
| SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("ABCD"); |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // ABCD should contain ABCD |
| |
| setDrillBuf("BC"); |
| assertEquals(0, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // BC cannot contain ABCD |
| |
| setDrillBuf(""); |
| assertEquals(0, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // null string should not match contains ABCD |
| |
| setDrillBuf("DE"); |
| assertEquals(0, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // ABCD should not contain DE |
| |
| setDrillBuf("xyzABCDqrs"); |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // xyzABCDqrs should contain ABCD |
| |
| // contains null pattern should match anything |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", ""); |
| sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("xyzABCDqrs"); |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // should match |
| |
| // null pattern and null text |
| setDrillBuf(""); |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // null text should match null pattern |
| |
| // wide character string. |
| setDrillBuf(wideString); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW"); |
| sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", "ABCDEF"); |
| sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(0, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); |
| |
| // non ascii |
| setDrillBuf("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| assertEquals(0, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", "¶T¤¤¤ß"); |
| sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternContains.match(0, byteBuffer.limit(), drillBuf)); // should match |
| } |
| |
| @Test |
| public void testSqlPatternConstant() { |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT, "ABC.*", "ABC"); |
| SqlPatternMatcher sqlPatternConstant = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("ABC"); |
| assertEquals(1, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // ABC should match ABC |
| |
| setDrillBuf("BC"); |
| assertEquals(0, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // ABC not same as BC |
| |
| setDrillBuf(""); |
| assertEquals(0, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // null string not same as ABC |
| |
| setDrillBuf("DE"); |
| assertEquals(0, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // ABC not same as DE |
| |
| // null pattern should match null string |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT, "", ""); |
| sqlPatternConstant = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf(""); |
| assertEquals(1, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // null text should match null pattern |
| |
| // wide character string. |
| setDrillBuf(wideString); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", wideString); |
| sqlPatternConstant = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); |
| |
| // non ascii |
| setDrillBuf("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| assertEquals(0, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // should not match |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT, "", "¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| sqlPatternConstant = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternConstant.match(0, byteBuffer.limit(), drillBuf)); // should match |
| } |
| |
| @Test |
| public void testSqlPatternComplex() { |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX, "A.*BC.*", ""); |
| SqlPatternMatcher sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| |
| setDrillBuf("ABCDEF"); |
| assertEquals(1, sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf)); // ADEBCDF should match A.*BC.* |
| |
| setDrillBuf("BC"); |
| assertEquals(0, sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf)); // BC should not match A.*BC.* |
| |
| setDrillBuf(""); |
| assertEquals(sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf), 0); // null string should not match |
| |
| setDrillBuf("DEFGHIJ"); |
| assertEquals(sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf), 0); // DEFGHIJ should not match A.*BC.* |
| |
| java.util.regex.Matcher matcher; |
| matcher = java.util.regex.Pattern.compile("b00dUrA0.*").matcher(""); |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX, "b00dUrA0.*42.*9a8BZ", ""); |
| |
| // wide character string. |
| setDrillBuf(wideString); |
| |
| sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf)); |
| |
| // non ascii |
| setDrillBuf("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"); |
| assertEquals(0, sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf)); // DEFGHIJ should not match A.*BC.* |
| |
| patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX, ".*»Ú®i¶T¤¤¤.*¼Ó®i.*ÆU2~~", ""); |
| sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| assertEquals(1, sqlPatternComplex.match(0, byteBuffer.limit(), drillBuf)); // should match |
| } |
| |
| @Test |
| public void testSqlPatternContainsMultipleMatchers() { |
| |
| final String longASCIIString = "Drill supports a variety of NoSQL databases and file systems, including HBase, MongoDB, MapR-DB, HDFS, MapR-FS, Amazon S3, Azure Blob Storage, Google Cloud Storage, Swift, " |
| + "NAS and local files. A single query can join data from multiple datastores. For example, you can join a user profile collection in MongoDB with a directory of event logs in Hadoop."; |
| final String emptyString = ""; |
| final String unicodeString = "¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~"; |
| |
| final List<SQLPatternTestParams> tests = new ArrayList<SQLPatternTestParams>(); |
| |
| // Tests for Matcher ZERO |
| tests.add(new SQLPatternTestParams(longASCIIString, "", true)); |
| tests.add(new SQLPatternTestParams(emptyString, "", true)); |
| tests.add(new SQLPatternTestParams(unicodeString, "", true)); |
| |
| // Tests for Matcher ONE |
| tests.add(new SQLPatternTestParams(longASCIIString, "N", true)); |
| tests.add(new SQLPatternTestParams(longASCIIString, "&", false)); |
| tests.add(new SQLPatternTestParams(emptyString, "N", false)); |
| |
| // Tests for Matcher TWO |
| tests.add(new SQLPatternTestParams(longASCIIString, "SQ", true)); |
| tests.add(new SQLPatternTestParams(longASCIIString, "eT", false)); |
| tests.add(new SQLPatternTestParams("A", "SQ", false)); |
| tests.add(new SQLPatternTestParams(emptyString, "SQ", false)); |
| tests.add(new SQLPatternTestParams(unicodeString, "¶", true)); |
| tests.add(new SQLPatternTestParams(unicodeString, "AT", false)); |
| |
| // Tests for Matcher THREE |
| tests.add(new SQLPatternTestParams(longASCIIString, "SQL", true)); |
| tests.add(new SQLPatternTestParams(longASCIIString, "cas", false)); |
| tests.add(new SQLPatternTestParams("S", "SQL", false)); |
| tests.add(new SQLPatternTestParams(emptyString, "SQL", false)); |
| tests.add(new SQLPatternTestParams(unicodeString, "¶T", true)); |
| tests.add(new SQLPatternTestParams(unicodeString, "¶A", false)); |
| |
| // Tests for Matcher for patterns of length: 3 < length < 10 |
| tests.add(new SQLPatternTestParams(longASCIIString, "MongoDB", true)); |
| tests.add(new SQLPatternTestParams(longASCIIString, "MongoDz", false)); |
| tests.add(new SQLPatternTestParams("Mon", "MongoDB", false)); |
| tests.add(new SQLPatternTestParams(emptyString, "MongoDB", false)); |
| tests.add(new SQLPatternTestParams(unicodeString, "®i¶", true)); |
| tests.add(new SQLPatternTestParams(unicodeString, "®x¶", false)); |
| |
| // Tests for Matcher for patterns of length >= 10 |
| tests.add(new SQLPatternTestParams(longASCIIString, "multiple datastores", true)); |
| tests.add(new SQLPatternTestParams(longASCIIString, "multiple datastorb", false)); |
| tests.add(new SQLPatternTestParams("multiple", "multiple datastores", false)); |
| tests.add(new SQLPatternTestParams(emptyString, "multiple datastores", false)); |
| tests.add(new SQLPatternTestParams(unicodeString, "¶T¤¤¤ß3¼", true)); |
| tests.add(new SQLPatternTestParams(unicodeString, "¶T¤¤¤ßz¼", false)); |
| |
| for (SQLPatternTestParams test : tests) { |
| setDrillBuf(test.inputString); |
| |
| RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS, "", test.patternString); |
| SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo); |
| int eval = sqlPatternContains.match(0, byteBuffer.limit(), drillBuf); |
| int expectedEval = test.shouldMatch ? 1 : 0; |
| |
| if (eval != expectedEval) { |
| System.err.format("test failed; params=%s%n", test); |
| } |
| |
| assertEquals(expectedEval, eval); |
| } |
| } |
| |
| |
| @After |
| public void cleanup() { |
| drillBuf.close(); |
| allocator.close(); |
| } |
| |
| // ------------- |
| // Inner Classes |
| // ------------- |
| |
| /** Container class to hold SQL pattern test data */ |
| private static class SQLPatternTestParams { |
| private final String inputString; |
| private final String patternString; |
| private final boolean shouldMatch; |
| |
| private SQLPatternTestParams(String inputString, String patternString, boolean shouldMatch) { |
| this.inputString = inputString; |
| this.patternString = patternString; |
| this.shouldMatch = shouldMatch; |
| } |
| |
| public String toString() { |
| return "input=["+inputString+"], pattern=["+patternString+"], should-match=["+shouldMatch+"].."; |
| } |
| } |
| } |
| |