blob: 654f744bc3da0b59fa64b8e7cfd7c9648dd7ef20 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.pattern;
import java.io.StringReader;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
public class TestPatternCaptureGroupTokenFilter extends BaseTokenStreamTestCase {
public void testNoPattern() throws Exception {
testPatterns(
"foobarbaz",
new String[] {},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
true
);
testPatterns(
"foo bar baz",
new String[] {},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testNoMatch() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"xx"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {"xx"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
true
);
testPatterns(
"foo bar baz",
new String[] {"xx"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {"xx"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testNoCapture() throws Exception {
testPatterns(
"foobarbaz",
new String[] {".."},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {".."},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
true
);
testPatterns(
"foo bar baz",
new String[] {".."},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {".."},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testEmptyCapture() throws Exception {
testPatterns(
"foobarbaz",
new String[] {".(y*)"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {".(y*)"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
true
);
testPatterns(
"foo bar baz",
new String[] {".(y*)"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {".(y*)"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testCaptureAll() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"(.+)"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {"(.+)"},
new String[] {"foobarbaz"},
new int[] {0},
new int[] {9},
new int[] {1},
true
);
testPatterns(
"foo bar baz",
new String[] {"(.+)"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {"(.+)"},
new String[] {"foo","bar","baz"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testCaptureStart() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"^(.)"},
new String[] {"f"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {"^(.)"},
new String[] {"foobarbaz","f"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"^(.)"},
new String[] {"f","b","b"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {"^(.)"},
new String[] {"foo","f","bar","b","baz","b"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
true
);
}
public void testCaptureMiddle() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"^.(.)."},
new String[] {"o"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {"^.(.)."},
new String[] {"foobarbaz","o"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"^.(.)."},
new String[] {"o","a","a"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {"^.(.)."},
new String[] {"foo","o","bar","a","baz","a"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
true
);
}
public void testCaptureEnd() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"(.)$"},
new String[] {"z"},
new int[] {0},
new int[] {9},
new int[] {1},
false
);
testPatterns(
"foobarbaz",
new String[] {"(.)$"},
new String[] {"foobarbaz","z"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"(.)$"},
new String[] {"o","r","z"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"foo bar baz",
new String[] {"(.)$"},
new String[] {"foo","o","bar","r","baz","z"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
true
);
}
public void testCaptureStartMiddle() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"^(.)(.)"},
new String[] {"f","o"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
false
);
testPatterns(
"foobarbaz",
new String[] {"^(.)(.)"},
new String[] {"foobarbaz","f","o"},
new int[] {0,0,0},
new int[] {9,9,9},
new int[] {1,0,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"^(.)(.)"},
new String[] {"f","o","b","a","b","a"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
false
);
testPatterns(
"foo bar baz",
new String[] {"^(.)(.)"},
new String[] {"foo","f","o","bar","b","a","baz","b","a"},
new int[] {0,0,0,4,4,4,8,8,8},
new int[] {3,3,3,7,7,7,11,11,11},
new int[] {1,0,0,1,0,0,1,0,0},
true
);
}
public void testCaptureStartEnd() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"^(.).+(.)$"},
new String[] {"f","z"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
false
);
testPatterns(
"foobarbaz",
new String[] {"^(.).+(.)$"},
new String[] {"foobarbaz","f","z"},
new int[] {0,0,0},
new int[] {9,9,9},
new int[] {1,0,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"^(.).+(.)$"},
new String[] {"f","o","b","r","b","z"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
false
);
testPatterns(
"foo bar baz",
new String[] {"^(.).+(.)$"},
new String[] {"foo","f","o","bar","b","r","baz","b","z"},
new int[] {0,0,0,4,4,4,8,8,8},
new int[] {3,3,3,7,7,7,11,11,11},
new int[] {1,0,0,1,0,0,1,0,0},
true
);
}
public void testCaptureMiddleEnd() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"(.)(.)$"},
new String[] {"a","z"},
new int[] {0,0},
new int[] {9,9},
new int[] {1,0},
false
);
testPatterns(
"foobarbaz",
new String[] {"(.)(.)$"},
new String[] {"foobarbaz","a","z"},
new int[] {0,0,0},
new int[] {9,9,9},
new int[] {1,0,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"(.)(.)$"},
new String[] {"o","o","a","r","a","z"},
new int[] {0,0,4,4,8,8},
new int[] {3,3,7,7,11,11},
new int[] {1,0,1,0,1,0},
false
);
testPatterns(
"foo bar baz",
new String[] {"(.)(.)$"},
new String[] {"foo","o","o","bar","a","r","baz","a","z"},
new int[] {0,0,0,4,4,4,8,8,8},
new int[] {3,3,3,7,7,7,11,11,11},
new int[] {1,0,0,1,0,0,1,0,0},
true
);
}
public void testMultiCaptureOverlap() throws Exception {
testPatterns(
"foobarbaz",
new String[] {"(.(.(.)))"},
new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
new int[] {0,0,0,0,0,0,0,0,0},
new int[] {9,9,9,9,9,9,9,9,9},
new int[] {1,0,0,0,0,0,0,0,0},
false
);
testPatterns(
"foobarbaz",
new String[] {"(.(.(.)))"},
new String[] {"foobarbaz","foo","oo","o","bar","ar","r","baz","az","z"},
new int[] {0,0,0,0,0,0,0,0,0,0},
new int[] {9,9,9,9,9,9,9,9,9,9},
new int[] {1,0,0,0,0,0,0,0,0,0},
true
);
testPatterns(
"foo bar baz",
new String[] {"(.(.(.)))"},
new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
new int[] {0,0,0,4,4,4,8,8,8},
new int[] {3,3,3,7,7,7,11,11,11},
new int[] {1,0,0,1,0,0,1,0,0},
false
);
testPatterns(
"foo bar baz",
new String[] {"(.(.(.)))"},
new String[] {"foo","oo","o","bar","ar","r","baz","az","z"},
new int[] {0,0,0,4,4,4,8,8,8},
new int[] {3,3,3,7,7,7,11,11,11},
new int[] {1,0,0,1,0,0,1,0,0},
true
);
}
public void testMultiPattern() throws Exception {
testPatterns(
"aaabbbaaa",
new String[] {"(aaa)","(bbb)","(ccc)"},
new String[] {"aaa","bbb","aaa"},
new int[] {0,0,0},
new int[] {9,9,9},
new int[] {1,0,0},
false
);
testPatterns(
"aaabbbaaa",
new String[] {"(aaa)","(bbb)","(ccc)"},
new String[] {"aaabbbaaa","aaa","bbb","aaa"},
new int[] {0,0,0,0},
new int[] {9,9,9,9},
new int[] {1,0,0,0},
true
);
testPatterns(
"aaa bbb aaa",
new String[] {"(aaa)","(bbb)","(ccc)"},
new String[] {"aaa","bbb","aaa"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
false
);
testPatterns(
"aaa bbb aaa",
new String[] {"(aaa)","(bbb)","(ccc)"},
new String[] {"aaa","bbb","aaa"},
new int[] {0,4,8},
new int[] {3,7,11},
new int[] {1,1,1},
true
);
}
public void testCamelCase() throws Exception {
testPatterns(
"letsPartyLIKEits1999_dude",
new String[] {
"([A-Z]{2,})",
"(?<![A-Z])([A-Z][a-z]+)",
"(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)",
"([0-9]+)"
},
new String[] {"lets","Party","LIKE","its","1999","dude"},
new int[] {0,0,0,0,0,0},
new int[] {25,25,25,25,25,25},
new int[] {1,0,0,0,0,0,0},
false
);
testPatterns(
"letsPartyLIKEits1999_dude",
new String[] {
"([A-Z]{2,})",
"(?<![A-Z])([A-Z][a-z]+)",
"(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)",
"([0-9]+)"
},
new String[] {"letsPartyLIKEits1999_dude","lets","Party","LIKE","its","1999","dude"},
new int[] {0,0,0,0,0,0,0},
new int[] {25,25,25,25,25,25,25},
new int[] {1,0,0,0,0,0,0,0},
true
);
}
public void testRandomString() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new PatternCaptureGroupTokenFilter(tokenizer, false,
Pattern.compile("((..)(..))")));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
private void testPatterns(String input, String[] regexes, String[] tokens,
int[] startOffsets, int[] endOffsets, int[] positions,
boolean preserveOriginal) throws Exception {
Pattern[] patterns = new Pattern[regexes.length];
for (int i = 0; i < regexes.length; i++) {
patterns[i] = Pattern.compile(regexes[i]);
}
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader( new StringReader(input));
TokenStream ts = new PatternCaptureGroupTokenFilter(tokenizer, preserveOriginal, patterns);
assertTokenStreamContents(ts, tokens, startOffsets, endOffsets, positions);
}
}