blob: 69f79287595f7dada1c0202434be079298f488b8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.solr.configuration;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.pattern.PatternCaptureGroupTokenFilter;
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
/**
* Testcase for checking default analyzers configurations behave as expected with regards to path related restrictions
*
* Note that default Solr analyzers for Oak should be equivalent to the ones programmatically defined here.
*/
public class DefaultAnalyzersConfigurationTest {
private Analyzer parentPathIndexingAnalyzer;
private Analyzer parentPathSearchingAnalyzer;
private Analyzer exactPathAnalyzer;
private Analyzer directChildrenPathIndexingAnalyzer;
private Analyzer directChildrenPathSearchingAnalyzer;
private Analyzer allChildrenPathIndexingAnalyzer;
private Analyzer allChildrenPathSearchingAnalyzer;
@Before
public void setUp() throws Exception {
this.exactPathAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.parentPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.parentPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
filter = new ReverseStringFilter(Version.LUCENE_47, filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
filter = new ReverseStringFilter(Version.LUCENE_47, filter);
return new TokenStreamComponents(source, filter);
}
};
this.directChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
this.allChildrenPathIndexingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new PathHierarchyTokenizer(reader);
TokenStream filter = new PatternCaptureGroupTokenFilter(source, false, Pattern.compile("((\\/).*)"));
filter = new RemoveDuplicatesTokenFilter(filter);
return new TokenStreamComponents(source, filter);
}
};
this.allChildrenPathSearchingAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new KeywordTokenizer(reader);
return new TokenStreamComponents(source);
}
};
}
@Test
public void testAllChildrenIndexingTokenization() throws Exception {
try {
TokenStream ts = allChildrenPathIndexingAnalyzer.tokenStream("text", new StringReader("/jcr:a/jcr:b/c/jcr:d"));
assertTokenStreamContents(ts, new String[]{"/jcr:a", "/", "/jcr:a/jcr:b", "/jcr:a/jcr:b/c", "/jcr:a/jcr:b/c/jcr:d"});
} finally {
allChildrenPathIndexingAnalyzer.close();
}
}
@Test
public void testAllChildrenSearchingTokenization() throws Exception {
try {
TokenStream ts = allChildrenPathSearchingAnalyzer.tokenStream("text", new StringReader("/jcr:a/jcr:b/jcr:c"));
assertTokenStreamContents(ts, new String[]{"/jcr:a/jcr:b/jcr:c"});
} finally {
allChildrenPathSearchingAnalyzer.close();
}
}
@Test
public void testDirectChildrenPathIndexingTokenization() throws Exception {
try {
TokenStream ts = directChildrenPathIndexingAnalyzer.tokenStream("text", new StringReader("/jcr:a/b/jcr:c"));
assertTokenStreamContents(ts, new String[]{"/jcr:a/b"});
ts = directChildrenPathIndexingAnalyzer.tokenStream("text", new StringReader("/jcr:a"));
assertTokenStreamContents(ts, new String[]{"/"});
ts = directChildrenPathIndexingAnalyzer.tokenStream("text", new StringReader("/"));
assertTokenStreamContents(ts, new String[]{});
} finally {
directChildrenPathIndexingAnalyzer.close();
}
}
@Test
public void testDirectChildrenPathSearchingTokenization() throws Exception {
try {
TokenStream ts = directChildrenPathSearchingAnalyzer.tokenStream("text", new StringReader("/jcr:a/jcr:b"));
assertTokenStreamContents(ts, new String[]{"/jcr:a/jcr:b"});
} finally {
directChildrenPathSearchingAnalyzer.close();
}
}
@Test
public void testExactPathIndexingTokenizationAndSearch() throws Exception {
try {
TokenStream ts = exactPathAnalyzer.tokenStream("text", new StringReader("/jcr:a/jcr:b/c"));
assertTokenStreamContents(ts, new String[]{"/jcr:a/jcr:b/c"});
} finally {
exactPathAnalyzer.close();
}
}
@Test
public void testParentPathSearchingTokenization() throws Exception {
try {
TokenStream ts = parentPathSearchingAnalyzer.tokenStream("text", new StringReader("/jcr:a/b/jcr:c"));
assertTokenStreamContents(ts, new String[]{"/jcr:a/b"});
} finally {
parentPathSearchingAnalyzer.close();
}
}
@Test
public void testParentPathIndexingTokenization() throws Exception {
try {
TokenStream ts = parentPathIndexingAnalyzer.tokenStream("text", new StringReader("/a/b"));
assertTokenStreamContents(ts, new String[]{"/a/b"});
} finally {
parentPathIndexingAnalyzer.close();
}
}
@Test
@Ignore("OAK-9261")
public void testAllChildrenPathMatching() throws Exception {
String nodePath = "/jcr:a/jcr:b/c";
String descendantPath = nodePath + "/d/jcr:e";
assertAnalyzesTo(allChildrenPathIndexingAnalyzer, descendantPath, new String[]{"/jcr:a", "/", "/jcr:a/jcr:b", "/jcr:a/jcr:b/c", "/jcr:a/jcr:b/c/d", "/jcr:a/jcr:b/c/d/jcr:e"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, nodePath, new String[]{nodePath});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/jcr:a", new String[]{"/jcr:a"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/jcr:a/b", new String[]{"/jcr:a/b"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/a/b/c", new String[]{"/a/b/c"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/a/b/c/d", new String[]{"/a/b/c/d"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/a/b/c/d/jcr:e", new String[]{"/a/b/c/d/jcr:e"});
assertAnalyzesTo(allChildrenPathSearchingAnalyzer, "/", new String[]{"/"});
}
@Test
@Ignore("OAK-9261")
public void testAllChildrenPathMatchingOnRootNode() throws Exception {
String nodePath = "/";
String descendantPath = nodePath + "jcr:a/jcr:b";
assertAnalyzesTo(allChildrenPathIndexingAnalyzer, descendantPath, new String[]{"/jcr:a", "/", "/jcr:a/jcr:b"});
}
@Test
@Ignore("OAK-9261")
public void testDirectChildrenPathMatching() throws Exception {
String nodePath = "/a/b/c";
String childPath = nodePath + "/d";
assertAnalyzesTo(directChildrenPathIndexingAnalyzer, childPath, new String[]{nodePath});
assertAnalyzesTo(directChildrenPathSearchingAnalyzer, nodePath, new String[]{nodePath});
nodePath = "/";
childPath = nodePath + "/jcr:a";
assertAnalyzesTo(directChildrenPathIndexingAnalyzer, childPath, new String[]{nodePath});
assertAnalyzesTo(directChildrenPathSearchingAnalyzer, nodePath, new String[]{nodePath});
String childPath1 = "/test/jcr:resource";
String childPath2 = "/test/resource";
nodePath = "/test";
assertAnalyzesTo(directChildrenPathIndexingAnalyzer, childPath1, new String[]{nodePath});
assertAnalyzesTo(directChildrenPathIndexingAnalyzer, childPath2, new String[]{nodePath});
assertAnalyzesTo(directChildrenPathSearchingAnalyzer, nodePath, new String[]{nodePath});
}
@Test
@Ignore("OAK-9261")
public void testParentPathMatching() throws Exception {
String parentPath = "/a/b";
String nodePath = parentPath + "/jcr:c";
assertAnalyzesTo(parentPathIndexingAnalyzer, parentPath, new String[]{parentPath});
assertAnalyzesTo(parentPathSearchingAnalyzer, nodePath, new String[]{parentPath});
}
}