blob: 66b3a5c774388c8493da838ed22d1a4127d63aee [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.ResourceLoader;
/**
* @deprecated Remove this test in Lucene 5.0
*/
@Deprecated
public class TestSynonymMap extends LuceneTestCase {
public void testInvalidMappingRules() throws Exception {
SlowSynonymMap synMap = new SlowSynonymMap( true );
List<String> rules = new ArrayList<String>( 1 );
rules.add( "a=>b=>c" );
try{
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
fail( "RuntimeException must be thrown." );
}
catch( RuntimeException expected ){}
}
public void testReadMappingRules() throws Exception {
SlowSynonymMap synMap;
// (a)->[b]
List<String> rules = new ArrayList<String>();
rules.add( "a=>b" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 1, synMap.submap.size() );
assertTokIncludes( synMap, "a", "b" );
// (a)->[c]
// (b)->[c]
rules.clear();
rules.add( "a,b=>c" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "c" );
assertTokIncludes( synMap, "b", "c" );
// (a)->[b][c]
rules.clear();
rules.add( "a=>b,c" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 1, synMap.submap.size() );
assertTokIncludes( synMap, "a", "b" );
assertTokIncludes( synMap, "a", "c" );
// (a)->(b)->[a2]
// [a1]
rules.clear();
rules.add( "a=>a1" );
rules.add( "a b=>a2" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 1, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a1" );
assertEquals( 1, getSubSynonymMap( synMap, "a" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "a" ), "b", "a2" );
// (a)->(b)->[a2]
// (c)->[a3]
// [a1]
rules.clear();
rules.add( "a=>a1" );
rules.add( "a b=>a2" );
rules.add( "a c=>a3" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 1, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a1" );
assertEquals( 2, getSubSynonymMap( synMap, "a" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "a" ), "b", "a2" );
assertTokIncludes( getSubSynonymMap( synMap, "a" ), "c", "a3" );
// (a)->(b)->[a2]
// [a1]
// (b)->(c)->[b2]
// [b1]
rules.clear();
rules.add( "a=>a1" );
rules.add( "a b=>a2" );
rules.add( "b=>b1" );
rules.add( "b c=>b2" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a1" );
assertEquals( 1, getSubSynonymMap( synMap, "a" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "a" ), "b", "a2" );
assertTokIncludes( synMap, "b", "b1" );
assertEquals( 1, getSubSynonymMap( synMap, "b" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "b" ), "c", "b2" );
}
public void testRead1waySynonymRules() throws Exception {
SlowSynonymMap synMap;
// (a)->[a]
// (b)->[a]
List<String> rules = new ArrayList<String>();
rules.add( "a,b" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "b", "a" );
// (a)->[a]
// (b)->[a]
// (c)->[a]
rules.clear();
rules.add( "a,b,c" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
assertEquals( 3, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "b", "a" );
assertTokIncludes( synMap, "c", "a" );
// (a)->[a]
// (b1)->(b2)->[a]
rules.clear();
rules.add( "a,b1 b2" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertEquals( 1, getSubSynonymMap( synMap, "b1" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "b1" ), "b2", "a" );
// (a1)->(a2)->[a1][a2]
// (b)->[a1][a2]
rules.clear();
rules.add( "a1 a2,b" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
assertEquals( 2, synMap.submap.size() );
assertEquals( 1, getSubSynonymMap( synMap, "a1" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a1" );
assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a2" );
assertTokIncludes( synMap, "b", "a1" );
assertTokIncludes( synMap, "b", "a2" );
}
public void testRead2waySynonymRules() throws Exception {
SlowSynonymMap synMap;
// (a)->[a][b]
// (b)->[a][b]
List<String> rules = new ArrayList<String>();
rules.add( "a,b" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "a", "b" );
assertTokIncludes( synMap, "b", "a" );
assertTokIncludes( synMap, "b", "b" );
// (a)->[a][b][c]
// (b)->[a][b][c]
// (c)->[a][b][c]
rules.clear();
rules.add( "a,b,c" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 3, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "a", "b" );
assertTokIncludes( synMap, "a", "c" );
assertTokIncludes( synMap, "b", "a" );
assertTokIncludes( synMap, "b", "b" );
assertTokIncludes( synMap, "b", "c" );
assertTokIncludes( synMap, "c", "a" );
assertTokIncludes( synMap, "c", "b" );
assertTokIncludes( synMap, "c", "c" );
// (a)->[a]
// [b1][b2]
// (b1)->(b2)->[a]
// [b1][b2]
rules.clear();
rules.add( "a,b1 b2" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "a", "b1" );
assertTokIncludes( synMap, "a", "b2" );
assertEquals( 1, getSubSynonymMap( synMap, "b1" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "b1" ), "b2", "a" );
assertTokIncludes( getSubSynonymMap( synMap, "b1" ), "b2", "b1" );
assertTokIncludes( getSubSynonymMap( synMap, "b1" ), "b2", "b2" );
// (a1)->(a2)->[a1][a2]
// [b]
// (b)->[a1][a2]
// [b]
rules.clear();
rules.add( "a1 a2,b" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
assertEquals( 2, synMap.submap.size() );
assertEquals( 1, getSubSynonymMap( synMap, "a1" ).submap.size() );
assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a1" );
assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a2" );
assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "b" );
assertTokIncludes( synMap, "b", "a1" );
assertTokIncludes( synMap, "b", "a2" );
assertTokIncludes( synMap, "b", "b" );
}
public void testBigramTokenizer() throws Exception {
SlowSynonymMap synMap;
// prepare bi-gram tokenizer factory
BaseTokenizerFactory tf = new NGramTokenizerFactory();
Map<String, String> args = new HashMap<String, String>();
args.put("minGramSize","2");
args.put("maxGramSize","2");
tf.init( args );
// (ab)->(bc)->(cd)->[ef][fg][gh]
List<String> rules = new ArrayList<String>();
rules.add( "abcd=>efgh" );
synMap = new SlowSynonymMap( true );
SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
assertEquals( 1, synMap.submap.size() );
assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "ef" );
assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "fg" );
assertTokIncludes( getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ), "cd", "gh" );
}
public void testLoadRules() throws Exception {
Map<String, String> args = new HashMap<String, String>();
args.put( "synonyms", "something.txt" );
SlowSynonymFilterFactory ff = new SlowSynonymFilterFactory();
ff.init(args);
ff.inform( new ResourceLoader() {
@Override
public List<String> getLines(String resource) throws IOException {
if( !"something.txt".equals(resource) ) {
throw new RuntimeException( "should not get a differnt resource" );
}
List<String> rules = new ArrayList<String>();
rules.add( "a,b" );
return rules;
}
@Override
public Object newInstance(String cname, String... subpackages) {
throw new RuntimeException("stub");
}
@Override
public InputStream openResource(String resource) throws IOException {
throw new RuntimeException("stub");
}
});
SlowSynonymMap synMap = ff.getSynonymMap();
assertEquals( 2, synMap.submap.size() );
assertTokIncludes( synMap, "a", "a" );
assertTokIncludes( synMap, "a", "b" );
assertTokIncludes( synMap, "b", "a" );
assertTokIncludes( synMap, "b", "b" );
}
private void assertTokIncludes( SlowSynonymMap map, String src, String exp ) throws Exception {
Token[] tokens = map.submap.get( src ).synonyms;
boolean inc = false;
for( Token token : tokens ){
if( exp.equals( new String(token.buffer(), 0, token.length()) ) )
inc = true;
}
assertTrue( inc );
}
private SlowSynonymMap getSubSynonymMap( SlowSynonymMap map, String src ){
return map.submap.get( src );
}
}