blob: 71d5af5dffafa7f8dac819e42065f9db4637ea1b [file] [log] [blame]
/*
* This software was produced for the U. S. Government
* under Contract No. W15P7T-11-C-F600, and is
* subject to the Rights in Noncommercial Computer Software
* and Noncommercial Computer Software Documentation
* Clause 252.227-7014 (JUN 1995)
*
* Copyright 2013 The MITRE Corporation. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.tagger;
import java.util.Arrays;
import java.util.stream.Collectors;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Ignore;
/**
* The original test for {@link TaggerRequestHandler}.
*/
public class TaggerTest extends TaggerTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-tagger.xml", "schema-tagger.xml");
}
private void indexAndBuild() throws Exception {
N[] names = N.values();
String[] namesStrs = new String[names.length];
for (int i = 0; i < names.length; i++) {
namesStrs[i] = names[i].getName();
}
buildNames(namesStrs);
}
/** Name corpus */
enum N {
//keep order to retain ord()
London, London_Business_School, Boston, City_of_London,
of, the//filtered out of the corpus by a custom query
;
String getName() { return name().replace('_',' '); }
static N lookupByName(String name) { return N.valueOf(name.replace(' ', '_')); }
int getId() { return ordinal(); }
}
public void testFormat() throws Exception {
baseParams.set("overlaps", "NO_SUB");
indexAndBuild();
String rspStr = _testFormatRequest(false);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<response>\n" +
"\n" +
"<int name=\"tagsCount\">1</int>\n" +
"<arr name=\"tags\">\n" +
" <lst>\n" +
" <int name=\"startOffset\">0</int>\n" +
" <int name=\"endOffset\">22</int>\n" +
" <arr name=\"ids\">\n" +
" <str>1</str>\n" +
" </arr>\n" +
" </lst>\n" +
"</arr>\n" +
"<result name=\"response\" numFound=\"1\" start=\"0\" numFoundExact=\"true\">\n" +
" <doc>\n" +
" <str name=\"id\">1</str>\n" +
" <str name=\"name\">London Business School</str>\n" +
" <str name=\"_root_\">1</str></doc>\n" +
"</result>\n" +
"</response>\n";
assertEquals(expected, rspStr);
}
public void testFormatMatchText() throws Exception {
baseParams.set("overlaps", "NO_SUB");
indexAndBuild();
String rspStr = _testFormatRequest(true);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<response>\n" +
"\n" +
"<int name=\"tagsCount\">1</int>\n" +
"<arr name=\"tags\">\n" +
" <lst>\n" +
" <int name=\"startOffset\">0</int>\n" +
" <int name=\"endOffset\">22</int>\n" +
" <str name=\"matchText\">london business school</str>\n" +
" <arr name=\"ids\">\n" +
" <str>1</str>\n" +
" </arr>\n" +
" </lst>\n" +
"</arr>\n" +
"<result name=\"response\" numFound=\"1\" start=\"0\" numFoundExact=\"true\">\n" +
" <doc>\n" +
" <str name=\"id\">1</str>\n" +
" <str name=\"name\">London Business School</str>\n" +
" <str name=\"_root_\">1</str></doc>\n" +
"</result>\n" +
"</response>\n";
assertEquals(expected, rspStr);
}
private String _testFormatRequest(boolean matchText) throws Exception {
String doc = "london business school";//just one tag
SolrQueryRequest req = reqDoc(doc, "indent", "on", "omitHeader", "on", "matchText", ""+matchText);
String rspStr = h.query(req);
req.close();
return rspStr;
}
/** Partial matching, no sub-tags */
@Ignore //TODO ConcatenateGraphFilter uses a special separator char that we can't put into XML (invalid char)
public void testPartialMatching() throws Exception {
baseParams.set("field", "name_tagPartial");
baseParams.set("overlaps", "NO_SUB");
baseParams.set("fq", "NOT name:(of the)");//test filtering
indexAndBuild();
//these match nothing
assertTags(reqDoc("") );
assertTags(reqDoc(" ") );
assertTags(reqDoc("the") );
String doc;
//just London Business School via "school" substring
doc = "school";
assertTags(reqDoc(doc), tt(doc,"school", 0, N.London_Business_School));
doc = "a school";
assertTags(reqDoc(doc), tt(doc,"school", 0, N.London_Business_School));
doc = "school a";
assertTags(reqDoc(doc), tt(doc,"school", 0, N.London_Business_School));
//More interesting
doc = "school City";
assertTags(reqDoc(doc),
tt(doc, "school", 0, N.London_Business_School),
tt(doc, "City", 0, N.City_of_London) );
doc = "City of London Business School";
assertTags(reqDoc(doc), //no plain London (sub-tag)
tt(doc, "City of London", 0, N.City_of_London),
tt(doc, "London Business School", 0, N.London_Business_School));
}
/** whole matching, no sub-tags */
public void testWholeMatching() throws Exception {
baseParams.set("overlaps", "NO_SUB");
baseParams.set("fq", "NOT name:(of the)");//test filtering
indexAndBuild();
//these match nothing
assertTags(reqDoc(""));
assertTags(reqDoc(" ") );
assertTags(reqDoc("the") );
//partial on N.London_Business_School matches nothing
assertTags(reqDoc("school") );
assertTags(reqDoc("a school") );
assertTags(reqDoc("school a") );
assertTags(reqDoc("school City") );
String doc;
doc = "school business london";//backwards
assertTags(reqDoc(doc), tt(doc,"london", 0, N.London));
doc = "of London Business School";
assertTags(reqDoc(doc), //no plain London (sub-tag)
tt(doc, "London Business School", 0, N.London_Business_School));
//More interesting
doc = "City of London Business School";
assertTags(reqDoc(doc), //no plain London (sub-tag)
tt(doc, "City of London", 0, N.City_of_London),
tt(doc, "London Business School", 0, N.London_Business_School));
doc = "City of London Business";
assertTags(reqDoc(doc), //no plain London (sub-tag) no Business (partial-match)
tt(doc, "City of London", 0, N.City_of_London));
doc = "London Business magazine";
assertTags(reqDoc(doc), //Just London; L.B.S. fails
tt(doc, "London", 0, N.London));
}
/** whole matching, with sub-tags */
public void testSubTags() throws Exception {
baseParams.set("overlaps", "ALL");
baseParams.set("fq", "NOT name:(of the)");//test filtering
indexAndBuild();
//these match nothing
assertTags(reqDoc(""));
assertTags(reqDoc(" ") );
assertTags(reqDoc("the") );
//partial on N.London_Business_School matches nothing
assertTags(reqDoc("school") );
assertTags(reqDoc("a school") );
assertTags(reqDoc("school a") );
assertTags(reqDoc("school City") );
String doc;
doc = "school business london";//backwards
assertTags(reqDoc(doc), tt(doc,"london", 0, N.London));
//More interesting
doc = "City of London Business School";
assertTags(reqDoc(doc),
tt(doc, "City of London", 0, N.City_of_London),
tt(doc, "London", 0, N.London),
tt(doc, "London Business School", 0, N.London_Business_School));
doc = "City of London Business";
assertTags(reqDoc(doc),
tt(doc, "City of London", 0, N.City_of_London),
tt(doc, "London", 0, N.London));
}
public void testMultipleFilterQueries() throws Exception {
baseParams.set("overlaps", "ALL");
// build up the corpus with some additional fields for filtering purposes
deleteByQueryAndGetVersion("*:*", null);
int i = 0;
assertU(adoc("id", ""+i++, "name", N.London.getName(), "type", "city", "country", "UK"));
assertU(adoc("id", ""+i++, "name", N.London_Business_School.getName(), "type", "school", "country", "UK"));
assertU(adoc("id", ""+i++, "name", N.Boston.getName(), "type", "city", "country", "US"));
assertU(adoc("id", ""+i++, "name", N.City_of_London.getName(), "type", "org", "country", "UK"));
assertU(commit());
// not calling buildNames so that we can bring along extra attributes for filtering
NAMES = Arrays.stream(N.values()).map(N::getName).collect(Collectors.toList());
// phrase that matches everything
String doc = "City of London Business School in Boston";
// first do no filtering
ModifiableSolrParams p = new ModifiableSolrParams();
p.add(CommonParams.Q, "*:*");
assertTags(reqDoc(doc, p),
tt(doc, "City of London", 0, N.City_of_London),
tt(doc, "London", 0, N.London),
tt(doc, "London Business School", 0, N.London_Business_School),
tt(doc, "Boston", 0, N.Boston));
// add a single fq
p.add(CommonParams.FQ, "type:city");
assertTags(reqDoc(doc, p),
tt(doc, "London", 0, N.London),
tt(doc, "Boston", 0, N.Boston));
// add another fq
p.add(CommonParams.FQ, "country:US");
assertTags(reqDoc(doc, p),
tt(doc, "Boston", 0, N.Boston));
}
private TestTag tt(String doc, String substring, int substringIndex, N name) {
assert substringIndex == 0;
//little bit of copy-paste code from super.tt()
int startOffset = -1, endOffset;
int substringIndex1 = 0;
for(int i = 0; i <= substringIndex1; i++) {
startOffset = doc.indexOf(substring, ++startOffset);
assert startOffset >= 0 : "The test itself is broken";
}
endOffset = startOffset+ substring.length();//1 greater (exclusive)
return new TestTag(startOffset, endOffset, substring, lookupByName(name.getName()));
}
public void testEmptyCollection() throws Exception {
//SOLR-14396: Ensure tagger handler doesn't fail on empty collections
SolrQueryRequest req = reqDoc("anything", "indent", "on", "omitHeader", "on", "matchText", "false");
String rspStr = h.query(req);
req.close();
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<response>\n" +
"\n" +
"<int name=\"tagsCount\">0</int>\n" +
"<arr name=\"tags\"/>\n" +
"<result name=\"response\" numFound=\"0\" start=\"0\" numFoundExact=\"true\">\n" +
"</result>\n" +
"</response>\n";
assertEquals(expected, rspStr);
}
}