| /** |
| * Derby - Class org.apache.derbyTesting.functionTests.tests.lang.LuceneSupportTest |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.derbyTesting.functionTests.tests.lang; |
| |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.sql.CallableStatement; |
| import java.sql.PreparedStatement; |
| import java.sql.SQLException; |
| import java.sql.Statement; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Locale; |
| import javax.xml.parsers.DocumentBuilderFactory; |
| import junit.framework.Test; |
| import org.apache.derby.optional.api.LuceneIndexDescriptor; |
| import org.apache.derby.optional.api.LuceneUtils; |
| import org.apache.derbyTesting.junit.BaseJDBCTestCase; |
| import org.apache.derbyTesting.junit.BaseTestSuite; |
| import org.apache.derbyTesting.junit.JDBC; |
| import org.apache.derbyTesting.junit.LocaleTestSetup; |
| import org.apache.derbyTesting.junit.SecurityManagerSetup; |
| import org.apache.derbyTesting.junit.TestConfiguration; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; |
| import org.apache.lucene.queryparser.classic.QueryParser; |
| import org.apache.lucene.util.Version; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.InputSource; |
| |
| /** |
| * <p> |
| * Basic test of the optional tool which provides Lucene indexing of |
| * columns in Derby tables. |
| * </p> |
| */ |
| public class LuceneSupportTest extends BaseJDBCTestCase { |
| |
| private static final String ILLEGAL_CHARACTER = "42XBD"; |
| |
| public LuceneSupportTest(String name) { |
| super(name); |
| } |
| |
| public static Test suite() { |
| BaseTestSuite suite = new BaseTestSuite("LuceneSupportTest"); |
| |
| Test baseTest = TestConfiguration.embeddedSuite(LuceneSupportTest.class); |
| Test singleUseTest = TestConfiguration.singleUseDatabaseDecorator( baseTest ); |
| Test localizedTest = new LocaleTestSetup( singleUseTest, new Locale( "en", "US" ) ); |
| |
| suite.addTest(SecurityManagerSetup.noSecurityManager(localizedTest)); |
| |
| return suite; |
| } |
| |
| public void testCreateAndQueryIndex() throws Exception { |
| CallableStatement cSt; |
| Statement s = createStatement(); |
| |
| // verify that we are in an en Locale |
| getConnection().prepareStatement |
| ( |
| "create function getDatabaseLocale() returns varchar( 20 )\n" + |
| "language java parameter style java reads sql data\n" + |
| "external name 'org.apache.derbyTesting.functionTests.tests.lang.LuceneSupportPermsTest.getDatabaseLocale()'\n" |
| ).executeUpdate(); |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "values ( substr( getDatabaseLocale(), 1, 2 ) )" |
| ), |
| new String[][] |
| { |
| { "en" } |
| } |
| ); |
| getConnection().prepareStatement( "drop function getDatabaseLocale" ).executeUpdate(); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.createIndex('lucenetest','titles','title', null )" ); |
| assertUpdateCount(cSt, 0); |
| |
| String[][] expectedRows = new String[][] |
| { |
| { "1","0","0.8048013" }, |
| { "3","2","0.643841" } |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select * from table ( lucenetest.titles__title( 'grapes', 1000, null ) ) luceneResults" |
| ), |
| expectedRows |
| ); |
| |
| expectedRows = new String[][] |
| { |
| { "3","2","0.643841" } |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select * from table ( lucenetest.titles__title( 'grapes', 1000, .75 ) ) luceneResults" |
| ), |
| expectedRows |
| ); |
| |
| JDBC.assertEmpty |
| ( |
| s.executeQuery |
| ( |
| "select * from table ( lucenetest.titles__title( 'grapes', 1000, 0.5) ) luceneResults" |
| ) |
| ); |
| |
| expectedRows = new String[][] |
| { |
| { "The Grapes Of Wrath", "John Steinbeck", "The Viking Press", "0"}, |
| {"Vines, Grapes, and Wines", "Jancis Robinson", "Alfred A. Knopf", "2"} |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select title, author, publisher, documentID\n" + |
| "from lucenetest.titles t, table ( lucenetest.titles__title( 'grapes', 1000, null ) ) l\n" + |
| "where t.id = l.id\n" |
| ), |
| expectedRows |
| ); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" ); |
| assertUpdateCount(cSt, 0); |
| |
| } |
| |
| public void testUpdateIndex() throws Exception { |
| CallableStatement cSt; |
| Statement s = createStatement(); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.createIndex('lucenetest','titles','title', null)" ); |
| assertUpdateCount(cSt, 0); |
| |
| JDBC.assertEmpty |
| ( |
| s.executeQuery |
| ( |
| "select *\n" + |
| "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n" |
| ) |
| ); |
| |
| cSt = prepareCall( "update TITLES SET TITLE='Of Mice and Men' WHERE ID=1" ); |
| assertUpdateCount(cSt, 1); |
| |
| JDBC.assertEmpty |
| ( |
| s.executeQuery |
| ( |
| "select *\n" + |
| "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n" |
| ) |
| ); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.updateIndex('lucenetest','titles','title', null)" ); |
| assertUpdateCount(cSt, 0); |
| |
| String[][] expectedRows = new String[][] |
| { |
| { "1","0","1.058217" } |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select *\n" + |
| "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n" |
| ), |
| expectedRows |
| ); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" ); |
| assertUpdateCount(cSt, 0); |
| |
| } |
| |
| public void testListIndex() throws Exception { |
| CallableStatement cSt; |
| Statement s = createStatement(); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.createIndex('lucenetest','titles','title', null)" ); |
| assertUpdateCount(cSt, 0); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.createIndex('lucenetest','titles','author', null)" ); |
| assertUpdateCount(cSt, 0); |
| |
| // leave out lastmodified as the date will change |
| String[][] expectedRows = new String[][] |
| { |
| { "LUCENETEST", "TITLES", "AUTHOR" }, |
| { "LUCENETEST", "TITLES", "TITLE" } |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes order by schemaname, tablename, columnname" |
| ), |
| expectedRows |
| ); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" ); |
| assertUpdateCount(cSt, 0); |
| |
| expectedRows = new String[][] |
| { |
| { "LUCENETEST", "TITLES", "AUTHOR" }, |
| }; |
| JDBC.assertFullResultSet |
| ( |
| s.executeQuery |
| ( |
| "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes order by schemaname, tablename, columnname" |
| ), |
| expectedRows |
| ); |
| |
| cSt = prepareCall |
| ( "call LuceneSupport.dropIndex('lucenetest','titles','author')" ); |
| assertUpdateCount(cSt, 0); |
| |
| JDBC.assertEmpty |
| ( |
| s.executeQuery |
| ( |
| "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes" |
| ) |
| ); |
| |
| } |
| |
| public void testDropIndexBadCharacters() throws Exception { |
| CallableStatement st; |
| |
| assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('../','','')"); |
| assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('','../','')"); |
| assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('','','../')"); |
| |
| } |
| |
| ////////////////////////////////////////////////////////////// |
| // |
| // BEGIN TEST FOR MULTIPLE FIELDS |
| // |
| ////////////////////////////////////////////////////////////// |
| |
| public void testMultipleFields() throws SQLException |
| { |
| println( "Running multi-field test." ); |
| |
| Statement s = createStatement(); |
| |
| s.execute("create table multifield(id int primary key, c clob)"); |
| s.execute("insert into multifield values " |
| + "(1, '<document><secret/>No one must know!</document>'), " |
| + "(2, '<document>No secret here!</document>')"); |
| |
| s.execute("call lucenesupport.createindex('lucenetest', 'multifield', " |
| + "'c', '" + getClass().getName() + ".makeMultiFieldIndexDescriptor')"); |
| |
| PreparedStatement ps = prepareStatement( |
| "select id from table(multifield__c(?, 100, null)) t"); |
| |
| String[][] bothRows = { {"1"}, {"2"} }; |
| |
| ps.setString(1, "text:secret"); |
| JDBC.assertSingleValueResultSet(ps.executeQuery(), "2"); |
| ps.setString(1, "tags:secret"); |
| JDBC.assertSingleValueResultSet(ps.executeQuery(), "1"); |
| ps.setString(1, "secret"); |
| JDBC.assertUnorderedResultSet(ps.executeQuery(), bothRows); |
| } |
| |
| /** Create the custom index descriptor for the multi-field test */ |
| public static LuceneIndexDescriptor makeMultiFieldIndexDescriptor() |
| { |
| return new MultiFieldIndexDescriptor(); |
| } |
| /** |
| * Create a simple query parser for multiple fields, which uses |
| * StandardAnalyzer instead of the XMLAnalyzer that was used to create |
| * the index. |
| */ |
| public static QueryParser createXMLQueryParser( |
| Version version, String[] fields, Analyzer analyzer) { |
| return new MultiFieldQueryParser( |
| version, fields, new StandardAnalyzer(version)); |
| } |
| |
| /** |
| * Custom analyzer for XML files. It indexes the tags and the text |
| * separately. |
| */ |
| public static class XMLAnalyzer extends Analyzer { |
| |
| public XMLAnalyzer() { |
| // We want different tokenizers for different fields. Set reuse |
| // policy to per-field to achieve that. |
| super(PER_FIELD_REUSE_STRATEGY); |
| } |
| |
| @Override |
| protected TokenStreamComponents createComponents( |
| String fieldName, Reader reader) { |
| |
| if (fieldName.equals("text")) { |
| return new TokenStreamComponents(new XMLTextTokenizer(reader)); |
| } |
| |
| if (fieldName.equals("tags")) { |
| return new TokenStreamComponents(new XMLTagsTokenizer(reader)); |
| } |
| |
| fail("unknown field name: " + fieldName); |
| return null; |
| } |
| } |
| |
| /** Common logic for XMLTextTokenizer and XMLTagsTokenizer. */ |
| private abstract static class AbstractTokenizer extends Tokenizer { |
| Iterator<String> tokens; |
| final CharTermAttribute charTermAttr = |
| addAttribute(CharTermAttribute.class); |
| final PositionIncrementAttribute posIncrAttr |
| = addAttribute(PositionIncrementAttribute.class); |
| |
| AbstractTokenizer(Reader reader) { |
| super(reader); |
| } |
| |
| @Override |
| public boolean incrementToken() throws IOException { |
| if (tokens == null) { |
| tokens = getTokens().iterator(); |
| } |
| |
| if (tokens.hasNext()) { |
| charTermAttr.setEmpty(); |
| charTermAttr.append(tokens.next()); |
| posIncrAttr.setPositionIncrement(1); |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| tokens = null; |
| super.reset(); |
| } |
| |
| abstract Iterable<String> getTokens(); |
| } |
| |
| private static class XMLTextTokenizer extends AbstractTokenizer { |
| |
| XMLTextTokenizer(Reader in) { |
| super(in); |
| } |
| |
| @Override |
| Iterable<String> getTokens() { |
| StringBuilder text = new StringBuilder(); |
| getAllText(parseXMLDocument(input), text); |
| return Arrays.asList(text.toString().split("[ \r\n\t]")); |
| } |
| |
| } |
| |
| private static class XMLTagsTokenizer extends AbstractTokenizer { |
| |
| XMLTagsTokenizer(Reader in) { |
| super(in); |
| } |
| |
| @Override |
| Iterable<String> getTokens() { |
| return getAllXMLTags(parseXMLDocument(input)); |
| } |
| |
| } |
| |
| /** Parse an XML document from a Reader. */ |
| private static Document parseXMLDocument(Reader reader) { |
| Document doc = null; |
| |
| try { |
| doc = DocumentBuilderFactory.newInstance() |
| .newDocumentBuilder().parse(new InputSource(reader)); |
| reader.close(); |
| } catch (Exception e) { |
| fail("Failed to parse XML document", e); |
| } |
| |
| return doc; |
| } |
| |
| /** Get a list of all the XML tags in a node. */ |
| private static List<String> getAllXMLTags(Node node) { |
| ArrayList<String> list = new ArrayList<String>(); |
| NodeList nl = node.getChildNodes(); |
| for (int i = 0; i < nl.getLength(); i++) { |
| Node n = nl.item(i); |
| if (n.getNodeType() == Node.ELEMENT_NODE) { |
| list.add(n.getNodeName()); |
| list.addAll(getAllXMLTags(n)); |
| } |
| } |
| return list; |
| } |
| |
| /** Strip out all tags from an XML node, so that only the text is left. */ |
| private static void getAllText(Node node, StringBuilder sb) { |
| if (node.getNodeType() == Node.TEXT_NODE) { |
| sb.append(node.getNodeValue()); |
| } else { |
| NodeList nl = node.getChildNodes(); |
| for (int i = 0; i < nl.getLength(); i++) { |
| getAllText(nl.item(i), sb); |
| } |
| } |
| } |
| |
| public static class MultiFieldIndexDescriptor implements LuceneIndexDescriptor |
| { |
| public String[] getFieldNames() { return new String[] { "tags", "text" }; } |
| |
| public Analyzer getAnalyzer() { return new XMLAnalyzer(); } |
| |
| public QueryParser getQueryParser() |
| { |
| Version version = LuceneUtils.currentVersion(); |
| |
| return new MultiFieldQueryParser |
| ( |
| version, |
| getFieldNames(), |
| new StandardAnalyzer( version ) |
| ); |
| } |
| |
| } |
| |
| ////////////////////////////////////////////////////////////// |
| // |
| // END TEST FOR MULTIPLE FIELDS |
| // |
| ////////////////////////////////////////////////////////////// |
| |
| protected void setUp() throws SQLException { |
| CallableStatement cSt; |
| Statement st = createStatement(); |
| |
| try { |
| st.executeUpdate("create schema lucenetest"); |
| } catch (Exception e) { |
| } |
| st.executeUpdate("set schema lucenetest"); |
| st.executeUpdate("create table titles (ID int generated always as identity primary key, ISBN varchar(16), PRINTISBN varchar(16), title varchar(1024), subtitle varchar(1024), author varchar(1024), series varchar(1024), publisher varchar(1024), collections varchar(128), collections2 varchar(128))"); |
| st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('9765087650324','9765087650324','The Grapes Of Wrath','The Great Depression in Oklahoma','John Steinbeck','Noble Winners','The Viking Press','National Book Award','Pulitzer Prize')"); |
| st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('6754278542987','6754278542987','Identical: Portraits of Twins','Best Photo Book 2012 by American Photo Magazine','Martin Schoeller','Portraits','teNeues','Photography','')"); |
| st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('2747583475882','2747583475882','Vines, Grapes, and Wines','The wine drinker''s guide to grape varieties','Jancis Robinson','Reference','Alfred A. Knopf','Wine','')"); |
| st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('4356123483483','4356123483483','A Tale of Two Cities','A fictional account of events leading up to the French revolution','Charles Dickens','Classics','Chapman & Hall','Fiction','Social Criticism')"); |
| |
| cSt = prepareCall |
| ( "call syscs_util.syscs_register_tool('luceneSupport',true)" ); |
| assertUpdateCount(cSt, 0); |
| |
| } |
| |
| protected void tearDown() throws Exception { |
| CallableStatement cSt; |
| Statement st = createStatement(); |
| |
| st.executeUpdate("drop table titles"); |
| |
| cSt = prepareCall |
| ( "call syscs_util.syscs_register_tool('luceneSupport',false)" ); |
| assertUpdateCount(cSt, 0); |
| super.tearDown(); |
| } |
| } |