| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.handler.dataimport; |
| |
| import java.io.File; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.nio.charset.StandardCharsets; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Properties; |
| import java.util.concurrent.TimeUnit; |
| |
| import org.junit.Test; |
| |
| /** |
| * <p> |
| * Test for XPathEntityProcessor |
| * </p> |
| * |
| * |
| * @since solr 1.3 |
| */ |
| public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase { |
| boolean simulateSlowReader; |
| boolean simulateSlowResultProcessor; |
| int rowsToRead = -1; |
| |
| @Test |
| @SuppressWarnings({"unchecked"}) |
| public void withFieldsAndXpath() throws Exception { |
| File tmpdir = createTempDir().toFile(); |
| |
| createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), false); |
| @SuppressWarnings({"rawtypes"}) |
| Map entityAttrs = createMap("name", "e", "url", "cd.xml", |
| XPathEntityProcessor.FOR_EACH, "/catalog/cd"); |
| @SuppressWarnings({"rawtypes"}) |
| List fields = new ArrayList(); |
| fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); |
| fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); |
| fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); |
| xPathEntityProcessor.init(c); |
| List<Map<String, Object>> result = new ArrayList<>(); |
| while (true) { |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result.add(row); |
| } |
| assertEquals(3, result.size()); |
| assertEquals("Empire Burlesque", result.get(0).get("title")); |
| assertEquals("Bonnie Tyler", result.get(1).get("artist")); |
| assertEquals("1982", result.get(2).get("year")); |
| } |
| |
| @Test |
| @SuppressWarnings({"unchecked"}) |
| public void testMultiValued() throws Exception { |
| @SuppressWarnings({"rawtypes"}) |
| Map entityAttrs = createMap("name", "e", "url", "testdata.xml", |
| XPathEntityProcessor.FOR_EACH, "/root"); |
| @SuppressWarnings({"rawtypes"}) |
| List fields = new ArrayList(); |
| fields.add(createMap("column", "a", "xpath", "/root/a", DataImporter.MULTI_VALUED, "true")); |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(testXml), Context.FULL_DUMP, fields, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); |
| xPathEntityProcessor.init(c); |
| List<Map<String, Object>> result = new ArrayList<>(); |
| while (true) { |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result.add(row); |
| } |
| @SuppressWarnings({"rawtypes"}) |
| List l = (List)result.get(0).get("a"); |
| assertEquals(3, l.size()); |
| assertEquals("1", l.get(0)); |
| assertEquals("2", l.get(1)); |
| assertEquals("ΓΌ", l.get(2)); |
| } |
| |
| @SuppressWarnings({"rawtypes", "unchecked"}) |
| @Test |
| public void testMultiValuedWithMultipleDocuments() throws Exception { |
| Map entityAttrs = createMap("name", "e", "url", "testdata.xml", XPathEntityProcessor.FOR_EACH, "/documents/doc"); |
| List fields = new ArrayList(); |
| fields.add(createMap("column", "id", "xpath", "/documents/doc/id", DataImporter.MULTI_VALUED, "false")); |
| fields.add(createMap("column", "a", "xpath", "/documents/doc/a", DataImporter.MULTI_VALUED, "true")); |
| fields.add(createMap("column", "s1dataA", "xpath", "/documents/doc/sec1/s1dataA", DataImporter.MULTI_VALUED, "true")); |
| fields.add(createMap("column", "s1dataB", "xpath", "/documents/doc/sec1/s1dataB", DataImporter.MULTI_VALUED, "true")); |
| fields.add(createMap("column", "s1dataC", "xpath", "/documents/doc/sec1/s1dataC", DataImporter.MULTI_VALUED, "true")); |
| |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(textMultipleDocuments), Context.FULL_DUMP, fields, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); |
| xPathEntityProcessor.init(c); |
| List<Map<String, Object>> result = new ArrayList<>(); |
| while (true) { |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result.add(row); |
| } |
| { |
| assertEquals("1", result.get(0).get("id")); |
| List a = (List)result.get(0).get("a"); |
| List s1dataA = (List)result.get(0).get("s1dataA"); |
| List s1dataB = (List)result.get(0).get("s1dataB"); |
| List s1dataC = (List)result.get(0).get("s1dataC"); |
| assertEquals(2, a.size()); |
| assertEquals("id1-a1", a.get(0)); |
| assertEquals("id1-a2", a.get(1)); |
| assertEquals(3, s1dataA.size()); |
| assertEquals("id1-s1dataA-1", s1dataA.get(0)); |
| assertNull(s1dataA.get(1)); |
| assertEquals("id1-s1dataA-3", s1dataA.get(2)); |
| assertEquals(3, s1dataB.size()); |
| assertEquals("id1-s1dataB-1", s1dataB.get(0)); |
| assertEquals("id1-s1dataB-2", s1dataB.get(1)); |
| assertEquals("id1-s1dataB-3", s1dataB.get(2)); |
| assertEquals(3, s1dataC.size()); |
| assertNull(s1dataC.get(0)); |
| assertNull(s1dataC.get(1)); |
| assertNull(s1dataC.get(2)); |
| } |
| { |
| assertEquals("2", result.get(1).get("id")); |
| List a = (List)result.get(1).get("a"); |
| List s1dataA = (List)result.get(1).get("s1dataA"); |
| List s1dataB = (List)result.get(1).get("s1dataB"); |
| List s1dataC = (List)result.get(1).get("s1dataC"); |
| assertTrue(a==null || a.size()==0); |
| assertEquals(1, s1dataA.size()); |
| assertNull(s1dataA.get(0)); |
| assertEquals(1, s1dataB.size()); |
| assertEquals("id2-s1dataB-1", s1dataB.get(0)); |
| assertEquals(1, s1dataC.size()); |
| assertNull(s1dataC.get(0)); |
| } |
| { |
| assertEquals("3", result.get(2).get("id")); |
| List a = (List)result.get(2).get("a"); |
| List s1dataA = (List)result.get(2).get("s1dataA"); |
| List s1dataB = (List)result.get(2).get("s1dataB"); |
| List s1dataC = (List)result.get(2).get("s1dataC"); |
| assertTrue(a==null || a.size()==0); |
| assertEquals(1, s1dataA.size()); |
| assertEquals("id3-s1dataA-1", s1dataA.get(0)); |
| assertEquals(1, s1dataB.size()); |
| assertNull(s1dataB.get(0)); |
| assertEquals(1, s1dataC.size()); |
| assertNull(s1dataC.get(0)); |
| } |
| { |
| assertEquals("4", result.get(3).get("id")); |
| List a = (List)result.get(3).get("a"); |
| List s1dataA = (List)result.get(3).get("s1dataA"); |
| List s1dataB = (List)result.get(3).get("s1dataB"); |
| List s1dataC = (List)result.get(3).get("s1dataC"); |
| assertTrue(a==null || a.size()==0); |
| assertEquals(1, s1dataA.size()); |
| assertEquals("id4-s1dataA-1", s1dataA.get(0)); |
| assertEquals(1, s1dataB.size()); |
| assertEquals("id4-s1dataB-1", s1dataB.get(0)); |
| assertEquals(1, s1dataC.size()); |
| assertEquals("id4-s1dataC-1", s1dataC.get(0)); |
| } |
| { |
| assertEquals("5", result.get(4).get("id")); |
| List a = (List)result.get(4).get("a"); |
| List s1dataA = (List)result.get(4).get("s1dataA"); |
| List s1dataB = (List)result.get(4).get("s1dataB"); |
| List s1dataC = (List)result.get(4).get("s1dataC"); |
| assertTrue(a==null || a.size()==0); |
| assertEquals(1, s1dataA.size()); |
| assertNull(s1dataA.get(0)); |
| assertEquals(1, s1dataB.size()); |
| assertNull(s1dataB.get(0)); |
| assertEquals(1, s1dataC.size()); |
| assertEquals("id5-s1dataC-1", s1dataC.get(0)); |
| } |
| { |
| assertEquals("6", result.get(5).get("id")); |
| List a = (List)result.get(5).get("a"); |
| List s1dataA = (List)result.get(5).get("s1dataA"); |
| List s1dataB = (List)result.get(5).get("s1dataB"); |
| List s1dataC = (List)result.get(5).get("s1dataC"); |
| assertTrue(a==null || a.size()==0); |
| assertEquals(3, s1dataA.size()); |
| assertEquals("id6-s1dataA-1", s1dataA.get(0)); |
| assertEquals("id6-s1dataA-2", s1dataA.get(1)); |
| assertNull(s1dataA.get(2)); |
| assertEquals(3, s1dataB.size()); |
| assertEquals("id6-s1dataB-1", s1dataB.get(0)); |
| assertEquals("id6-s1dataB-2", s1dataB.get(1)); |
| assertEquals("id6-s1dataB-3", s1dataB.get(2)); |
| assertEquals(3, s1dataC.size()); |
| assertEquals("id6-s1dataC-1", s1dataC.get(0)); |
| assertNull(s1dataC.get(1)); |
| assertEquals("id6-s1dataC-3", s1dataC.get(2)); |
| } |
| } |
| |
| @Test |
| @SuppressWarnings({"unchecked"}) |
| public void testMultiValuedFlatten() throws Exception { |
| @SuppressWarnings({"rawtypes"}) |
| Map entityAttrs = createMap("name", "e", "url", "testdata.xml", |
| XPathEntityProcessor.FOR_EACH, "/root"); |
| @SuppressWarnings({"rawtypes"}) |
| List fields = new ArrayList(); |
| fields.add(createMap("column", "a", "xpath", "/root/a" ,"flatten","true")); |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(testXmlFlatten), Context.FULL_DUMP, fields, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); |
| xPathEntityProcessor.init(c); |
| Map<String, Object> result = null; |
| while (true) { |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result = row; |
| } |
| assertEquals("1B2", result.get("a")); |
| } |
| |
| @Test |
| @SuppressWarnings({"unchecked"}) |
| public void withFieldsAndXpathStream() throws Exception { |
| final Object monitor = new Object(); |
| final boolean[] done = new boolean[1]; |
| |
| @SuppressWarnings({"rawtypes"}) |
| Map entityAttrs = createMap("name", "e", "url", "cd.xml", |
| XPathEntityProcessor.FOR_EACH, "/catalog/cd", "stream", "true", "batchSize","1"); |
| @SuppressWarnings({"rawtypes"}) |
| List fields = new ArrayList(); |
| fields.add(createMap("column", "title", "xpath", "/catalog/cd/title")); |
| fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist")); |
| fields.add(createMap("column", "year", "xpath", "/catalog/cd/year")); |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, fields, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor() { |
| private int count; |
| |
| @Override |
| protected Map<String, Object> readRow(Map<String, Object> record, |
| String xpath) { |
| synchronized (monitor) { |
| if (simulateSlowReader && !done[0]) { |
| try { |
| monitor.wait(100); |
| } catch (InterruptedException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| } |
| |
| return super.readRow(record, xpath); |
| } |
| }; |
| |
| if (simulateSlowResultProcessor) { |
| xPathEntityProcessor.blockingQueueSize = 1; |
| } |
| xPathEntityProcessor.blockingQueueTimeOut = 1; |
| xPathEntityProcessor.blockingQueueTimeOutUnits = TimeUnit.MICROSECONDS; |
| |
| xPathEntityProcessor.init(c); |
| List<Map<String, Object>> result = new ArrayList<>(); |
| while (true) { |
| if (rowsToRead >= 0 && result.size() >= rowsToRead) { |
| Thread.currentThread().interrupt(); |
| } |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result.add(row); |
| if (simulateSlowResultProcessor) { |
| synchronized (xPathEntityProcessor.publisherThread) { |
| if (xPathEntityProcessor.publisherThread.isAlive()) { |
| xPathEntityProcessor.publisherThread.wait(1000); |
| } |
| } |
| } |
| } |
| |
| synchronized (monitor) { |
| done[0] = true; |
| monitor.notify(); |
| } |
| |
| // confirm that publisher thread stops. |
| xPathEntityProcessor.publisherThread.join(1000); |
| assertEquals("Expected thread to stop", false, xPathEntityProcessor.publisherThread.isAlive()); |
| |
| assertEquals(rowsToRead < 0 ? 3 : rowsToRead, result.size()); |
| |
| if (rowsToRead < 0) { |
| assertEquals("Empire Burlesque", result.get(0).get("title")); |
| assertEquals("Bonnie Tyler", result.get(1).get("artist")); |
| assertEquals("1982", result.get(2).get("year")); |
| } |
| } |
| |
| @Test |
| public void withFieldsAndXpathStreamContinuesOnTimeout() throws Exception { |
| simulateSlowReader = true; |
| withFieldsAndXpathStream(); |
| } |
| |
| @Test |
| public void streamWritesMessageAfterBlockedAttempt() throws Exception { |
| simulateSlowResultProcessor = true; |
| withFieldsAndXpathStream(); |
| } |
| |
| @Test |
| public void streamStopsAfterInterrupt() throws Exception { |
| simulateSlowResultProcessor = true; |
| rowsToRead = 1; |
| withFieldsAndXpathStream(); |
| } |
| |
| @Test |
| @SuppressWarnings({"unchecked"}) |
| public void withDefaultSolrAndXsl() throws Exception { |
| File tmpdir = createTempDir().toFile(); |
| AbstractDataImportHandlerTestCase.createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), |
| false); |
| |
| @SuppressWarnings({"rawtypes"}) |
| Map entityAttrs = createMap("name", "e", |
| XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", "" |
| + new File(tmpdir, "x.xsl").toURI(), "url", "cd.xml"); |
| Context c = getContext(null, |
| new VariableResolver(), getDataSource(cdData), Context.FULL_DUMP, null, entityAttrs); |
| XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor(); |
| xPathEntityProcessor.init(c); |
| List<Map<String, Object>> result = new ArrayList<>(); |
| while (true) { |
| Map<String, Object> row = xPathEntityProcessor.nextRow(); |
| if (row == null) |
| break; |
| result.add(row); |
| } |
| assertEquals(3, result.size()); |
| assertEquals("Empire Burlesque", result.get(0).get("title")); |
| assertEquals("Bonnie Tyler", result.get(1).get("artist")); |
| assertEquals("1982", result.get(2).get("year")); |
| } |
| |
| private DataSource<Reader> getDataSource(final String xml) { |
| return new DataSource<Reader>() { |
| |
| @Override |
| public void init(Context context, Properties initProps) { |
| } |
| |
| @Override |
| public void close() { |
| } |
| |
| @Override |
| public Reader getData(String query) { |
| return new StringReader(xml); |
| } |
| }; |
| } |
| |
| private static final String xsl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" |
| + "<xsl:stylesheet version=\"1.0\"\n" |
| + "xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" |
| + "<xsl:output version='1.0' method='xml' encoding='UTF-8' indent='yes'/>\n" |
| + "\n" |
| + "<xsl:template match=\"/\">\n" |
| + " <add> \n" |
| + " <xsl:for-each select=\"catalog/cd\">\n" |
| + " <doc>\n" |
| + " <field name=\"title\"><xsl:value-of select=\"title\"/></field>\n" |
| + " <field name=\"artist\"><xsl:value-of select=\"artist\"/></field>\n" |
| + " <field name=\"country\"><xsl:value-of select=\"country\"/></field>\n" |
| + " <field name=\"company\"><xsl:value-of select=\"company\"/></field> \n" |
| + " <field name=\"price\"><xsl:value-of select=\"price\"/></field>\n" |
| + " <field name=\"year\"><xsl:value-of select=\"year\"/></field> \n" |
| + " </doc>\n" |
| + " </xsl:for-each>\n" |
| + " </add> \n" |
| + "</xsl:template>\n" + "</xsl:stylesheet>"; |
| |
| private static final String cdData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" |
| + "<?xml-stylesheet type=\"text/xsl\" href=\"solr.xsl\"?>\n" |
| + "<catalog>\n" |
| + "\t<cd>\n" |
| + "\t\t<title>Empire Burlesque</title>\n" |
| + "\t\t<artist>Bob Dylan</artist>\n" |
| + "\t\t<country>USA</country>\n" |
| + "\t\t<company>Columbia</company>\n" |
| + "\t\t<price>10.90</price>\n" |
| + "\t\t<year>1985</year>\n" |
| + "\t</cd>\n" |
| + "\t<cd>\n" |
| + "\t\t<title>Hide your heart</title>\n" |
| + "\t\t<artist>Bonnie Tyler</artist>\n" |
| + "\t\t<country>UK</country>\n" |
| + "\t\t<company>CBS Records</company>\n" |
| + "\t\t<price>9.90</price>\n" |
| + "\t\t<year>1988</year>\n" |
| + "\t</cd>\n" |
| + "\t<cd>\n" |
| + "\t\t<title>Greatest Hits</title>\n" |
| + "\t\t<artist>Dolly Parton</artist>\n" |
| + "\t\t<country>USA</country>\n" |
| + "\t\t<company>RCA</company>\n" |
| + "\t\t<price>9.90</price>\n" |
| + "\t\t<year>1982</year>\n" + "\t</cd>\n" + "</catalog>\t"; |
| |
| private static final String testXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root [\n<!ENTITY uuml \"ü\" >\n]>\n<root><a>1</a><a>2</a><a>ü</a></root>"; |
| |
| private static final String testXmlFlatten = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1<b>B</b>2</a></root>"; |
| |
| private static final String textMultipleDocuments = |
| "<?xml version=\"1.0\" ?>" + |
| "<documents>" + |
| " <doc>" + |
| " <id>1</id>" + |
| " <a>id1-a1</a>" + |
| " <a>id1-a2</a>" + |
| " <sec1>" + |
| " <s1dataA>id1-s1dataA-1</s1dataA>" + |
| " <s1dataB>id1-s1dataB-1</s1dataB>" + |
| " </sec1>" + |
| " <sec1>" + |
| " <s1dataB>id1-s1dataB-2</s1dataB>" + |
| " </sec1>" + |
| " <sec1>" + |
| " <s1dataA>id1-s1dataA-3</s1dataA>" + |
| " <s1dataB>id1-s1dataB-3</s1dataB>" + |
| " </sec1>" + |
| " </doc>" + |
| " <doc>" + |
| " <id>2</id>" + |
| " <sec1>" + |
| " <s1dataB>id2-s1dataB-1</s1dataB>" + |
| " </sec1>" + |
| " </doc>" + |
| " <doc>" + |
| " <id>3</id>" + |
| " <sec1>" + |
| " <s1dataA>id3-s1dataA-1</s1dataA>" + |
| " </sec1>" + |
| " </doc>" + |
| " <doc>" + |
| " <id>4</id>" + |
| " <sec1>" + |
| " <s1dataA>id4-s1dataA-1</s1dataA>" + |
| " <s1dataB>id4-s1dataB-1</s1dataB>" + |
| " <s1dataC>id4-s1dataC-1</s1dataC>" + |
| " </sec1>" + |
| " </doc>" + |
| " <doc>" + |
| " <id>5</id>" + |
| " <sec1>" + |
| " <s1dataC>id5-s1dataC-1</s1dataC>" + |
| " </sec1>" + |
| " </doc>" + |
| " <doc>" + |
| " <id>6</id>" + |
| " <sec1>" + |
| " <s1dataA>id6-s1dataA-1</s1dataA>" + |
| " <s1dataB>id6-s1dataB-1</s1dataB>" + |
| " <s1dataC>id6-s1dataC-1</s1dataC>" + |
| " </sec1>" + |
| " <sec1>" + |
| " <s1dataA>id6-s1dataA-2</s1dataA>" + |
| " <s1dataB>id6-s1dataB-2</s1dataB>" + |
| " </sec1>" + |
| " <sec1>" + |
| " <s1dataB>id6-s1dataB-3</s1dataB>" + |
| " <s1dataC>id6-s1dataC-3</s1dataC>" + |
| " </sec1>" + |
| " </doc>" + |
| "</documents>" |
| ; |
| } |