blob: c2464b573dbdf2ffd06e946f861813730e0d773d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.benchmark.byTask.feeds;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
public class TestEnwikiContentSource extends LuceneTestCase {
/** An EnwikiContentSource which works on a String and not files. */
private static class StringableEnwikiSource extends EnwikiContentSource {
private final String docs;
public StringableEnwikiSource(String docs) {
this.docs = docs;
}
@Override
protected InputStream openInputStream() throws IOException {
return new ByteArrayInputStream(docs.getBytes(StandardCharsets.UTF_8));
}
}
private void assertDocData(
DocData dd, String expName, String expTitle, String expBody, String expDate)
throws ParseException {
assertNotNull(dd);
assertEquals(expName, dd.getName());
assertEquals(expTitle, dd.getTitle());
assertEquals(expBody, dd.getBody());
assertEquals(expDate, dd.getDate());
}
private void assertNoMoreDataException(EnwikiContentSource stdm) throws Exception {
expectThrows(
NoMoreDataException.class,
() -> {
stdm.getNextDocData(null);
});
}
private static final String PAGE1 =
" <page>\r\n"
+ " <title>Title1</title>\r\n"
+ " <ns>0</ns>\r\n"
+ " <id>1</id>\r\n"
+ " <revision>\r\n"
+ " <id>11</id>\r\n"
+ " <parentid>111</parentid>\r\n"
+ " <timestamp>2011-09-14T11:35:09Z</timestamp>\r\n"
+ " <contributor>\r\n"
+ " <username>Mister1111</username>\r\n"
+ " <id>1111</id>\r\n"
+ " </contributor>\r\n"
+ " <minor />\r\n"
+ " <comment>/* Never mind */</comment>\r\n"
+ " <text>Some text 1 here</text>\r\n"
+ " </revision>\r\n"
+ " </page>\r\n";
private static final String PAGE2 =
" <page>\r\n"
+ " <title>Title2</title>\r\n"
+ " <ns>0</ns>\r\n"
+ " <id>2</id>\r\n"
+ " <revision>\r\n"
+ " <id>22</id>\r\n"
+ " <parentid>222</parentid>\r\n"
+ " <timestamp>2022-09-14T22:35:09Z</timestamp>\r\n"
+ " <contributor>\r\n"
+ " <username>Mister2222</username>\r\n"
+ " <id>2222</id>\r\n"
+ " </contributor>\r\n"
+ " <minor />\r\n"
+ " <comment>/* Never mind */</comment>\r\n"
+ " <text>Some text 2 here</text>\r\n"
+ " </revision>\r\n"
+ " </page>\r\n";
@Test
public void testOneDocument() throws Exception {
String docs = "<mediawiki>\r\n" + PAGE1 + "</mediawiki>";
EnwikiContentSource source = createContentSource(docs, false);
DocData dd = source.getNextDocData(new DocData());
assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
assertNoMoreDataException(source);
}
private EnwikiContentSource createContentSource(String docs, boolean forever) throws IOException {
Properties props = new Properties();
props.setProperty("print.props", "false");
props.setProperty("content.source.forever", Boolean.toString(forever));
Config config = new Config(props);
EnwikiContentSource source = new StringableEnwikiSource(docs);
source.setConfig(config);
// doc-maker just for initiating content source inputs
DocMaker docMaker = new DocMaker();
docMaker.setConfig(config, source);
docMaker.resetInputs();
return source;
}
@Test
public void testTwoDocuments() throws Exception {
String docs = "<mediawiki>\r\n" + PAGE1 + PAGE2 + "</mediawiki>";
EnwikiContentSource source = createContentSource(docs, false);
DocData dd1 = source.getNextDocData(new DocData());
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
DocData dd2 = source.getNextDocData(new DocData());
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
assertNoMoreDataException(source);
}
@Test
public void testForever() throws Exception {
String docs = "<mediawiki>\r\n" + PAGE1 + PAGE2 + "</mediawiki>";
EnwikiContentSource source = createContentSource(docs, true);
// same documents several times
for (int i = 0; i < 3; i++) {
DocData dd1 = source.getNextDocData(new DocData());
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
DocData dd2 = source.getNextDocData(new DocData());
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
// Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
}
source.close();
}
}