blob: f7760704e1fa003cc622df22439d089955f4c846 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/** JUnit adaptation of an older test case DocTest. */
public class TestDoc extends LuceneTestCase {
private Path workDir;
private Path indexDir;
private LinkedList<Path> files;
/** Set the test case. This test case needs
* a few text files created in the current working directory.
*/
@Override
public void setUp() throws Exception {
super.setUp();
if (VERBOSE) {
System.out.println("TEST: setUp");
}
workDir = createTempDir("TestDoc");
indexDir = createTempDir("testIndex");
Directory directory = newFSDirectory(indexDir);
directory.close();
files = new LinkedList<>();
files.add(createOutput("test.txt",
"This is the first test file"
));
files.add(createOutput("test2.txt",
"This is the second test file"
));
}
private Path createOutput(String name, String text) throws IOException {
Writer fw = null;
PrintWriter pw = null;
try {
Path path = workDir.resolve(name);
Files.deleteIfExists(path);
fw = new OutputStreamWriter(Files.newOutputStream(path), StandardCharsets.UTF_8);
pw = new PrintWriter(fw);
pw.println(text);
return path;
} finally {
if (pw != null) pw.close();
if (fw != null) fw.close();
}
}
/** This test executes a number of merges and compares the contents of
* the segments created when using compound file or not using one.
*
* TODO: the original test used to print the segment contents to System.out
* for visual validation. To have the same effect, a new method
* checkSegment(String name, ...) should be created that would
* assert various things about the segment.
*/
public void testIndexAndMerge() throws Exception {
StringWriter sw = new StringWriter();
PrintWriter out = new PrintWriter(sw, true);
Directory directory = newFSDirectory(indexDir);
if (directory instanceof MockDirectoryWrapper) {
// We create unreferenced files (we don't even write
// a segments file):
((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false);
}
IndexWriter writer = new IndexWriter(
directory,
newIndexWriterConfig(new MockAnalyzer(random())).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(-1).
setMergePolicy(newLogMergePolicy(10))
);
SegmentCommitInfo si1 = indexDoc(writer, "test.txt");
printSegment(out, si1);
SegmentCommitInfo si2 = indexDoc(writer, "test2.txt");
printSegment(out, si2);
writer.close();
SegmentCommitInfo siMerge = merge(directory, si1, si2, "_merge", false);
printSegment(out, siMerge);
SegmentCommitInfo siMerge2 = merge(directory, si1, si2, "_merge2", false);
printSegment(out, siMerge2);
SegmentCommitInfo siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", false);
printSegment(out, siMerge3);
directory.close();
out.close();
sw.close();
String multiFileOutput = sw.toString();
//System.out.println(multiFileOutput);
sw = new StringWriter();
out = new PrintWriter(sw, true);
directory = newFSDirectory(indexDir);
if (directory instanceof MockDirectoryWrapper) {
// We create unreferenced files (we don't even write
// a segments file):
((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false);
}
writer = new IndexWriter(
directory,
newIndexWriterConfig(new MockAnalyzer(random())).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(-1).
setMergePolicy(newLogMergePolicy(10))
);
si1 = indexDoc(writer, "test.txt");
printSegment(out, si1);
si2 = indexDoc(writer, "test2.txt");
printSegment(out, si2);
writer.close();
siMerge = merge(directory, si1, si2, "_merge", true);
printSegment(out, siMerge);
siMerge2 = merge(directory, si1, si2, "_merge2", true);
printSegment(out, siMerge2);
siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", true);
printSegment(out, siMerge3);
directory.close();
out.close();
sw.close();
String singleFileOutput = sw.toString();
assertEquals(multiFileOutput, singleFileOutput);
}
private SegmentCommitInfo indexDoc(IndexWriter writer, String fileName)
throws Exception
{
Path path = workDir.resolve(fileName);
Document doc = new Document();
InputStreamReader is = new InputStreamReader(Files.newInputStream(path), StandardCharsets.UTF_8);
doc.add(new TextField("contents", is));
writer.addDocument(doc);
writer.commit();
is.close();
return writer.newestSegment();
}
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile)
throws Exception {
IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
new FieldInfos.FieldNumbers(null), context);
MergeState mergeState = merger.merge();
r1.close();
r2.close();;
si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {
Collection<String> filesToDelete = si.files();
codec.compoundFormat().write(dir, si, context);
si.setUseCompoundFile(true);
for(String name : filesToDelete) {
si1.info.dir.deleteFile(name);
}
}
return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId());
}
private void printSegment(PrintWriter out, SegmentCommitInfo si)
throws Exception {
SegmentReader reader = new SegmentReader(si, Version.LATEST.major, newIOContext(random()));
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
continue;
}
Terms terms = reader.terms(fieldInfo.name);
assertNotNull(terms);
TermsEnum tis = terms.iterator();
while(tis.next() != null) {
out.print(" term=" + fieldInfo.name + ":" + tis.term());
out.println(" DF=" + tis.docFreq());
PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS);
final Bits liveDocs = reader.getLiveDocs();
while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (liveDocs != null && liveDocs.get(positions.docID()) == false) {
continue;
}
out.print(" doc=" + positions.docID());
out.print(" TF=" + positions.freq());
out.print(" pos=");
out.print(positions.nextPosition());
for (int j = 1; j < positions.freq(); j++)
out.print("," + positions.nextPosition());
out.println("");
}
}
}
reader.close();
}
}