| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.io.OutputStreamWriter; |
| import java.io.PrintWriter; |
| import java.io.StringWriter; |
| import java.io.Writer; |
| import java.nio.charset.StandardCharsets; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.LinkedList; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.MergeInfo; |
| import org.apache.lucene.store.MockDirectoryWrapper; |
| import org.apache.lucene.store.TrackingDirectoryWrapper; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.Version; |
| |
| /** JUnit adaptation of an older test case DocTest. */ |
| public class TestDoc extends LuceneTestCase { |
| |
| private Path workDir; |
| private Path indexDir; |
| private LinkedList<Path> files; |
| |
| /** Set the test case. This test case needs |
| * a few text files created in the current working directory. |
| */ |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| if (VERBOSE) { |
| System.out.println("TEST: setUp"); |
| } |
| workDir = createTempDir("TestDoc"); |
| indexDir = createTempDir("testIndex"); |
| |
| Directory directory = newFSDirectory(indexDir); |
| directory.close(); |
| |
| files = new LinkedList<>(); |
| files.add(createOutput("test.txt", |
| "This is the first test file" |
| )); |
| |
| files.add(createOutput("test2.txt", |
| "This is the second test file" |
| )); |
| } |
| |
| private Path createOutput(String name, String text) throws IOException { |
| Writer fw = null; |
| PrintWriter pw = null; |
| |
| try { |
| Path path = workDir.resolve(name); |
| Files.deleteIfExists(path); |
| |
| fw = new OutputStreamWriter(Files.newOutputStream(path), StandardCharsets.UTF_8); |
| pw = new PrintWriter(fw); |
| pw.println(text); |
| return path; |
| |
| } finally { |
| if (pw != null) pw.close(); |
| if (fw != null) fw.close(); |
| } |
| } |
| |
| |
| /** This test executes a number of merges and compares the contents of |
| * the segments created when using compound file or not using one. |
| * |
| * TODO: the original test used to print the segment contents to System.out |
| * for visual validation. To have the same effect, a new method |
| * checkSegment(String name, ...) should be created that would |
| * assert various things about the segment. |
| */ |
| public void testIndexAndMerge() throws Exception { |
| StringWriter sw = new StringWriter(); |
| PrintWriter out = new PrintWriter(sw, true); |
| |
| Directory directory = newFSDirectory(indexDir); |
| |
| if (directory instanceof MockDirectoryWrapper) { |
| // We create unreferenced files (we don't even write |
| // a segments file): |
| ((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false); |
| } |
| |
| IndexWriter writer = new IndexWriter( |
| directory, |
| newIndexWriterConfig(new MockAnalyzer(random())). |
| setOpenMode(OpenMode.CREATE). |
| setMaxBufferedDocs(-1). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| |
| SegmentCommitInfo si1 = indexDoc(writer, "test.txt"); |
| printSegment(out, si1); |
| |
| SegmentCommitInfo si2 = indexDoc(writer, "test2.txt"); |
| printSegment(out, si2); |
| writer.close(); |
| |
| SegmentCommitInfo siMerge = merge(directory, si1, si2, "_merge", false); |
| printSegment(out, siMerge); |
| |
| SegmentCommitInfo siMerge2 = merge(directory, si1, si2, "_merge2", false); |
| printSegment(out, siMerge2); |
| |
| SegmentCommitInfo siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", false); |
| printSegment(out, siMerge3); |
| |
| directory.close(); |
| out.close(); |
| sw.close(); |
| |
| String multiFileOutput = sw.toString(); |
| //System.out.println(multiFileOutput); |
| |
| sw = new StringWriter(); |
| out = new PrintWriter(sw, true); |
| |
| directory = newFSDirectory(indexDir); |
| |
| if (directory instanceof MockDirectoryWrapper) { |
| // We create unreferenced files (we don't even write |
| // a segments file): |
| ((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false); |
| } |
| |
| writer = new IndexWriter( |
| directory, |
| newIndexWriterConfig(new MockAnalyzer(random())). |
| setOpenMode(OpenMode.CREATE). |
| setMaxBufferedDocs(-1). |
| setMergePolicy(newLogMergePolicy(10)) |
| ); |
| |
| si1 = indexDoc(writer, "test.txt"); |
| printSegment(out, si1); |
| |
| si2 = indexDoc(writer, "test2.txt"); |
| printSegment(out, si2); |
| writer.close(); |
| |
| siMerge = merge(directory, si1, si2, "_merge", true); |
| printSegment(out, siMerge); |
| |
| siMerge2 = merge(directory, si1, si2, "_merge2", true); |
| printSegment(out, siMerge2); |
| |
| siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", true); |
| printSegment(out, siMerge3); |
| |
| directory.close(); |
| out.close(); |
| sw.close(); |
| String singleFileOutput = sw.toString(); |
| |
| assertEquals(multiFileOutput, singleFileOutput); |
| } |
| |
| private SegmentCommitInfo indexDoc(IndexWriter writer, String fileName) |
| throws Exception |
| { |
| Path path = workDir.resolve(fileName); |
| Document doc = new Document(); |
| InputStreamReader is = new InputStreamReader(Files.newInputStream(path), StandardCharsets.UTF_8); |
| doc.add(new TextField("contents", is)); |
| writer.addDocument(doc); |
| writer.commit(); |
| is.close(); |
| return writer.newestSegment(); |
| } |
| |
| |
| private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) |
| throws Exception { |
| IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))); |
| SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context); |
| SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context); |
| |
| final Codec codec = Codec.getDefault(); |
| TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); |
| final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); |
| |
| SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), |
| si, InfoStream.getDefault(), trackingDir, |
| new FieldInfos.FieldNumbers(null), context); |
| |
| MergeState mergeState = merger.merge(); |
| r1.close(); |
| r2.close();; |
| si.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); |
| |
| if (useCompoundFile) { |
| Collection<String> filesToDelete = si.files(); |
| codec.compoundFormat().write(dir, si, context); |
| si.setUseCompoundFile(true); |
| for(String name : filesToDelete) { |
| si1.info.dir.deleteFile(name); |
| } |
| } |
| |
| return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId()); |
| } |
| |
| |
| private void printSegment(PrintWriter out, SegmentCommitInfo si) |
| throws Exception { |
| SegmentReader reader = new SegmentReader(si, Version.LATEST.major, newIOContext(random())); |
| |
| for (int i = 0; i < reader.numDocs(); i++) |
| out.println(reader.document(i)); |
| |
| for (FieldInfo fieldInfo : reader.getFieldInfos()) { |
| if (fieldInfo.getIndexOptions() == IndexOptions.NONE) { |
| continue; |
| } |
| Terms terms = reader.terms(fieldInfo.name); |
| assertNotNull(terms); |
| TermsEnum tis = terms.iterator(); |
| while(tis.next() != null) { |
| |
| out.print(" term=" + fieldInfo.name + ":" + tis.term()); |
| out.println(" DF=" + tis.docFreq()); |
| |
| PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS); |
| |
| final Bits liveDocs = reader.getLiveDocs(); |
| while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| if (liveDocs != null && liveDocs.get(positions.docID()) == false) { |
| continue; |
| } |
| out.print(" doc=" + positions.docID()); |
| out.print(" TF=" + positions.freq()); |
| out.print(" pos="); |
| out.print(positions.nextPosition()); |
| for (int j = 1; j < positions.freq(); j++) |
| out.print("," + positions.nextPosition()); |
| out.println(""); |
| } |
| } |
| } |
| reader.close(); |
| } |
| } |