blob: 16892a85056d7905da27d82fb1997a3f75eafec9 [file] [log] [blame]
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.File;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper.Failure;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.store._TestHelper;
import org.apache.lucene.util._TestUtil;
public class TestCompoundFile extends LuceneTestCase
{
private Directory dir;
@Override
public void setUp() throws Exception {
super.setUp();
File file = _TestUtil.getTempDir("testIndex");
// use a simple FSDir here, to be sure to have SimpleFSInputs
dir = new SimpleFSDirectory(file,null);
}
@Override
public void tearDown() throws Exception {
dir.close();
super.tearDown();
}
/** Creates a file of the specified size with random data. */
private void createRandomFile(Directory dir, String name, int size)
throws IOException
{
IndexOutput os = dir.createOutput(name);
for (int i=0; i<size; i++) {
byte b = (byte) (Math.random() * 256);
os.writeByte(b);
}
os.close();
}
/** Creates a file of the specified size with sequential data. The first
* byte is written as the start byte provided. All subsequent bytes are
* computed as start + offset where offset is the number of the byte.
*/
private void createSequenceFile(Directory dir,
String name,
byte start,
int size)
throws IOException
{
IndexOutput os = dir.createOutput(name);
for (int i=0; i < size; i++) {
os.writeByte(start);
start ++;
}
os.close();
}
private void assertSameStreams(String msg,
IndexInput expected,
IndexInput test)
throws IOException
{
assertNotNull(msg + " null expected", expected);
assertNotNull(msg + " null test", test);
assertEquals(msg + " length", expected.length(), test.length());
assertEquals(msg + " position", expected.getFilePointer(),
test.getFilePointer());
byte expectedBuffer[] = new byte[512];
byte testBuffer[] = new byte[expectedBuffer.length];
long remainder = expected.length() - expected.getFilePointer();
while(remainder > 0) {
int readLen = (int) Math.min(remainder, expectedBuffer.length);
expected.readBytes(expectedBuffer, 0, readLen);
test.readBytes(testBuffer, 0, readLen);
assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer,
testBuffer, 0, readLen);
remainder -= readLen;
}
}
private void assertSameStreams(String msg,
IndexInput expected,
IndexInput actual,
long seekTo)
throws IOException
{
if(seekTo >= 0 && seekTo < expected.length())
{
expected.seek(seekTo);
actual.seek(seekTo);
assertSameStreams(msg + ", seek(mid)", expected, actual);
}
}
private void assertSameSeekBehavior(String msg,
IndexInput expected,
IndexInput actual)
throws IOException
{
// seek to 0
long point = 0;
assertSameStreams(msg + ", seek(0)", expected, actual, point);
// seek to middle
point = expected.length() / 2l;
assertSameStreams(msg + ", seek(mid)", expected, actual, point);
// seek to end - 2
point = expected.length() - 2;
assertSameStreams(msg + ", seek(end-2)", expected, actual, point);
// seek to end - 1
point = expected.length() - 1;
assertSameStreams(msg + ", seek(end-1)", expected, actual, point);
// seek to the end
point = expected.length();
assertSameStreams(msg + ", seek(end)", expected, actual, point);
// seek past end
point = expected.length() + 1;
assertSameStreams(msg + ", seek(end+1)", expected, actual, point);
}
private void assertEqualArrays(String msg,
byte[] expected,
byte[] test,
int start,
int len)
{
assertNotNull(msg + " null expected", expected);
assertNotNull(msg + " null test", test);
for (int i=start; i<len; i++) {
assertEquals(msg + " " + i, expected[i], test[i]);
}
}
// ===========================================================
// Tests of the basic CompoundFile functionality
// ===========================================================
/** This test creates compound file based on a single file.
* Files of different sizes are tested: 0, 1, 10, 100 bytes.
*/
public void testSingleFile() throws IOException {
int data[] = new int[] { 0, 1, 10, 100 };
for (int i=0; i<data.length; i++) {
String name = "t" + data[i];
createSequenceFile(dir, name, (byte) 0, data[i]);
CompoundFileWriter csw = new CompoundFileWriter(dir, name + ".cfs");
csw.addFile(name);
csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs");
IndexInput expected = dir.openInput(name);
IndexInput actual = csr.openInput(name);
assertSameStreams(name, expected, actual);
assertSameSeekBehavior(name, expected, actual);
expected.close();
actual.close();
csr.close();
}
}
/** This test creates compound file based on two files.
*
*/
public void testTwoFiles() throws IOException {
createSequenceFile(dir, "d1", (byte) 0, 15);
createSequenceFile(dir, "d2", (byte) 0, 114);
CompoundFileWriter csw = new CompoundFileWriter(dir, "d.csf");
csw.addFile("d1");
csw.addFile("d2");
csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, "d.csf");
IndexInput expected = dir.openInput("d1");
IndexInput actual = csr.openInput("d1");
assertSameStreams("d1", expected, actual);
assertSameSeekBehavior("d1", expected, actual);
expected.close();
actual.close();
expected = dir.openInput("d2");
actual = csr.openInput("d2");
assertSameStreams("d2", expected, actual);
assertSameSeekBehavior("d2", expected, actual);
expected.close();
actual.close();
csr.close();
}
/** This test creates a compound file based on a large number of files of
* various length. The file content is generated randomly. The sizes range
* from 0 to 1Mb. Some of the sizes are selected to test the buffering
* logic in the file reading code. For this the chunk variable is set to
* the length of the buffer used internally by the compound file logic.
*/
public void testRandomFiles() throws IOException {
// Setup the test segment
String segment = "test";
int chunk = 1024; // internal buffer size used by the stream
createRandomFile(dir, segment + ".zero", 0);
createRandomFile(dir, segment + ".one", 1);
createRandomFile(dir, segment + ".ten", 10);
createRandomFile(dir, segment + ".hundred", 100);
createRandomFile(dir, segment + ".big1", chunk);
createRandomFile(dir, segment + ".big2", chunk - 1);
createRandomFile(dir, segment + ".big3", chunk + 1);
createRandomFile(dir, segment + ".big4", 3 * chunk);
createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
createRandomFile(dir, segment + ".big7", 1000 * chunk);
// Setup extraneous files
createRandomFile(dir, "onetwothree", 100);
createRandomFile(dir, segment + ".notIn", 50);
createRandomFile(dir, segment + ".notIn2", 51);
// Now test
CompoundFileWriter csw = new CompoundFileWriter(dir, "test.cfs");
final String data[] = new String[] {
".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3",
".big4", ".big5", ".big6", ".big7"
};
for (int i=0; i<data.length; i++) {
csw.addFile(segment + data[i]);
}
csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs");
for (int i=0; i<data.length; i++) {
IndexInput check = dir.openInput(segment + data[i]);
IndexInput test = csr.openInput(segment + data[i]);
assertSameStreams(data[i], check, test);
assertSameSeekBehavior(data[i], check, test);
test.close();
check.close();
}
csr.close();
}
/** Setup a larger compound file with a number of components, each of
* which is a sequential file (so that we can easily tell that we are
* reading in the right byte). The methods sets up 20 files - f0 to f19,
* the size of each file is 1000 bytes.
*/
private void setUp_2() throws IOException {
CompoundFileWriter cw = new CompoundFileWriter(dir, "f.comp");
for (int i=0; i<20; i++) {
createSequenceFile(dir, "f" + i, (byte) 0, 2000);
cw.addFile("f" + i);
}
cw.close();
}
public void testReadAfterClose() throws IOException {
demo_FSIndexInputBug(dir, "test");
}
private void demo_FSIndexInputBug(Directory fsdir, String file)
throws IOException
{
// Setup the test file - we need more than 1024 bytes
IndexOutput os = fsdir.createOutput(file);
for(int i=0; i<2000; i++) {
os.writeByte((byte) i);
}
os.close();
IndexInput in = fsdir.openInput(file);
// This read primes the buffer in IndexInput
in.readByte();
// Close the file
in.close();
// ERROR: this call should fail, but succeeds because the buffer
// is still filled
in.readByte();
// ERROR: this call should fail, but succeeds for some reason as well
in.seek(1099);
try {
// OK: this call correctly fails. We are now past the 1024 internal
// buffer, so an actual IO is attempted, which fails
in.readByte();
fail("expected readByte() to throw exception");
} catch (IOException e) {
// expected exception
}
}
static boolean isCSIndexInput(IndexInput is) {
return is instanceof CompoundFileReader.CSIndexInput;
}
static boolean isCSIndexInputOpen(IndexInput is) throws IOException {
if (isCSIndexInput(is)) {
CompoundFileReader.CSIndexInput cis =
(CompoundFileReader.CSIndexInput) is;
return _TestHelper.isSimpleFSIndexInputOpen(cis.base);
} else {
return false;
}
}
public void testClonedStreamsClosing() throws IOException {
setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
// basic clone
IndexInput expected = dir.openInput("f11");
// this test only works for FSIndexInput
assertTrue(_TestHelper.isSimpleFSIndexInput(expected));
assertTrue(_TestHelper.isSimpleFSIndexInputOpen(expected));
IndexInput one = cr.openInput("f11");
assertTrue(isCSIndexInputOpen(one));
IndexInput two = (IndexInput) one.clone();
assertTrue(isCSIndexInputOpen(two));
assertSameStreams("basic clone one", expected, one);
expected.seek(0);
assertSameStreams("basic clone two", expected, two);
// Now close the first stream
one.close();
assertTrue("Only close when cr is closed", isCSIndexInputOpen(one));
// The following should really fail since we couldn't expect to
// access a file once close has been called on it (regardless of
// buffering and/or clone magic)
expected.seek(0);
two.seek(0);
assertSameStreams("basic clone two/2", expected, two);
// Now close the compound reader
cr.close();
assertFalse("Now closed one", isCSIndexInputOpen(one));
assertFalse("Now closed two", isCSIndexInputOpen(two));
// The following may also fail since the compound stream is closed
expected.seek(0);
two.seek(0);
//assertSameStreams("basic clone two/3", expected, two);
// Now close the second clone
two.close();
expected.seek(0);
two.seek(0);
//assertSameStreams("basic clone two/4", expected, two);
expected.close();
}
/** This test opens two files from a compound stream and verifies that
* their file positions are independent of each other.
*/
public void testRandomAccess() throws IOException {
setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
// Open two files
IndexInput e1 = dir.openInput("f11");
IndexInput e2 = dir.openInput("f3");
IndexInput a1 = cr.openInput("f11");
IndexInput a2 = dir.openInput("f3");
// Seek the first pair
e1.seek(100);
a1.seek(100);
assertEquals(100, e1.getFilePointer());
assertEquals(100, a1.getFilePointer());
byte be1 = e1.readByte();
byte ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now seek the second pair
e2.seek(1027);
a2.seek(1027);
assertEquals(1027, e2.getFilePointer());
assertEquals(1027, a2.getFilePointer());
byte be2 = e2.readByte();
byte ba2 = a2.readByte();
assertEquals(be2, ba2);
// Now make sure the first one didn't move
assertEquals(101, e1.getFilePointer());
assertEquals(101, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now more the first one again, past the buffer length
e1.seek(1910);
a1.seek(1910);
assertEquals(1910, e1.getFilePointer());
assertEquals(1910, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now make sure the second set didn't move
assertEquals(1028, e2.getFilePointer());
assertEquals(1028, a2.getFilePointer());
be2 = e2.readByte();
ba2 = a2.readByte();
assertEquals(be2, ba2);
// Move the second set back, again cross the buffer size
e2.seek(17);
a2.seek(17);
assertEquals(17, e2.getFilePointer());
assertEquals(17, a2.getFilePointer());
be2 = e2.readByte();
ba2 = a2.readByte();
assertEquals(be2, ba2);
// Finally, make sure the first set didn't move
// Now make sure the first one didn't move
assertEquals(1911, e1.getFilePointer());
assertEquals(1911, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
e1.close();
e2.close();
a1.close();
a2.close();
cr.close();
}
/** This test opens two files from a compound stream and verifies that
* their file positions are independent of each other.
*/
public void testRandomAccessClones() throws IOException {
setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
// Open two files
IndexInput e1 = cr.openInput("f11");
IndexInput e2 = cr.openInput("f3");
IndexInput a1 = (IndexInput) e1.clone();
IndexInput a2 = (IndexInput) e2.clone();
// Seek the first pair
e1.seek(100);
a1.seek(100);
assertEquals(100, e1.getFilePointer());
assertEquals(100, a1.getFilePointer());
byte be1 = e1.readByte();
byte ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now seek the second pair
e2.seek(1027);
a2.seek(1027);
assertEquals(1027, e2.getFilePointer());
assertEquals(1027, a2.getFilePointer());
byte be2 = e2.readByte();
byte ba2 = a2.readByte();
assertEquals(be2, ba2);
// Now make sure the first one didn't move
assertEquals(101, e1.getFilePointer());
assertEquals(101, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now more the first one again, past the buffer length
e1.seek(1910);
a1.seek(1910);
assertEquals(1910, e1.getFilePointer());
assertEquals(1910, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
// Now make sure the second set didn't move
assertEquals(1028, e2.getFilePointer());
assertEquals(1028, a2.getFilePointer());
be2 = e2.readByte();
ba2 = a2.readByte();
assertEquals(be2, ba2);
// Move the second set back, again cross the buffer size
e2.seek(17);
a2.seek(17);
assertEquals(17, e2.getFilePointer());
assertEquals(17, a2.getFilePointer());
be2 = e2.readByte();
ba2 = a2.readByte();
assertEquals(be2, ba2);
// Finally, make sure the first set didn't move
// Now make sure the first one didn't move
assertEquals(1911, e1.getFilePointer());
assertEquals(1911, a1.getFilePointer());
be1 = e1.readByte();
ba1 = a1.readByte();
assertEquals(be1, ba1);
e1.close();
e2.close();
a1.close();
a2.close();
cr.close();
}
public void testFileNotFound() throws IOException {
setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
// Open two files
try {
cr.openInput("bogus");
fail("File not found");
} catch (IOException e) {
/* success */
//System.out.println("SUCCESS: File Not Found: " + e);
}
cr.close();
}
public void testReadPastEOF() throws IOException {
setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
IndexInput is = cr.openInput("f2");
is.seek(is.length() - 10);
byte b[] = new byte[100];
is.readBytes(b, 0, 10);
try {
is.readByte();
fail("Single byte read past end of file");
} catch (IOException e) {
/* success */
//System.out.println("SUCCESS: single byte read past end of file: " + e);
}
is.seek(is.length() - 10);
try {
is.readBytes(b, 0, 50);
fail("Block read past end of file");
} catch (IOException e) {
/* success */
//System.out.println("SUCCESS: block read past end of file: " + e);
}
is.close();
cr.close();
}
/** This test that writes larger than the size of the buffer output
* will correctly increment the file pointer.
*/
public void testLargeWrites() throws IOException {
IndexOutput os = dir.createOutput("testBufferStart.txt");
byte[] largeBuf = new byte[2048];
for (int i=0; i<largeBuf.length; i++) {
largeBuf[i] = (byte) (Math.random() * 256);
}
long currentPos = os.getFilePointer();
os.writeBytes(largeBuf, largeBuf.length);
try {
assertEquals(currentPos + largeBuf.length, os.getFilePointer());
} finally {
os.close();
}
}
public void testAddExternalFile() throws IOException {
createSequenceFile(dir, "d1", (byte) 0, 15);
Directory newDir = newDirectory();
CompoundFileWriter csw = new CompoundFileWriter(newDir, "d.csf");
csw.addFile("d1", dir);
csw.close();
CompoundFileReader csr = new CompoundFileReader(newDir, "d.csf");
IndexInput expected = dir.openInput("d1");
IndexInput actual = csr.openInput("d1");
assertSameStreams("d1", expected, actual);
assertSameSeekBehavior("d1", expected, actual);
expected.close();
actual.close();
csr.close();
newDir.close();
}
// Make sure we don't somehow use more than 1 descriptor
// when reading a CFS with many subs:
public void testManySubFiles() throws IOException {
final Directory d = newFSDirectory(_TestUtil.getTempDir("CFSManySubFiles"));
final int FILE_COUNT = 10000;
for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
IndexOutput out = d.createOutput("file." + fileIdx);
out.writeByte((byte) fileIdx);
out.close();
}
final CompoundFileWriter cfw = new CompoundFileWriter(d, "c.cfs");
for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
cfw.addFile("file." + fileIdx);
}
cfw.close();
final IndexInput[] ins = new IndexInput[FILE_COUNT];
final CompoundFileReader cfr = new CompoundFileReader(d, "c.cfs");
for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
ins[fileIdx] = cfr.openInput("file." + fileIdx);
}
for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
assertEquals((byte) fileIdx, ins[fileIdx].readByte());
}
for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
ins[fileIdx].close();
}
cfr.close();
d.close();
}
}