| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.codecs.compressing; |
| |
| import java.io.IOException; |
| import java.util.Random; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.IntPoint; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; |
| import org.apache.lucene.index.CodecReader; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.NoMergePolicy; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ByteArrayDataOutput; |
| import org.apache.lucene.store.Directory; |
| import com.carrotsearch.randomizedtesting.generators.RandomNumbers; |
| |
| public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase { |
| |
| static final long SECOND = 1000L; |
| static final long HOUR = 60 * 60 * SECOND; |
| static final long DAY = 24 * HOUR; |
| |
| @Override |
| protected Codec getCodec() { |
| if (TEST_NIGHTLY) { |
| return CompressingCodec.randomInstance(random()); |
| } else { |
| return CompressingCodec.reasonableInstance(random()); |
| } |
| } |
| |
| public void testDeletePartiallyWrittenFilesIfAbort() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); |
| iwConf.setCodec(getCodec()); |
| // disable CFS because this test checks file names |
| iwConf.setMergePolicy(newLogMergePolicy(false)); |
| iwConf.setUseCompoundFile(false); |
| |
| // Cannot use RIW because this test wants CFS to stay off: |
| IndexWriter iw = new IndexWriter(dir, iwConf); |
| |
| final Document validDoc = new Document(); |
| validDoc.add(new IntPoint("id", 0)); |
| validDoc.add(new StoredField("id", 0)); |
| iw.addDocument(validDoc); |
| iw.commit(); |
| |
| // make sure that #writeField will fail to trigger an abort |
| final Document invalidDoc = new Document(); |
| FieldType fieldType = new FieldType(); |
| fieldType.setStored(true); |
| invalidDoc.add(new Field("invalid", fieldType) { |
| |
| @Override |
| public String stringValue() { |
| // TODO: really bad & scary that this causes IW to |
| // abort the segment!! We should fix this. |
| return null; |
| } |
| |
| }); |
| |
| try { |
| iw.addDocument(invalidDoc); |
| iw.commit(); |
| } catch(IllegalArgumentException iae) { |
| // expected |
| assertEquals(iae, iw.getTragicException()); |
| } |
| // Writer should be closed by tragedy |
| assertFalse(iw.isOpen()); |
| dir.close(); |
| } |
| |
| public void testZFloat() throws Exception { |
| byte buffer[] = new byte[5]; // we never need more than 5 bytes |
| ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); |
| ByteArrayDataInput in = new ByteArrayDataInput(buffer); |
| |
| // round-trip small integer values |
| for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { |
| float f = (float) i; |
| CompressingStoredFieldsWriter.writeZFloat(out, f); |
| in.reset(buffer, 0, out.getPosition()); |
| float g = CompressingStoredFieldsReader.readZFloat(in); |
| assertTrue(in.eof()); |
| assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); |
| |
| // check that compression actually works |
| if (i >= -1 && i <= 123) { |
| assertEquals(1, out.getPosition()); // single byte compression |
| } |
| out.reset(buffer); |
| } |
| |
| // round-trip special values |
| float special[] = { |
| -0.0f, |
| +0.0f, |
| Float.NEGATIVE_INFINITY, |
| Float.POSITIVE_INFINITY, |
| Float.MIN_VALUE, |
| Float.MAX_VALUE, |
| Float.NaN, |
| }; |
| |
| for (float f : special) { |
| CompressingStoredFieldsWriter.writeZFloat(out, f); |
| in.reset(buffer, 0, out.getPosition()); |
| float g = CompressingStoredFieldsReader.readZFloat(in); |
| assertTrue(in.eof()); |
| assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); |
| out.reset(buffer); |
| } |
| |
| // round-trip random values |
| Random r = random(); |
| for (int i = 0; i < 100000; i++) { |
| float f = r.nextFloat() * (random().nextInt(100) - 50); |
| CompressingStoredFieldsWriter.writeZFloat(out, f); |
| assertTrue("length=" + out.getPosition() + ", f=" + f, out.getPosition() <= ((Float.floatToIntBits(f) >>> 31) == 1 ? 5 : 4)); |
| in.reset(buffer, 0, out.getPosition()); |
| float g = CompressingStoredFieldsReader.readZFloat(in); |
| assertTrue(in.eof()); |
| assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); |
| out.reset(buffer); |
| } |
| } |
| |
| public void testZDouble() throws Exception { |
| byte buffer[] = new byte[9]; // we never need more than 9 bytes |
| ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); |
| ByteArrayDataInput in = new ByteArrayDataInput(buffer); |
| |
| // round-trip small integer values |
| for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { |
| double x = (double) i; |
| CompressingStoredFieldsWriter.writeZDouble(out, x); |
| in.reset(buffer, 0, out.getPosition()); |
| double y = CompressingStoredFieldsReader.readZDouble(in); |
| assertTrue(in.eof()); |
| assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); |
| |
| // check that compression actually works |
| if (i >= -1 && i <= 124) { |
| assertEquals(1, out.getPosition()); // single byte compression |
| } |
| out.reset(buffer); |
| } |
| |
| // round-trip special values |
| double special[] = { |
| -0.0d, |
| +0.0d, |
| Double.NEGATIVE_INFINITY, |
| Double.POSITIVE_INFINITY, |
| Double.MIN_VALUE, |
| Double.MAX_VALUE, |
| Double.NaN |
| }; |
| |
| for (double x : special) { |
| CompressingStoredFieldsWriter.writeZDouble(out, x); |
| in.reset(buffer, 0, out.getPosition()); |
| double y = CompressingStoredFieldsReader.readZDouble(in); |
| assertTrue(in.eof()); |
| assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); |
| out.reset(buffer); |
| } |
| |
| // round-trip random values |
| Random r = random(); |
| for (int i = 0; i < 100000; i++) { |
| double x = r.nextDouble() * (random().nextInt(100) - 50); |
| CompressingStoredFieldsWriter.writeZDouble(out, x); |
| assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8)); |
| in.reset(buffer, 0, out.getPosition()); |
| double y = CompressingStoredFieldsReader.readZDouble(in); |
| assertTrue(in.eof()); |
| assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); |
| out.reset(buffer); |
| } |
| |
| // same with floats |
| for (int i = 0; i < 100000; i++) { |
| double x = (double) (r.nextFloat() * (random().nextInt(100) - 50)); |
| CompressingStoredFieldsWriter.writeZDouble(out, x); |
| assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5); |
| in.reset(buffer, 0, out.getPosition()); |
| double y = CompressingStoredFieldsReader.readZDouble(in); |
| assertTrue(in.eof()); |
| assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); |
| out.reset(buffer); |
| } |
| } |
| |
| public void testTLong() throws Exception { |
| byte buffer[] = new byte[10]; // we never need more than 10 bytes |
| ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); |
| ByteArrayDataInput in = new ByteArrayDataInput(buffer); |
| |
| // round-trip small integer values |
| for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { |
| for (long mul : new long[] {SECOND, HOUR, DAY}) { |
| long l1 = (long) i * mul; |
| CompressingStoredFieldsWriter.writeTLong(out, l1); |
| in.reset(buffer, 0, out.getPosition()); |
| long l2 = CompressingStoredFieldsReader.readTLong(in); |
| assertTrue(in.eof()); |
| assertEquals(l1, l2); |
| |
| // check that compression actually works |
| if (i >= -16 && i <= 15) { |
| assertEquals(1, out.getPosition()); // single byte compression |
| } |
| out.reset(buffer); |
| } |
| } |
| |
| // round-trip random values |
| Random r = random(); |
| for (int i = 0; i < 100000; i++) { |
| final int numBits = r.nextInt(65); |
| long l1 = r.nextLong() & ((1L << numBits) - 1); |
| switch (r.nextInt(4)) { |
| case 0: |
| l1 *= SECOND; |
| break; |
| case 1: |
| l1 *= HOUR; |
| break; |
| case 2: |
| l1 *= DAY; |
| break; |
| default: |
| break; |
| } |
| CompressingStoredFieldsWriter.writeTLong(out, l1); |
| in.reset(buffer, 0, out.getPosition()); |
| long l2 = CompressingStoredFieldsReader.readTLong(in); |
| assertTrue(in.eof()); |
| assertEquals(l1, l2); |
| out.reset(buffer); |
| } |
| } |
| |
| /** |
| * writes some tiny segments with incomplete compressed blocks, |
| * and ensures merge recompresses them. |
| */ |
| public void testChunkCleanup() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); |
| iwConf.setMergePolicy(NoMergePolicy.INSTANCE); |
| |
| // we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created |
| // by this test. |
| iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 4, false, 8)); |
| IndexWriter iw = new IndexWriter(dir, iwConf); |
| DirectoryReader ir = DirectoryReader.open(iw); |
| for (int i = 0; i < 5; i++) { |
| Document doc = new Document(); |
| doc.add(new StoredField("text", "not very long at all")); |
| iw.addDocument(doc); |
| // force flush |
| DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); |
| assertNotNull(ir2); |
| ir.close(); |
| ir = ir2; |
| // examine dirty counts: |
| for (LeafReaderContext leaf : ir2.leaves()) { |
| CodecReader sr = (CodecReader) leaf.reader(); |
| CompressingStoredFieldsReader reader = (CompressingStoredFieldsReader)sr.getFieldsReader(); |
| assertTrue(reader.getNumDirtyDocs() > 0); |
| assertTrue(reader.getNumDirtyDocs() < 100); // can't be gte the number of docs per chunk |
| assertEquals(1, reader.getNumDirtyChunks()); |
| } |
| } |
| iw.getConfig().setMergePolicy(newLogMergePolicy()); |
| iw.forceMerge(1); |
| // add a single doc and merge again |
| Document doc = new Document(); |
| doc.add(new StoredField("text", "not very long at all")); |
| iw.addDocument(doc); |
| iw.forceMerge(1); |
| DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); |
| assertNotNull(ir2); |
| ir.close(); |
| ir = ir2; |
| CodecReader sr = (CodecReader) getOnlyLeafReader(ir); |
| CompressingStoredFieldsReader reader = (CompressingStoredFieldsReader)sr.getFieldsReader(); |
| // at most 2: the 5 chunks from 5 doc segment will be collapsed into a single chunk |
| assertTrue(reader.getNumDirtyChunks() <= 2); |
| ir.close(); |
| iw.close(); |
| dir.close(); |
| } |
| } |