| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.avro; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import org.apache.avro.Schema.Type; |
| import org.apache.avro.file.DataFileConstants; |
| import org.apache.avro.file.DataFileReader; |
| import org.apache.avro.file.DataFileWriter; |
| import org.apache.avro.generic.GenericDatumReader; |
| import org.apache.avro.generic.GenericDatumWriter; |
| import org.apache.avro.util.Utf8; |
| import org.junit.Test; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.fail; |
| |
| public class TestDataFileCorruption { |
| |
| private static final File DIR |
| = new File(System.getProperty("test.dir", "/tmp")); |
| |
| private File makeFile(String name) { |
| return new File(DIR, "test-" + name + ".avro"); |
| } |
| |
| @Test |
| public void testCorruptedFile() throws IOException { |
| Schema schema = Schema.create(Type.STRING); |
| |
| // Write a data file |
| DataFileWriter<Utf8> w = new DataFileWriter<Utf8>(new GenericDatumWriter<Utf8>(schema)); |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| w.create(schema, baos); |
| w.append(new Utf8("apple")); |
| w.append(new Utf8("banana")); |
| w.sync(); |
| w.append(new Utf8("celery")); |
| w.append(new Utf8("date")); |
| long pos = w.sync(); |
| w.append(new Utf8("endive")); |
| w.append(new Utf8("fig")); |
| w.close(); |
| |
| // Corrupt the input by inserting some zero bytes before the sync marker for the |
| // penultimate block |
| byte[] original = baos.toByteArray(); |
| int corruptPosition = (int) pos - DataFileConstants.SYNC_SIZE; |
| int corruptedBytes = 3; |
| byte[] corrupted = new byte[original.length + corruptedBytes]; |
| System.arraycopy(original, 0, corrupted, 0, corruptPosition); |
| System.arraycopy(original, corruptPosition, |
| corrupted, corruptPosition + corruptedBytes, original.length - corruptPosition); |
| |
| File file = makeFile("corrupt"); |
| file.deleteOnExit(); |
| FileOutputStream out = new FileOutputStream(file); |
| out.write(corrupted); |
| out.close(); |
| |
| // Read the data file |
| DataFileReader r = new DataFileReader<Utf8>(file, |
| new GenericDatumReader<Utf8>(schema)); |
| assertEquals("apple", r.next().toString()); |
| assertEquals("banana", r.next().toString()); |
| long prevSync = r.previousSync(); |
| try { |
| r.next(); |
| fail("Corrupt block should throw exception"); |
| } catch (AvroRuntimeException e) { |
| assertEquals("Invalid sync!", e.getCause().getMessage()); |
| } |
| r.sync(prevSync); // go to sync point after previous successful one |
| assertEquals("endive", r.next().toString()); |
| assertEquals("fig", r.next().toString()); |
| assertFalse(r.hasNext()); |
| } |
| } |