blob: e76251c6a390b28c2adf918bf893a5b4f861f368 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.avro.Schema.Type;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.util.Utf8;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
public class TestDataFileCorruption {
private static final File DIR
= new File(System.getProperty("test.dir", "/tmp"));
private File makeFile(String name) {
return new File(DIR, "test-" + name + ".avro");
}
@Test
public void testCorruptedFile() throws IOException {
Schema schema = Schema.create(Type.STRING);
// Write a data file
DataFileWriter<Utf8> w = new DataFileWriter<Utf8>(new GenericDatumWriter<Utf8>(schema));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
w.create(schema, baos);
w.append(new Utf8("apple"));
w.append(new Utf8("banana"));
w.sync();
w.append(new Utf8("celery"));
w.append(new Utf8("date"));
long pos = w.sync();
w.append(new Utf8("endive"));
w.append(new Utf8("fig"));
w.close();
// Corrupt the input by inserting some zero bytes before the sync marker for the
// penultimate block
byte[] original = baos.toByteArray();
int corruptPosition = (int) pos - DataFileConstants.SYNC_SIZE;
int corruptedBytes = 3;
byte[] corrupted = new byte[original.length + corruptedBytes];
System.arraycopy(original, 0, corrupted, 0, corruptPosition);
System.arraycopy(original, corruptPosition,
corrupted, corruptPosition + corruptedBytes, original.length - corruptPosition);
File file = makeFile("corrupt");
file.deleteOnExit();
FileOutputStream out = new FileOutputStream(file);
out.write(corrupted);
out.close();
// Read the data file
DataFileReader r = new DataFileReader<Utf8>(file,
new GenericDatumReader<Utf8>(schema));
assertEquals("apple", r.next().toString());
assertEquals("banana", r.next().toString());
long prevSync = r.previousSync();
try {
r.next();
fail("Corrupt block should throw exception");
} catch (AvroRuntimeException e) {
assertEquals("Invalid sync!", e.getCause().getMessage());
}
r.sync(prevSync); // go to sync point after previous successful one
assertEquals("endive", r.next().toString());
assertEquals("fig", r.next().toString());
assertFalse(r.hasNext());
}
}