blob: 2de056fd063c6dd2bc873a1379e4c3579d7d5f97 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.apache.avro.mapreduce;
import static org.easymock.EasyMock.createMock;
import static org.easymock.EasyMock.replay;
import static org.easymock.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.hadoop.io.AvroDatumConverter;
import org.apache.avro.hadoop.io.AvroDatumConverterFactory;
import org.apache.avro.hadoop.io.AvroKeyValue;
import org.apache.avro.io.DatumReader;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.junit.Test;
public class TestAvroKeyValueRecordWriter {
@Test
public void testWriteRecords() throws IOException {
Job job = new Job();
AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$);
TaskAttemptContext context = createMock(TaskAttemptContext.class);
replay(context);
AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration());
AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);
AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null);
@SuppressWarnings("unchecked")
AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter
= factory.create((Class<AvroValue<TextStats>>) avroValue.getClass());
CodecFactory compressionCodec = CodecFactory.nullCodec();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
// Use a writer to generate a Avro container file in memory.
// Write two records: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>.
AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer
= new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter,
new ReflectData(), compressionCodec, outputStream);
TextStats appleStats = new TextStats();
appleStats.name = "apple";
writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats));
TextStats bananaStats = new TextStats();
bananaStats.name = "banana";
writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats));
writer.close(context);
verify(context);
ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
Schema readerSchema = AvroKeyValue.getSchema(
Schema.create(Schema.Type.STRING), TextStats.SCHEMA$);
DatumReader<GenericRecord> datumReader
= new SpecificDatumReader<GenericRecord>(readerSchema);
DataFileStream<GenericRecord> avroFileReader
= new DataFileStream<GenericRecord>(inputStream, datumReader);
// Verify that the first record was written.
assertTrue(avroFileReader.hasNext());
AvroKeyValue<CharSequence, TextStats> firstRecord
= new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
assertNotNull(firstRecord.get());
assertEquals("apple", firstRecord.getKey().toString());
assertEquals("apple", firstRecord.getValue().name.toString());
// Verify that the second record was written;
assertTrue(avroFileReader.hasNext());
AvroKeyValue<CharSequence, TextStats> secondRecord
= new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
assertNotNull(secondRecord.get());
assertEquals("banana", secondRecord.getKey().toString());
assertEquals("banana", secondRecord.getValue().name.toString());
// That's all, folks.
assertFalse(avroFileReader.hasNext());
avroFileReader.close();
}
public static class R1 {
String attribute;
}
@Test public void testUsingReflection() throws Exception {
Job job = new Job();
Schema schema = ReflectData.get().getSchema(R1.class);
AvroJob.setOutputValueSchema(job, schema);
TaskAttemptContext context = createMock(TaskAttemptContext.class);
replay(context);
R1 record = new R1();
record.attribute = "test";
AvroValue<R1> avroValue = new AvroValue<R1>(record);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
AvroDatumConverterFactory factory =
new AvroDatumConverterFactory(job.getConfiguration());
AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);
@SuppressWarnings("unchecked")
AvroDatumConverter<AvroValue<R1>, R1> valueConverter =
factory.create((Class<AvroValue<R1>>) avroValue.getClass());
AvroKeyValueRecordWriter<Text, AvroValue<R1>> writer =
new AvroKeyValueRecordWriter<Text, AvroValue<R1>>(
keyConverter, valueConverter, new ReflectData(),
CodecFactory.nullCodec(), outputStream);
writer.write(new Text("reflectionData"), avroValue);
writer.close(context);
verify(context);
ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
Schema readerSchema = AvroKeyValue.getSchema(
Schema.create(Schema.Type.STRING), schema);
DatumReader<GenericRecord> datumReader =
new ReflectDatumReader<GenericRecord>(readerSchema);
DataFileStream<GenericRecord> avroFileReader =
new DataFileStream<GenericRecord>(inputStream, datumReader);
// Verify that the first record was written.
assertTrue(avroFileReader.hasNext());
// Verify that the record holds the same data that we've written
AvroKeyValue<CharSequence, R1> firstRecord =
new AvroKeyValue<CharSequence, R1>(avroFileReader.next());
assertNotNull(firstRecord.get());
assertEquals("reflectionData", firstRecord.getKey().toString());
assertEquals(record.attribute, firstRecord.getValue().attribute);
}
@Test
public void testSyncableWriteRecords() throws IOException {
Job job = new Job();
AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$);
TaskAttemptContext context = createMock(TaskAttemptContext.class);
replay(context);
AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration());
AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);
AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null);
@SuppressWarnings("unchecked")
AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter
= factory.create((Class<AvroValue<TextStats>>) avroValue.getClass());
CodecFactory compressionCodec = CodecFactory.nullCodec();
FileOutputStream outputStream = new FileOutputStream(new File("target/temp.avro"));
// Write a marker followed by each record: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>.
AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer
= new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter,
new ReflectData(), compressionCodec, outputStream);
TextStats appleStats = new TextStats();
appleStats.name = "apple";
long pointOne = writer.sync();
writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats));
TextStats bananaStats = new TextStats();
bananaStats.name = "banana";
long pointTwo = writer.sync();
writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats));
writer.close(context);
verify(context);
Configuration conf = new Configuration();
conf.set("fs.default.name", "file:///");
Path avroFile = new Path("target/temp.avro");
DataFileReader<GenericData.Record> avroFileReader = new DataFileReader<GenericData.Record>(new FsInput(avroFile,
conf), new SpecificDatumReader<GenericData.Record>());
avroFileReader.seek(pointTwo);
// Verify that the second record was written;
assertTrue(avroFileReader.hasNext());
AvroKeyValue<CharSequence, TextStats> secondRecord
= new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
assertNotNull(secondRecord.get());
assertEquals("banana", secondRecord.getKey().toString());
assertEquals("banana", secondRecord.getValue().name.toString());
avroFileReader.seek(pointOne);
// Verify that the first record was written.
assertTrue(avroFileReader.hasNext());
AvroKeyValue<CharSequence, TextStats> firstRecord
= new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
assertNotNull(firstRecord.get());
assertEquals("apple", firstRecord.getKey().toString());
assertEquals("apple", firstRecord.getValue().name.toString());
// That's all, folks.
avroFileReader.close();
}
}