package org.apache.avro.util;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
/** Generates schema data as Java objects with random values. */
public class RandomData implements Iterable<Object> {
public static final String USE_DEFAULT = "use-default";
private final Schema root;
private final long seed;
private final int count;
private final boolean utf8ForString;
public RandomData(Schema schema, int count) {
this(schema, count, false);
public RandomData(Schema schema, int count, long seed) {
this(schema, count, seed, false);
public RandomData(Schema schema, int count, boolean utf8ForString) {
this(schema, count, System.currentTimeMillis(), utf8ForString);
public RandomData(Schema schema, int count, long seed, boolean utf8ForString) {
this.root = schema;
this.seed = seed;
this.count = count;
this.utf8ForString = utf8ForString;
public Iterator<Object> iterator() {
return new Iterator<Object>() {
private int n;
private Random random = new Random(seed);
public boolean hasNext() {
return n < count;
public Object next() {
return generate(root, random, 0);
public void remove() {
throw new UnsupportedOperationException();
@SuppressWarnings(value = "unchecked")
private Object generate(Schema schema, Random random, int d) {
switch (schema.getType()) {
case RECORD:
GenericRecord record = new GenericData.Record(schema);
for (Schema.Field field : schema.getFields()) {
Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1)
: GenericData.get().getDefaultValue(field);
record.put(, value);
return record;
case ENUM:
List<String> symbols = schema.getEnumSymbols();
return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size())));
case ARRAY:
int length = (random.nextInt(5) + 2) - d;
GenericArray<Object> array = new GenericData.Array(length <= 0 ? 0 : length, schema);
for (int i = 0; i < length; i++)
array.add(generate(schema.getElementType(), random, d + 1));
return array;
case MAP:
length = (random.nextInt(5) + 2) - d;
Map<Object, Object> map = new HashMap<>(length <= 0 ? 0 : length);
for (int i = 0; i < length; i++) {
map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1));
return map;
case UNION:
List<Schema> types = schema.getTypes();
return generate(types.get(random.nextInt(types.size())), random, d);
case FIXED:
byte[] bytes = new byte[schema.getFixedSize()];
return new GenericData.Fixed(schema, bytes);
case STRING:
return randomString(random, 40);
case BYTES:
return randomBytes(random, 40);
case INT:
return random.nextInt();
case LONG:
return random.nextLong();
case FLOAT:
return random.nextFloat();
case DOUBLE:
return random.nextDouble();
return random.nextBoolean();
case NULL:
return null;
throw new RuntimeException("Unknown type: " + schema);
private static final Charset UTF8 = StandardCharsets.UTF_8;
private Object randomString(Random random, int maxLength) {
int length = random.nextInt(maxLength);
byte[] bytes = new byte[length];
for (int i = 0; i < length; i++) {
bytes[i] = (byte) ('a' + random.nextInt('z' - 'a'));
return utf8ForString ? new Utf8(bytes) : new String(bytes, UTF8);
private static ByteBuffer randomBytes(Random rand, int maxLength) {
ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength));
((Buffer) bytes).limit(bytes.capacity());
return bytes;
public static void main(String[] args) throws Exception {
if (args.length < 3 || args.length > 4) {
System.out.println("Usage: RandomData <schemafile> <outputfile> <count> [codec]");
Schema sch = new Schema.Parser().parse(new File(args[0]));
try (DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>())) {
writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null"));
writer.setMeta("user_metadata", "someByteArray".getBytes(StandardCharsets.UTF_8));
writer.create(sch, new File(args[1]));
for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {