lang/java/mapred/src/test/java/org/apache/avro/mapred/TestReflectJob.java - avro - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.avro.mapred;

 import java.io.IOException;
 import java.io.File;
 import java.io.InputStream;
 import java.io.FileInputStream;
 import java.io.BufferedInputStream;
 import java.util.StringTokenizer;

 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.Reporter;

 import org.apache.avro.io.DatumReader;
 import org.apache.avro.io.DatumWriter;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.file.DataFileStream;
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.ReflectDatumWriter;
 import org.apache.avro.reflect.ReflectDatumReader;

 import org.junit.Test;
 import static org.junit.Assert.*;

 public class TestReflectJob {

   /** The input class. */
   public static class Text {
     private String text = "";
     public Text() {}
     public Text(String text) { this.text = text; }
     public String toString() { return text; }
   }

   /** The intermediate data class. */
   public static class Count {
     private long count;
     public Count() {}
     public Count(long count) { this.count = count; }
   }

   /** The output class. */
   public static class WordCount {
     private String word;
     private long count;
     public WordCount() {}
     public WordCount(String word, long count) {
       this.word = word;
       this.count = count;
     }
   }

   public static class MapImpl extends AvroMapper<Text, Pair<Text,Count>> {
     @Override
       public void map(Text text, AvroCollector<Pair<Text,Count>> collector,
                       Reporter reporter) throws IOException {
       StringTokenizer tokens = new StringTokenizer(text.toString());
       while (tokens.hasMoreTokens())
         collector.collect(new Pair<Text,Count>(new Text(tokens.nextToken()),
                                                new Count(1L)));
     }
   }

   public static class ReduceImpl
     extends AvroReducer<Text, Count, WordCount> {
     @Override
     public void reduce(Text word, Iterable<Count> counts,
                        AvroCollector<WordCount> collector,
                        Reporter reporter) throws IOException {
       long sum = 0;
       for (Count count : counts)
         sum += count.count;
       collector.collect(new WordCount(word.text, sum));
     }
   }

   @Test
   @SuppressWarnings("deprecation")
   public void testJob() throws Exception {
     JobConf job = new JobConf();
     String dir = System.getProperty("test.dir", ".") + "target/testReflectJob";
     Path inputPath = new Path(dir + "/in");
     Path outputPath = new Path(dir + "/out");

     outputPath.getFileSystem(job).delete(outputPath);
     inputPath.getFileSystem(job).delete(inputPath);

     writeLinesFile(new File(dir+"/in"));

     job.setJobName("reflect");

     AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class));
     AvroJob.setMapOutputSchema
       (job, new Pair(new Text(""), new Count(0L)).getSchema());
     AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class));

     AvroJob.setMapperClass(job, MapImpl.class);
     //AvroJob.setCombinerClass(job, ReduceImpl.class);
     AvroJob.setReducerClass(job, ReduceImpl.class);

     FileInputFormat.setInputPaths(job, inputPath);
     FileOutputFormat.setOutputPath(job, outputPath);

     AvroJob.setReflect(job);                      // use reflection

     JobClient.runJob(job);

     validateCountsFile(new File(new File(dir, "out"), "part-00000.avro"));
   }

   private void writeLinesFile(File dir) throws IOException {
     DatumWriter<Text> writer = new ReflectDatumWriter<Text>();
     DataFileWriter<Text> out = new DataFileWriter<Text>(writer);
     File linesFile = new File(dir+"/lines.avro");
     dir.mkdirs();
     out.create(ReflectData.get().getSchema(Text.class), linesFile);
     for (String line : WordCountUtil.LINES)
       out.append(new Text(line));
     out.close();
   }

   private void validateCountsFile(File file) throws Exception {
     DatumReader<WordCount> reader = new ReflectDatumReader<WordCount>();
     InputStream in = new BufferedInputStream(new FileInputStream(file));
     DataFileStream<WordCount> counts = new DataFileStream<WordCount>(in,reader);
     int numWords = 0;
     for (WordCount wc : counts) {
       assertEquals(wc.word,
                    WordCountUtil.COUNTS.get(wc.word),
                    (Long)wc.count);
       numWords++;
     }
     in.close();
     assertEquals(WordCountUtil.COUNTS.size(), numWords);
   }


 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.avro.mapred;

	import java.io.IOException;
	import java.io.File;
	import java.io.InputStream;
	import java.io.FileInputStream;
	import java.io.BufferedInputStream;
	import java.util.StringTokenizer;

	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.mapred.JobClient;
	import org.apache.hadoop.mapred.JobConf;
	import org.apache.hadoop.mapred.FileInputFormat;
	import org.apache.hadoop.mapred.FileOutputFormat;
	import org.apache.hadoop.mapred.Reporter;

	import org.apache.avro.io.DatumReader;
	import org.apache.avro.io.DatumWriter;
	import org.apache.avro.file.DataFileWriter;
	import org.apache.avro.file.DataFileStream;
	import org.apache.avro.reflect.ReflectData;
	import org.apache.avro.reflect.ReflectDatumWriter;
	import org.apache.avro.reflect.ReflectDatumReader;

	import org.junit.Test;
	import static org.junit.Assert.*;

	public class TestReflectJob {

	/** The input class. */
	public static class Text {
	private String text = "";
	public Text() {}
	public Text(String text) { this.text = text; }
	public String toString() { return text; }
	}

	/** The intermediate data class. */
	public static class Count {
	private long count;
	public Count() {}
	public Count(long count) { this.count = count; }
	}

	/** The output class. */
	public static class WordCount {
	private String word;
	private long count;
	public WordCount() {}
	public WordCount(String word, long count) {
	this.word = word;
	this.count = count;
	}
	}

	public static class MapImpl extends AvroMapper<Text, Pair<Text,Count>> {
	@Override
	public void map(Text text, AvroCollector<Pair<Text,Count>> collector,
	Reporter reporter) throws IOException {
	StringTokenizer tokens = new StringTokenizer(text.toString());
	while (tokens.hasMoreTokens())
	collector.collect(new Pair<Text,Count>(new Text(tokens.nextToken()),
	new Count(1L)));
	}
	}

	public static class ReduceImpl
	extends AvroReducer<Text, Count, WordCount> {
	@Override
	public void reduce(Text word, Iterable<Count> counts,
	AvroCollector<WordCount> collector,
	Reporter reporter) throws IOException {
	long sum = 0;
	for (Count count : counts)
	sum += count.count;
	collector.collect(new WordCount(word.text, sum));
	}
	}

	@Test
	@SuppressWarnings("deprecation")
	public void testJob() throws Exception {
	JobConf job = new JobConf();
	String dir = System.getProperty("test.dir", ".") + "target/testReflectJob";
	Path inputPath = new Path(dir + "/in");
	Path outputPath = new Path(dir + "/out");

	outputPath.getFileSystem(job).delete(outputPath);
	inputPath.getFileSystem(job).delete(inputPath);

	writeLinesFile(new File(dir+"/in"));

	job.setJobName("reflect");

	AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class));
	AvroJob.setMapOutputSchema
	(job, new Pair(new Text(""), new Count(0L)).getSchema());
	AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class));

	AvroJob.setMapperClass(job, MapImpl.class);
	//AvroJob.setCombinerClass(job, ReduceImpl.class);
	AvroJob.setReducerClass(job, ReduceImpl.class);

	FileInputFormat.setInputPaths(job, inputPath);
	FileOutputFormat.setOutputPath(job, outputPath);

	AvroJob.setReflect(job); // use reflection

	JobClient.runJob(job);

	validateCountsFile(new File(new File(dir, "out"), "part-00000.avro"));
	}

	private void writeLinesFile(File dir) throws IOException {
	DatumWriter<Text> writer = new ReflectDatumWriter<Text>();
	DataFileWriter<Text> out = new DataFileWriter<Text>(writer);
	File linesFile = new File(dir+"/lines.avro");
	dir.mkdirs();
	out.create(ReflectData.get().getSchema(Text.class), linesFile);
	for (String line : WordCountUtil.LINES)
	out.append(new Text(line));
	out.close();
	}

	private void validateCountsFile(File file) throws Exception {
	DatumReader<WordCount> reader = new ReflectDatumReader<WordCount>();
	InputStream in = new BufferedInputStream(new FileInputStream(file));
	DataFileStream<WordCount> counts = new DataFileStream<WordCount>(in,reader);
	int numWords = 0;
	for (WordCount wc : counts) {
	assertEquals(wc.word,
	WordCountUtil.COUNTS.get(wc.word),
	(Long)wc.count);
	numWords++;
	}
	in.close();
	assertEquals(WordCountUtil.COUNTS.size(), numWords);
	}


	}