blob: e722ccb8889d5ba35994bac5e1fb7898a43d2077 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mnemonic.mapreduce;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.mnemonic.DurableChunk;
import org.apache.mnemonic.DurableType;
import org.apache.mnemonic.Utils;
import org.apache.mnemonic.hadoop.MneConfigHelper;
import org.apache.mnemonic.hadoop.MneDurableInputSession;
import org.apache.mnemonic.hadoop.MneDurableInputValue;
import org.apache.mnemonic.hadoop.MneDurableOutputSession;
import org.apache.mnemonic.hadoop.MneDurableOutputValue;
import org.apache.mnemonic.hadoop.mapreduce.MneInputFormat;
import org.apache.mnemonic.hadoop.mapreduce.MneOutputFormat;
import org.apache.mnemonic.sessions.SessionIterator;
import org.testng.Assert;
import org.testng.AssertJUnit;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@SuppressWarnings("restriction")
public class MneMapreduceChunkDataTest {
private static final String DEFAULT_BASE_WORK_DIR = "target" + File.separator + "test" + File.separator + "tmp";
private static final String DEFAULT_WORK_DIR = DEFAULT_BASE_WORK_DIR + File.separator + "chunk-data";
private static final String SERVICE_NAME = "pmalloc";
private static final long SLOT_KEY_ID = 5L;
private Path m_workdir;
private JobConf m_conf;
private FileSystem m_fs;
private Random m_rand;
private TaskAttemptID m_taid;
private TaskAttemptContext m_tacontext;
private long m_reccnt = 5000L;
private volatile long m_checksum;
private volatile long m_totalsize = 0L;
@SuppressWarnings({"restriction", "UseOfSunClasses"})
private sun.misc.Unsafe unsafe;
@BeforeClass
public void setUp() throws Exception {
m_workdir = new Path(
System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
m_conf = new JobConf();
m_rand = Utils.createRandom();
unsafe = Utils.getUnsafe();
try {
m_fs = FileSystem.getLocal(m_conf).getRaw();
m_fs.delete(m_workdir, true);
m_fs.mkdirs(m_workdir);
} catch (IOException e) {
throw new IllegalStateException("bad fs init", e);
}
m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);
MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data");
MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
MneConfigHelper.setDurableTypes(m_conf,
MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] {DurableType.CHUNK});
MneConfigHelper.setEntityFactoryProxies(m_conf,
MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {});
MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
MneConfigHelper.setMemPoolSize(m_conf,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
MneConfigHelper.setDurableTypes(m_conf,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] {DurableType.CHUNK});
MneConfigHelper.setEntityFactoryProxies(m_conf,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {});
}
@AfterClass
public void tearDown() {
}
protected DurableChunk<?> genupdDurableChunk(
MneDurableOutputSession<DurableChunk<?>> s, Checksum cs) {
DurableChunk<?> ret = null;
int sz = m_rand.nextInt(1024 * 1024) + 1024 * 1024;
ret = s.newDurableObjectRecord(sz);
byte b;
if (null != ret) {
for (int i = 0; i < ret.getSize(); ++i) {
b = (byte) m_rand.nextInt(255);
unsafe.putByte(ret.get() + i, b);
cs.update(b);
}
m_totalsize += sz;
}
return ret;
}
@Test(enabled = true)
public void testWriteChunkData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableChunk<?>> sess =
new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableChunk<?>> mdvalue =
new MneDurableOutputValue<DurableChunk<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer =
outputFormat.getRecordWriter(m_tacontext);
DurableChunk<?> dchunk = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dchunk = genupdDurableChunk(sess, cs);
Assert.assertNotNull(dchunk);
writer.write(nada, mdvalue.of(dchunk));
}
m_checksum = cs.getValue();
writer.close(m_tacontext);
sess.close();
}
@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
public void testReadChunkData() throws Exception {
List<String> partfns = new ArrayList<String>();
long reccnt = 0L;
long tsize = 0L;
Checksum cs = new CRC32();
cs.reset();
File folder = new File(m_workdir.toString());
File[] listfiles = folder.listFiles();
for (int idx = 0; idx < listfiles.length; ++idx) {
if (listfiles[idx].isFile()
&& listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
&& listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
partfns.add(listfiles[idx].getName());
}
}
Collections.sort(partfns); // keep the order for checksum
for (int idx = 0; idx < partfns.size(); ++idx) {
System.out.println(String.format("Verifying : %s", partfns.get(idx)));
FileSplit split = new FileSplit(
new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]);
InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat =
new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>();
RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader =
inputFormat.createRecordReader(split, m_tacontext);
MneDurableInputValue<DurableChunk<?>> dchkval = null;
while (reader.nextKeyValue()) {
dchkval = reader.getCurrentValue();
byte b;
for (int j = 0; j < dchkval.getValue().getSize(); ++j) {
b = unsafe.getByte(dchkval.getValue().get() + j);
cs.update(b);
}
tsize += dchkval.getValue().getSize();
++reccnt;
}
reader.close();
}
AssertJUnit.assertEquals(m_reccnt, reccnt);
AssertJUnit.assertEquals(m_totalsize, tsize);
AssertJUnit.assertEquals(m_checksum, cs.getValue());
System.out.println(String.format("The checksum of chunk is %d", m_checksum));
}
@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
public void testBatchReadChunkDataUsingInputSession() throws Exception {
List<String> partfns = new ArrayList<String>();
long reccnt = 0L;
long tsize = 0L;
Checksum cs = new CRC32();
cs.reset();
File folder = new File(m_workdir.toString());
File[] listfiles = folder.listFiles();
for (int idx = 0; idx < listfiles.length; ++idx) {
if (listfiles[idx].isFile()
&& listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
&& listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
partfns.add(listfiles[idx].getName());
}
}
Collections.sort(partfns); // keep the order for checksum
List<Path> paths = new ArrayList<Path>();
for (String fns : partfns) {
paths.add(new Path(m_workdir, fns));
System.out.println(String.format("[Batch Mode] Added : %s", fns));
}
MneDurableInputSession<DurableChunk<?>> m_session =
new MneDurableInputSession<DurableChunk<?>>(m_tacontext, null,
paths.toArray(new Path[0]), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
SessionIterator<DurableChunk<?>, ?, Void, Void> m_iter = m_session.iterator();
DurableChunk<?> val = null;
while (m_iter.hasNext()) {
val = m_iter.next();
byte b;
for (int j = 0; j < val.getSize(); ++j) {
b = unsafe.getByte(val.get() + j);
cs.update(b);
}
tsize += val.getSize();
++reccnt;
}
AssertJUnit.assertEquals(m_reccnt, reccnt);
AssertJUnit.assertEquals(m_totalsize, tsize);
AssertJUnit.assertEquals(m_checksum, cs.getValue());
System.out.println(String.format("The checksum of chunk is %d [Batch Mode]", m_checksum));
}
}