blob: ed3a5c19b138a77af306dd030c75bbe664f6199d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.io;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.util.HDFSTool;
/**
* Single-threaded frame binary block writer.
*
*/
public class FrameWriterBinaryBlock extends FrameWriter
{
@Override
public final void writeFrameToHDFS( FrameBlock src, String fname, long rlen, long clen )
throws IOException, DMLRuntimeException
{
//prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path( fname );
//if the file already exists on HDFS, remove it.
HDFSTool.deleteFileIfExistOnHDFS( fname );
//bound check for src block
if( src.getNumRows() > rlen || src.getNumColumns() > clen ) {
throw new IOException("Frame block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " +
"out of overall frame range [1:"+rlen+",1:"+clen+"].");
}
//write binary block to hdfs (sequential/parallel)
writeBinaryBlockFrameToHDFS( path, job, src, rlen, clen );
}
protected void writeBinaryBlockFrameToHDFS( Path path, JobConf job, FrameBlock src, long rlen, long clen )
throws IOException, DMLRuntimeException
{
FileSystem fs = IOUtilFunctions.getFileSystem(path);
int blen = ConfigurationManager.getBlocksize();
//sequential write to single file
writeBinaryBlockFrameToSequenceFile(path, job, fs, src, blen, 0, (int)rlen);
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
/**
* Internal primitive to write a block-aligned row range of a frame to a single sequence file,
* which is used for both single- and multi-threaded writers (for consistency).
*
* @param path file path
* @param job job configuration
* @param fs file system
* @param src frame block
* @param blen block length
* @param rl lower row
* @param ru upper row
* @throws IOException if IOException occurs
*/
@SuppressWarnings("deprecation")
protected static void writeBinaryBlockFrameToSequenceFile( Path path, JobConf job, FileSystem fs, FrameBlock src, int blen, int rl, int ru )
throws IOException
{
//1) create sequence file writer
SequenceFile.Writer writer = null;
writer = new SequenceFile.Writer(fs, job, path, LongWritable.class, FrameBlock.class);
try
{
//2) reblock and write
LongWritable index = new LongWritable();
if( src.getNumRows() <= blen ) //opt for single block
{
//directly write single block
index.set(1);
writer.append(index, src);
}
else //general case
{
//initialize blocks for reuse (at most 4 different blocks required)
FrameBlock[] blocks = createFrameBlocksForReuse(src.getSchema(), src.getColumnNames(), src.getNumRows());
//create and write subblocks of frame
for(int bi = rl; bi < ru; bi += blen) {
int len = Math.min(blen, src.getNumRows()-bi);
//get reuse frame block and copy subpart to block (incl meta on first)
FrameBlock block = getFrameBlockForReuse(blocks);
src.slice( bi, bi+len-1, 0, src.getNumColumns()-1, block );
if( bi==0 ) //first block
block.setColumnMetadata(src.getColumnMetadata());
//append block to sequence file
index.set(bi+1);
writer.append(index, block);
}
}
}
finally {
IOUtilFunctions.closeSilently(writer);
}
}
}