blob: f747b6515e90be5c65f1c54466039939c5beb84b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.io;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.util.HDFSTool;
public class WriterTextLIBSVM extends MatrixWriter
{
public WriterTextLIBSVM() {
}
@Override
public final void writeMatrixToHDFS(MatrixBlock src, String fname, long rlen, long clen, int blen, long nnz, boolean diag)
throws IOException, DMLRuntimeException
{
//validity check matrix dimensions
if( src.getNumRows() != rlen || src.getNumColumns() != clen )
throw new IOException("Matrix dimensions mismatch with metadata: "+src.getNumRows()+"x"+src.getNumColumns()+" vs "+rlen+"x"+clen+".");
if( rlen == 0 || clen == 0 )
throw new IOException("Write of matrices with zero rows or columns not supported ("+rlen+"x"+clen+").");
//prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path( fname );
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
//if the file already exists on HDFS, remove it.
HDFSTool.deleteFileIfExistOnHDFS( fname );
//core write (sequential/parallel)
writeLIBSVMMatrixToHDFS(path, job, fs, src);
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
@Override
public final void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int blen)
throws IOException, DMLRuntimeException
{
}
protected void writeLIBSVMMatrixToHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock src)
throws IOException
{
//sequential write libsvm file
writeLIBSVMMatrixToFile(path, job, fs, src, 0, src.getNumRows());
}
protected static void writeLIBSVMMatrixToFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int rlen )
throws IOException
{
boolean sparse = src.isInSparseFormat();
int clen = src.getNumColumns();
//create buffered writer
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)));
try
{
StringBuilder sb = new StringBuilder();
// Write data lines
if( sparse ) //SPARSE
{
SparseBlock sblock = src.getSparseBlock();
for(int i=rl; i < rlen; i++) {
// append the class label as the 1st column
double label = (sblock!=null) ?
sblock.get(i, clen-1) : 0;
sb.append(label);
if( sblock!=null && i<sblock.numRows() && !sblock.isEmpty(i) ) {
int pos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
// append sparse row
for( int k=pos; k<pos+alen; k++ ) {
if( aix[k]!=clen-1 ) {
sb.append(IOUtilFunctions.LIBSVM_DELIM);
appendIndexValLibsvm(sb, aix[k], avals[k]);
}
}
}
// write the string row
sb.append('\n');
br.write( sb.toString() );
sb.setLength(0);
}
}
else //DENSE
{
for( int i=rl; i<rlen; i++ ) {
// append the class label as the 1st column
double label = src.getValueDenseUnsafe(i, clen-1);
sb.append(label);
// append dense row
for( int j=0; j<clen-1; j++ ) {
double val = src.getValueDenseUnsafe(i, j);
if( val != 0 ) {
sb.append(IOUtilFunctions.LIBSVM_DELIM);
appendIndexValLibsvm(sb, j, val);
}
}
// write the string row
sb.append('\n');
br.write( sb.toString() );
sb.setLength(0);
}
}
}
finally {
IOUtilFunctions.closeSilently(br);
}
}
// Return string in libsvm format (<index#>:<value#>)
protected static void appendIndexValLibsvm(StringBuilder sb, int index, double value) {
sb.append(index+1); // convert 0 based matrix index to 1 base libsvm index
sb.append(IOUtilFunctions.LIBSVM_INDEX_DELIM);
sb.append(value);
}
}