blob: 433b59514de6454530d7d22a3723f80e38d4984e [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.mrql;
import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
/** The domain of the MRQL physical algebra is a set of DataSources */
public class DataSet implements Serializable {
public ArrayList<DataSource> source; // multiple sources
public long counter; // a Hadoop user-defined counter used by the `repeat' operator
public long records; // total number of dataset records
/** Construct a DataSet that contains one DataSource
* @param s the given DataSource
* @param counter a Hadoop user-defined counter used by the `repeat' operator
* @param records total number of dataset records
DataSet ( DataSource s, long counter, long records ) {
source = new ArrayList<DataSource>();
this.counter = counter;
this.records = records;
/** Construct a set of DataSources
* @param counter a Hadoop user-defined counter used by the `repeat' operator
* @param records total number of dataset records
DataSet ( long counter, long records ) {
source = new ArrayList<DataSource>();
this.counter = counter;
this.records = records;
/** add a DataSource to this DataSet */
public void add ( DataSource s ) {
/** merge this DataSet with the given DataSet */
public void merge ( DataSet ds ) {
counter += ds.counter;
records += ds.records;
/** dataset size in bytes */
public long size ( Configuration conf ) {
long n = 0;
for (DataSource s: source)
n += s.size(conf);
return n;
/** return a single DataSource path by merging all the DataSource paths in this DataSet */
public String merge () {
Object[] ds = source.toArray();
String path = ((DataSource)ds[0]).path.toString();
for ( int i = 1; i < ds.length; i++ )
path += ","+((DataSource)ds[i]).path;
return path;
/** return the first num values */
public List<MRData> take ( int num ) {
int count = num;
ArrayList<MRData> res = new ArrayList<MRData>();
for ( DataSource s: source ) {
if (res.size() < count)
count = count-res.size();
else return res;
return res;
/** accumulate all dataset values */
public MRData reduce ( MRData zero, Function acc ) {
MRData res = zero;
for ( DataSource s: source )
res = s.reduce(res,acc);
return res;
public String toString () {
String p = "<"+counter;
for (DataSource s: source)
p += ","+s;
return p+">";