| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.sysds.runtime.compress.colgroup.mapping; |
| |
| import java.io.DataOutput; |
| import java.io.IOException; |
| import java.io.Serializable; |
| import java.util.BitSet; |
| |
| import org.apache.commons.lang.NotImplementedException; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.sysds.runtime.compress.colgroup.IMapToDataGroup; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; |
| import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; |
| import org.apache.sysds.runtime.data.DenseBlock; |
| import org.apache.sysds.runtime.data.SparseBlock; |
| import org.apache.sysds.runtime.matrix.data.MatrixBlock; |
| |
| /** |
| * This Class's job is to link into the dictionary entries for column groups. |
| * |
| * Column groups |
| * |
| * - DDC use this to map to map directly to the dictionary |
| * |
| * - SDC use this in collaboration with the offsets to only point to dictionary entries for non default values. |
| */ |
| public abstract class AMapToData implements Serializable { |
| private static final long serialVersionUID = 1208906071822976041L; |
| protected static final Log LOG = LogFactory.getLog(AMapToData.class.getName()); |
| |
| /** Number of unique values inside this map. */ |
| private int nUnique; |
| |
| /** |
| * Main constructor for AMapToData. |
| * |
| * NOTE! The value should be representable inside the map. This requirement is not checked. |
| * |
| * @param nUnique number of unique values. |
| */ |
| protected AMapToData(int nUnique) { |
| this.nUnique = nUnique; |
| } |
| |
| /** |
| * Get the number of unique values inside this map. |
| * |
| * @return the unique count. |
| */ |
| public final int getUnique() { |
| return nUnique; |
| } |
| |
| /** |
| * Set number of unique values. |
| * |
| * NOTE! The value should be representable inside the map. This requirement is not checked. |
| * |
| * @param nUnique the value to set. |
| */ |
| public final void setUnique(int nUnique) { |
| this.nUnique = nUnique; |
| } |
| |
| /** |
| * Get the given index back as a integer |
| * |
| * @param n the index to get |
| * @return the value represented in that cell as integer |
| */ |
| public abstract int getIndex(int n); |
| |
| /** |
| * Set the index to the value. |
| * |
| * NOTE! The value should be representable inside the map. This requirement is not checked. |
| * |
| * @param n index to set. |
| * @param v the value to set it to. |
| */ |
| public abstract void set(int n, int v); |
| |
| /** |
| * Set the index to the value and get the contained value after. |
| * |
| * @param n index to set. |
| * @param v the value to set it to. |
| * @return v as encoded, note this value can be different that the one put in if the map is not able to represent the |
| * value |
| */ |
| public abstract int setAndGet(int n, int v); |
| |
| /** |
| * Fill the map with a given value. |
| * |
| * NOTE! The value should be representable inside the map. This requirement is not checked. |
| * |
| * @param v the value to fill |
| */ |
| public abstract void fill(int v); |
| |
| /** |
| * Get the maximum value that is possible to allocate inside this map. |
| * |
| * @return The maximum value. |
| */ |
| public abstract int getUpperBoundValue(); |
| |
| /** |
| * Get the in memory size of this Mapping object. |
| * |
| * @return The size in Bytes. |
| */ |
| public abstract long getInMemorySize(); |
| |
| /** |
| * Get the size of this Mapping object on disk. |
| * |
| * @return The on disk size in Bytes. |
| */ |
| public abstract long getExactSizeOnDisk(); |
| |
| /** |
| * The size of the Mapping object, signaling how many value cells are stored in this mapping object. |
| * |
| * @return The length of the mapping object. |
| */ |
| public abstract int size(); |
| |
| /** |
| * Serialize this object to the DataOutput given. |
| * |
| * @param out The object to serialize this object into. |
| * @throws IOException An IO exception if the Serialization fails. |
| */ |
| public abstract void write(DataOutput out) throws IOException; |
| |
| /** |
| * Replace v with r for all entries, |
| * |
| * NOTE! It is assumed that you call this correctly: |
| * |
| * - with two distinct values that is representable inside the given AMapToData. |
| * |
| * @param v The value to replace |
| * @param r The value to put instead |
| */ |
| public abstract void replace(int v, int r); |
| |
| public abstract MAP_TYPE getType(); |
| |
| /** |
| * Pre aggregate a dense matrix m into pre, subject to only including a row segment and column segment. |
| * |
| * @param m The dense matrix values to preaggregate |
| * @param preAV The preAggregate double array populate with the summed values of m |
| * @param rl The row start in m |
| * @param ru The row end in m |
| * @param cl The column start in m |
| * @param cu The column end in m |
| */ |
| public final void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu) { |
| final DenseBlock db = m.getDenseBlock(); |
| if(rl == ru - 1) |
| preAggregateDenseSingleRow(db.values(rl), db.pos(rl), preAV, cl, cu); |
| else |
| preAggregateDenseMultiRow(m, preAV, rl, ru, cl, cu); |
| } |
| |
| /** |
| * PreAggregate Dense on a single row. |
| * |
| * @param mV The DenseMatrix Values from the input matrix block for the specific row given |
| * @param off The offset into the mV that the row values start from |
| * @param preAV The PreAggregate value target to preAggregate into |
| * @param cl The column index to start at |
| * @param cu The column index to stop at (not inclusive) |
| */ |
| protected void preAggregateDenseSingleRow(double[] mV, int off, double[] preAV, int cl, int cu) { |
| if(cu - cl > 64) |
| preAggregateDenseToRowBy8(mV, preAV, cl, cu, off); |
| else { |
| off += cl; |
| for(int rc = cl; rc < cu; rc++, off++) |
| preAV[getIndex(rc)] += mV[off]; |
| } |
| } |
| |
| protected void preAggregateDenseToRowBy8(double[] mV, double[] preAV, int cl, int cu, int off) { |
| final int h = (cu - cl) % 8; |
| off += cl; |
| for(int rc = cl; rc < cl + h; rc++, off++) |
| preAV[getIndex(rc)] += mV[off]; |
| for(int rc = cl + h; rc < cu; rc += 8, off += 8) { |
| preAV[getIndex(rc)] += mV[off]; |
| preAV[getIndex(rc + 1)] += mV[off + 1]; |
| preAV[getIndex(rc + 2)] += mV[off + 2]; |
| preAV[getIndex(rc + 3)] += mV[off + 3]; |
| preAV[getIndex(rc + 4)] += mV[off + 4]; |
| preAV[getIndex(rc + 5)] += mV[off + 5]; |
| preAV[getIndex(rc + 6)] += mV[off + 6]; |
| preAV[getIndex(rc + 7)] += mV[off + 7]; |
| } |
| } |
| |
| /** |
| * PreAggregate from Dense Matrix, and handle multiple rows, |
| * |
| * @param m The Matrix to preAggregate. |
| * @param preAV The target dense array to preAggregate into |
| * @param rl The row to start at |
| * @param ru The row to end at (not inclusive) |
| * @param cl The column to start at |
| * @param cu The column to end at (not inclusive) |
| */ |
| protected void preAggregateDenseMultiRow(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu) { |
| final int nVal = getUnique(); |
| final DenseBlock db = m.getDenseBlock(); |
| if(db.isContiguous()) { |
| final double[] mV = m.getDenseBlockValues(); |
| final int nCol = m.getNumColumns(); |
| preAggregateDenseMultiRowContiguous(mV, nCol, nVal, preAV, rl, ru, cl, cu); |
| } |
| else |
| throw new NotImplementedException(); |
| } |
| |
| protected void preAggregateDenseMultiRowContiguous(double[] mV, int nCol, int nVal, double[] preAV, int rl, int ru, |
| int cl, int cu) { |
| if(cu - cl > 64) |
| preAggregateDenseMultiRowContiguousBy8(mV, nCol, nVal, preAV, rl, ru, cl, cu); |
| else |
| preAggregateDenseMultiRowContiguousBy1(mV, nCol, nVal, preAV, rl, ru, cl, cu); |
| } |
| |
| protected void preAggregateDenseMultiRowContiguousBy8(double[] mV, int nCol, int nVal, double[] preAV, int rl, |
| int ru, int cl, int cu) { |
| final int h = (cu - cl) % 8; |
| preAggregateDenseMultiRowContiguousBy1(mV, nCol, nVal, preAV, rl, ru, cl, cl + h); |
| final int offR = nCol * rl; |
| final int offE = nCol * ru; |
| for(int c = cl + h; c < cu; c += 8) { |
| final int id1 = getIndex(c), id2 = getIndex(c + 1), id3 = getIndex(c + 2), id4 = getIndex(c + 3), |
| id5 = getIndex(c + 4), id6 = getIndex(c + 5), id7 = getIndex(c + 6), id8 = getIndex(c + 7); |
| |
| final int start = c + offR; |
| final int end = c + offE; |
| int nValOff = 0; |
| for(int off = start; off < end; off += nCol) { |
| preAV[id1 + nValOff] += mV[off]; |
| preAV[id2 + nValOff] += mV[off + 1]; |
| preAV[id3 + nValOff] += mV[off + 2]; |
| preAV[id4 + nValOff] += mV[off + 3]; |
| preAV[id5 + nValOff] += mV[off + 4]; |
| preAV[id6 + nValOff] += mV[off + 5]; |
| preAV[id7 + nValOff] += mV[off + 6]; |
| preAV[id8 + nValOff] += mV[off + 7]; |
| nValOff += nVal; |
| } |
| } |
| } |
| |
| protected void preAggregateDenseMultiRowContiguousBy1(double[] mV, int nCol, int nVal, double[] preAV, int rl, |
| int ru, int cl, int cu) { |
| final int offR = nCol * rl; |
| final int offE = nCol * ru; |
| for(int c = cl; c < cu; c++) { |
| final int idx = getIndex(c); |
| final int start = c + offR; |
| final int end = c + offE; |
| for(int offOut = idx, off = start; off < end; offOut += nVal, off += nCol) { |
| preAV[offOut] += mV[off]; |
| } |
| } |
| } |
| |
| /** |
| * PreAggregate a Dense Matrix at index offsets. |
| * |
| * @param m The DenseBlock to preAggregate |
| * @param preAV The target double array to put the preAggregate into |
| * @param rl The row to start at |
| * @param ru The row to end at (not inclusive) |
| * @param cl The column in m to start from |
| * @param cu The column in m to end at (not inclusive) |
| * @param indexes The Offset Indexes to iterate through |
| */ |
| public final void preAggregateDense(MatrixBlock m, double[] preAV, int rl, int ru, int cl, int cu, AOffset indexes) { |
| indexes.preAggregateDenseMap(m, preAV, rl, ru, cl, cu, getUnique(), this); |
| } |
| |
| /** |
| * PreAggregate the SparseBlock in the range of rows given. |
| * |
| * @param sb The SparseBlock to preAggregate |
| * @param preAV The target double array to put the preAggregate into |
| * @param rl The row to start at |
| * @param ru The row to end at (not inclusive) |
| * @param indexes The Offset Indexes to iterate through |
| */ |
| public final void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru, AOffset indexes) { |
| indexes.preAggregateSparseMap(sb, preAV, rl, ru, getUnique(), this); |
| } |
| |
| /** |
| * PreAggregate the sparseblock in the range of rows given. |
| * |
| * @param sb Sparse block to preAggregate from |
| * @param preAV Pre aggregate target |
| * @param rl row index in sb |
| * @param ru upper row index in sp (not inclusive) |
| */ |
| public final void preAggregateSparse(SparseBlock sb, double[] preAV, int rl, int ru) { |
| if(rl == ru - 1) |
| preAggregateSparseSingleRow(sb, preAV, rl); |
| else |
| preAggregateSparseMultiRow(sb, preAV, rl, ru); |
| } |
| |
| private final void preAggregateSparseSingleRow(final SparseBlock sb, final double[] preAV, final int r) { |
| if(sb.isEmpty(r)) |
| return; |
| final int apos = sb.pos(r); |
| final int alen = sb.size(r) + apos; |
| final int[] aix = sb.indexes(r); |
| final double[] avals = sb.values(r); |
| for(int j = apos; j < alen; j++) |
| preAV[getIndex(aix[j])] += avals[j]; |
| } |
| |
| private final void preAggregateSparseMultiRow(final SparseBlock sb, final double[] preAV, final int rl, |
| final int ru) { |
| final int unique = getUnique(); |
| for(int r = rl; r < ru; r++) { |
| if(sb.isEmpty(r)) |
| continue; |
| final int apos = sb.pos(r); |
| final int alen = sb.size(r) + apos; |
| final int[] aix = sb.indexes(r); |
| final double[] avals = sb.values(r); |
| final int off = unique * (r - rl); |
| for(int j = apos; j < alen; j++) |
| preAV[off + getIndex(aix[j])] += avals[j]; |
| } |
| } |
| |
| /** |
| * Get the number of counts of each unique value contained in this map. Note that in the case the mapping is shorter |
| * than number of rows the counts sum to the number of mapped values not the number of rows. |
| * |
| * @return The counts |
| */ |
| public final int[] getCounts() { |
| return getCounts(new int[getUnique()]); |
| } |
| |
| /** |
| * Get the number of counts of each unique value contained in this map. Note that in the case the mapping is shorter |
| * than number of rows the counts sum to the number of mapped values not the number of rows. |
| * |
| * @param counts The object to return. |
| * @return The counts |
| */ |
| public abstract int[] getCounts(int[] counts); |
| |
| /** |
| * PreAggregate into dictionary with two sides of DDC. |
| * |
| * @param tm Map of other side |
| * @param td Dictionary to take values from (other side dictionary) |
| * @param ret The output dictionary to aggregate into |
| * @param nCol The number of columns |
| */ |
| public final void preAggregateDDC_DDC(AMapToData tm, ADictionary td, Dictionary ret, int nCol) { |
| if(nCol == 1) |
| preAggregateDDC_DDCSingleCol(tm, td.getValues(), ret.getValues()); |
| else |
| preAggregateDDC_DDCMultiCol(tm, td, ret.getValues(), nCol); |
| } |
| |
| /** |
| * PreAggregate into dictionary with two sides of DDC guaranteed to only have one column tuples. |
| * |
| * @param tm Map of other side |
| * @param td Dictionary to take values from (other side dictionary) |
| * @param ret The output dictionary to aggregate into |
| */ |
| protected void preAggregateDDC_DDCSingleCol(AMapToData tm, double[] td, double[] v) { |
| final int sz = size(); |
| for(int r = 0; r < sz; r++) |
| v[getIndex(r)] += td[tm.getIndex(r)]; |
| } |
| |
| /** |
| * PreAggregate into dictionary with two sides of DDC guaranteed to multiple column tuples. |
| * |
| * @param tm Map of other side |
| * @param td Dictionary to take values from (other side dictionary) |
| * @param ret The output dictionary to aggregate into |
| * @param nCol The number of columns |
| */ |
| protected void preAggregateDDC_DDCMultiCol(AMapToData tm, ADictionary td, double[] v, int nCol) { |
| final int sz = size(); |
| final int h = sz % 8; |
| for(int r = 0; r < h; r++) |
| td.addToEntry(v, tm.getIndex(r), getIndex(r), nCol); |
| |
| for(int r = h; r < sz; r += 8) { |
| int r2 = r + 1, r3 = r + 2, r4 = r + 3, r5 = r + 4, r6 = r + 5, r7 = r + 6, r8 = r + 7; |
| td.addToEntryVectorized(v, tm.getIndex(r), tm.getIndex(r2), tm.getIndex(r3), tm.getIndex(r4), tm.getIndex(r5), |
| tm.getIndex(r6), tm.getIndex(r7), tm.getIndex(r8), getIndex(r), getIndex(r2), getIndex(r3), getIndex(r4), |
| getIndex(r5), getIndex(r6), getIndex(r7), getIndex(r8), nCol); |
| } |
| } |
| |
| /** |
| * PreAggregate into SDCZero dictionary from DDC dictionary. |
| * |
| * @param tm Map of other side |
| * @param td Dictionary to take values from (other side dictionary) |
| * @param tof The offset index structure of the SDC side |
| * @param ret The output dictionary to aggregate into |
| * @param nCol The number of columns in output and td dictionary |
| */ |
| public final void preAggregateDDC_SDCZ(AMapToData tm, ADictionary td, AOffset tof, Dictionary ret, int nCol) { |
| if(nCol == 1) |
| preAggregateDDC_SDCZSingleCol(tm, td.getValues(), tof, ret.getValues()); |
| else |
| preAggregateDDC_SDCZMultiCol(tm, td, tof, ret.getValues(), nCol); |
| } |
| |
| public void preAggregateDDC_SDCZSingleCol(AMapToData tm, double[] td, AOffset tof, double[] v) { |
| final AOffsetIterator itThat = tof.getOffsetIterator(); |
| final int size = tm.size() - 1; |
| for(int i = 0; i < size; i++) { |
| final int to = getIndex(itThat.value()); |
| final int fr = tm.getIndex(i); |
| v[to] += td[fr]; |
| itThat.next(); |
| } |
| final int to = getIndex(itThat.value()); |
| final int fr = tm.getIndex(size); |
| v[to] += td[fr]; |
| } |
| |
| public void preAggregateDDC_SDCZMultiCol(AMapToData tm, ADictionary td, AOffset tof, double[] v, int nCol) { |
| final AOffsetIterator it = tof.getOffsetIterator(); |
| final int size = tm.size() - 1; |
| int i = (size > 8) ? preAggregateDDC_SDCZMultiCol_vect(tm, td, v, nCol, it, size) : 0; |
| |
| for(; i < size; i++) { |
| final int to = getIndex(it.value()); |
| final int fr = tm.getIndex(i); |
| td.addToEntry(v, fr, to, nCol); |
| it.next(); |
| } |
| |
| final int to = getIndex(it.value()); |
| final int fr = tm.getIndex(size); |
| td.addToEntry(v, fr, to, nCol); |
| } |
| |
| private int preAggregateDDC_SDCZMultiCol_vect(AMapToData tm, ADictionary td, double[] v, int nCol, |
| AOffsetIterator it, int size) { |
| final int h = size % 8; |
| int i = 0; |
| while(i < size - h) { |
| int t1 = it.value(), t2 = it.next(), t3 = it.next(), t4 = it.next(), t5 = it.next(), t6 = it.next(), |
| t7 = it.next(), t8 = it.next(); |
| |
| t1 = getIndex(t1); |
| t2 = getIndex(t2); |
| t3 = getIndex(t3); |
| t4 = getIndex(t4); |
| t5 = getIndex(t5); |
| t6 = getIndex(t6); |
| t7 = getIndex(t7); |
| t8 = getIndex(t8); |
| |
| int f1 = tm.getIndex(i), f2 = tm.getIndex(i + 1), f3 = tm.getIndex(i + 2), f4 = tm.getIndex(i + 3), |
| f5 = tm.getIndex(i + 4), f6 = tm.getIndex(i + 5), f7 = tm.getIndex(i + 6), f8 = tm.getIndex(i + 7); |
| |
| i += 8; |
| it.next(); |
| td.addToEntryVectorized(v, f1, f2, f3, f4, f5, f6, f7, f8, t1, t2, t3, t4, t5, t6, t7, t8, nCol); |
| } |
| return i; |
| } |
| |
| /** |
| * PreAggregate into DDC dictionary from SDCZero dictionary. |
| * |
| * @param tm Map of other side |
| * @param td Dictionary to take values from (other side dictionary) |
| * @param of Offsets of the SDC to look into DDC |
| * @param ret The output dictionary to aggregate into |
| * @param nCol The number of columns in output and td dictionary |
| */ |
| public final void preAggregateSDCZ_DDC(AMapToData tm, ADictionary td, AOffset of, Dictionary ret, int nCol) { |
| if(nCol == 1) |
| preAggregateSDCZ_DDCSingleCol(tm, td.getValues(), of, ret.getValues()); |
| else |
| preAggregateSDCZ_DDCMultiCol(tm, td, of, ret.getValues(), nCol); |
| } |
| |
| protected void preAggregateSDCZ_DDCSingleCol(AMapToData tm, double[] td, AOffset of, double[] v) { |
| final AOffsetIterator itThis = of.getOffsetIterator(); |
| final int size = size() - 1; |
| int tv = itThis.value(); |
| for(int i = 0; i < size; i++) { |
| v[getIndex(i)] += td[tm.getIndex(tv)]; |
| tv = itThis.next(); |
| } |
| v[getIndex(size)] += td[tm.getIndex(tv)]; |
| } |
| |
| protected void preAggregateSDCZ_DDCMultiCol(AMapToData tm, ADictionary td, AOffset of, double[] v, int nCol) { |
| final AOffsetIterator itThis = of.getOffsetIterator(); |
| final int size = size() - 1; |
| int i = (size > 8) ? preAggregateSDCZ_DDCMultiCol_vect(tm, td, v, nCol, itThis, size) : 0; |
| |
| int tv = itThis.value(); |
| for(; i < size; i++) { |
| td.addToEntry(v, tm.getIndex(tv), getIndex(i), nCol); |
| tv = itThis.next(); |
| } |
| td.addToEntry(v, tm.getIndex(tv), getIndex(size), nCol); |
| } |
| |
| private int preAggregateSDCZ_DDCMultiCol_vect(AMapToData tm, ADictionary td, double[] v, int nCol, |
| AOffsetIterator it, int size) { |
| final int h = size % 8; |
| int i = 0; |
| while(i < size - h) { |
| int t1 = getIndex(i), t2 = getIndex(i + 1), t3 = getIndex(i + 2), t4 = getIndex(i + 3), t5 = getIndex(i + 4), |
| t6 = getIndex(i + 5), t7 = getIndex(i + 6), t8 = getIndex(i + 7); |
| |
| int f1 = it.value(), f2 = it.next(), f3 = it.next(), f4 = it.next(), f5 = it.next(), f6 = it.next(), |
| f7 = it.next(), f8 = it.next(); |
| |
| f1 = tm.getIndex(f1); |
| f2 = tm.getIndex(f2); |
| f3 = tm.getIndex(f3); |
| f4 = tm.getIndex(f4); |
| f5 = tm.getIndex(f5); |
| f6 = tm.getIndex(f6); |
| f7 = tm.getIndex(f7); |
| f8 = tm.getIndex(f8); |
| |
| i += 8; |
| it.next(); |
| td.addToEntryVectorized(v, f1, f2, f3, f4, f5, f6, f7, f8, t1, t2, t3, t4, t5, t6, t7, t8, nCol); |
| } |
| return i; |
| } |
| |
| public final void preAggregateSDCZ_SDCZ(AMapToData tm, ADictionary td, AOffset tof, AOffset of, Dictionary ret, |
| int nCol) { |
| if(nCol == 1) |
| preAggregateSDCZ_SDCZSingleCol(tm, td.getValues(), tof, of, ret.getValues()); |
| else |
| preAggregateSDCZ_SDCZMultiCol(tm, td, tof, of, ret.getValues(), nCol); |
| } |
| |
| private final void preAggregateSDCZ_SDCZSingleCol(AMapToData tm, double[] td, AOffset tof, AOffset of, double[] dv) { |
| final AOffsetIterator itThat = tof.getOffsetIterator(); |
| final AOffsetIterator itThis = of.getOffsetIterator(); |
| final int tSize = tm.size() - 1, size = size() - 1; |
| preAggregateSDCZ_SDCZSingleCol(tm, td, dv, itThat, itThis, tSize, size); |
| } |
| |
| protected void preAggregateSDCZ_SDCZSingleCol(AMapToData tm, double[] td, double[] dv, AOffsetIterator itThat, |
| AOffsetIterator itThis, int tSize, int size) { |
| |
| int i = 0, j = 0, tv = itThat.value(), v = itThis.value(); |
| |
| // main preAggregate process |
| while(i < tSize && j < size) { |
| if(tv == v) { |
| dv[getIndex(j)] += td[tm.getIndex(i)]; |
| tv = itThat.next(); |
| v = itThis.next(); |
| i++; |
| j++; |
| } |
| else if(tv < v) { |
| tv = itThat.next(); |
| i++; |
| } |
| else { |
| v = itThis.next(); |
| j++; |
| } |
| } |
| |
| // Remaining part (very small so not really main performance bottleneck) |
| preAggregateSDCZ_SDCZMultiCol_tail(tm, this, Dictionary.create(td), dv, 1, itThat, itThis, tSize, size, i, j); |
| } |
| |
| protected void preAggregateSDCZ_SDCZMultiCol(AMapToData tm, ADictionary td, AOffset tof, AOffset of, double[] dv, |
| int nCol) { |
| final AOffsetIterator itThat = tof.getOffsetIterator(); |
| final AOffsetIterator itThis = of.getOffsetIterator(); |
| final int tSize = tm.size() - 1, size = size() - 1; |
| int i = 0, j = 0; |
| |
| // main preAggregate process |
| while(i < tSize && j < size) { |
| final int tv = itThat.value(); |
| final int v = itThis.value(); |
| if(tv == v) { |
| final int fr = tm.getIndex(i); |
| final int to = getIndex(j); |
| td.addToEntry(dv, fr, to, nCol); |
| itThat.next(); |
| itThis.next(); |
| i++; |
| j++; |
| } |
| else if(tv < v) { |
| itThat.next(); |
| i++; |
| } |
| else { |
| itThis.next(); |
| j++; |
| } |
| } |
| |
| // Remaining part (very small so not really main performance bottleneck) |
| preAggregateSDCZ_SDCZMultiCol_tail(tm, this, td, dv, nCol, itThat, itThis, tSize, size, i, j); |
| } |
| |
| protected static void preAggregateSDCZ_SDCZMultiCol_tail(AMapToData tm, AMapToData m, ADictionary td, double[] dv, |
| int nCol, AOffsetIterator itThat, AOffsetIterator itThis, int tSize, int size, int i, int j) { |
| int tv = itThat.value(); |
| int v = itThis.value(); |
| if(tv == v) { |
| final int fr = tm.getIndex(i); |
| final int to = m.getIndex(j); |
| td.addToEntry(dv, fr, to, nCol); |
| return; |
| } |
| |
| while(i < tSize && tv < v) { // this is at final |
| itThat.next(); |
| i++; |
| tv = itThat.value(); |
| if(tv == v) { |
| final int fr = tm.getIndex(i); |
| final int to = m.getIndex(j); |
| td.addToEntry(dv, fr, to, nCol); |
| return; |
| } |
| } |
| |
| while(j < size && v < tv) { // that is at final |
| itThis.next(); |
| j++; |
| v = itThis.value(); |
| if(tv == v) { |
| final int fr = tm.getIndex(i); |
| final int to = m.getIndex(j); |
| td.addToEntry(dv, fr, to, nCol); |
| return; |
| } |
| } |
| } |
| |
| public void preAggregateRLE_DDC(int[] ptr, char[] data, ADictionary td, Dictionary ret, int nCol) { |
| if(nCol == 1) |
| preAggregateRLE_DDCSingleCol(ptr, data, td.getValues(), ret.getValues()); |
| else |
| preAggregateRLE_DDCMultiCol(ptr, data, td, ret.getValues(), nCol); |
| } |
| |
| protected void preAggregateRLE_DDCSingleCol(int[] ptr, char[] data, double[] td, double[] ret) { |
| // find each index in RLE, and aggregate into those. |
| for(int k = 0; k < ret.length; k++) { // for each run in RLE |
| final int blen = ptr[k + 1]; |
| for(int apos = ptr[k], rs = 0, re = 0; apos < blen; apos += 2) { |
| rs = re + data[apos]; |
| re = rs + data[apos + 1]; |
| for(int rix = rs; rix < re; rix++) |
| ret[k] += td[getIndex(rix)]; |
| } |
| } |
| } |
| |
| protected void preAggregateRLE_DDCMultiCol(int[] ptr, char[] data, ADictionary td, double[] ret, int nCol) { |
| // find each index in RLE, and aggregate into those. |
| for(int k = 0; k < ret.length / nCol; k++) { // for each run in RLE |
| final int blen = ptr[k + 1]; |
| for(int apos = ptr[k], rs = 0, re = 0; apos < blen; apos += 2) { |
| rs = re + data[apos]; |
| re = rs + data[apos + 1]; |
| for(int rix = rs; rix < re; rix++) |
| td.addToEntry(ret, getIndex(rix), k, nCol); |
| } |
| } |
| } |
| |
| public void preAggregateDDC_RLE(int[] ptr, char[] data, ADictionary td, Dictionary ret, int nCol) { |
| // find each index in RLE, and aggregate into those. |
| double[] v = ret.getValues(); |
| for(int k = 0; k < ptr.length - 1; k++) { // for each run in RLE |
| final int blen = ptr[k + 1]; |
| for(int apos = ptr[k], rs = 0, re = 0; apos < blen; apos += 2) { |
| rs = re + data[apos]; |
| re = rs + data[apos + 1]; |
| for(int rix = rs; rix < re; rix++) |
| td.addToEntry(v, k, getIndex(rix), nCol); |
| } |
| } |
| } |
| |
| /** |
| * Copy the values in this map into another mapping object. |
| * |
| * NOTE! All contained vales should be representable inside the map given. This requirement is not checked. |
| * |
| * @param d Map to copy all values into. |
| */ |
| public void copy(AMapToData d) { |
| if(d.nUnique == 1) |
| return; |
| else if(d instanceof MapToBit) |
| copyBit((MapToBit) d); |
| else if(d instanceof MapToInt) |
| copyInt((MapToInt) d); |
| else { |
| final int sz = size(); |
| for(int i = 0; i < sz; i++) |
| set(i, d.getIndex(i)); |
| } |
| } |
| |
| protected void copyInt(MapToInt d) { |
| copyInt(d.getData()); |
| } |
| |
| protected void copyBit(MapToBit d) { |
| copyBit(d.getData()); |
| } |
| |
| public abstract void copyInt(int[] d); |
| |
| public abstract void copyBit(BitSet d); |
| |
| public int getMax() { |
| int m = -1; |
| for(int i = 0; i < size(); i++) { |
| int v = getIndex(i); |
| m = v > m ? v : m; |
| } |
| return m; |
| } |
| |
| public abstract AMapToData resize(int unique); |
| |
| /** |
| * Count the number of runs inside the map. |
| * |
| * @return The number of runs |
| */ |
| public abstract int countRuns(); |
| |
| /** |
| * Count the number of runs inside the map, but sparse with offsets. |
| * |
| * @param off The sparse offsets to consider counting the runs from. |
| * @return count of runs. |
| */ |
| public int countRuns(AOffset off) { |
| int c = 1; |
| final int size = size(); |
| final AOffsetIterator of = off.getOffsetIterator(); |
| for(int i = 1; i < size; i++) { |
| int id = of.value(); |
| if(id + 1 == of.next()) |
| c += getIndex(i - 1) == getIndex(i) ? 0 : 1; |
| else |
| c++; |
| } |
| return c; |
| } |
| |
| /** |
| * Slice out the range from lower to upper from this map toData. |
| * |
| * @param l Low value to slice from |
| * @param u high value to slice to (not inclusive) |
| * @return A new map containing only the values from the range. |
| */ |
| public abstract AMapToData slice(int l, int u); |
| |
| public abstract AMapToData append(AMapToData t); |
| |
| public abstract AMapToData appendN(IMapToDataGroup[] d); |
| |
| @Override |
| public String toString() { |
| final int sz = size(); |
| StringBuilder sb = new StringBuilder(); |
| sb.append(this.getClass().getSimpleName()); |
| sb.append("["); |
| for(int i = 0; i < sz - 1; i++) |
| sb.append(getIndex(i) + ", "); |
| sb.append(getIndex(sz - 1)); |
| sb.append("]"); |
| return sb.toString(); |
| } |
| } |