blob: 71a2c11f8168210aae814281113628f34f584581 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math
import org.apache.mahout.math.solver.EigenDecomposition
import collection._
import scala.util.Random
/**
* Mahout matrices and vectors' scala syntactic sugar
*/
package object scalabindings {
// Reserved "ALL" range
final val `::`: Range = null
// values for stochastic sparsityAnalysis
final val z95 = 1.959964
final val z80 = 1.281552
final val maxSamples = 500
final val minSamples = 15
// Some enums
object AutoBooleanEnum extends Enumeration {
type T = Value
val TRUE, FALSE, AUTO = Value
}
implicit def seq2Vector(s: TraversableOnce[AnyVal]) =
new DenseVector(s.map(_.asInstanceOf[Number].doubleValue()).toArray)
implicit def tuple2TravOnce2svec[V <: AnyVal](sdata: TraversableOnce[(Int, V)]) = svec(sdata)
implicit def t1vec(s: Tuple1[AnyVal]): Vector = prod2Vec(s)
implicit def t2vec(s: Tuple2[AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t3vec(s: Tuple3[AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t4vec(s: Tuple4[AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t5vec(s: Tuple5[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t6vec(s: Tuple6[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t7vec(s: Tuple7[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t8vec(s: Tuple8[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector = prod2Vec(s)
implicit def t9vec(s: Tuple9[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal]): Vector =
prod2Vec(s)
implicit def t10vec(s: Tuple10[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t11vec(s: Tuple11[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal])
: Vector = prod2Vec(s)
implicit def t12vec(s: Tuple12[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t13vec(s: Tuple13[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t14vec(s: Tuple14[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t15vec(s: Tuple15[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t16vec(s: Tuple16[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t17vec(s: Tuple17[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t18vec(s: Tuple18[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t19vec(s: Tuple19[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t20vec(s: Tuple20[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t21vec(s: Tuple21[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
implicit def t22vec(s: Tuple22[AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal
, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal, AnyVal])
: Vector = prod2Vec(s)
def prod2Vec(s: Product) = new DenseVector(s.productIterator.
map(_.asInstanceOf[Number].doubleValue()).toArray)
def diagv(v: Vector): DiagonalMatrix = new DiagonalMatrix(v)
def diag(v: Double, size: Int): DiagonalMatrix =
new DiagonalMatrix(new DenseVector(Array.fill(size)(v)))
def eye(size: Int) = new DiagonalMatrix(1.0, size)
/**
* Create dense matrix out of inline arguments -- rows -- which can be tuples,
* iterables of Double, or just single Number (for columnar vectors)
* @param rows
* @tparam R
* @return
*/
def dense[R](rows: R*): DenseMatrix = {
import RLikeOps._
val data = for (r ← rows) yield {
r match {
case n: NumberArray(n.doubleValue())
case t: VectorArray.tabulate(t.length)(t(_))
case t: Array[Double] ⇒ t
case t: Iterable[_]
t.head match {
case ss: Double ⇒ t.asInstanceOf[Iterable[Double]].toArray
case vv: Vector
val m = new DenseMatrix(t.size, t.head.asInstanceOf[Vector].length)
t.asInstanceOf[Iterable[Vector]].view.zipWithIndex.foreach {
case (v, idx) ⇒ m(idx, ::) := v
}
return m
}
case t: Product ⇒ t.productIterator.map(_.asInstanceOf[Number].doubleValue()).toArray
case t: Array[Array[Double]]if (rows.size == 1)
return new DenseMatrix(t)
else
throw new IllegalArgumentException(
"double[][] data parameter can be the only argument for dense()")
case t: Array[Vector]
val m = new DenseMatrix(t.size, t.head.length)
t.view.zipWithIndex.foreach {
case (v, idx) ⇒ m(idx, ::) := v
}
return m
case _ ⇒ throw new IllegalArgumentException("unsupported type in the inline Matrix initializer")
}
}
new DenseMatrix(data.toArray)
}
/**
* Default initializes are always row-wise.
* create a sparse,
* e.g. {{{
*
* m = sparse(
* (0,5)::(9,3)::Nil,
* (2,3.5)::(7,8)::Nil
* )
*
* }}}
*
* @param rows
* @return
*/
def sparse(rows: Vector*): SparseRowMatrix = {
import RLikeOps._
val nrow = rows.size
val ncol = rows.map(_.size()).max
val m = new SparseRowMatrix(nrow, ncol)
m := rows.map { row ⇒
if (row.length < ncol) {
val newRow = row.like(ncol)
newRow(0 until row.length) := row
newRow
}
else row
}
m
}
/**
* create a sparse vector out of list of tuple2's
* @param sdata cardinality
* @return
*/
def svec(sdata: TraversableOnce[(Int, AnyVal)], cardinality: Int = -1) = {
val required = if (sdata.nonEmpty) sdata.map(_._1).max + 1 else 0
var tmp = -1
if (cardinality < 0) {
tmp = required
} else if (cardinality < required) {
throw new IllegalArgumentException(s"Required cardinality %required but got %cardinality")
} else {
tmp = cardinality
}
val initialCapacity = sdata.size
val sv = new RandomAccessSparseVector(tmp, initialCapacity)
sdata.foreach(t ⇒ sv.setQuick(t._1, t._2.asInstanceOf[Number].doubleValue()))
sv
}
def dvec(fromV: Vector) = new DenseVector(fromV)
def dvec(ddata: TraversableOnce[Double]) = new DenseVector(ddata.toArray)
def dvec(numbers: Number*) = new DenseVector(numbers.map(_.doubleValue()).toArray)
def chol(m: Matrix, pivoting: Boolean = false) = new CholeskyDecomposition(m, pivoting)
/**
* computes SVD
* @param m svd input
* @return (U,V, singular-values-vector)
*/
def svd(m: Matrix) = {
val svdObj = new SingularValueDecomposition(m)
(svdObj.getU, svdObj.getV, new DenseVector(svdObj.getSingularValues))
}
/**
* Computes Eigendecomposition of a symmetric matrix
* @param m symmetric input matrix
* @return (V, eigen-values-vector)
*/
def eigen(m: Matrix) = {
val ed = new EigenDecomposition(m, true)
(ed.getV, ed.getRealEigenvalues)
}
/**
* More general version of eigen decomposition
* @param m
* @param symmetric
* @return (V, eigenvalues-real-vector, eigenvalues-imaginary-vector)
*/
def eigenFull(m: Matrix, symmetric: Boolean = true) {
val ed = new EigenDecomposition(m, symmetric)
(ed.getV, ed.getRealEigenvalues, ed.getImagEigenvalues)
}
/**
* QR.
*
* Right now Mahout's QR seems to be using argument for in-place transformations,
* so the matrix context gets messed after this. Hence we force cloning of the
* argument before passing it to Mahout's QR so to keep expected semantics.
* @param m
* @return (Q,R)
*/
def qr(m: Matrix) = {
import MatrixOps._
val qrdec = new QRDecomposition(m cloned)
(qrdec.getQ, qrdec.getR)
}
/**
* Solution <tt>X</tt> of <tt>A*X = B</tt> using QR-Decomposition, where <tt>A</tt> is a square, non-singular matrix.
*
* @param a
* @param b
* @return (X)
*/
def solve(a: Matrix, b: Matrix): Matrix = {
import MatrixOps._
if (a.nrow != a.ncol) {
throw new IllegalArgumentException("supplied matrix A is not square")
}
val qr = new QRDecomposition(a cloned)
if (!qr.hasFullRank) {
throw new IllegalArgumentException("supplied matrix A is singular")
}
qr.solve(b)
}
/**
* Solution <tt>A^{-1}</tt> of <tt>A*A^{-1} = I</tt> using QR-Decomposition, where <tt>A</tt> is a square,
* non-singular matrix. Here only for compatibility with R semantics.
*
* @param a
* @return (A^{-1})
*/
def solve(a: Matrix): Matrix = {
import MatrixOps._
solve(a, eye(a.nrow))
}
/**
* Solution <tt>x</tt> of <tt>A*x = b</tt> using QR-Decomposition, where <tt>A</tt> is a square, non-singular matrix.
*
* @param a
* @param b
* @return (x)
*/
def solve(a: Matrix, b: Vector): Vector = {
import RLikeOps._
val x = solve(a, b.toColMatrix)
x(::, 0)
}
///////////////////////////////////////////////////////////
// Elementwise unary functions. Actually this requires creating clones to avoid side effects. For
// efficiency reasons one may want to actually do in-place exression assignments instead, e.g.
//
// m := exp _
import RLikeOps._
import scala.math._
def mexp(m: Matrix): Matrix = m.cloned := exp _
def vexp(v: Vector): Vector = v.cloned := exp _
def mlog(m: Matrix): Matrix = m.cloned := log _
def vlog(v: Vector): Vector = v.cloned := log _
def mabs(m: Matrix): Matrix = m.cloned ::= (abs(_: Double))
def vabs(v: Vector): Vector = v.cloned ::= (abs(_: Double))
def msqrt(m: Matrix): Matrix = m.cloned ::= sqrt _
def vsqrt(v: Vector): Vector = v.cloned ::= sqrt _
def msignum(m: Matrix): Matrix = m.cloned ::= (signum(_: Double))
def vsignum(v: Vector): Vector = v.cloned ::= (signum(_: Double))
//////////////////////////////////////////////////////////
// operation funcs
/** Matrix-matrix unary func */
type MMUnaryFunc = (Matrix, Option[Matrix])Matrix
/** Binary matrix-matrix operations which may save result in-place, optionally */
type MMBinaryFunc = (Matrix, Matrix, Option[Matrix])Matrix
type MVBinaryFunc = (Matrix, Vector, Option[Matrix])Matrix
type VMBinaryFunc = (Vector, Matrix, Option[Matrix])Matrix
type MDBinaryFunc = (Matrix, Double, Option[Matrix])Matrix
/////////////////////////////////////
// Miscellaneous in-core utilities
/**
* Compute column-wise means and variances.
*
* @return colMeans → colVariances
*/
def colMeanVars(mxA:Matrix): (Vector, Vector) = {
val mu = mxA.colMeans()
val variance = (mxA * mxA colMeans) -= mu ^ 2
mu → variance
}
/**
* Compute column-wise means and stdevs.
* @param mxA input
* @return colMeans → colStdevs
*/
def colMeanStdevs(mxA:Matrix) = {
val (mu, variance) = colMeanVars(mxA)
mu → (variance ::= math.sqrt _)
}
/** Compute square distance matrix. We assume data points are row-wise, similar to R's dist(). */
def sqDist(mxX: Matrix): Matrix = {
val s = mxX ^ 2 rowSums
(mxX %*% mxX.t) := { (r, c, x) ⇒ s(r) + s(c) - 2 * x}
}
/**
* Pairwise squared distance computation.
* @param mxX X, m x d
* @param mxY Y, n x d
* @return pairwise squaired distances of row-wise data points in X and Y (m x n)
*/
def sqDist(mxX: Matrix, mxY: Matrix): Matrix = {
val s = mxX ^ 2 rowSums
val t = mxY ^ 2 rowSums
// D = s*1' + 1*t' - 2XY'
(mxX %*% mxY.t) := { (r, c, d) ⇒ s(r) + t(c) - 2.0 * d}
}
def dist(mxX: Matrix): Matrix = sqDist(mxX) := sqrt _
def dist(mxX: Matrix, mxY: Matrix): Matrix = sqDist(mxX, mxY) := sqrt _
/**
* Check the density of an in-core matrix based on supplied criteria.
* Returns true if we think mx is densier than threshold with at least 80% confidence.
*
* @param mx The matrix to check density of.
* @param threshold the threshold of non-zero elements above which we consider a Matrix Dense
*/
def sparsityAnalysis(mx: Matrix, threshold: Double = 0.25): Boolean = {
require(threshold >= 0.0 && threshold <= 1.0)
var n = minSamples
var mean = 0.0
val rnd = new Random()
val dimm = mx.nrow
val dimn = mx.ncol
val pq = threshold * (1 - threshold)
for (s ← 0 until minSamples) {
if (mx(rnd.nextInt(dimm), rnd.nextInt(dimn)) != 0.0) mean += 1
}
mean /= minSamples
val iv = z80 * math.sqrt(pq / n)
if (mean < threshold - iv) return false // sparse
else if (mean > threshold + iv) return true // dense
while (n < maxSamples) {
// Determine upper bound we may need for n to likely relinquish the uncertainty. Here, we use
// confidence interval formula but solved for n.
val ivNeeded = math.abs(threshold - mean) max 1e-11
val stderr = ivNeeded / z80
val nNeeded = (math.ceil(pq / (stderr * stderr)).toInt max n min maxSamples) - n
var meanNext = 0.0
for (s ← 0 until nNeeded) {
if (mx(rnd.nextInt(dimm), rnd.nextInt(dimn)) != 0.0) meanNext += 1
}
mean = (n * mean + meanNext) / (n + nNeeded)
n += nNeeded
// Are we good now?
val iv = z80 * math.sqrt(pq / n)
if (mean < threshold - iv) return false // sparse
else if (mean > threshold + iv) return true // dense
}
return mean <= threshold
}
}