| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * Copyright (c) 2017 by Contributors |
| * \file krprod.h |
| * \brief Core function for Khatri-Rao product |
| * \author Jencir Lee, Chris Swierczewski |
| */ |
| #ifndef MXNET_OPERATOR_CONTRIB_KRPROD_H_ |
| #define MXNET_OPERATOR_CONTRIB_KRPROD_H_ |
| #include <algorithm> |
| #include <utility> |
| #include <vector> |
| #include "mshadow/tensor.h" |
| #include "../operator_common.h" |
| #include "../c_lapack_api.h" |
| |
| |
| namespace mxnet { |
| namespace op { |
| |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| |
| /*! |
| * \brief Computes row-wise Kronecker product |
| * |
| * Given input matrices, this function computes the Kronecker product |
| * row-wise. E.g. if the input matrices are of shape (3, 2), (3, 4), |
| * (3, 5), the result matrix will be of shape (3, 2 * 4 * 5), which is |
| * (3, 40). |
| * |
| * \param out result matrix |
| * \param ts_arr vector of input matrices |
| */ |
| template <typename DType> |
| inline void row_wise_kronecker |
| (Tensor<cpu, 2, DType> out, |
| const std::vector<Tensor<cpu, 2, DType> > &ts_arr) { |
| CHECK_GE(ts_arr.size(), 1) << "The input matrices must be non-empty."; |
| |
| // Check all input and output matrices have the same number of rows |
| // and the output matrix has the right number of columns |
| int nrows = static_cast<int>(out.size(0)); |
| int ncols = 1; |
| for (auto & ts : ts_arr) { |
| CHECK_EQ(nrows, static_cast<int>(ts.size(0))) |
| << "All input and output matrices must have the same number of rows."; |
| ncols *= ts.size(1); |
| } |
| CHECK_EQ(ncols, static_cast<int>(out.size(1))); |
| |
| // Create an intermediate space of the same shape as out |
| // |
| // Suppose storage stores the result at step i-1, we'd |
| // compute and store the result into out for step i; |
| // we then proceed to compute and store the result in storage |
| // for step i+1 and so on and so forth, by alternating using |
| // storage and out to store the given variable and the result variable |
| Tensor<cpu, 2, DType> storage(out.shape_); |
| AllocSpace(&storage); |
| |
| // Pointers to the given variable and result variable |
| // We exchange what given and result point to at every step |
| Tensor<cpu, 2, DType> *given = &storage, |
| *result = &out, *tmp; |
| |
| // Compute each intermediate row-wise Kronecker product |
| storage = 1; |
| ncols = 1; |
| for (auto & ts : ts_arr) { |
| expr::BLASEngine<cpu, DType>::SetStream |
| (result->stream_); |
| |
| // Compute the current row-wise Kronecker product |
| *result = 0; |
| for (int i = 0; i < nrows; ++i) { |
| // BLAS signature |
| // |
| // dger( |
| // m : ts.size(1), length of each row of current matrix |
| // n : ncols, length of each row of previous result |
| // alpha : 1, scaling to the outer product of x and y |
| // x : ts[i].dptr_, current row of current matrix |
| // incx : 1, as each element in the row is contiguous |
| // y : (*given)[i].dptr_, current row of the given variable |
| // incy : 1, as each element in the row is contiguous |
| // a : (*result)[i].dptr_, current row of the result variable |
| // lda : ts.size(1), as the outer product is stored as one row |
| // which occupies contiguous memory, and as BLASEngine::ger() |
| // assumes column-major matrix, lda has to be precisely |
| // the length of x, i.e. ts[i].size(1) |
| // ) |
| expr::BLASEngine<cpu, DType>::ger |
| (result->stream_, |
| ts.size(1), ncols, 1, |
| ts[i].dptr_, 1, |
| (*given)[i].dptr_, 1, |
| (*result)[i].dptr_, ts.size(1)); |
| } |
| ncols *= ts.size(1); |
| |
| tmp = given; |
| given = result; |
| result = tmp; |
| } |
| |
| // If the final result is stored in storage, |
| // copy its value to out |
| if (given != &out) |
| Copy(out, storage); |
| |
| FreeSpace(&storage); |
| } |
| |
| /*! |
| * \brief Khatri-Rao product |
| * |
| * \param out result matrix |
| * \param ts_arr vector of input matrices |
| */ |
| template <typename DType> |
| inline void khatri_rao |
| (Tensor<cpu, 2, DType> out, |
| const std::vector<Tensor<cpu, 2, DType> > &ts_arr) { |
| CHECK_GE(ts_arr.size(), 1) << "The input matrices must be non-empty."; |
| |
| // Check all input and output matrices have the same number |
| // of columns and the output matrix has the right number of rows |
| int ncols = static_cast<int>(out.size(1)); |
| int nrows = 1; |
| for (auto & ts : ts_arr) { |
| CHECK_EQ(ncols, static_cast<int>(ts.size(1))) |
| << "All input and output matrices must have the same number of columns."; |
| nrows *= ts.size(0); |
| } |
| CHECK_EQ(nrows, static_cast<int>(out.size(0))); |
| |
| // Change the layout of matrices to column-major |
| Tensor<cpu, 2, DType> out_t(Shape2(out.size(1), out.size(0))); |
| AllocSpace(&out_t); |
| flip<cpu, DType>(out.size(0), out.size(1), out_t.dptr_, out_t.stride_, |
| out.dptr_, out.stride_); |
| |
| std::vector<Tensor<cpu, 2, DType> > ts_t_arr; |
| for (int i = 0; i < static_cast<int>(ts_arr.size()); ++i) { |
| ts_t_arr.emplace_back(Shape2(ts_arr[i].size(1), ts_arr[i].size(0))); |
| AllocSpace(&ts_t_arr[i]); |
| flip<cpu, DType>(ts_arr[i].size(0), ts_arr[i].size(1), ts_t_arr[i].dptr_, |
| ts_t_arr[i].stride_, ts_arr[i].dptr_, ts_arr[i].stride_); |
| } |
| |
| // Perform row-wise Kronecker product |
| row_wise_kronecker(out_t, ts_t_arr); |
| |
| // Change the layout of result matrix back to row-major |
| flip<cpu, DType>(out.size(1), out.size(0), out.dptr_, out.stride_, |
| out_t.dptr_, out_t.stride_); |
| |
| FreeSpace(&out_t); |
| for (auto &ts_t : ts_t_arr) |
| FreeSpace(&ts_t); |
| } |
| |
| /*! |
| * \brief Moore-Penrose pseudoinverse of the Khatri-Rao product |
| * |
| * Given input matrices A_1, ..., A_n, of shape (l_1, k), ..., (l_n, k) respectively, the pseudoinverse of the Khatri-Rao product is |
| * |
| * pinv(A_1 khatri-rao A_2 khatri-rao ... khatri-rao A_n) = |
| * ((A_1^T A_1) hadamard-dot ... hadamard-dot (A_n^T A_n)) |
| * (A_1 khatri-rao ... khatri-rao A_n)^T |
| * |
| * As the first term of the r.h.s is a square matrix, the result is always of the same shape as the transpose of the Khatri-Rao product of the input matrices. The input argument ts_arr could contain the original input matrices, or transposed ones. |
| * |
| * \param out result matrix |
| * \param ts_arr vector of input matrices |
| * \param input_transposed if every input matrices is transposed |
| */ |
| template <typename DType> |
| inline void inv_khatri_rao |
| (Tensor<cpu, 2, DType> out, |
| const std::vector<Tensor<cpu, 2, DType> > &ts_arr, |
| bool input_transposed = false) { |
| CHECK_GE(ts_arr.size(), 1) << "Input tensor array must be non-empty"; |
| |
| // Initialise the Hadamard product to eye(k) |
| // where k is the number of "factors" |
| int k = out.size(0); |
| Tensor<cpu, 2, DType> hadamard_prod(Shape2(k, k)); |
| AllocSpace(&hadamard_prod); |
| hadamard_prod = 1; |
| |
| // Note that out is of the same shape as the transpose of |
| // the Khatri-Rao product |
| // |
| // When input is transposed, we could first put the transpose of |
| // the Khatri-Rao product in out, then call the linear solver, which |
| // will update the out's content to the final result; |
| // |
| // If the input is not transposed, we need to create an intermediate |
| // tensor to store the Khatri-Rao product, call the linear solver with |
| // MXNET_LAPACK_COL_MAJOR as the matrix layout, and transpose |
| // the final result into out |
| |
| int info; |
| if (input_transposed) { |
| row_wise_kronecker(out, ts_arr); |
| for (auto &ts : ts_arr) |
| hadamard_prod *= implicit_dot(ts, ts.T()); |
| |
| info = MXNET_LAPACK_posv<DType>(MXNET_LAPACK_ROW_MAJOR, 'U', |
| k, out.size(1), hadamard_prod.dptr_, hadamard_prod.stride_, |
| out.dptr_, out.stride_); |
| } else { |
| Tensor<cpu, 2, DType> kr(Shape2(out.size(1), out.size(0))); |
| AllocSpace(&kr); |
| khatri_rao(kr, ts_arr); |
| |
| for (auto &ts : ts_arr) |
| hadamard_prod *= implicit_dot(ts.T(), ts); |
| |
| info = MXNET_LAPACK_posv<DType>(MXNET_LAPACK_COL_MAJOR, 'U', |
| k, out.size(1), hadamard_prod.dptr_, hadamard_prod.stride_, |
| kr.dptr_, kr.stride_); |
| |
| flip<cpu, DType>(out.size(1), out.size(0), out.dptr_, out.stride_, |
| kr.dptr_, kr.stride_); |
| FreeSpace(&kr); |
| } |
| |
| FreeSpace(&hadamard_prod); |
| if (info != 0) |
| LOG(FATAL) << "The linear solver in inv_prod() returns " << info; |
| } |
| |
| |
| template<typename xpu, typename DType> |
| inline void KhatriRaoCompute_(const nnvm::NodeAttrs &attrs, |
| const OpContext &ctx, |
| const std::vector<TBlob> &in_data, |
| const std::vector<OpReqType> &req, |
| const std::vector<TBlob> &out_data) { |
| using namespace mxnet_op; |
| if (req[0] == kNullOp) return; |
| |
| Stream<xpu> *stream = ctx.get_stream<xpu>(); |
| Tensor<xpu, 2, DType> out = out_data[0].get<xpu, 2, DType>(stream); |
| std::vector<Tensor<xpu, 2, DType> > ts_arr(in_data.size()); |
| std::transform(in_data.begin(), in_data.end(), ts_arr.begin(), |
| [&stream](TBlob blob) -> Tensor<xpu, 2, DType> { |
| return blob.get<xpu, 2, DType>(stream); |
| }); |
| khatri_rao(out, ts_arr); |
| } |
| |
| |
| template<typename xpu> |
| inline void KhatriRaoCompute(const nnvm::NodeAttrs &attrs, |
| const OpContext &ctx, |
| const std::vector<TBlob> &inputs, |
| const std::vector<OpReqType> &req, |
| const std::vector<TBlob> &outputs) { |
| using namespace mxnet_op; |
| CHECK_EQ(outputs.size(), 1U); |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| KhatriRaoCompute_<xpu, DType>(attrs, ctx, inputs, req, outputs); |
| }); |
| } |
| |
| } // namespace op |
| } // namespace mxnet |
| |
| #endif // MXNET_OPERATOR_CONTRIB_KRPROD_H_ |