blob: 696ed4e94916503403a7c6498ddd6cc0cbdf41df [file] [log] [blame]
% When using TeXShop on the Mac, let it know the root document. The following must be one of the first 20 lines.
% !TEX root = ../design.tex
\chapter{Matrix Operations}
\begin{moduleinfo}
\item[Author] \href{mailto:Florian.Schoppmann@emc.com}{Florian Schoppmann}
\item[History]
\begin{modulehistory}
\item[v0.5] Initial revision
\end{modulehistory}
\end{moduleinfo}
% Abstract. What is the problem we want to solve?
While dense and sparse matrices are native objects for MADlib, they are not part of the SQL standard. It is therefore essential to provide bridges between SQL types and MADlib types, as well provide a ground set of primitive functions that can be used in SQL.
\section{Constructing Matrices}
\subsection{Construct a matrix from columns stored as tuples} \label{sec:matrix:matrixAgg}
Let $X = ( x_1, \dots, x_n ) \subset \R^m$. \symlabel{matrix\_agg}{sym:matrixAgg}$(X)$ returns the matrix $(x_1 \dots x_n) \in \R^{m \times n}$.
\subsubsection{Implementation as User-Defined Aggregate}
\begin{center}
\begin{tabular}{rlll}
\toprule%
& \textbf{Name} & \textbf{Description} & \textbf{Type}
\\\otoprule
In &
\texttt{x} &
Vector $x_i \in \R^m$ &
floating-point vector
\\\midrule
Out & &
Matrix $M = (x_1 \dots x_n) \in \R^{m \times n}$ &
floating-point matrix
\\\bottomrule
\end{tabular}
\end{center}
\section{Norms and Distances}
\subsection{Column in a matrix that is closest to a given vector} \label{sec:matrix:closestColumn}
Let $M \in \R^{m \times n}$, $x \in \R^m$, and $\dist$ be a metric. \symlabel{closest\_column}{sym:closestColumn}$(M, x, \dist)$ returns a tuple $(i,d)$ so that $d = \dist(x, M_i) = \min_{j \in \Nupto n} \dist(x, M_j)$ where $M_j$ denotes the $j$-th column of $M$.
\subsubsection{Implementation as User-Defined Function}
\begin{center}
\begin{tabular}{rlll}
\toprule%
& \textbf{Name} & \textbf{Description} & \textbf{Type}
\\\otoprule
In &
\texttt{M} &
Matrix $M \in \R^{m \times n}$ &
floating-point matrix
\\\midrule
In &
\texttt{x} &
Vector $x \in \R^m$ &
floating-point vector
\\\midrule
In &
\texttt{dist} &
Metric to use &
function
\\\midrule
Out &
\texttt{column\_id} &
index $i$ of the column of $M$ that is closest to $x$ &
integer
\\\midrule
Out &
\texttt{distance} &
$\dist(x, M_i)$ &
floating-point
\\\bottomrule
\end{tabular}
\end{center}