[SYSTEMDS-2673] Python Read Write from disk
This PR include Python API read and write to disk.
Restructure the position of operands such that they make more sense, etc.
diff --git a/src/main/python/docs/source/api/matrix/data_gen.rst b/src/main/python/docs/source/api/matrix/data_gen.rst
deleted file mode 100644
index c5a6a45..0000000
--- a/src/main/python/docs/source/api/matrix/data_gen.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. -------------------------------------------------------------
-..
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-..
-.. http://www.apache.org/licenses/LICENSE-2.0
-..
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-..
-.. ------------------------------------------------------------
-
-Data Generators
-===============
-
-.. automodule:: systemds.matrix.data_gen
- :members:
-
diff --git a/src/main/python/docs/source/getting_started/simple_examples.rst b/src/main/python/docs/source/getting_started/simple_examples.rst
index 406fddc..cd86cf8 100644
--- a/src/main/python/docs/source/getting_started/simple_examples.rst
+++ b/src/main/python/docs/source/getting_started/simple_examples.rst
@@ -33,13 +33,12 @@
# Import SystemDSContext
from systemds.context import SystemDSContext
- from systemds.matrix.data_gen import full
# Create a context and if necessary (no SystemDS py4j instance running)
# it starts a subprocess which does the execution in SystemDS
with SystemDSContext() as sds:
# Full generates a matrix completely filled with one number.
# Generate a 5x10 matrix filled with 4.2
- m = full(sds,(5, 10), 4.20)
+ m = sds.full((5, 10), 4.20)
# multiply with scalar. Nothing is executed yet!
m_res = m * 3.1
# Do the calculation in SystemDS by calling compute().
diff --git a/src/main/python/docs/source/index.rst b/src/main/python/docs/source/index.rst
index 6a232a0..c8134ff 100644
--- a/src/main/python/docs/source/index.rst
+++ b/src/main/python/docs/source/index.rst
@@ -69,7 +69,6 @@
api/operator/algorithms.rst
api/context/systemds_context.rst
- api/matrix/data_gen.rst
api/matrix/matrix.rst
api/matrix/federated.rst
api/operator/operation_node.rst
diff --git a/src/main/python/systemds/context/systemds_context.py b/src/main/python/systemds/context/systemds_context.py
index 23fe3fa..017d56c 100644
--- a/src/main/python/systemds/context/systemds_context.py
+++ b/src/main/python/systemds/context/systemds_context.py
@@ -36,6 +36,7 @@
from py4j.protocol import Py4JNetworkError
from systemds.utils.consts import VALID_INPUT_TYPES
from systemds.utils.helpers import get_module_dir
+from systemds.operator import OperationNode
class SystemDSContext(object):
@@ -200,3 +201,78 @@
port = s.getsockname()[1]
s.close()
return port
+
+ def full(self, shape: Tuple[int, int], value: Union[float, int]) -> 'OperationNode':
+ """Generates a matrix completely filled with a value
+
+ :param sds_context: SystemDS context
+ :param shape: shape (rows and cols) of the matrix TODO tensor
+ :param value: the value to fill all cells with
+ :return: the OperationNode representing this operation
+ """
+ unnamed_input_nodes = [value]
+ named_input_nodes = {'rows': shape[0], 'cols': shape[1]}
+ return OperationNode(self, 'matrix', unnamed_input_nodes, named_input_nodes)
+
+ def seq(self, start: Union[float, int], stop: Union[float, int] = None,
+ step: Union[float, int] = 1) -> 'OperationNode':
+ """Create a single column vector with values from `start` to `stop` and an increment of `step`.
+ If no stop is defined and only one parameter is given, then start will be 0 and the parameter will be interpreted as
+ stop.
+
+ :param sds_context: SystemDS context
+ :param start: the starting value
+ :param stop: the maximum value
+ :param step: the step size
+ :return: the OperationNode representing this operation
+ """
+ if stop is None:
+ stop = start
+ start = 0
+ unnamed_input_nodes = [start, stop, step]
+ return OperationNode(self, 'seq', unnamed_input_nodes)
+
+ def rand(self, rows: int, cols: int,
+ min: Union[float, int] = None, max: Union[float, int] = None, pdf: str = "uniform",
+ sparsity: Union[float, int] = None, seed: Union[float, int] = None,
+ lambd: Union[float, int] = 1) -> 'OperationNode':
+ """Generates a matrix filled with random values
+
+ :param sds_context: SystemDS context
+ :param rows: number of rows
+ :param cols: number of cols
+ :param min: min value for cells
+ :param max: max value for cells
+ :param pdf: "uniform"/"normal"/"poison" distribution
+ :param sparsity: fraction of non-zero cells
+ :param seed: random seed
+ :param lambd: lamda value for "poison" distribution
+ :return:
+ """
+ available_pdfs = ["uniform", "normal", "poisson"]
+ if rows < 0:
+ raise ValueError("In rand statement, can only assign rows a long (integer) value >= 0 "
+ "-- attempted to assign value: {r}".format(r=rows))
+ if cols < 0:
+ raise ValueError("In rand statement, can only assign cols a long (integer) value >= 0 "
+ "-- attempted to assign value: {c}".format(c=cols))
+ if pdf not in available_pdfs:
+ raise ValueError("The pdf passed is invalid! given: {g}, expected: {e}".format(
+ g=pdf, e=available_pdfs))
+
+ pdf = '\"' + pdf + '\"'
+ named_input_nodes = {
+ 'rows': rows, 'cols': cols, 'pdf': pdf, 'lambda': lambd}
+ if min is not None:
+ named_input_nodes['min'] = min
+ if max is not None:
+ named_input_nodes['max'] = max
+ if sparsity is not None:
+ named_input_nodes['sparsity'] = sparsity
+ if seed is not None:
+ named_input_nodes['seed'] = seed
+
+ return OperationNode(self, 'rand', [], named_input_nodes=named_input_nodes)
+
+ def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]):
+ return OperationNode(self, 'read', [f'"{path}"'], named_input_nodes=kwargs)
diff --git a/src/main/python/systemds/matrix/__init__.py b/src/main/python/systemds/matrix/__init__.py
index 0ffe295..1d8fcb9 100644
--- a/src/main/python/systemds/matrix/__init__.py
+++ b/src/main/python/systemds/matrix/__init__.py
@@ -21,6 +21,5 @@
from systemds.matrix.matrix import Matrix
from systemds.matrix.federated import Federated
-from systemds.matrix import data_gen
-__all__ = [Matrix, Federated, data_gen]
+__all__ = [Matrix, Federated]
diff --git a/src/main/python/systemds/matrix/data_gen.py b/src/main/python/systemds/matrix/data_gen.py
deleted file mode 100644
index f450e5e..0000000
--- a/src/main/python/systemds/matrix/data_gen.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# -------------------------------------------------------------
-
-__all__ = ['full', 'seq', 'rand']
-
-'''
-Contains a number of different data generators
-'''
-
-from typing import Union, Tuple
-
-from systemds.operator import OperationNode
-from systemds.context import SystemDSContext
-
-def full(sds_context: SystemDSContext, shape: Tuple[int, int], value: Union[float, int]) -> OperationNode:
- """Generates a matrix completely filled with a value
-
- :param sds_context: SystemDS context
- :param shape: shape (rows and cols) of the matrix TODO tensor
- :param value: the value to fill all cells with
- :return: the OperationNode representing this operation
- """
- unnamed_input_nodes = [value]
- named_input_nodes = {'rows': shape[0], 'cols': shape[1]}
- return OperationNode(sds_context, 'matrix', unnamed_input_nodes, named_input_nodes)
-
-
-def seq(sds_context: SystemDSContext, start: Union[float, int], stop: Union[float, int] = None,
- step: Union[float, int] = 1) -> OperationNode:
- """Create a single column vector with values from `start` to `stop` and an increment of `step`.
- If no stop is defined and only one parameter is given, then start will be 0 and the parameter will be interpreted as
- stop.
-
- :param sds_context: SystemDS context
- :param start: the starting value
- :param stop: the maximum value
- :param step: the step size
- :return: the OperationNode representing this operation
- """
- if stop is None:
- stop = start
- start = 0
- unnamed_input_nodes = [start, stop, step]
- return OperationNode(sds_context, 'seq', unnamed_input_nodes)
-
-
-def rand(sds_context: SystemDSContext, rows: int, cols: int,
- min: Union[float, int] = None, max: Union[float, int] = None, pdf: str = "uniform",
- sparsity: Union[float, int] = None, seed: Union[float, int] = None,
- lambd: Union[float, int] = 1) -> OperationNode:
- """Generates a matrix filled with random values
-
- :param sds_context: SystemDS context
- :param rows: number of rows
- :param cols: number of cols
- :param min: min value for cells
- :param max: max value for cells
- :param pdf: "uniform"/"normal"/"poison" distribution
- :param sparsity: fraction of non-zero cells
- :param seed: random seed
- :param lambd: lamda value for "poison" distribution
- :return:
- """
- available_pdfs = ["uniform", "normal", "poisson"]
- if rows < 0:
- raise ValueError("In rand statement, can only assign rows a long (integer) value >= 0 "
- "-- attempted to assign value: {r}".format(r=rows))
- if cols < 0:
- raise ValueError("In rand statement, can only assign cols a long (integer) value >= 0 "
- "-- attempted to assign value: {c}".format(c=cols))
- if pdf not in available_pdfs:
- raise ValueError("The pdf passed is invalid! given: {g}, expected: {e}".format(
- g=pdf, e=available_pdfs))
-
- pdf = '\"' + pdf + '\"'
- named_input_nodes = {
- 'rows': rows, 'cols': cols, 'pdf': pdf, 'lambda': lambd}
- if min is not None:
- named_input_nodes['min'] = min
- if max is not None:
- named_input_nodes['max'] = max
- if sparsity is not None:
- named_input_nodes['sparsity'] = sparsity
- if seed is not None:
- named_input_nodes['seed'] = seed
-
- return OperationNode(sds_context, 'rand', [], named_input_nodes=named_input_nodes)
diff --git a/src/main/python/systemds/matrix/matrix.py b/src/main/python/systemds/matrix/matrix.py
index 577bbaf..634af55 100644
--- a/src/main/python/systemds/matrix/matrix.py
+++ b/src/main/python/systemds/matrix/matrix.py
@@ -36,40 +36,32 @@
class Matrix(OperationNode):
_np_array: Optional[np.array]
- def __init__(self, sds_context: 'SystemDSContext', mat: Union[np.array, os.PathLike],
+ def __init__(self, sds_context: 'SystemDSContext', mat: np.array,
*args: Sequence[VALID_INPUT_TYPES],
**kwargs: Dict[str, VALID_INPUT_TYPES]) -> None:
- """Generate DAGNode representing matrix with data either given by a numpy array, which will be sent to SystemDS
- on need, or a path pointing to a matrix.
+ """Generate DAGNode representing matrix with data given by a numpy array, which will be sent to SystemDS
+ on need.
- :param mat: the numpy array or path to matrix file
+ :param mat: the numpy array
:param args: unnamed parameters
:param kwargs: named parameters
"""
- if isinstance(mat, str):
- unnamed_params = [f'\'{mat}\'']
- named_params = {}
- self._np_array = None
- if type(mat) is np.ndarray:
- self._np_array = mat
- unnamed_params = ['\'./tmp/{file_name}\'']
- self.shape = mat.shape
- if len(self.shape) == 2:
- named_params = {'rows': mat.shape[0], 'cols': mat.shape[1]}
- elif len(self.shape) == 1:
- named_params = {'rows': mat.shape[0], 'cols': 1}
- else:
- # TODO Support tensors.
- raise ValueError("Only two dimensional arrays supported")
+
+ self._np_array = mat
+ unnamed_params = ['\'./tmp/{file_name}\'']
+ self.shape = mat.shape
+ if len(self.shape) == 2:
+ named_params = {'rows': mat.shape[0], 'cols': mat.shape[1]}
+ elif len(self.shape) == 1:
+ named_params = {'rows': mat.shape[0], 'cols': 1}
else:
- # TODO better alternative than format string?
- unnamed_params = ['\'./tmp/{file_name}\'']
- named_params = {'rows': -1, 'cols': -1}
- self._np_array = mat
+ # TODO Support tensors.
+ raise ValueError("Only two dimensional arrays supported")
+
unnamed_params.extend(args)
named_params.update(kwargs)
super().__init__(sds_context, 'read', unnamed_params,
- named_params, is_python_local_data=self._is_numpy(), shape=mat.shape)
+ named_params, is_python_local_data=self._is_numpy(), shape=self.shape)
def pass_python_data_to_prepared_script(self, sds, var_name: str, prepared_script: JavaObject) -> None:
assert self.is_python_local_data, 'Can only pass data to prepared script if it is python local!'
@@ -94,84 +86,3 @@
def _is_numpy(self) -> bool:
return self._np_array is not None
-
- def rev(self) -> OperationNode:
- """ Reverses the rows in a matrix
-
- :return: the OperationNode representing this operation
- """
-
- self._is_numpy()
- return OperationNode(self.sds_context, 'rev', [self])
-
- def order(self, by: int = 1, decreasing: bool = False,
- index_return: bool = False) -> OperationNode:
- """ Sort by a column of the matrix X in increasing/decreasing order and returns either the index or data
-
- :param by: sort matrix by this column number
- :param decreasing: If true the matrix will be sorted in decreasing order
- :param index_return: If true, the index numbers will be returned
- :return: the OperationNode representing this operation
- """
-
- self._is_numpy()
-
- cols = self._np_array.shape[1]
- if by > cols:
- raise IndexError(
- "Index {i} is out of bounds for axis 1 with size {c}".format(i=by, c=cols))
-
- named_input_nodes = {'target': self, 'by': by, 'decreasing': str(decreasing).upper(),
- 'index.return': str(index_return).upper()}
-
- return OperationNode(self.sds_context, 'order', [], named_input_nodes=named_input_nodes)
-
- def t(self) -> OperationNode:
- """ Transposes the input matrix
-
- :return: the OperationNode representing this operation
- """
-
- self._is_numpy()
- return OperationNode(self.sds_context, 't', [self])
-
- def cholesky(self, safe: bool = False) -> OperationNode:
- """ Computes the Cholesky decomposition of a symmetric, positive definite matrix
-
- :param safe: default value is False, if flag is True additional checks to ensure
- that the matrix is symmetric positive definite are applied, if False, checks will be skipped
- :return: the OperationNode representing this operation
- """
-
- # check square dimension
- if self.shape[0] != self.shape[1]:
- raise ValueError("Last 2 dimensions of the array must be square")
-
- if safe:
- # check if mat is positive definite
- if not np.all(np.linalg.eigvals(self._np_array) > 0):
- raise ValueError("Matrix is not positive definite")
-
- # check if mat is symmetric
- if not np.allclose(self._np_array, self._np_array.transpose()):
- raise ValueError("Matrix is not symmetric")
-
- return OperationNode(self.sds_context, 'cholesky', [self])
-
- def to_one_hot(self, num_classes: int) -> OperationNode:
- """ OneHot encode the matrix.
-
- It is assumed that there is only one column to encode, and all values are whole numbers > 0
-
- :param num_classes: The number of classes to encode into. max value contained in the matrix must be <= num_classes
- :return: The OperationNode containing the oneHotEncoded values
- """
- if len(self.shape) != 1:
- raise ValueError(
- "Only Matrixes with a single column or row is valid in One Hot, " + str(self.shape) + " is invalid")
-
- if num_classes < 2:
- raise ValueError("Number of classes should be larger than 1")
-
- named_input_nodes = {"X": self, "numClasses": num_classes}
- return OperationNode(self.sds_context, 'toOneHot', named_input_nodes=named_input_nodes, shape=(self.shape[0], num_classes))
diff --git a/src/main/python/systemds/operator/operation_node.py b/src/main/python/systemds/operator/operation_node.py
index d5da256..aba4c63 100644
--- a/src/main/python/systemds/operator/operation_node.py
+++ b/src/main/python/systemds/operator/operation_node.py
@@ -76,7 +76,7 @@
self.operation = operation
self._unnamed_input_nodes = unnamed_input_nodes
self._named_input_nodes = named_input_nodes
- self.output_type = output_type
+ self._output_type = output_type
self._is_python_local_data = is_python_local_data
self._result_var = None
self._lineage_trace = None
@@ -159,6 +159,8 @@
output += f'{var_name}_{idx},'
output = output[:-1] + "]"
return f'{output}={self.operation}({inputs_comma_sep});'
+ elif self.output_type == OutputType.NONE:
+ return f'{self.operation}({inputs_comma_sep});'
else:
return f'{var_name}={self.operation}({inputs_comma_sep});'
@@ -335,3 +337,94 @@
unnamed_inputs.append(weights)
unnamed_inputs.append(moment)
return OperationNode(self.sds_context, 'moment', unnamed_inputs, output_type=OutputType.DOUBLE)
+
+ def write(self, destination: str, format:str = "binary", **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode':
+
+ unnamed_inputs = [self, f'"{destination}"']
+ named_parameters = {"format":f'"{format}"'}
+ named_parameters.update(kwargs)
+ return OperationNode(self.sds_context, 'write', unnamed_inputs, named_parameters, output_type= OutputType.NONE)
+
+ def rev(self) -> 'OperationNode':
+ """ Reverses the rows in a matrix
+
+ :return: the OperationNode representing this operation
+ """
+
+ self._check_matrix_op()
+ return OperationNode(self.sds_context, 'rev', [self])
+
+ def order(self, by: int = 1, decreasing: bool = False,
+ index_return: bool = False) -> 'OperationNode':
+ """ Sort by a column of the matrix X in increasing/decreasing order and returns either the index or data
+
+ :param by: sort matrix by this column number
+ :param decreasing: If true the matrix will be sorted in decreasing order
+ :param index_return: If true, the index numbers will be returned
+ :return: the OperationNode representing this operation
+ """
+
+ self._check_matrix_op()
+
+ cols = self._np_array.shape[1]
+ if by > cols:
+ raise IndexError(
+ "Index {i} is out of bounds for axis 1 with size {c}".format(i=by, c=cols))
+
+ named_input_nodes = {'target': self, 'by': by, 'decreasing': str(decreasing).upper(),
+ 'index.return': str(index_return).upper()}
+
+ return OperationNode(self.sds_context, 'order', [], named_input_nodes=named_input_nodes)
+
+ def t(self) -> 'OperationNode':
+ """ Transposes the input matrix
+
+ :return: the OperationNode representing this operation
+ """
+
+ self._check_matrix_op()
+ return OperationNode(self.sds_context, 't', [self])
+
+ def cholesky(self, safe: bool = False) -> 'OperationNode':
+ """ Computes the Cholesky decomposition of a symmetric, positive definite matrix
+
+ :param safe: default value is False, if flag is True additional checks to ensure
+ that the matrix is symmetric positive definite are applied, if False, checks will be skipped
+ :return: the OperationNode representing this operation
+ """
+
+ self._check_matrix_op()
+ # check square dimension
+ if self.shape[0] != self.shape[1]:
+ raise ValueError("Last 2 dimensions of the array must be square")
+
+ if safe:
+ # check if mat is positive definite
+ if not np.all(np.linalg.eigvals(self._np_array) > 0):
+ raise ValueError("Matrix is not positive definite")
+
+ # check if mat is symmetric
+ if not np.allclose(self._np_array, self._np_array.transpose()):
+ raise ValueError("Matrix is not symmetric")
+
+ return OperationNode(self.sds_context, 'cholesky', [self])
+
+ def to_one_hot(self, num_classes: int) -> 'OperationNode':
+ """ OneHot encode the matrix.
+
+ It is assumed that there is only one column to encode, and all values are whole numbers > 0
+
+ :param num_classes: The number of classes to encode into. max value contained in the matrix must be <= num_classes
+ :return: The OperationNode containing the oneHotEncoded values
+ """
+
+ self._check_matrix_op()
+ if len(self.shape) != 1:
+ raise ValueError(
+ "Only Matrixes with a single column or row is valid in One Hot, " + str(self.shape) + " is invalid")
+
+ if num_classes < 2:
+ raise ValueError("Number of classes should be larger than 1")
+
+ named_input_nodes = {"X": self, "numClasses": num_classes}
+ return OperationNode(self.sds_context, 'toOneHot', named_input_nodes=named_input_nodes, shape=(self.shape[0], num_classes))
\ No newline at end of file
diff --git a/src/main/python/systemds/script_building/dag.py b/src/main/python/systemds/script_building/dag.py
index fa6bab9..69988f4 100644
--- a/src/main/python/systemds/script_building/dag.py
+++ b/src/main/python/systemds/script_building/dag.py
@@ -34,6 +34,7 @@
MATRIX = auto()
DOUBLE = auto()
LIST = auto()
+ NONE = auto()
class DAGNode(ABC):
@@ -92,7 +93,11 @@
@property
def is_python_local_data(self):
return self._is_python_local_data
-
+
@property
def number_of_outputs(self):
return self._number_of_outputs
+
+ @property
+ def output_type(self):
+ return self._output_type
diff --git a/src/main/python/systemds/script_building/script.py b/src/main/python/systemds/script_building/script.py
index 2fba22c..1f9d5de 100644
--- a/src/main/python/systemds/script_building/script.py
+++ b/src/main/python/systemds/script_building/script.py
@@ -24,7 +24,7 @@
from py4j.java_collections import JavaArray
from py4j.java_gateway import JavaObject, JavaGateway
-from systemds.script_building.dag import DAGNode
+from systemds.script_building.dag import DAGNode, OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
if TYPE_CHECKING:
@@ -140,15 +140,17 @@
:param dag_root: the topmost operation of our DAG, result of operation will be output
"""
baseOutVarString = self._dfs_dag_nodes(dag_root)
- if(dag_root.number_of_outputs > 1):
- self.out_var_name = []
- for idx in range(dag_root.number_of_outputs):
- self.add_code(
- f'write({baseOutVarString}_{idx}, \'./tmp_{idx}\');')
- self.out_var_name.append(f'{baseOutVarString}_{idx}')
- else:
- self.out_var_name.append(baseOutVarString)
- self.add_code(f'write({baseOutVarString}, \'./tmp\');')
+ if(dag_root.output_type != OutputType.NONE):
+ if(dag_root.number_of_outputs > 1):
+ self.out_var_name = []
+ for idx in range(dag_root.number_of_outputs):
+ self.add_code(
+ f'write({baseOutVarString}_{idx}, \'./tmp_{idx}\');')
+ self.out_var_name.append(f'{baseOutVarString}_{idx}')
+ else:
+ self.out_var_name.append(baseOutVarString)
+ self.add_code(f'write({baseOutVarString}, \'./tmp\');')
+
def _dfs_dag_nodes(self, dag_node: VALID_INPUT_TYPES) -> str:
"""Uses Depth-First-Search to create code from DAG
diff --git a/src/main/python/systemds/utils/converters.py b/src/main/python/systemds/utils/converters.py
index 101aceb..3079a55 100644
--- a/src/main/python/systemds/utils/converters.py
+++ b/src/main/python/systemds/utils/converters.py
@@ -22,10 +22,7 @@
import numpy as np
from py4j.java_gateway import JavaClass, JavaObject, JVMView
-from systemds.context import SystemDSContext
-
-
-def numpy_to_matrix_block(sds: SystemDSContext, np_arr: np.array):
+def numpy_to_matrix_block(sds: 'SystemDSContext', np_arr: np.array):
"""Converts a given numpy array, to internal matrix block representation.
:param sds: The current systemds context.
diff --git a/src/main/python/tests/lineage/test_lineagetrace.py b/src/main/python/tests/lineage/test_lineagetrace.py
index 84cefdf..0d35c4c 100644
--- a/src/main/python/tests/lineage/test_lineagetrace.py
+++ b/src/main/python/tests/lineage/test_lineagetrace.py
@@ -25,7 +25,6 @@
import unittest
from systemds.context import SystemDSContext
-from systemds.matrix.data_gen import full
from systemds.utils.helpers import get_module_dir
os.environ['SYSDS_QUIET'] = "1"
@@ -51,7 +50,7 @@
def test_compare_trace1(self): # test getLineageTrace() on an intermediate
if "SYSTEMDS_ROOT" in os.environ:
- m = full(self.sds, (10, 10), 1)
+ m = self.sds.full((10, 10), 1)
m_res = m + m
python_trace = [x.strip().split("°")
@@ -90,7 +89,6 @@
command = "systemds " + script_file_name + \
" > " + result_file_name + " 2> /dev/null"
- print(command)
os.system(command)
return parse_trace(result_file_name)
diff --git a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
index 9e1705cf..0ee4a1c 100644
--- a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
+++ b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py
@@ -34,6 +34,6 @@
# Test data
Xt = Matrix(sds, d.get_test_data().reshape((10000, 28*28)))
Yt = Matrix(sds, d.get_test_labels()) + 1.0
- [m, y_pred, acc] = multiLogRegPredict(Xt, bias, Yt).compute()
+ [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute()
print(acc)
diff --git a/src/main/python/tests/matrix/test_aggregations.py b/src/main/python/tests/matrix/test_aggregations.py
index f26da1a..8990bcc 100644
--- a/src/main/python/tests/matrix/test_aggregations.py
+++ b/src/main/python/tests/matrix/test_aggregations.py
@@ -23,7 +23,6 @@
import numpy as np
from systemds.context import SystemDSContext
-from systemds.matrix.data_gen import full, seq
from systemds.matrix import Matrix
dim = 5
@@ -72,11 +71,11 @@
def test_full(self):
self.assertTrue(np.allclose(
- full(self.sds, (2, 3), 10.1).compute(), np.full((2, 3), 10.1)))
+ self.sds.full( (2, 3), 10.1).compute(), np.full((2, 3), 10.1)))
def test_seq(self):
self.assertTrue(np.allclose(
- seq(self.sds, 3).compute(), np.arange(4).reshape(4, 1)))
+ self.sds.seq(3).compute(), np.arange(4).reshape(4, 1)))
def test_var1(self):
self.assertTrue(np.allclose(
diff --git a/src/main/python/tests/matrix/test_rand.py b/src/main/python/tests/matrix/test_rand.py
index 104ce7d..b5cc3cd 100644
--- a/src/main/python/tests/matrix/test_rand.py
+++ b/src/main/python/tests/matrix/test_rand.py
@@ -28,7 +28,6 @@
import numpy as np
import scipy.stats as st
from systemds.context import SystemDSContext
-from systemds.matrix.data_gen import rand
np.random.seed(7)
# TODO Remove the randomness of the test, such that
@@ -54,19 +53,19 @@
cls.sds.close()
def test_rand_shape(self):
- m = rand(self.sds, rows=shape[0], cols=shape[1]).compute()
+ m = self.sds.rand(rows=shape[0], cols=shape[1]).compute()
self.assertTrue(m.shape == shape)
def test_rand_min_max(self):
m = (
- rand(self.sds, rows=shape[0], cols=shape[1],
- min=min_max[0], max=min_max[1])
+ self.sds.rand(rows=shape[0], cols=shape[1],
+ min=min_max[0], max=min_max[1])
.compute())
self.assertTrue((m.min() >= min_max[0]) and (m.max() <= min_max[1]))
def test_rand_sparsity(self):
- m = rand(self.sds, rows=shape[0], cols=shape[1],
- sparsity=sparsity, seed=0).compute()
+ m = self.sds.rand(rows=shape[0], cols=shape[1],
+ sparsity=sparsity, seed=0).compute()
non_zero_value_percent = np.count_nonzero(m) * 100 / np.prod(m.shape)
self.assertTrue(math.isclose(
@@ -74,13 +73,13 @@
def test_rand_uniform_distribution(self):
m = (
- rand(self.sds,
- rows=dist_shape[0],
- cols=dist_shape[1],
- pdf="uniform",
- min=min_max[0],
- max=min_max[1],
- seed=0)
+ self.sds.rand(
+ rows=dist_shape[0],
+ cols=dist_shape[1],
+ pdf="uniform",
+ min=min_max[0],
+ max=min_max[1],
+ seed=0)
.compute())
dist = find_best_fit_distribution(m.flatten("F"), distributions)
@@ -88,29 +87,29 @@
def test_rand_normal_distribution(self):
m = (
- rand(self.sds,
- rows=dist_shape[0],
- cols=dist_shape[1],
- pdf="normal",
- min=min_max[0],
- max=min_max[1],
- seed=0)
+ self.sds.rand(
+ rows=dist_shape[0],
+ cols=dist_shape[1],
+ pdf="normal",
+ min=min_max[0],
+ max=min_max[1],
+ seed=0)
.compute())
dist = find_best_fit_distribution(m.flatten("F"), distributions)
self.assertTrue(dist == "norm")
def test_rand_zero_shape(self):
- m = rand(self.sds, rows=0, cols=0).compute()
+ m = self.sds.rand(rows=0, cols=0).compute()
self.assertTrue(np.allclose(m, np.array([[]])))
def test_rand_invalid_shape(self):
with self.assertRaises(ValueError) as context:
- rand(self.sds, rows=1, cols=-10).compute()
+ self.sds.rand(rows=1, cols=-10).compute()
def test_rand_invalid_pdf(self):
with self.assertRaises(ValueError) as context:
- rand(self.sds, rows=1, cols=10, pdf="norm").compute()
+ self.sds.rand(rows=1, cols=10, pdf="norm").compute()
def find_best_fit_distribution(data, distribution_lst):
diff --git a/src/main/python/tests/matrix/test_to_one_hot.py b/src/main/python/tests/matrix/test_to_one_hot.py
index b43d4b1..e660905 100644
--- a/src/main/python/tests/matrix/test_to_one_hot.py
+++ b/src/main/python/tests/matrix/test_to_one_hot.py
@@ -23,7 +23,6 @@
import numpy as np
from systemds.context import SystemDSContext
-from systemds.matrix.data_gen import full, seq
from systemds.matrix import Matrix
diff --git a/src/main/python/tests/matrix/test_write.py b/src/main/python/tests/matrix/test_write.py
new file mode 100644
index 0000000..14b0b8f
--- /dev/null
+++ b/src/main/python/tests/matrix/test_write.py
@@ -0,0 +1,61 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+import math
+import os
+import random
+import shutil
+import sys
+import unittest
+
+import numpy as np
+import scipy.stats as st
+from systemds.context import SystemDSContext
+from systemds.matrix import Matrix
+
+
+class TestWrite(unittest.TestCase):
+
+ sds: SystemDSContext = None
+ temp_dir: str = "tests/matrix/temp_write/"
+
+ @classmethod
+ def setUpClass(cls):
+ cls.sds = SystemDSContext()
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.sds.close()
+
+ def tearDown(self):
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def test_write_01(self):
+ original = np.ones([10, 10])
+ X = Matrix(self.sds, original)
+ X.write(self.temp_dir + "01").compute()
+ NX = self.sds.read(self.temp_dir + "01")
+ res = NX.compute()
+ self.assertTrue(np.allclose(original, res))
+
+
+if __name__ == "__main__":
+ unittest.main(exit=False)