src/ports/postgres/modules/deep_learning/madlib_keras_serializer.py_in - madlib - Git at Google

 # coding=utf-8
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import numpy as np
 from utilities.utilities import _assert

 # TODO
 # 1. Current serializing logic
     # serialized string -> byte string
     # np.array(np.array(image_count).concatenate(weights_np_array)).tostring()
     # Proposed logic
     # image_count can be a separate value
     # weights -> np.array(weights).tostring()
     # combine these 2 into one string by a random splitter
     # serialized string -> imagecount_splitter_weights
 # 2. combine the serialize_state_with_nd_weights and serialize_state_with_1d_weights
     # into one function called serialize_state. This function can infer the shape
     # of the model weights and then flatten if they are nd weights.
 # 3. Same as 2 for deserialize


 """
 workflow
 1. Set initial weights in madlib keras fit function.
 2. Serialize these initial model weights as a byte string and pass it to keras step
 3. Deserialize the state passed from the previous step into a list of nd weights
 that will be passed on to model.set_weights()
 4. At the end of each buffer in fit transition, serialize the image count and
 the model weights into a bytestring that will be passed on to the fit merge function.
 5. In fit merge, deserialize the state as image and 1d np arrays. Do some averaging
 operations and serialize them again into a state which contains the image
 and the 1d state. same for fit final
 6. Return the final state from fit final to fit which will then be deserialized
 as 1d weights to be passed on to the evaluate function
 """
 def get_image_count_from_state(state):
     """
     :param state: bytestring serialized model state containing image count
     and weights
     :return: image count as float
     """
     if not state:
         image_count = 0
     else:
         image_count , _  = deserialize_as_image_1d_weights(state)
     return image_count

 def get_serialized_1d_weights_from_state(state):
     """
     Output of this function is used to deserialize the output of each iteration
     of madlib keras step UDA.

     :param state: bytestring serialized model state containing image count
     and weights
     :return: model weights serialized as bytestring
     """
     _ , weights = deserialize_as_image_1d_weights(state)
     return weights.tostring()

 def serialize_state_with_nd_weights(image_count, model_weights):
     """
     This function is called when the output of keras.get_weights() (list of nd
     np arrays) has to be converted into a serialized model state.

     :param image_count: float value
     :param model_weights: a list of numpy arrays, what you get from
         keras.get_weights()
     :return: Image count and model weights serialized into a bytestring format

     """
     if model_weights is None:
         return None
     flattened_weights = [w.flatten() for w in model_weights]
     state = [np.array([image_count])] + flattened_weights
     state = np.concatenate(state)
     return np.float32(state).tostring()


 def serialize_state_with_1d_weights(image_count, model_weights):
     """
     This function is called when the weights are to be passed to the keras fit
     merge and final functions.

     :param image_count: float value
     :param model_weights: a single flattened numpy array containing all of the
         weights
     :return: Image count and model weights serialized into a bytestring format

     """
     if model_weights is None:
         return None
     merge_state = np.array([image_count])
     merge_state = np.concatenate((merge_state, model_weights))
     merge_state = np.float32(merge_state)
     return merge_state.tostring()


 def deserialize_as_image_1d_weights(state):
     """
     This function is called when the model state needs to be deserialized in
     the keras fit merge and final functions.

     :param state: the stringified (serialized) state containing image_count and
             model_weights
     :return:
         image_count: total buffer counts processed
         model_weights: a single flattened numpy array containing all of the
         weights
     """
     if not state:
         return None
     state = np.fromstring(state, dtype=np.float32)
     return float(state[0]), state[1:]


 def serialize_nd_weights(model_weights):
     """
     This function is called for passing the initial model weights from the keras
     fit function to the keras fit transition function.
     :param model_weights: a list of numpy arrays, what you get from
         keras.get_weights()
     :return: Model weights serialized into a bytestring format
     """
     if model_weights is None:
         return None
     flattened_weights = [w.flatten() for w in model_weights]
     flattened_weights = np.concatenate(flattened_weights)
     return np.float32(flattened_weights).tostring()


 def deserialize_as_nd_weights(model_weights_serialized, model_shapes):
     """
     The output of this function is used to set keras model weights using the
     function model.set_weights()
     :param model_weights_serialized: bytestring containing model weights
     :param model_shapes: list containing the shapes of each layer.
     :return: list of nd numpy arrays containing all of the
         weights
     """
     if not model_weights_serialized or not model_shapes:
         return None

     i, j, model_weights = 0, 0, []
     model_weights_serialized = np.fromstring(model_weights_serialized, dtype=np.float32)

     total_model_shape = \
         sum([reduce(lambda x, y: x * y, ls) for ls in model_shapes])
     total_weights_shape = model_weights_serialized.size
     _assert(total_model_shape == total_weights_shape,
             "Number of elements in model weights({0}) doesn't match model({1})."\
                 .format(total_weights_shape, total_model_shape))
     while j < len(model_shapes):
         next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
         weight_arr_portion = model_weights_serialized[i:next_pointer]
         model_weights.append(np.array(weight_arr_portion).reshape(model_shapes[j]))
         i, j = next_pointer, j + 1
     return model_weights
	# coding=utf-8
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import numpy as np
	from utilities.utilities import _assert

	# TODO
	# 1. Current serializing logic
	# serialized string -> byte string
	# np.array(np.array(image_count).concatenate(weights_np_array)).tostring()
	# Proposed logic
	# image_count can be a separate value
	# weights -> np.array(weights).tostring()
	# combine these 2 into one string by a random splitter
	# serialized string -> imagecount_splitter_weights
	# 2. combine the serialize_state_with_nd_weights and serialize_state_with_1d_weights
	# into one function called serialize_state. This function can infer the shape
	# of the model weights and then flatten if they are nd weights.
	# 3. Same as 2 for deserialize


	"""
	workflow
	1. Set initial weights in madlib keras fit function.
	2. Serialize these initial model weights as a byte string and pass it to keras step
	3. Deserialize the state passed from the previous step into a list of nd weights
	that will be passed on to model.set_weights()
	4. At the end of each buffer in fit transition, serialize the image count and
	the model weights into a bytestring that will be passed on to the fit merge function.
	5. In fit merge, deserialize the state as image and 1d np arrays. Do some averaging
	operations and serialize them again into a state which contains the image
	and the 1d state. same for fit final
	6. Return the final state from fit final to fit which will then be deserialized
	as 1d weights to be passed on to the evaluate function
	"""
	def get_image_count_from_state(state):
	"""
	:param state: bytestring serialized model state containing image count
	and weights
	:return: image count as float
	"""
	if not state:
	image_count = 0
	else:
	image_count , _ = deserialize_as_image_1d_weights(state)
	return image_count

	def get_serialized_1d_weights_from_state(state):
	"""
	Output of this function is used to deserialize the output of each iteration
	of madlib keras step UDA.

	:param state: bytestring serialized model state containing image count
	and weights
	:return: model weights serialized as bytestring
	"""
	_ , weights = deserialize_as_image_1d_weights(state)
	return weights.tostring()

	def serialize_state_with_nd_weights(image_count, model_weights):
	"""
	This function is called when the output of keras.get_weights() (list of nd
	np arrays) has to be converted into a serialized model state.

	:param image_count: float value
	:param model_weights: a list of numpy arrays, what you get from
	keras.get_weights()
	:return: Image count and model weights serialized into a bytestring format

	"""
	if model_weights is None:
	return None
	flattened_weights = [w.flatten() for w in model_weights]
	state = [np.array([image_count])] + flattened_weights
	state = np.concatenate(state)
	return np.float32(state).tostring()


	def serialize_state_with_1d_weights(image_count, model_weights):
	"""
	This function is called when the weights are to be passed to the keras fit
	merge and final functions.

	:param image_count: float value
	:param model_weights: a single flattened numpy array containing all of the
	weights
	:return: Image count and model weights serialized into a bytestring format

	"""
	if model_weights is None:
	return None
	merge_state = np.array([image_count])
	merge_state = np.concatenate((merge_state, model_weights))
	merge_state = np.float32(merge_state)
	return merge_state.tostring()


	def deserialize_as_image_1d_weights(state):
	"""
	This function is called when the model state needs to be deserialized in
	the keras fit merge and final functions.

	:param state: the stringified (serialized) state containing image_count and
	model_weights
	:return:
	image_count: total buffer counts processed
	model_weights: a single flattened numpy array containing all of the
	weights
	"""
	if not state:
	return None
	state = np.fromstring(state, dtype=np.float32)
	return float(state[0]), state[1:]


	def serialize_nd_weights(model_weights):
	"""
	This function is called for passing the initial model weights from the keras
	fit function to the keras fit transition function.
	:param model_weights: a list of numpy arrays, what you get from
	keras.get_weights()
	:return: Model weights serialized into a bytestring format
	"""
	if model_weights is None:
	return None
	flattened_weights = [w.flatten() for w in model_weights]
	flattened_weights = np.concatenate(flattened_weights)
	return np.float32(flattened_weights).tostring()


	def deserialize_as_nd_weights(model_weights_serialized, model_shapes):
	"""
	The output of this function is used to set keras model weights using the
	function model.set_weights()
	:param model_weights_serialized: bytestring containing model weights
	:param model_shapes: list containing the shapes of each layer.
	:return: list of nd numpy arrays containing all of the
	weights
	"""
	if not model_weights_serialized or not model_shapes:
	return None

	i, j, model_weights = 0, 0, []
	model_weights_serialized = np.fromstring(model_weights_serialized, dtype=np.float32)

	total_model_shape = \
	sum([reduce(lambda x, y: x * y, ls) for ls in model_shapes])
	total_weights_shape = model_weights_serialized.size
	_assert(total_model_shape == total_weights_shape,
	"Number of elements in model weights({0}) doesn't match model({1})."\
	.format(total_weights_shape, total_model_shape))
	while j < len(model_shapes):
	next_pointer = i + reduce(lambda x, y: x * y, model_shapes[j])
	weight_arr_portion = model_weights_serialized[i:next_pointer]
	model_weights.append(np.array(weight_arr_portion).reshape(model_shapes[j]))
	i, j = next_pointer, j + 1
	return model_weights