scripts/builtin/sherlockPredict.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------
 source("scripts/builtin/sherlockNet.dml") as sherlockNet

 # Implements prediction and evaluation phase of Sherlock:
 # A Deep Learning Approach to Semantic Data Type Detection
 #
 # [Hulsebos, Madelon, et al. "Sherlock: A deep learning approach to semantic data type detection."
 # Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining.
 # 2019.]

 # Split feature matrix into four different feature categories and predicting the class probability
 # on the respective features. Then combine all predictions for final predicted probabilities.
 # ---------------------------------------------------------------------------------------------
 # NAME         TYPE      DEFAULT  MEANING
 # ---------------------------------------------------------------------------------------------
 # X            Matrix    ---      matrix of values which are to be classified
 # cW           Matrix    ---      weights (parameters) matrices for character distribtions
 # cb           Matrix    ---      biases vectors for character distribtions
 # wW           Matrix    ---      weights (parameters) matrices for word embeddings
 # wb           Matrix    ---      biases vectors for word embeddings
 # pW           Matrix    ---      weights (parameters) matrices for paragraph vectors
 # pb           Matrix    ---      biases vectors for paragraph vectors
 # sW           Matrix    ---      weights (parameters) matrices for global statistics
 # sb           Matrix    ---      biases vectors for global statistics
 # fW           Matrix    ---      weights (parameters) matrices for  combining all trained features (final)
 # fb           Matrix    ---      biases vectors for combining all trained features (final)
 # ---------------------------------------------------------------------------------------------
 # probs        Matrix             class probabilities of shape (N, K)

 m_sherlockPredict = function(matrix[double] X,
             matrix[double] cW1, matrix[double] cb1,
             matrix[double] cW2, matrix[double] cb2,
             matrix[double] cW3, matrix[double] cb3,
             matrix[double] wW1, matrix[double] wb1,
             matrix[double] wW2, matrix[double] wb2,
             matrix[double] wW3, matrix[double] wb3,
             matrix[double] pW1, matrix[double] pb1,
             matrix[double] pW2, matrix[double] pb2,
             matrix[double] pW3, matrix[double] pb3,
             matrix[double] sW1, matrix[double] sb1,
             matrix[double] sW2, matrix[double] sb2,
             matrix[double] sW3, matrix[double] sb3,
             matrix[double] fW1, matrix[double] fb1,
             matrix[double] fW2, matrix[double] fb2,
             matrix[double] fW3, matrix[double] fb3)
       return (matrix[double] probs) {

   rows = nrow(X)

   cprobs = sherlockNet::predict(X[1:rows, 224:1183], cW1, cb1, cW2, cb2, cW3, cb3)
   wprobs = sherlockNet::predict(cbind(X[1:rows, 13:212], X[1:rows, 1188]), wW1,wb1, wW2, wb2, wW3, wb3)
   pprobs = sherlockNet::predict(X[1:rows, 1189:1588], pW1, pb1, pW2, pb2, pW3, pb3)
   sprobs = sherlockNet::predict(cbind(X[1:rows, 1:12], X[1:rows, 213:223], X[1:rows, 1184:1187]), sW1, sb1, sW2, sb2, sW3, sb3)

   first_predictions = cbind(cprobs, wprobs, pprobs, sprobs)
   #final training
   probs = sherlockNet::predict(first_predictions, fW1, fb1, fW2, fb2, fW3, fb3)
 }

 # Evaluates the performance of the network.
 # ---------------------------------------------------------------------------------------------
 # NAME         TYPE      DEFAULT  MEANING
 # ---------------------------------------------------------------------------------------------
 # probs        Matrix             class probabilities of shape (N, K) (one-hot encoded)
 # Y            Matrix             target matrix of shape (N, K)
 # ---------------------------------------------------------------------------------------------
 # loss         double             scalar loss, of shape (1)
 # accuracy     double             scalar accuracy, of shape (1)
 # f1 score     double             scalar f1 score, of shape (1)
 # precision    double             scalar precission, of shape (1)
 # recall       double             scalar recall, of shape (1)

 eval = function(matrix[double] probs, matrix[double] Y)
   return (double loss, double accuracy, double f1, double precision, double recall) {

   [loss, accuracy, f1, precision, recall] = sherlockNet::eval(probs, Y)
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------
	source("scripts/builtin/sherlockNet.dml") as sherlockNet

	# Implements prediction and evaluation phase of Sherlock:
	# A Deep Learning Approach to Semantic Data Type Detection
	#
	# [Hulsebos, Madelon, et al. "Sherlock: A deep learning approach to semantic data type detection."
	# Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining.
	# 2019.]

	# Split feature matrix into four different feature categories and predicting the class probability
	# on the respective features. Then combine all predictions for final predicted probabilities.
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# X Matrix --- matrix of values which are to be classified
	# cW Matrix --- weights (parameters) matrices for character distribtions
	# cb Matrix --- biases vectors for character distribtions
	# wW Matrix --- weights (parameters) matrices for word embeddings
	# wb Matrix --- biases vectors for word embeddings
	# pW Matrix --- weights (parameters) matrices for paragraph vectors
	# pb Matrix --- biases vectors for paragraph vectors
	# sW Matrix --- weights (parameters) matrices for global statistics
	# sb Matrix --- biases vectors for global statistics
	# fW Matrix --- weights (parameters) matrices for combining all trained features (final)
	# fb Matrix --- biases vectors for combining all trained features (final)
	# ---------------------------------------------------------------------------------------------
	# probs Matrix class probabilities of shape (N, K)

	m_sherlockPredict = function(matrix[double] X,
	matrix[double] cW1, matrix[double] cb1,
	matrix[double] cW2, matrix[double] cb2,
	matrix[double] cW3, matrix[double] cb3,
	matrix[double] wW1, matrix[double] wb1,
	matrix[double] wW2, matrix[double] wb2,
	matrix[double] wW3, matrix[double] wb3,
	matrix[double] pW1, matrix[double] pb1,
	matrix[double] pW2, matrix[double] pb2,
	matrix[double] pW3, matrix[double] pb3,
	matrix[double] sW1, matrix[double] sb1,
	matrix[double] sW2, matrix[double] sb2,
	matrix[double] sW3, matrix[double] sb3,
	matrix[double] fW1, matrix[double] fb1,
	matrix[double] fW2, matrix[double] fb2,
	matrix[double] fW3, matrix[double] fb3)
	return (matrix[double] probs) {

	rows = nrow(X)

	cprobs = sherlockNet::predict(X[1:rows, 224:1183], cW1, cb1, cW2, cb2, cW3, cb3)
	wprobs = sherlockNet::predict(cbind(X[1:rows, 13:212], X[1:rows, 1188]), wW1,wb1, wW2, wb2, wW3, wb3)
	pprobs = sherlockNet::predict(X[1:rows, 1189:1588], pW1, pb1, pW2, pb2, pW3, pb3)
	sprobs = sherlockNet::predict(cbind(X[1:rows, 1:12], X[1:rows, 213:223], X[1:rows, 1184:1187]), sW1, sb1, sW2, sb2, sW3, sb3)

	first_predictions = cbind(cprobs, wprobs, pprobs, sprobs)
	#final training
	probs = sherlockNet::predict(first_predictions, fW1, fb1, fW2, fb2, fW3, fb3)
	}

	# Evaluates the performance of the network.
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# probs Matrix class probabilities of shape (N, K) (one-hot encoded)
	# Y Matrix target matrix of shape (N, K)
	# ---------------------------------------------------------------------------------------------
	# loss double scalar loss, of shape (1)
	# accuracy double scalar accuracy, of shape (1)
	# f1 score double scalar f1 score, of shape (1)
	# precision double scalar precission, of shape (1)
	# recall double scalar recall, of shape (1)

	eval = function(matrix[double] probs, matrix[double] Y)
	return (double loss, double accuracy, double f1, double precision, double recall) {

	[loss, accuracy, f1, precision, recall] = sherlockNet::eval(probs, Y)
	}