scripts/builtin/matrixProfile.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # ----------------------------------------------------------------------------
 # References:
 # Yan Zhu et al.. 2018.
 #   Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
 #   2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
 #   DOI: 10.1109/ICDM.2018.00099.
 #   https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
 # ----------------------------------------------------------------------------

 # Builtin function that computes the MatrixProfile of a time series efficiently
 # using the SCRIMP++ algorithm.
 #
 # INPUT PARAMETERS:
 # ----------------------------------------------------------------------------
 # NAME                              TYPE      DEFAULT  MEANING
 # ----------------------------------------------------------------------------
 # ts                                Matrix    ---      Time series to profile
 # window_size                       Integer   4        Sliding window size
 # sample_percent                    Double    1.0      Degree of approximation
 #                                                      between zero and one (1
 #                                                      computes the exact solution)
 # is_verbose                        Boolean   False    Print debug information
 #
 #
 # RETURN VALUES
 # ----------------------------------------------------------------------------
 # NAME                              TYPE      DEFAULT  MEANING
 # profile                           Matrix    ---      The computed matrix profile
 # profile_index                     Matrix    ---      Indices of least distances


 m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE)
   return(Matrix[Double] profile, Matrix[Double] profile_index)
 {
   if (is_verbose)
     print ("##############################\n# MATRIXPROFILE SCRIPT ENTRY #\n##############################");

   # TODO: preSCRIMP
   # requires a similarity search algorithm e.g.: MASS (Mueen's Algorithm for Similarity Search)

   n = length(ts);
   [mu,sig] = moving_avg(ts, n, window_size);
   if (is_verbose) {
     print_ts(ts);
     print_ts(mu);
     print_ts(sig);
   }

   # initialize
   profile_len = n-window_size+1;
   profile = matrix(Inf, cols=1, rows=profile_len);
   profile_index = matrix(1, cols=1, rows=profile_len);

   # random permutation
   exclusion_zone = as.integer(ceil(window_size/4)) + 1;
   sample_size = profile_len-exclusion_zone;
   if (sample_percent < 1.0 & sample_percent >= 0.0) {
     sample_size = ceil(sample_size*sample_percent);
   }
   s = sample(sample_size, sample_size, FALSE);
   s = s + exclusion_zone;

   if (is_verbose) {
     print("n: " + n);
     print("window_size: " + window_size);
     print("profile_len: " + profile_len);
     print("exclusion_zone: " + exclusion_zone);
     print("sample_size: " + sample_size);
   }
   k_idx = 1;
   while (k_idx <= sample_size) {
     k = as.scalar(s[k_idx]);
     k_idx += 1;
     q = 0;
     for (i in 1:n-window_size+2-k) {
       if (i==1)
         q = as.scalar(t(ts[1:window_size]) %*% ts[k:k+window_size-1]);
       else
         q = as.scalar(q - ts[i-1]%*%ts[i+k-2] + ts[i+window_size-1]%*%ts[i+k+window_size-2]);
       d = sqrt(2*window_size*(1-(q - window_size*as.scalar(mu[i]*mu[i+k-1])) / (window_size*as.scalar(sig[i]*sig[i+k-1]))));

       if (d < as.scalar(profile[i])) {
         profile[i] = d;
         profile_index[i] = as.matrix(i+k-1);
       }
       if (d < as.scalar(profile[i+k-1])) {
         profile[i+k-1] = d;
         profile_index[i+k-1] = i;
       }
     }
   }

   print_ts(profile);
   print_ts(profile_index);
 }

 moving_avg = function(Matrix[Double] array, Integer n, Integer window_size)
   return(Matrix[Double] mu, Matrix[Double] sig)
 {
   profile_len = n - window_size + 1;
   cum_sum = matrix(0, cols=1, rows=n);
   sq_cum_sum = matrix(0, cols=1, rows=n);
   sums = matrix(0, cols=1, rows=profile_len);
   sq_sums = matrix(0, cols=1, rows=profile_len);
   mu = matrix(0, cols=1, rows=profile_len);
   sig_sq = matrix(0, cols=1, rows=profile_len);
   sig = matrix(0, cols=1, rows=profile_len);

   cum_sum = cumsum(array);
   sq_cum_sum = cumsum(array*array);

   sums[1] = cum_sum[window_size];
   sq_sums[1] = sq_cum_sum[window_size];
   for (i in 1:n-window_size) {
     sums[i+1]    = cum_sum[window_size + i] - cum_sum[i];
     sq_sums[i+1] = sq_cum_sum[window_size + i] - sq_cum_sum[i];
   }

   for (i in 1:profile_len) {
     mu[i] = sums[i] / window_size;
     sig_sq[i] = sq_sums[i] / window_size - mu[i] * mu[i];
     sig[i] = max(sqrt(sig_sq[i]), 0);
   }
 }

 print_ts = function(Matrix[Double] ts) {
   print(toString(t(ts)));
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# ----------------------------------------------------------------------------
	# References:
	# Yan Zhu et al.. 2018.
	# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
	# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
	# DOI: 10.1109/ICDM.2018.00099.
	# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
	# ----------------------------------------------------------------------------

	# Builtin function that computes the MatrixProfile of a time series efficiently
	# using the SCRIMP++ algorithm.
	#
	# INPUT PARAMETERS:
	# ----------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ----------------------------------------------------------------------------
	# ts Matrix --- Time series to profile
	# window_size Integer 4 Sliding window size
	# sample_percent Double 1.0 Degree of approximation
	# between zero and one (1
	# computes the exact solution)
	# is_verbose Boolean False Print debug information
	#
	#
	# RETURN VALUES
	# ----------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# profile Matrix --- The computed matrix profile
	# profile_index Matrix --- Indices of least distances


	m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE)
	return(Matrix[Double] profile, Matrix[Double] profile_index)
	{
	if (is_verbose)
	print ("##############################\n# MATRIXPROFILE SCRIPT ENTRY #\n##############################");

	# TODO: preSCRIMP
	# requires a similarity search algorithm e.g.: MASS (Mueen's Algorithm for Similarity Search)

	n = length(ts);
	[mu,sig] = moving_avg(ts, n, window_size);
	if (is_verbose) {
	print_ts(ts);
	print_ts(mu);
	print_ts(sig);
	}

	# initialize
	profile_len = n-window_size+1;
	profile = matrix(Inf, cols=1, rows=profile_len);
	profile_index = matrix(1, cols=1, rows=profile_len);

	# random permutation
	exclusion_zone = as.integer(ceil(window_size/4)) + 1;
	sample_size = profile_len-exclusion_zone;
	if (sample_percent < 1.0 & sample_percent >= 0.0) {
	sample_size = ceil(sample_size*sample_percent);
	}
	s = sample(sample_size, sample_size, FALSE);
	s = s + exclusion_zone;

	if (is_verbose) {
	print("n: " + n);
	print("window_size: " + window_size);
	print("profile_len: " + profile_len);
	print("exclusion_zone: " + exclusion_zone);
	print("sample_size: " + sample_size);
	}
	k_idx = 1;
	while (k_idx <= sample_size) {
	k = as.scalar(s[k_idx]);
	k_idx += 1;
	q = 0;
	for (i in 1:n-window_size+2-k) {
	if (i==1)
	q = as.scalar(t(ts[1:window_size]) %*% ts[k:k+window_size-1]);
	else
	q = as.scalar(q - ts[i-1]%%ts[i+k-2] + ts[i+window_size-1]%%ts[i+k+window_size-2]);
	d = sqrt(2window_size(1-(q - window_sizeas.scalar(mu[i]mu[i+k-1])) / (window_sizeas.scalar(sig[i]sig[i+k-1]))));

	if (d < as.scalar(profile[i])) {
	profile[i] = d;
	profile_index[i] = as.matrix(i+k-1);
	}
	if (d < as.scalar(profile[i+k-1])) {
	profile[i+k-1] = d;
	profile_index[i+k-1] = i;
	}
	}
	}

	print_ts(profile);
	print_ts(profile_index);
	}

	moving_avg = function(Matrix[Double] array, Integer n, Integer window_size)
	return(Matrix[Double] mu, Matrix[Double] sig)
	{
	profile_len = n - window_size + 1;
	cum_sum = matrix(0, cols=1, rows=n);
	sq_cum_sum = matrix(0, cols=1, rows=n);
	sums = matrix(0, cols=1, rows=profile_len);
	sq_sums = matrix(0, cols=1, rows=profile_len);
	mu = matrix(0, cols=1, rows=profile_len);
	sig_sq = matrix(0, cols=1, rows=profile_len);
	sig = matrix(0, cols=1, rows=profile_len);

	cum_sum = cumsum(array);
	sq_cum_sum = cumsum(array*array);

	sums[1] = cum_sum[window_size];
	sq_sums[1] = sq_cum_sum[window_size];
	for (i in 1:n-window_size) {
	sums[i+1] = cum_sum[window_size + i] - cum_sum[i];
	sq_sums[i+1] = sq_cum_sum[window_size + i] - sq_cum_sum[i];
	}

	for (i in 1:profile_len) {
	mu[i] = sums[i] / window_size;
	sig_sq[i] = sq_sums[i] / window_size - mu[i] * mu[i];
	sig[i] = max(sqrt(sig_sq[i]), 0);
	}
	}

	print_ts = function(Matrix[Double] ts) {
	print(toString(t(ts)));
	}