| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # ---------------------------------------------------------------------------- |
| # References: |
| # Yan Zhu et al.. 2018. |
| # Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds. |
| # 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846. |
| # DOI: 10.1109/ICDM.2018.00099. |
| # https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf |
| # ---------------------------------------------------------------------------- |
| |
| # Builtin function that computes the MatrixProfile of a time series efficiently |
| # using the SCRIMP++ algorithm. |
| # |
| # INPUT PARAMETERS: |
| # ---------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # ---------------------------------------------------------------------------- |
| # ts Matrix --- Time series to profile |
| # window_size Integer 4 Sliding window size |
| # sample_percent Double 1.0 Degree of approximation |
| # between zero and one (1 |
| # computes the exact solution) |
| # is_verbose Boolean False Print debug information |
| # |
| # |
| # RETURN VALUES |
| # ---------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # profile Matrix --- The computed matrix profile |
| # profile_index Matrix --- Indices of least distances |
| |
| |
| m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE) |
| return(Matrix[Double] profile, Matrix[Double] profile_index) |
| { |
| if (is_verbose) |
| print ("##############################\n# MATRIXPROFILE SCRIPT ENTRY #\n##############################"); |
| |
| # TODO: preSCRIMP |
| # requires a similarity search algorithm e.g.: MASS (Mueen's Algorithm for Similarity Search) |
| |
| n = length(ts); |
| [mu,sig] = moving_avg(ts, n, window_size); |
| if (is_verbose) { |
| print_ts(ts); |
| print_ts(mu); |
| print_ts(sig); |
| } |
| |
| # initialize |
| profile_len = n-window_size+1; |
| profile = matrix(Inf, cols=1, rows=profile_len); |
| profile_index = matrix(1, cols=1, rows=profile_len); |
| |
| # random permutation |
| exclusion_zone = as.integer(ceil(window_size/4)) + 1; |
| sample_size = profile_len-exclusion_zone; |
| if (sample_percent < 1.0 & sample_percent >= 0.0) { |
| sample_size = ceil(sample_size*sample_percent); |
| } |
| s = sample(sample_size, sample_size, FALSE); |
| s = s + exclusion_zone; |
| |
| if (is_verbose) { |
| print("n: " + n); |
| print("window_size: " + window_size); |
| print("profile_len: " + profile_len); |
| print("exclusion_zone: " + exclusion_zone); |
| print("sample_size: " + sample_size); |
| } |
| k_idx = 1; |
| while (k_idx <= sample_size) { |
| k = as.scalar(s[k_idx]); |
| k_idx += 1; |
| q = 0; |
| for (i in 1:n-window_size+2-k) { |
| if (i==1) |
| q = as.scalar(t(ts[1:window_size]) %*% ts[k:k+window_size-1]); |
| else |
| q = as.scalar(q - ts[i-1]%*%ts[i+k-2] + ts[i+window_size-1]%*%ts[i+k+window_size-2]); |
| d = sqrt(2*window_size*(1-(q - window_size*as.scalar(mu[i]*mu[i+k-1])) / (window_size*as.scalar(sig[i]*sig[i+k-1])))); |
| |
| if (d < as.scalar(profile[i])) { |
| profile[i] = d; |
| profile_index[i] = as.matrix(i+k-1); |
| } |
| if (d < as.scalar(profile[i+k-1])) { |
| profile[i+k-1] = d; |
| profile_index[i+k-1] = i; |
| } |
| } |
| } |
| |
| print_ts(profile); |
| print_ts(profile_index); |
| } |
| |
| moving_avg = function(Matrix[Double] array, Integer n, Integer window_size) |
| return(Matrix[Double] mu, Matrix[Double] sig) |
| { |
| profile_len = n - window_size + 1; |
| cum_sum = matrix(0, cols=1, rows=n); |
| sq_cum_sum = matrix(0, cols=1, rows=n); |
| sums = matrix(0, cols=1, rows=profile_len); |
| sq_sums = matrix(0, cols=1, rows=profile_len); |
| mu = matrix(0, cols=1, rows=profile_len); |
| sig_sq = matrix(0, cols=1, rows=profile_len); |
| sig = matrix(0, cols=1, rows=profile_len); |
| |
| cum_sum = cumsum(array); |
| sq_cum_sum = cumsum(array*array); |
| |
| sums[1] = cum_sum[window_size]; |
| sq_sums[1] = sq_cum_sum[window_size]; |
| for (i in 1:n-window_size) { |
| sums[i+1] = cum_sum[window_size + i] - cum_sum[i]; |
| sq_sums[i+1] = sq_cum_sum[window_size + i] - sq_cum_sum[i]; |
| } |
| |
| for (i in 1:profile_len) { |
| mu[i] = sums[i] / window_size; |
| sig_sq[i] = sq_sums[i] / window_size - mu[i] * mu[i]; |
| sig[i] = max(sqrt(sig_sq[i]), 0); |
| } |
| } |
| |
| print_ts = function(Matrix[Double] ts) { |
| print(toString(t(ts))); |
| } |