blob: e6fcbff1c8d361b05c3457f91c23b1d9e2a78d5f [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# ----------------------------------------------------------------------------
# References:
# Yan Zhu et al.. 2018.
# Matrix Profile XI: SCRIMP++: Time Series Motif Discovery at Interactive Speeds.
# 2018 IEEE International Conference on Data Mining (ICDM), 2018, pp. 837-846.
# DOI: 10.1109/ICDM.2018.00099.
# https://www.cs.ucr.edu/~eamonn/SCRIMP_ICDM_camera_ready_updated.pdf
# ----------------------------------------------------------------------------
# Builtin function that computes the MatrixProfile of a time series efficiently
# using the SCRIMP++ algorithm.
#
# INPUT PARAMETERS:
# ----------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ----------------------------------------------------------------------------
# ts Matrix --- Time series to profile
# window_size Integer 4 Sliding window size
# sample_percent Double 1.0 Degree of approximation
# between zero and one (1
# computes the exact solution)
# is_verbose Boolean False Print debug information
#
#
# RETURN VALUES
# ----------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# profile Matrix --- The computed matrix profile
# profile_index Matrix --- Indices of least distances
m_matrixProfile = function(Matrix[Double] ts, Integer window_size=4, Double sample_percent=1.0, Boolean is_verbose=FALSE)
return(Matrix[Double] profile, Matrix[Double] profile_index)
{
if (is_verbose)
print ("##############################\n# MATRIXPROFILE SCRIPT ENTRY #\n##############################");
# TODO: preSCRIMP
# requires a similarity search algorithm e.g.: MASS (Mueen's Algorithm for Similarity Search)
n = length(ts);
[mu,sig] = moving_avg(ts, n, window_size);
if (is_verbose) {
print_ts(ts);
print_ts(mu);
print_ts(sig);
}
# initialize
profile_len = n-window_size+1;
profile = matrix(Inf, cols=1, rows=profile_len);
profile_index = matrix(1, cols=1, rows=profile_len);
# random permutation
exclusion_zone = as.integer(ceil(window_size/4)) + 1;
sample_size = profile_len-exclusion_zone;
if (sample_percent < 1.0 & sample_percent >= 0.0) {
sample_size = ceil(sample_size*sample_percent);
}
s = sample(sample_size, sample_size, FALSE);
s = s + exclusion_zone;
if (is_verbose) {
print("n: " + n);
print("window_size: " + window_size);
print("profile_len: " + profile_len);
print("exclusion_zone: " + exclusion_zone);
print("sample_size: " + sample_size);
}
k_idx = 1;
while (k_idx <= sample_size) {
k = as.scalar(s[k_idx]);
k_idx += 1;
q = 0;
for (i in 1:n-window_size+2-k) {
if (i==1)
q = as.scalar(t(ts[1:window_size]) %*% ts[k:k+window_size-1]);
else
q = as.scalar(q - ts[i-1]%*%ts[i+k-2] + ts[i+window_size-1]%*%ts[i+k+window_size-2]);
d = sqrt(2*window_size*(1-(q - window_size*as.scalar(mu[i]*mu[i+k-1])) / (window_size*as.scalar(sig[i]*sig[i+k-1]))));
if (d < as.scalar(profile[i])) {
profile[i] = d;
profile_index[i] = as.matrix(i+k-1);
}
if (d < as.scalar(profile[i+k-1])) {
profile[i+k-1] = d;
profile_index[i+k-1] = i;
}
}
}
print_ts(profile);
print_ts(profile_index);
}
moving_avg = function(Matrix[Double] array, Integer n, Integer window_size)
return(Matrix[Double] mu, Matrix[Double] sig)
{
profile_len = n - window_size + 1;
cum_sum = matrix(0, cols=1, rows=n);
sq_cum_sum = matrix(0, cols=1, rows=n);
sums = matrix(0, cols=1, rows=profile_len);
sq_sums = matrix(0, cols=1, rows=profile_len);
mu = matrix(0, cols=1, rows=profile_len);
sig_sq = matrix(0, cols=1, rows=profile_len);
sig = matrix(0, cols=1, rows=profile_len);
cum_sum = cumsum(array);
sq_cum_sum = cumsum(array*array);
sums[1] = cum_sum[window_size];
sq_sums[1] = sq_cum_sum[window_size];
for (i in 1:n-window_size) {
sums[i+1] = cum_sum[window_size + i] - cum_sum[i];
sq_sums[i+1] = sq_cum_sum[window_size + i] - sq_cum_sum[i];
}
for (i in 1:profile_len) {
mu[i] = sums[i] / window_size;
sig_sq[i] = sq_sums[i] / window_size - mu[i] * mu[i];
sig[i] = max(sqrt(sig_sq[i]), 0);
}
}
print_ts = function(Matrix[Double] ts) {
print(toString(t(ts)));
}