| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| # |
| # This function considers some constraints indicating statements that can NOT happen in the data (denial constraints). |
| # |
| # |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # dataFrame Frame --- frame which columns represent the variables of the data and the rows correspond to different tuples or instances. |
| # Recommended to have a column indexing the instances from 1 to N (N=number of instances). |
| # constraintsFrame Frame --- frame with fixed columns and each row representing one constraint. |
| # 1. idx: (double) index of the constraint, from 1 to M (number of constraints) |
| # 2. constraint.type: (string) The constraints can be of 3 different kinds: |
| # - variableCompare: for each instance, it will compare the values of two variables (with a relation <, > or =). |
| # - valueCompare: for each instance, it will compare a fixed value and a variable value (with a relation <, > or =). |
| # - instanceCompare: for every couple of instances, it will compare the relation between two variables, |
| # ie if the value of the variable 1 in instance 1 is lower/higher than the value of variable 1 in instance 2, |
| # then the value of of variable 2 in instance 2 can't be lower/higher than the value of variable 2 in instance 2. |
| # 3. group.by: (boolean) if TRUE only one group of data (defined by a variable option) will be considered for the constraint. |
| # 4. group.variable: (string, only if group.by TRUE) name of the variable (column in dataFrame) that will divide our data in groups. |
| # 5. group.option: (only if group.by TRUE) option of the group.variable that defines the group to consider. |
| # 6. variable1: (string) first variable to compare (name of column in dataFrame). |
| # 7. relation: (string) can be < , > or = in the case of variableCompare and valueCompare, and < >, < < , > < or > > |
| # in the case of instanceCompare |
| # 8. variable2: (string) second variable to compare (name of column in dataFrame) or fixed value for the case of valueCompare. |
| # |
| |
| # ----------------------- |
| # EXAMPLE: |
| # dataFrame: |
| # |
| # rank discipline yrs.since.phd yrs.service sex salary |
| # 1 Prof B 19 18 Male 139750 |
| # 2 Prof B 20 16 Male 173200 |
| # 3 AsstProf B 3 3 Male 79750.56 |
| # 4 Prof B 45 39 Male 115000 |
| # 5 Prof B 40 40 Male 141500 |
| # 6 AssocProf B 6 6 Male 97000 |
| # 7 Prof B 30 23 Male 175000 |
| # 8 Prof B 45 45 Male 147765 |
| # 9 Prof B 21 20 Male 119250 |
| # 10 Prof B 18 18 Female 129000 |
| # 11 AssocProf B 12 8 Male 119800 |
| # 12 AsstProf B 7 2 Male 79800 |
| # 13 AsstProf B 1 1 Male 77700 |
| # |
| # constraintsFrame: |
| # |
| # idx constraint.type group.by group.variable group.option variable1 relation variable2 |
| # 1 variableCompare FALSE yrs.since.phd < yrs.service |
| # 2 instanceCompare TRUE rank Prof yrs.service >< salary |
| # 3 valueCompare FALSE salary = 78182 |
| # 4 variableCompare TRUE discipline B yrs.service > yrs.since.phd |
| # |
| # |
| # Example: explanation of constraint 2 --> it can't happen that one professor of rank Prof has more years of service than other, but lower salary. |
| # |
| #---------------------------------- |
| # OUTPUT PARAMETERS: |
| #---------------------------------- |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # -------------------------------------------------------------------------------- |
| # WrongInstances Matrix Double Matrix of 2 columns. |
| # - First column shows the indexes of dataFrame that are wrong. |
| # - Second column shows the index of the denial constraint that is fulfilled |
| # If there are no wrong instances to show (0 constrains fulfilled) --> WrongInstances=matrix(0,1,2) |
| # |
| |
| |
| s_denialConstraints = function(Frame[Unknown] dataFrame, Frame[Unknown] constraintsFrame) |
| return(Matrix[double] WrongInstances) |
| { |
| print("DENIAL CONSTRAINTS"); |
| |
| |
| N = nrow(dataFrame); # rows in data frame |
| M = nrow(constraintsFrame); # number of constraints |
| |
| WrongInstances = matrix(0,rows=N*M,cols=2) |
| flag=0 |
| colName = dataFrame[1,] |
| |
| for(iConstraint in 2:M) { # loop starts in 2 because 1 is the name of the columns, not a constraint |
| var1 = as.scalar(constraintsFrame[iConstraint,6]) # variable 1 of the constraint |
| isCol1 = map(colName, "x->x.equals(\""+var1+"\")") # find the column of dataFrame corresponding to var1 |
| rel = as.scalar(constraintsFrame[iConstraint,7]) # relation of the constraint |
| colIdx1=0 |
| for(iLog in 1:ncol(colName)){ |
| if(as.scalar(isCol1[1,iLog])=="true"){ |
| colIdx1=iLog # number (index) of the column of dataFrame corresponding to var1 |
| } |
| } |
| if (colIdx1==0){ |
| print('Variable 1 for constraint ' + toString(iConstraint-1) + " not found in dataFrame") |
| } |
| |
| # DEFINE IF THE CONSTRAINT IS RESTRICTED TO A GROUP OF DATA: |
| |
| if(as.scalar(constraintsFrame[iConstraint,3])=="TRUE" & colIdx1!=0) { |
| varToGroup = as.scalar(constraintsFrame[iConstraint,4]) # variable that will divide our data in groups |
| isColToGroup = map(colName, "x->x.equals(\""+varToGroup+"\")") # find the column of dataFrame corresponding to varToGroup |
| for(iLog in 1:ncol(colName)){ |
| if(as.scalar(isColToGroup[1,iLog])=="true"){ |
| colIdxToGroup=iLog |
| } |
| } |
| groupInstances = dataFrame[,colIdxToGroup] |
| groupOption = as.scalar(constraintsFrame[iConstraint,5]) # option of the group.variable that defines the group to consider |
| IsGroupInstance= map(groupInstances, "x->x.equals(\""+groupOption+"\")") # find the instances with varToGroup = groupOption |
| IsGroupInstanceM = matrix(0, nrow(IsGroupInstance), 1) |
| for(h in 1:nrow(IsGroupInstance)){ |
| IsGroupInstanceM[h,1] = ifelse(as.scalar(IsGroupInstance[h,1]) == "true",TRUE,FALSE) |
| } |
| } else if (colIdx1!=0){ |
| IsGroupInstanceM = matrix(0, N, 1) |
| IsGroupInstance = matrix(1, N, 1) |
| for(h in 1:N){ |
| IsGroupInstanceM[h,1] = ifelse(as.scalar(IsGroupInstance[h,1]) == 1,TRUE,FALSE) |
| } |
| } |
| |
| |
| |
| # CONSTRAINT TO COMPARE VARIABLES OF THE SAME INSTANCE: |
| |
| if(as.scalar(constraintsFrame[iConstraint,2])=="variableCompare" & colIdx1!=0){ |
| var2 = as.scalar(constraintsFrame[iConstraint,8]) # variable 2 of the constraint |
| isCol2 = 0 |
| isCol2 = map(colName, "x->x.equals(\""+var2+"\")") |
| for(iLog in 1:ncol(colName)){ |
| if(as.scalar(isCol2[1,iLog])=="true"){ |
| colIdx2=iLog |
| } |
| } |
| if (colIdx2==0){ |
| print('Variable 2 for constraint ' + toString(iConstraint-1) + " not found in dataFrame") |
| } |
| if(rel=="<" & colIdx2!=0){ |
| for(iInstance in 2:N){ # loop starts in 2 because 1 is the name of the columns, not an instance |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| value2 = as.scalar(dataFrame[iInstance,colIdx2]) # value 2 to compare in the constraint |
| if(as.integer(value1)<as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } else if(rel==">" & colIdx2!=0){ |
| for(iInstance in 2:N){ |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| value2 = as.scalar(dataFrame[iInstance,colIdx2]) # value 2 to compare in the constraint |
| if(as.integer(value1)>as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } else if(rel=="=" & colIdx2!=0){ |
| for(iInstance in 2:N){ |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| value2 = as.scalar(dataFrame[iInstance,colIdx2]) # value 2 to compare in the constraint |
| if(as.integer(value1)==as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } |
| |
| |
| # CONSTRAINT TO COMPARE A VALUE AND A VARIABLE FOR EACH iNSTANCE |
| |
| } else if(as.scalar(constraintsFrame[iConstraint,2])=="valueCompare" & colIdx1!=0){ |
| value2 = as.scalar(constraintsFrame[iConstraint,8]) # value 2 to compare in the constraint |
| if(rel=="<"){ |
| for(iInstance in 2:N){ |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| if(as.integer(value1)<as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } else if(rel==">"){ |
| for(iInstance in 2:N){ |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| if(as.integer(value1)>as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } else if(rel=="="){ |
| for(iInstance in 2:N){ |
| value1 = as.scalar(dataFrame[iInstance,colIdx1]) # value 1 to compare in the constraint |
| if(as.integer(value1)==as.integer(value2) & as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag = flag+1 |
| WrongInstances[flag,1] = iInstance-1 |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } |
| |
| # CONSTRAINT TO COMPARE THE RELATION BETWEEN VARIABLES FOR DIFFERENT INSTANCES |
| |
| } else if(as.scalar(constraintsFrame[iConstraint,2])=="instanceCompare" & colIdx1!=0){ |
| |
| var2 = as.scalar(constraintsFrame[iConstraint,8]) # variable 2 of the constraint |
| isCol2 = map(colName, "x->x.equals(\""+var2+"\")") |
| colIdx2=0 |
| for(iLog in 1:ncol(colName)){ |
| if(as.scalar(isCol2[1,iLog])=="true"){ |
| colIdx2=iLog |
| } |
| } |
| if (colIdx2==0){ |
| print('Variable 2 for constraint ' + toString(iConstraint-1) + " not found in dataFrame") |
| } else { |
| |
| # Define a matrix with as many rows as it should be considered according to "group.by" and the following 3 columns: |
| # (1) index of the instance, (2) instances or variable 1, (3) instances of the variable 2 |
| DataMatrix = matrix(0,cols=4,rows=N-1) |
| flag3=0 |
| for(iInstance in 2:N){ |
| if(as.scalar(IsGroupInstanceM[iInstance,1])){ |
| flag3=flag3+1 |
| DataMatrix[flag3,1] = as.matrix(dataFrame[iInstance,1]) # InstanceIdx |
| DataMatrix[flag3,2] = as.matrix(dataFrame[iInstance,colIdx1]) |
| DataMatrix[flag3,3] = as.matrix(dataFrame[iInstance,colIdx2]) |
| } |
| } |
| DataMatrix=DataMatrix[1:flag3,] |
| |
| # order the matrix according to the values of variable 1, decreasing or increasing depending on the first part of the relation(> or <): |
| if(rel=="<>" | rel=="<<"){ |
| DataMatrixOrdered = order(target=DataMatrix,by=2,decreasing=FALSE,index.return=FALSE) |
| } else if(rel==">>" | rel=="><"){ |
| DataMatrixOrdered = order(target=DataMatrix,by=2,decreasing=TRUE,index.return=FALSE) |
| } |
| |
| # define groups of rows in the way that every group has the same value for variable 1 (second column of DataMatrixOrdered): |
| idxToGroup=matrix(0,flag3,1) |
| flag2=1 |
| for(iRow in 2:flag3){ |
| if((as.scalar(DataMatrixOrdered[iRow,2])-as.scalar(DataMatrixOrdered[iRow-1,2]))!=0){ # there is a change of group |
| flag2=flag2+1 |
| idxToGroup[flag2,1]=iRow # vector with the row indexes where there is a change of group |
| } |
| } |
| idxToGroup=idxToGroup[1:flag2,1] |
| idxOrdered = DataMatrixOrdered[,1] |
| # loop over the groups and see if they fulfill the constrain (compare every group with the next one): |
| for (iGroup in 1:(flag2-2)){ |
| idx1 = as.scalar(idxToGroup[iGroup,1]) |
| idx2 = as.scalar(idxToGroup[iGroup+1,1]) |
| idx3 = as.scalar(idxToGroup[iGroup+2,1]) |
| |
| if(rel=="<<" | rel=="><"){ |
| G1 = DataMatrixOrdered[idx1+1:idx2,] # first group |
| G2 = DataMatrixOrdered[idx2+1:idx3,] # second group |
| M1 = min(G1[,3]) |
| M2 = max(G2[,3]) |
| if(M1<M2){ |
| for(iNumber in 1:nrow(G1)){ |
| if(as.integer(as.scalar(G1[iNumber,3]))<M2){ |
| flag = flag+1 |
| WrongInstances[flag,1] = as.scalar(G1[iNumber,1]) |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| for(iNumber in 1:nrow(G2)){ |
| if(as.integer(as.scalar(G2[iNumber,3]))>M1){ |
| flag = flag+1 |
| WrongInstances[flag,1] = as.scalar(G2[iNumber,1]) |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } |
| |
| } else if(rel=="<>" | rel==">>"){ |
| G1 = DataMatrixOrdered[idx1+1:idx2,] # first group |
| G2 = DataMatrixOrdered[idx2+1:idx3,] # second group |
| M1 = max(G1[,3]) |
| M2 = min(G2[,3]) |
| if(M1>M2){ |
| for(iNumber in 1:nrow(G1)){ |
| if(as.integer(as.scalar(G1[iNumber,3]))>as.integer(M2)){ |
| flag = flag+1 |
| WrongInstances[flag,1] = as.scalar(G1[iNumber,1]) |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| for(iNumber in 1:nrow(G2)){ |
| if(as.integer(as.scalar(G2[iNumber,3]))<as.integer(M1)){ |
| flag = flag+1 |
| WrongInstances[flag,1] = as.scalar(G2[iNumber,1]) |
| WrongInstances[flag,2] = iConstraint-1 |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| if (flag==0){ |
| flag=1 |
| print("0 constraints are fulfilled") |
| } |
| |
| # Define the final output: |
| WrongInstances=WrongInstances[1:flag,] |
| WrongInstances = order(target=WrongInstances,by=1,decreasing=FALSE,index.return=FALSE) |
| } |
| |