| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # |
| # THIS SCRIPT EVALUATES THE PREDICTIONS OF THE ENTITY RESOLUTION AGAINST THE GROUND TRUTH |
| # |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # FX String --- Location to read the frame of the predictions |
| # Each line contains comma separated ids of one matched pair |
| # Remainig columns (>2) are ignored |
| # FY String --- Location to read the frame of the ground truth |
| # Each line contains comma separated ids of one matched pair |
| # Remainig columns (>2) are ignored |
| # --------------------------------------------------------------------------------------------- |
| # OUTPUT: prints different evaluation metrics (accuracy, F1) |
| # --------------------------------------------------------------------------------------------- |
| |
| source("./scripts/staging/entity-resolution/primitives/evaluation.dml") as eval; |
| |
| # Command Line Arguments |
| fileFX = $FX; |
| fileFY = $FY; |
| |
| use_MX = ifdef($use_MX, FALSE); |
| |
| # Read data |
| FX = read(fileFX); |
| FY = read(fileFY); |
| |
| # Transform the data |
| [XY, MX] = transformencode(target=rbind(FX[,1:2],FY[,1:2]), spec="{recode:[C1,C2]}"); |
| X = XY[1:nrow(FX),]; |
| Y = XY[nrow(FX)+1:nrow(FX)+nrow(FY),]; |
| PRED = table(X[,1], X[,2], nrow(MX), nrow(MX)); |
| GT = table(Y[,1], Y[,2], nrow(MX), nrow(MX)); |
| |
| # Perform the evaluation |
| eval::print_eval_stats(PRED, GT); |