blob: 89d12e4085717d11472de59a29aa685a7e9fd848 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.jsmlearning;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
public class FeatureSpaceCoverageProcessor {
public Map<String, Integer> paramMap = new HashMap<String, Integer>();
public String[] header;
String[] attributes;
public FeatureSpaceCoverageProcessor (){
}
public void initParamMap(String[] attributes, String[] header){
this.header = header;
this.attributes = attributes;
for(int m=0; m<header.length; m++){
paramMap.put(header[m], m);
}
}
// distance between array and array
public Float calcDistance(String[] seed, String[] candidate) throws Exception {
if (paramMap.isEmpty())
throw new Exception("paramMap.isEmpty()");
Float score = 0f;
int p1 = paramMap.get("First Level Category");
int p2 = paramMap.get("Second Level Category");
if (seed[p1].equals(candidate[p1])) {
if (seed[p2].equals(candidate[p2]))
score = score+0.0000001f;
else
score = score+0.01f;
} else return 100000f;
try {
int p3 = paramMap.get("Latitude");
int p4 = paramMap.get("Longitude");
Double latDiff = Math.abs(Double.parseDouble(seed[p3]) - Double.parseDouble(candidate[p3]));
Double longDiff = Math.abs(Double.parseDouble(seed[p4]) - Double.parseDouble(candidate[p4]));
if (latDiff>1 || longDiff>1)
return 1000000f;
else
score+= latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f;
} catch (Exception e) {
return 1000000f;
}
return score;
}
// distance between matrix and array
public Float calcDistance(String[][] seed, String[] candidate) throws Exception {
if (paramMap.isEmpty())
throw new Exception("paramMap.isEmpty()");
Float score = 0f, catScore = 10000f, currCatScore=10000000f;
int p1 = paramMap.get("First Level Category");
int p2 = paramMap.get("Second Level Category");
for(int v=0; v<seed[0].length; v++){
if (seed[p1][v].equals(candidate[p1])) {
if (seed[p2][v].equals(candidate[p2]))
currCatScore = 0.0000001f;
else
currCatScore = 0.01f;
}
if ( catScore > currCatScore) // if found closer, update
catScore = currCatScore;
}
score = catScore;
if (score > 1000000f)
return 10000000f;
Float latLongScore = 100000f, currLatLongScore = 10000000f;
for(int v=0; v<seed[0].length; v++){
try {
int p3 = paramMap.get("Latitude");
int p4 = paramMap.get("Longitude");
if (seed[p3][v].equals("") || seed[p4][v].equals("")
|| candidate[p3].equals("") || candidate[p4].equals(""))
continue;
Double latDiff = Math.abs(Double.parseDouble(seed[p3][v]) - Double.parseDouble(candidate[p3]));
Double longDiff = Math.abs(Double.parseDouble(seed[p4][v]) - Double.parseDouble(candidate[p4]));
if (!(latDiff>1 || longDiff>1))
currLatLongScore = latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f;
} catch (Exception e) {
//return 1000000f;
}
if (latLongScore > currLatLongScore)
latLongScore = currLatLongScore;
}
if (latLongScore> 10000)
return 10000f;
score+=latLongScore;
return score;
}
public Integer getIdForAttributeName(String key){
Integer res = paramMap.get(key);
try {
res.toString();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("wrong key"+key);
}
return res;
}
public String getAttribNameForId(Integer id){
return header[id];
}
public Map<String, String> computeIntersection(String[] line1,
String[] line2) {
Map<String, String> attr_value = new HashMap<String, String>();
for(String attr: attributes){
int attrIndex = getIdForAttributeName(attr);
String v1 = line1[attrIndex].toLowerCase().replace("\"", "").replace(", ", ", ").replace(", ", ",");;
String v2 = line2[attrIndex].toLowerCase().replace("\"", "").replace(", ", ", ").replace(", ", ",");;
String valArr1Str = StringUtils.substringBetween(v1, "{", "}");
String valArr2Str = StringUtils.substringBetween(v2, "{", "}");
if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values
if (v1.equals(v2)){
attr_value.put(attr, v1);
}
}
else {
valArr1Str = valArr1Str.replaceAll(", ", ",");
valArr2Str = valArr2Str.replaceAll(", ", ",");
String[] valArr1 = valArr1Str.split(",");
String[] valArr2 = valArr2Str.split(",");
List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));
List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));
valList1.retainAll(valList2);
/* verification of coverage
valList1.retainAll(valList2);
List<String> vl1 = new ArrayList<String>(Arrays.asList(valArr1));
valList1.retainAll(vl1); */
if (!valList1.isEmpty()){
v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}";
attr_value.put(attr, v1);
}
}
}
return attr_value;
}
public boolean ruleCoversCase(Map<String, String> attr_value, String[] line){
boolean soFarCovers = true;
for(String attr: attributes){
int attrIndex = getIdForAttributeName(attr);
String rule = attr_value.get(attr);
if (rule == null)
continue; // no constraint
rule = rule.toLowerCase().replace("\"", "").replace(", ", ",").replace(", ", ",");
String vCase = line[attrIndex].toLowerCase().replace("\"", "").replace(", ", ",").replace(", ", ",");
if (vCase==null){// rule for this attribute exists but case has no value
soFarCovers = false;
return false;
}
String valArrCaseStr = StringUtils.substringBetween(vCase, "{", "}");
String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");
if (valArrCaseStr==null || valArrRuleStr==null) { // we assume single value, not an array of values
if (!vCase.equals(rule)){
soFarCovers = false;
return false;
}
}
else {
String[] valArrCase = valArrCaseStr.split(",");
String[] valArrRule = valArrRuleStr.split(",");
List<String> valListCase = new ArrayList<String>(Arrays.asList(valArrCase));
List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));
int ruleSize = valListRule.size();
//System.out.println(valListRule);
//System.out.println(valListCase);
// rule members are subset of case
valListRule.retainAll(valListCase);
//System.out.println(valListRule);
if (ruleSize != valListRule.size()){
soFarCovers = false;
return false;
}
}
}
return soFarCovers;
}
public boolean ruleCoversRule(Map<String, String> attr_value, Map<String, String> line){
boolean soFarCovers = true;
for(String attr: attributes){
int attrIndex = getIdForAttributeName(attr);
String rule = attr_value.get(attr);
if (rule == null)
continue; // no constraint
String vRuleBeingCovered = line.get(attr);
if (vRuleBeingCovered==null){// rule for this attribute exists but RuleBeingCovered has no value
soFarCovers = false;
return false;
}
String valArrRuleBeingCoveredStr = StringUtils.substringBetween(vRuleBeingCovered, "{", "}");
String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");
if (valArrRuleBeingCoveredStr==null || valArrRuleStr==null) { // we assume single value, not an array of values
if (!vRuleBeingCovered.equals(rule)){
soFarCovers = false;
return false;
}
}
else {
String[] valArrRuleBeingCovered = valArrRuleBeingCoveredStr.split(",");
String[] valArrRule = valArrRuleStr.split(",");
List<String> valListRuleBeingCovered = new ArrayList<String>(Arrays.asList(valArrRuleBeingCovered));
List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));
for(String r: valListRule){
if (!strListContainsMember(valListRuleBeingCovered, r)){
soFarCovers = false;
return false;
}
}
}
}
return soFarCovers;
}
public Map<String, String> computeIntersection(
Map<String, String> rule1, Map<String, String> rule2) {
Map<String, String> attr_value = new HashMap<String, String>();
for(String attr: attributes){
int attrIndex = getIdForAttributeName(attr);
String v1 = rule1.get(attr);
String v2 = rule2.get(attr);
if (v1==null || v2==null)
continue;
String valArr1Str = StringUtils.substringBetween(v1, "{", "}");
String valArr2Str = StringUtils.substringBetween(v2, "{", "}");
if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values
if (v1.equals(v2)){
attr_value.put(attr, v1);
}
}
else {
valArr1Str = valArr1Str.replaceAll(", ", ",");
valArr2Str = valArr2Str.replaceAll(", ", ",");
String[] valArr1 = valArr1Str.split(",");
String[] valArr2 = valArr2Str.split(",");
List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));
List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));
valList1.retainAll(valList2);
if (!valList1.isEmpty()){
v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}";
attr_value.put(attr, v1);
}
}
}
return attr_value;
}
private boolean strListContainsMember(List<String> valListCase, String r) {
boolean bContains = false;
for(String m: valListCase){
if (m.startsWith(r) || r.startsWith(m))
return true;
}
return false;
}
}