/* | |
* Licensed to the Apache Software Foundation (ASF) under one or more | |
* contributor license agreements. See the NOTICE file distributed with | |
* this work for additional information regarding copyright ownership. | |
* The ASF licenses this file to You under the Apache License, Version 2.0 | |
* (the "License"); you may not use this file except in compliance with | |
* the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package opennlp.tools.jsmlearning; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import org.apache.commons.lang.StringUtils; | |
public class FeatureSpaceCoverageProcessor { | |
public Map<String, Integer> paramMap = new HashMap<String, Integer>(); | |
public String[] header; | |
String[] attributes; | |
public FeatureSpaceCoverageProcessor (){ | |
} | |
public void initParamMap(String[] attributes, String[] header){ | |
this.header = header; | |
this.attributes = attributes; | |
for(int m=0; m<header.length; m++){ | |
paramMap.put(header[m], m); | |
} | |
} | |
// distance between array and array | |
public Float calcDistance(String[] seed, String[] candidate) throws Exception { | |
if (paramMap.isEmpty()) | |
throw new Exception("paramMap.isEmpty()"); | |
Float score = 0f; | |
int p1 = paramMap.get("First Level Category"); | |
int p2 = paramMap.get("Second Level Category"); | |
if (seed[p1].equals(candidate[p1])) { | |
if (seed[p2].equals(candidate[p2])) | |
score = score+0.0000001f; | |
else | |
score = score+0.01f; | |
} else return 100000f; | |
try { | |
int p3 = paramMap.get("Latitude"); | |
int p4 = paramMap.get("Longitude"); | |
Double latDiff = Math.abs(Double.parseDouble(seed[p3]) - Double.parseDouble(candidate[p3])); | |
Double longDiff = Math.abs(Double.parseDouble(seed[p4]) - Double.parseDouble(candidate[p4])); | |
if (latDiff>1 || longDiff>1) | |
return 1000000f; | |
else | |
score+= latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f; | |
} catch (Exception e) { | |
return 1000000f; | |
} | |
return score; | |
} | |
// distance between matrix and array | |
public Float calcDistance(String[][] seed, String[] candidate) throws Exception { | |
if (paramMap.isEmpty()) | |
throw new Exception("paramMap.isEmpty()"); | |
Float score = 0f, catScore = 10000f, currCatScore=10000000f; | |
int p1 = paramMap.get("First Level Category"); | |
int p2 = paramMap.get("Second Level Category"); | |
for(int v=0; v<seed[0].length; v++){ | |
if (seed[p1][v].equals(candidate[p1])) { | |
if (seed[p2][v].equals(candidate[p2])) | |
currCatScore = 0.0000001f; | |
else | |
currCatScore = 0.01f; | |
} | |
if ( catScore > currCatScore) // if found closer, update | |
catScore = currCatScore; | |
} | |
score = catScore; | |
if (score > 1000000f) | |
return 10000000f; | |
Float latLongScore = 100000f, currLatLongScore = 10000000f; | |
for(int v=0; v<seed[0].length; v++){ | |
try { | |
int p3 = paramMap.get("Latitude"); | |
int p4 = paramMap.get("Longitude"); | |
if (seed[p3][v].equals("") || seed[p4][v].equals("") | |
|| candidate[p3].equals("") || candidate[p4].equals("")) | |
continue; | |
Double latDiff = Math.abs(Double.parseDouble(seed[p3][v]) - Double.parseDouble(candidate[p3])); | |
Double longDiff = Math.abs(Double.parseDouble(seed[p4][v]) - Double.parseDouble(candidate[p4])); | |
if (!(latDiff>1 || longDiff>1)) | |
currLatLongScore = latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f; | |
} catch (Exception e) { | |
//return 1000000f; | |
} | |
if (latLongScore > currLatLongScore) | |
latLongScore = currLatLongScore; | |
} | |
if (latLongScore> 10000) | |
return 10000f; | |
score+=latLongScore; | |
return score; | |
} | |
public Integer getIdForAttributeName(String key){ | |
Integer res = paramMap.get(key); | |
try { | |
res.toString(); | |
} catch (Exception e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
System.out.println("wrong key"+key); | |
} | |
return res; | |
} | |
public String getAttribNameForId(Integer id){ | |
return header[id]; | |
} | |
public Map<String, String> computeIntersection(String[] line1, | |
String[] line2) { | |
Map<String, String> attr_value = new HashMap<String, String>(); | |
for(String attr: attributes){ | |
int attrIndex = getIdForAttributeName(attr); | |
String v1 = line1[attrIndex].toLowerCase().replace("\"", "").replace(", ", ", ").replace(", ", ",");; | |
String v2 = line2[attrIndex].toLowerCase().replace("\"", "").replace(", ", ", ").replace(", ", ",");; | |
String valArr1Str = StringUtils.substringBetween(v1, "{", "}"); | |
String valArr2Str = StringUtils.substringBetween(v2, "{", "}"); | |
if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values | |
if (v1.equals(v2)){ | |
attr_value.put(attr, v1); | |
} | |
} | |
else { | |
valArr1Str = valArr1Str.replaceAll(", ", ","); | |
valArr2Str = valArr2Str.replaceAll(", ", ","); | |
String[] valArr1 = valArr1Str.split(","); | |
String[] valArr2 = valArr2Str.split(","); | |
List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1)); | |
List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2)); | |
valList1.retainAll(valList2); | |
/* verification of coverage | |
valList1.retainAll(valList2); | |
List<String> vl1 = new ArrayList<String>(Arrays.asList(valArr1)); | |
valList1.retainAll(vl1); */ | |
if (!valList1.isEmpty()){ | |
v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}"; | |
attr_value.put(attr, v1); | |
} | |
} | |
} | |
return attr_value; | |
} | |
public boolean ruleCoversCase(Map<String, String> attr_value, String[] line){ | |
boolean soFarCovers = true; | |
for(String attr: attributes){ | |
int attrIndex = getIdForAttributeName(attr); | |
String rule = attr_value.get(attr); | |
if (rule == null) | |
continue; // no constraint | |
rule = rule.toLowerCase().replace("\"", "").replace(", ", ",").replace(", ", ","); | |
String vCase = line[attrIndex].toLowerCase().replace("\"", "").replace(", ", ",").replace(", ", ","); | |
if (vCase==null){// rule for this attribute exists but case has no value | |
soFarCovers = false; | |
return false; | |
} | |
String valArrCaseStr = StringUtils.substringBetween(vCase, "{", "}"); | |
String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}"); | |
if (valArrCaseStr==null || valArrRuleStr==null) { // we assume single value, not an array of values | |
if (!vCase.equals(rule)){ | |
soFarCovers = false; | |
return false; | |
} | |
} | |
else { | |
String[] valArrCase = valArrCaseStr.split(","); | |
String[] valArrRule = valArrRuleStr.split(","); | |
List<String> valListCase = new ArrayList<String>(Arrays.asList(valArrCase)); | |
List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule)); | |
int ruleSize = valListRule.size(); | |
//System.out.println(valListRule); | |
//System.out.println(valListCase); | |
// rule members are subset of case | |
valListRule.retainAll(valListCase); | |
//System.out.println(valListRule); | |
if (ruleSize != valListRule.size()){ | |
soFarCovers = false; | |
return false; | |
} | |
} | |
} | |
return soFarCovers; | |
} | |
public boolean ruleCoversRule(Map<String, String> attr_value, Map<String, String> line){ | |
boolean soFarCovers = true; | |
for(String attr: attributes){ | |
int attrIndex = getIdForAttributeName(attr); | |
String rule = attr_value.get(attr); | |
if (rule == null) | |
continue; // no constraint | |
String vRuleBeingCovered = line.get(attr); | |
if (vRuleBeingCovered==null){// rule for this attribute exists but RuleBeingCovered has no value | |
soFarCovers = false; | |
return false; | |
} | |
String valArrRuleBeingCoveredStr = StringUtils.substringBetween(vRuleBeingCovered, "{", "}"); | |
String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}"); | |
if (valArrRuleBeingCoveredStr==null || valArrRuleStr==null) { // we assume single value, not an array of values | |
if (!vRuleBeingCovered.equals(rule)){ | |
soFarCovers = false; | |
return false; | |
} | |
} | |
else { | |
String[] valArrRuleBeingCovered = valArrRuleBeingCoveredStr.split(","); | |
String[] valArrRule = valArrRuleStr.split(","); | |
List<String> valListRuleBeingCovered = new ArrayList<String>(Arrays.asList(valArrRuleBeingCovered)); | |
List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule)); | |
for(String r: valListRule){ | |
if (!strListContainsMember(valListRuleBeingCovered, r)){ | |
soFarCovers = false; | |
return false; | |
} | |
} | |
} | |
} | |
return soFarCovers; | |
} | |
public Map<String, String> computeIntersection( | |
Map<String, String> rule1, Map<String, String> rule2) { | |
Map<String, String> attr_value = new HashMap<String, String>(); | |
for(String attr: attributes){ | |
int attrIndex = getIdForAttributeName(attr); | |
String v1 = rule1.get(attr); | |
String v2 = rule2.get(attr); | |
if (v1==null || v2==null) | |
continue; | |
String valArr1Str = StringUtils.substringBetween(v1, "{", "}"); | |
String valArr2Str = StringUtils.substringBetween(v2, "{", "}"); | |
if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values | |
if (v1.equals(v2)){ | |
attr_value.put(attr, v1); | |
} | |
} | |
else { | |
valArr1Str = valArr1Str.replaceAll(", ", ","); | |
valArr2Str = valArr2Str.replaceAll(", ", ","); | |
String[] valArr1 = valArr1Str.split(","); | |
String[] valArr2 = valArr2Str.split(","); | |
List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1)); | |
List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2)); | |
valList1.retainAll(valList2); | |
if (!valList1.isEmpty()){ | |
v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}"; | |
attr_value.put(attr, v1); | |
} | |
} | |
} | |
return attr_value; | |
} | |
private boolean strListContainsMember(List<String> valListCase, String r) { | |
boolean bContains = false; | |
for(String m: valListCase){ | |
if (m.startsWith(r) || r.startsWith(m)) | |
return true; | |
} | |
return false; | |
} | |
} |