implementation of countryMatch and addition of workflow parameters

This commit is contained in:
Michele De Bonis 2024-06-28 16:46:52 +02:00 committed by Claudio Atzori
parent ee7deb3f60
commit ea1841fbd2
3 changed files with 67 additions and 1 deletions

View File

@ -0,0 +1,47 @@
package eu.dnetlib.pace.tree;
import java.util.Map;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
@ComparatorClass("countryMatch")
public class CountryMatch extends AbstractStringComparator {
public CountryMatch(Map<String, String> params) {
super(params, new com.wcohen.ss.JaroWinkler());
}
public CountryMatch(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler());
}
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
super(weight, ssalgo);
}
@Override
public double distance(final String a, final String b, final Config conf) {
if (a.isEmpty() || b.isEmpty()) {
return -1.0; // return -1 if a field is missing
}
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
return -1.0; // return -1 if a country is UNKNOWN
}
return a.equals(b) ? 1.0 : 0;
}
@Override
public double getWeight() {
return super.weight;
}
@Override
protected double normalize(final double d) {
return d;
}
}

View File

@ -336,4 +336,23 @@ public class ComparatorTest extends AbstractPaceTest {
System.out.println("compare = " + compare);
}
@Test
public void countryMatch() {
CountryMatch countryMatch = new CountryMatch(params);
double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
assertEquals(-1.0, result);
result = countryMatch.distance("CHILE", "UNKNOWN", conf);
assertEquals(-1.0, result);
result = countryMatch.distance("CHILE", "ITALY", conf);
assertEquals(0.0, result);
result = countryMatch.distance("CHILE", "CHILE", conf);
assertEquals(1.0, result);
}
}

View File

@ -49,7 +49,7 @@
},
{
"field": "country",
"comparator": "exactMatch",
"comparator": "countryMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}