forked from D-Net/dnet-hadoop
implementation of countryMatch and addition of workflow parameters
This commit is contained in:
parent
ee7deb3f60
commit
ea1841fbd2
|
@ -0,0 +1,47 @@
|
||||||
|
package eu.dnetlib.pace.tree;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.wcohen.ss.AbstractStringDistance;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||||
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
|
|
||||||
|
@ComparatorClass("countryMatch")
|
||||||
|
public class CountryMatch extends AbstractStringComparator {
|
||||||
|
|
||||||
|
public CountryMatch(Map<String, String> params) {
|
||||||
|
super(params, new com.wcohen.ss.JaroWinkler());
|
||||||
|
}
|
||||||
|
|
||||||
|
public CountryMatch(final double weight) {
|
||||||
|
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||||
|
super(weight, ssalgo);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double distance(final String a, final String b, final Config conf) {
|
||||||
|
if (a.isEmpty() || b.isEmpty()) {
|
||||||
|
return -1.0; // return -1 if a field is missing
|
||||||
|
}
|
||||||
|
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||||
|
return -1.0; // return -1 if a country is UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
return a.equals(b) ? 1.0 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getWeight() {
|
||||||
|
return super.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double normalize(final double d) {
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
}
|
|
@ -336,4 +336,23 @@ public class ComparatorTest extends AbstractPaceTest {
|
||||||
System.out.println("compare = " + compare);
|
System.out.println("compare = " + compare);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void countryMatch() {
|
||||||
|
|
||||||
|
CountryMatch countryMatch = new CountryMatch(params);
|
||||||
|
|
||||||
|
double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
|
||||||
|
assertEquals(-1.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "UNKNOWN", conf);
|
||||||
|
assertEquals(-1.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "ITALY", conf);
|
||||||
|
assertEquals(0.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "CHILE", conf);
|
||||||
|
assertEquals(1.0, result);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "country",
|
"field": "country",
|
||||||
"comparator": "exactMatch",
|
"comparator": "countryMatch",
|
||||||
"weight": 1,
|
"weight": 1,
|
||||||
"countIfUndefined": "true",
|
"countIfUndefined": "true",
|
||||||
"params": {}
|
"params": {}
|
||||||
|
|
Loading…
Reference in New Issue