exact match condition gives undefined if a field is missing, ignoremissing semantics changed: now performs the comparison in any case if =true, if false gives -1 in case of missing

This commit is contained in:
miconis 2019-06-18 14:05:31 +02:00
parent e8db8f2abb
commit 54e4d0af04
6 changed files with 48 additions and 8 deletions

View File

@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
final Field va = a.values(fd.getName()); final Field va = a.values(fd.getName());
final Field vb = b.values(fd.getName()); final Field vb = b.values(fd.getName());
if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) { if (fd.isIgnoreMissing()) {
res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
} else {
res.put(fd.getName(), verify(fd, va, vb)); res.put(fd.getName(), verify(fd, va, vb));
} }
else {
if (va.isEmpty() || vb.isEmpty()) {
res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
}
else {
res.put(fd.getName(), verify(fd, va, vb));
}
}
// // if ignoreMissing=true always return undefined (0) in case of missing
// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
// } else {
// if (va.isEmpty()&&vb.isEmpty()) {
// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
// }
// else {
// res.put(fd.getName(), verify(fd, va, vb));
// }
// }
} }
return res; return res;
} }

View File

@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {
private URL asUrl(final String value) { private URL asUrl(final String value) {
try { try {
if (value.isEmpty())
return new URL("http://");
return new URL(value); return new URL(value);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
// should not happen as checked by pace typing // should not happen as checked by pace typing

View File

@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition {
int res; int res;
if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) { // if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
// res = 0;
// } else {
// res = fa.equals(fb) ? 1 : -1;
// }
//if there is a blank, undefined result
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
res = 0; res = 0;
} else { } else {
res = fa.equals(fb) ? 1 : -1; res = fa.equals(fb) ? 1 : -1;

View File

@ -5,6 +5,7 @@ import java.util.List;
import eu.dnetlib.pace.distance.eval.ConditionEval; import eu.dnetlib.pace.distance.eval.ConditionEval;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldDef; import eu.dnetlib.pace.model.FieldDef;
import org.apache.commons.lang.StringUtils;
/** /**
* The Class ExactMatch. * The Class ExactMatch.
@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition {
final String fa = getValue(a); final String fa = getValue(a);
final String fb = getValue(b); final String fb = getValue(b);
return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1); int res;
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
res = 0;
} else {
res = fa.equalsIgnoreCase(fb) ? 1 : -1;
}
return new ConditionEval(cond, a, b, res);
} }
protected String getValue(final Field f) { protected String getValue(final Field f) {

View File

@ -1,4 +1,4 @@
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο

1 key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
2 key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
3 key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
4 key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο

View File

@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
@Test @Test
public void testJaroWinklerNormalizedName() { public void testJaroWinklerNormalizedName() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params); final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma"); double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
assertEquals(result, 0.0); System.out.println("result = " + result);
assertEquals(1.0, result);
} }
@Test @Test