forked from D-Net/dnet-hadoop
exact match condition gives undefined if a field is missing, ignoremissing semantics changed: now performs the comparison in any case if =true, if false gives -1 in case of missing
This commit is contained in:
parent
e8db8f2abb
commit
54e4d0af04
|
@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
|
|||
final Field va = a.values(fd.getName());
|
||||
final Field vb = b.values(fd.getName());
|
||||
|
||||
if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
||||
res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
||||
} else {
|
||||
if (fd.isIgnoreMissing()) {
|
||||
res.put(fd.getName(), verify(fd, va, vb));
|
||||
}
|
||||
else {
|
||||
if (va.isEmpty() || vb.isEmpty()) {
|
||||
res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||
}
|
||||
else {
|
||||
res.put(fd.getName(), verify(fd, va, vb));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// // if ignoreMissing=true always return undefined (0) in case of missing
|
||||
// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
||||
// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
||||
// } else {
|
||||
// if (va.isEmpty()&&vb.isEmpty()) {
|
||||
// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||
// }
|
||||
// else {
|
||||
// res.put(fd.getName(), verify(fd, va, vb));
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {
|
|||
|
||||
private URL asUrl(final String value) {
|
||||
try {
|
||||
if (value.isEmpty())
|
||||
return new URL("http://");
|
||||
return new URL(value);
|
||||
} catch (MalformedURLException e) {
|
||||
// should not happen as checked by pace typing
|
||||
|
|
|
@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition {
|
|||
|
||||
int res;
|
||||
|
||||
if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
||||
// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
||||
// res = 0;
|
||||
// } else {
|
||||
// res = fa.equals(fb) ? 1 : -1;
|
||||
// }
|
||||
|
||||
//if there is a blank, undefined result
|
||||
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||
res = 0;
|
||||
} else {
|
||||
res = fa.equals(fb) ? 1 : -1;
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.List;
|
|||
import eu.dnetlib.pace.distance.eval.ConditionEval;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
/**
|
||||
* The Class ExactMatch.
|
||||
|
@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition {
|
|||
final String fa = getValue(a);
|
||||
final String fb = getValue(b);
|
||||
|
||||
return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1);
|
||||
int res;
|
||||
|
||||
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||
res = 0;
|
||||
} else {
|
||||
res = fa.equalsIgnoreCase(fb) ? 1 : -1;
|
||||
}
|
||||
|
||||
return new ConditionEval(cond, a, b, res);
|
||||
}
|
||||
|
||||
protected String getValue(final Field f) {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
||||
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
||||
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
|
||||
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
|
||||
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
|
||||
|
|
|
|
@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
|
|||
@Test
|
||||
public void testJaroWinklerNormalizedName() {
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma");
|
||||
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
|
||||
|
||||
assertEquals(result, 0.0);
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(1.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue