forked from D-Net/dnet-hadoop
exact match condition gives undefined if a field is missing, ignoremissing semantics changed: now performs the comparison in any case if =true, if false gives -1 in case of missing
This commit is contained in:
parent
e8db8f2abb
commit
54e4d0af04
|
@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
|
||||||
final Field va = a.values(fd.getName());
|
final Field va = a.values(fd.getName());
|
||||||
final Field vb = b.values(fd.getName());
|
final Field vb = b.values(fd.getName());
|
||||||
|
|
||||||
if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
if (fd.isIgnoreMissing()) {
|
||||||
res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
|
||||||
} else {
|
|
||||||
res.put(fd.getName(), verify(fd, va, vb));
|
res.put(fd.getName(), verify(fd, va, vb));
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
if (va.isEmpty() || vb.isEmpty()) {
|
||||||
|
res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
res.put(fd.getName(), verify(fd, va, vb));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// // if ignoreMissing=true always return undefined (0) in case of missing
|
||||||
|
// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
||||||
|
// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
||||||
|
// } else {
|
||||||
|
// if (va.isEmpty()&&vb.isEmpty()) {
|
||||||
|
// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// res.put(fd.getName(), verify(fd, va, vb));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {
|
||||||
|
|
||||||
private URL asUrl(final String value) {
|
private URL asUrl(final String value) {
|
||||||
try {
|
try {
|
||||||
|
if (value.isEmpty())
|
||||||
|
return new URL("http://");
|
||||||
return new URL(value);
|
return new URL(value);
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
// should not happen as checked by pace typing
|
// should not happen as checked by pace typing
|
||||||
|
|
|
@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition {
|
||||||
|
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
||||||
|
// res = 0;
|
||||||
|
// } else {
|
||||||
|
// res = fa.equals(fb) ? 1 : -1;
|
||||||
|
// }
|
||||||
|
|
||||||
|
//if there is a blank, undefined result
|
||||||
|
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||||
res = 0;
|
res = 0;
|
||||||
} else {
|
} else {
|
||||||
res = fa.equals(fb) ? 1 : -1;
|
res = fa.equals(fb) ? 1 : -1;
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.util.List;
|
||||||
import eu.dnetlib.pace.distance.eval.ConditionEval;
|
import eu.dnetlib.pace.distance.eval.ConditionEval;
|
||||||
import eu.dnetlib.pace.model.Field;
|
import eu.dnetlib.pace.model.Field;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Class ExactMatch.
|
* The Class ExactMatch.
|
||||||
|
@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition {
|
||||||
final String fa = getValue(a);
|
final String fa = getValue(a);
|
||||||
final String fb = getValue(b);
|
final String fb = getValue(b);
|
||||||
|
|
||||||
return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1);
|
int res;
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||||
|
res = 0;
|
||||||
|
} else {
|
||||||
|
res = fa.equalsIgnoreCase(fb) ? 1 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ConditionEval(cond, a, b, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getValue(final Field f) {
|
protected String getValue(final Field f) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
||||||
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
|
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
|
||||||
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
|
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
|
||||||
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
|
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
|
||||||
|
|
|
|
@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
|
||||||
@Test
|
@Test
|
||||||
public void testJaroWinklerNormalizedName() {
|
public void testJaroWinklerNormalizedName() {
|
||||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||||
double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma");
|
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
|
||||||
|
|
||||||
assertEquals(result, 0.0);
|
System.out.println("result = " + result);
|
||||||
|
assertEquals(1.0, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue