From 54e4d0af041211479632a91f271c5279eae87942 Mon Sep 17 00:00:00 2001 From: miconis Date: Tue, 18 Jun 2019 14:05:31 +0200 Subject: [PATCH] exact match condition gives undefined if a field is missing, ignoremissing semantics changed: now performs the comparison in any case if =true, if false gives -1 in case of missing --- .../pace/condition/AbstractCondition.java | 27 ++++++++++++++++--- .../pace/condition/DomainExactMatch.java | 2 ++ .../eu/dnetlib/pace/condition/ExactMatch.java | 9 ++++++- .../pace/condition/ExactMatchIgnoreCase.java | 11 +++++++- .../dnetlib/pace/config/translation_map.csv | 2 +- .../pace/distance/DistanceAlgoTest.java | 5 ++-- 6 files changed, 48 insertions(+), 8 deletions(-) diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java index cf68e740f3..c78163d01c 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java @@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements final Field va = a.values(fd.getName()); final Field vb = b.values(fd.getName()); - if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) { - res.put(fd.getName(), new ConditionEval(cond, va, vb, 0)); - } else { + if (fd.isIgnoreMissing()) { res.put(fd.getName(), verify(fd, va, vb)); } + else { + if (va.isEmpty() || vb.isEmpty()) { + res.put(fd.getName(), new ConditionEval(cond, va, vb, -1)); + } + else { + res.put(fd.getName(), verify(fd, va, vb)); + } + } + + + +// // if ignoreMissing=true always return undefined (0) in case of missing +// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) { +// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0)); +// } else { +// if (va.isEmpty()&&vb.isEmpty()) { +// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1)); +// } +// else { +// res.put(fd.getName(), verify(fd, va, vb)); +// } +// } + } return res; } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java index e415f15656..dffe2cacc8 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/DomainExactMatch.java @@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase { private URL asUrl(final String value) { try { + if (value.isEmpty()) + return new URL("http://"); return new URL(value); } catch (MalformedURLException e) { // should not happen as checked by pace typing diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java index 2776576c4a..a4cd847922 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java @@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition { int res; - if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) { +// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) { +// res = 0; +// } else { +// res = fa.equals(fb) ? 1 : -1; +// } + + //if there is a blank, undefined result + if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) { res = 0; } else { res = fa.equals(fb) ? 1 : -1; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java index 7741f38587..e9925ec6d5 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatchIgnoreCase.java @@ -5,6 +5,7 @@ import java.util.List; import eu.dnetlib.pace.distance.eval.ConditionEval; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.FieldDef; +import org.apache.commons.lang.StringUtils; /** * The Class ExactMatch. @@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition { final String fa = getValue(a); final String fb = getValue(b); - return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1); + int res; + + if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) { + res = 0; + } else { + res = fa.equalsIgnoreCase(fb) ? 1 : -1; + } + + return new ConditionEval(cond, a, b, res); } protected String getValue(final Field f) { diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv index 55d114c799..ef49c2f9e6 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv @@ -1,4 +1,4 @@ -key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο +key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java index c92c6fed3f..3943e4f8e9 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java @@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions { @Test public void testJaroWinklerNormalizedName() { final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params); - double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma"); + double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State"); - assertEquals(result, 0.0); + System.out.println("result = " + result); + assertEquals(1.0, result); } @Test