From 0735f3a8229227b0cc968bf55ecdadca7e074596 Mon Sep 17 00:00:00 2001 From: Michele De Bonis Date: Fri, 8 Feb 2019 12:56:47 +0100 Subject: [PATCH] implementation of the test classes and minor changes --- dnet-pace-core/pom.xml | 1 - .../algo/JaroWinklerNormalizedName.java | 8 +- .../eu/dnetlib/pace/tree/UndefinedNode.java | 3 - .../eu/dnetlib/pace/tree/support/AggType.java | 14 +- .../eu/dnetlib/pace/util/BlockProcessor.java | 2 +- .../eu/dnetlib/pace/AbstractPaceTest.java | 23 ++- .../eu/dnetlib/pace/tree/ComparatorTest.java | 144 ++++++++++++++++++ 7 files changed, 180 insertions(+), 15 deletions(-) create mode 100644 dnet-pace-core/src/test/java/eu/dnetlib/pace/tree/ComparatorTest.java diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml index 7a5666830..d88dea4e0 100644 --- a/dnet-pace-core/pom.xml +++ b/dnet-pace-core/pom.xml @@ -74,7 +74,6 @@ commons-math3 - diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java index 2f79493d9..ebaa0eab4 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerNormalizedName.java @@ -23,8 +23,11 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo { private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); + private Map params; + public JaroWinklerNormalizedName(Map params){ super(params, new com.wcohen.ss.JaroWinkler()); + this.params = params; } public JaroWinklerNormalizedName(double weight) { @@ -52,9 +55,8 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo { // ca = norm.split("\\|\\|\\|")[0].trim(); // cb = norm.split("\\|\\|\\|")[1].trim(); - ca = normalizeCities2(ca, cityMap, 4); - cb = normalizeCities2(cb, cityMap, 4); - + ca = normalizeCities2(ca, cityMap, params.getOrDefault("windowSize", 4).intValue()); + cb = normalizeCities2(cb, cityMap, params.getOrDefault("windowSize", 4).intValue()); if (sameCity(ca,cb)){ if (sameKeywords(ca,cb)){ diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UndefinedNode.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UndefinedNode.java index cf90847de..de43543ec 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UndefinedNode.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UndefinedNode.java @@ -17,9 +17,6 @@ public class UndefinedNode implements Comparator { final List sa = ((FieldList) a).stringList(); final List sb = ((FieldList) b).stringList(); - System.out.println("sa = " + sa.size()); - System.out.println("sb = " + sb.size()); - return 0; } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AggType.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AggType.java index 71e3ad0d4..bd7bd9fb8 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AggType.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AggType.java @@ -1,9 +1,21 @@ package eu.dnetlib.pace.tree.support; +import eu.dnetlib.pace.util.PaceException; + public enum AggType { AVG, SUM, MAX, - MIN + MIN; + + public static AggType getEnum(String value) { + + try { + return AggType.valueOf(value); + } + catch (IllegalArgumentException e) { + throw new PaceException("Undefined aggregation type", e); + } + } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index 19105aefc..1cd0eb3af 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -76,7 +76,7 @@ public class BlockProcessor { } } - private MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){ + public MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){ final Map decisionTree = dedupConf.getPace().getDecisionTree(); diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java index 8a0c08d85..3da6f0a7e 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java @@ -1,14 +1,15 @@ package eu.dnetlib.pace; -import java.io.IOException; -import java.io.StringWriter; - -import org.apache.commons.io.IOUtils; - import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.model.Field; +import eu.dnetlib.pace.model.FieldListImpl; import eu.dnetlib.pace.model.FieldValueImpl; -import org.junit.Test; +import org.apache.commons.io.IOUtils; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.List; +import java.util.stream.Collectors; public abstract class AbstractPaceTest { @@ -34,4 +35,14 @@ public abstract class AbstractPaceTest { return new FieldValueImpl(Type.URL, "url", s); } + protected Field createFieldList(List strings, String fieldName){ + + List fieldValueStream = strings.stream().map(s -> new FieldValueImpl(Type.String, fieldName, s)).collect(Collectors.toList()); + + FieldListImpl a = new FieldListImpl(); + a.addAll(fieldValueStream); + + return a; + + } } diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/tree/ComparatorTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/tree/ComparatorTest.java new file mode 100644 index 000000000..240a5d6bd --- /dev/null +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/tree/ComparatorTest.java @@ -0,0 +1,144 @@ +package eu.dnetlib.pace.tree; + +import eu.dnetlib.pace.AbstractPaceTest; +import eu.dnetlib.pace.config.Type; +import eu.dnetlib.pace.model.Field; +import eu.dnetlib.pace.model.FieldListImpl; +import eu.dnetlib.pace.model.FieldValueImpl; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; + +//test class for comparators (to be used into the tree nodes) +public class ComparatorTest extends AbstractPaceTest { + + private Map params; + + @Before + public void setup() { + params = new HashMap<>(); + //to put all the needed parameters + params.put("minCoauthors", 5); + params.put("maxCoauthors", 200); + + } + + @Test + public void testCoauthorsMatch() { + + final CoauthorsMatch coauthorsMatch = new CoauthorsMatch(params); + + Field a = createFieldList(Arrays.asList("la bruzzo, sandro", "atzori, claudio", "artini, michele", "de bonis, michele", "bardi, alessia", "dell'amico, andrea", "baglioni, miriam"), "coauthors"); + Field b = createFieldList(Arrays.asList("la bruzzo, sandro"), "coauthors"); + + double result1 = coauthorsMatch.compare(a, b); + double result2 = coauthorsMatch.compare(a, a); + + System.out.println("a = " + a); + System.out.println("b = " + b); + + System.out.println("a vs b = " + result1); + System.out.println("a vs a = " + result2); + + assertEquals(result1, -1.0); + assertEquals(result2, 7.0); + } + + @Test + public void testExactMatch() { + + final ExactMatch exactMatch = new ExactMatch(params); + + Field a = new FieldValueImpl(Type.String, "doi", "10.1000/0000000000"); + Field b = new FieldValueImpl(Type.String, "doi", "10.1033/0000000000"); + Field c = new FieldValueImpl(Type.String, "doi", ""); + + double result1 = exactMatch.compare(a,a); + double result2 = exactMatch.compare(a,b); + double result3 = exactMatch.compare(a,c); + + System.out.println("a = " + a); + System.out.println("b = " + b); + System.out.println("c = " + c); + + System.out.println("a vs a = " + result1); + System.out.println("a vs b = " + result2); + System.out.println("a vs c = " + result3); + + assertEquals(result1, 1.0); + assertEquals(result2, 0.0); + assertEquals(result3, -1.0); + + } + + @Test + public void testSimilarMatch() { + + final SimilarMatch similarMatch = new SimilarMatch(params); + + Field a = new FieldValueImpl(Type.String, "firstname", "sandro"); + Field b = new FieldValueImpl(Type.String, "firstname", "s."); + Field c = new FieldValueImpl(Type.String, "firstname", "stefano"); + + double result1 = similarMatch.compare(a,b); + double result2 = similarMatch.compare(a,c); + double result3 = similarMatch.compare(b,c); + + System.out.println("a = " + a); + System.out.println("b = " + b); + System.out.println("c = " + c); + + System.out.println("a vs b = " + result1); + System.out.println("a vs c = " + result2); + System.out.println("b vs c = " + result3); + + assertEquals(result1, 1.0); + assertEquals(result3, 1.0); + assertTrue(result2<0.7); + + } + + @Test + public void testTopicsMatch() { + + final TopicsMatch topicsMatch = new TopicsMatch(params); + + Field a = createFieldList(Arrays.asList("0.0", "1.0", "0.0"), "topics"); + Field b = createFieldList(Arrays.asList("0.0", "0.0", "1.0"), "topics"); + Field c = createFieldList(Arrays.asList("0.5", "0.5", "0.0"), "topics"); + + double result1 = topicsMatch.compare(a,a); + double result2 = topicsMatch.compare(a,c); + double result3 = topicsMatch.compare(b,c); + + System.out.println("a = " + a); + System.out.println("b = " + b); + System.out.println("c = " + c); + + System.out.println("a vs a = " + result1); + System.out.println("a vs c = " + result2); + System.out.println("b vs c = " + result3); + + assertEquals(result1, 1.0); + assertEquals(result2, 0.5); + assertEquals(result3, 0.0); + + } + + @Test + public void testUndefinedNode() { + + final UndefinedNode undefinedNode = new UndefinedNode(); + double result = undefinedNode.compare(new FieldListImpl(),new FieldListImpl()); + + assertEquals(result, 0.0); + } + + +}