From d059bf68b8ef298d6a6927732f52684b7d2acb42 Mon Sep 17 00:00:00 2001 From: Michele De Bonis Date: Thu, 25 Oct 2018 15:15:40 +0200 Subject: [PATCH] modification in the initialization of clustering functions, distance algos and conditions. --- dnet-pace-core/pom.xml | 6 ++-- .../AbstractClusteringFunction.java | 6 ---- .../eu/dnetlib/pace/clustering/Acronyms.java | 4 --- .../pace/clustering/ClusteringFunction.java | 1 - .../pace/clustering/ClusteringResolver.java | 4 +-- .../pace/clustering/ImmutableFieldValue.java | 4 --- .../pace/clustering/LowercaseClustering.java | 4 --- .../dnetlib/pace/clustering/NgramPairs.java | 5 +-- .../eu/dnetlib/pace/clustering/Ngrams.java | 9 +---- .../pace/clustering/PersonClustering.java | 4 --- .../dnetlib/pace/clustering/PersonHash.java | 4 --- .../clustering/RandomClusteringFunction.java | 4 --- .../pace/clustering/SortedNgramPairs.java | 9 +---- .../clustering/SpaceTrimmingFieldValue.java | 4 --- .../dnetlib/pace/clustering/SuffixPrefix.java | 4 --- .../pace/clustering/UrlClustering.java | 8 ----- .../pace/condition/AbstractCondition.java | 10 ------ .../pace/condition/AlwaysTrueCondition.java | 3 -- .../dnetlib/pace/condition/ConditionAlgo.java | 3 -- .../pace/condition/ConditionResolver.java | 7 ++-- .../eu/dnetlib/pace/condition/ExactMatch.java | 4 --- .../eu/dnetlib/pace/condition/YearMatch.java | 2 -- .../dnetlib/pace/distance/DistanceAlgo.java | 4 --- .../pace/distance/DistanceResolver.java | 4 +-- .../distance/SecondStringDistanceAlgo.java | 19 ++-------- .../pace/distance/algo/AlwaysMatch.java | 6 +--- .../pace/distance/algo/ExactMatch.java | 6 +--- .../pace/distance/algo/JaroWinkler.java | 7 ++-- .../pace/distance/algo/JaroWinklerTitle.java | 6 +--- .../pace/distance/algo/Level2JaroWinkler.java | 6 ++++ .../distance/algo/Level2JaroWinklerTitle.java | 6 ++++ .../pace/distance/algo/Level2Levenstein.java | 6 ++++ .../pace/distance/algo/Levenstein.java | 6 ++-- .../pace/distance/algo/LevensteinTitle.java | 6 ++-- .../pace/distance/algo/MustBeDifferent.java | 6 ++++ .../pace/distance/algo/NullDistanceAlgo.java | 15 ++------ .../pace/distance/algo/SortedJaroWinkler.java | 6 ++++ .../algo/SortedLevel2JaroWinkler.java | 6 ++++ .../algo/SortedSecondStringDistanceAlgo.java | 5 +++ .../distance/algo/SubStringLevenstein.java | 14 +++----- .../pace/distance/algo/UrlMatcher.java | 5 +-- .../pace/distance/eval/ScoreResult.java | 14 ++++---- .../eu/dnetlib/pace/model/ClusteringDef.java | 6 +--- .../java/eu/dnetlib/pace/model/CondDef.java | 8 ++--- .../java/eu/dnetlib/pace/model/FieldDef.java | 10 +++--- .../clustering/ClusteringResolverTest.java | 30 ++++++++++++++++ .../pace/condition/ConditionResolverTest.java | 35 +++++++++++++++++++ pom.xml | 5 +++ 48 files changed, 169 insertions(+), 187 deletions(-) create mode 100644 dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java create mode 100644 dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml index 1a41a74..51958c5 100644 --- a/dnet-pace-core/pom.xml +++ b/dnet-pace-core/pom.xml @@ -58,9 +58,11 @@ org.reflections reflections - 0.9.10 - + + org.apache.spark + spark-core_2.11 + diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java index 2885994..f9192ad 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java @@ -19,12 +19,6 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i this.params = params; } - public AbstractClusteringFunction(){} - - public void setParams(Map params){ - this.params = params; - } - protected abstract Collection doApply(String s); @Override diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java index 09d2ce0..ee5efc9 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java @@ -14,10 +14,6 @@ public class Acronyms extends AbstractClusteringFunction { super(params); } - public Acronyms(){ - super(); - } - @Override protected Collection doApply(String s) { return extractAcronyms(s, param("max"), param("minLen"), param("maxLen")); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java index 040b928..4fe1b59 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java @@ -12,5 +12,4 @@ public interface ClusteringFunction { public Map getParams(); - public void setParams(Map params); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java index 06a364c..feec3e2 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java @@ -17,8 +17,8 @@ public class ClusteringResolver implements Serializable { .collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class)cl)); } - public ClusteringFunction resolve(String clusteringFunction) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { + public ClusteringFunction resolve(String clusteringFunction, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { - return functionMap.get(clusteringFunction).newInstance(); + return functionMap.get(clusteringFunction).getDeclaredConstructor(Map.class).newInstance(params); } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java index 2d5b67a..fab8e98 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java @@ -13,10 +13,6 @@ public class ImmutableFieldValue extends AbstractClusteringFunction { super(params); } - public ImmutableFieldValue() { - super(); - } - @Override protected Collection doApply(final String s) { final List res = Lists.newArrayList(); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java index 50d73cf..5ec8590 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java @@ -16,10 +16,6 @@ public class LowercaseClustering extends AbstractClusteringFunction { super(params); } - public LowercaseClustering(){ - super(); - } - @Override public Collection apply(List fields) { Collection c = Sets.newLinkedHashSet(); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java index 6c96ca2..06885be 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java @@ -1,6 +1,7 @@ package eu.dnetlib.pace.clustering; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -9,10 +10,6 @@ import com.google.common.collect.Lists; @ClusteringClass("ngrampairs") public class NgramPairs extends Ngrams { - public NgramPairs() { - super(); - } - public NgramPairs(Map params) { super(params); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java index 49ce404..8549468 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java @@ -1,9 +1,6 @@ package eu.dnetlib.pace.clustering; -import java.util.Collection; -import java.util.LinkedHashSet; -import java.util.Map; -import java.util.StringTokenizer; +import java.util.*; @ClusteringClass("ngrams") public class Ngrams extends AbstractClusteringFunction { @@ -12,10 +9,6 @@ public class Ngrams extends AbstractClusteringFunction { super(params); } - public Ngrams() { - super(); - } - @Override protected Collection doApply(String s) { return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen")); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java index 4230079..67b7dcd 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java @@ -30,10 +30,6 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin this.params = params; } - public void setParams(Map params){ - this.params = params; - } - @Override public Collection apply(final List fields) { final Set hashes = Sets.newHashSet(); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java index b0e57e9..fcb01b9 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java @@ -17,10 +17,6 @@ public class PersonHash extends AbstractClusteringFunction { super(params); } - public PersonHash(){ - super(); - } - @Override protected Collection doApply(final String s) { final List res = Lists.newArrayList(); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java index 893abe8..f012aac 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java @@ -9,10 +9,6 @@ public class RandomClusteringFunction extends AbstractClusteringFunction { super(params); } - public RandomClusteringFunction(){ - super(); - } - @Override protected Collection doApply(String s) { // TODO Auto-generated method stub diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java index 9ce12fc..2f475fe 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java @@ -1,9 +1,6 @@ package eu.dnetlib.pace.clustering; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; +import java.util.*; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -16,10 +13,6 @@ public class SortedNgramPairs extends NgramPairs { super(params); } - public SortedNgramPairs(){ - super(); - } - @Override protected Collection doApply(String s) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java index 8e1fdf3..22dc490 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java @@ -16,10 +16,6 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { super(params); } - public SpaceTrimmingFieldValue(){ - super(); - } - @Override protected Collection doApply(final String s) { final List res = Lists.newArrayList(); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java index 25520d9..3960331 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java @@ -13,10 +13,6 @@ public class SuffixPrefix extends AbstractClusteringFunction { super(params); } - public SuffixPrefix(){ - super(); - } - @Override protected Collection doApply(String s) { return suffixPrefix(s, param("len"), param("max")); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java index 4c0c33f..3c02613 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java @@ -20,14 +20,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu this.params = params; } - public UrlClustering() { - super(); - } - - public void setParams(Map params){ - this.params = params; - } - @Override public Collection apply(List fields) { return fields.stream() diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java index adc6825..cf68e74 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java @@ -25,16 +25,6 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements this.fields = fields; } - public AbstractCondition(){} - - public void setCond(String cond){ - this.cond = cond; - } - - public void setFields(List fields){ - this.fields = fields; - } - protected abstract ConditionEval verify(FieldDef fd, Field a, Field b); @Override diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java index a67567e..2274da5 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java @@ -17,9 +17,6 @@ public class AlwaysTrueCondition extends AbstractCondition { super(cond, fields); } - public AlwaysTrueCondition(){ - super(); - } @Override protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) { return new ConditionEval(cond, a, b, 1); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java index 1293c7d..787ad9a 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java @@ -24,7 +24,4 @@ public interface ConditionAlgo { */ public abstract ConditionEvalMap verify(Document a, Document b); - public void setFields(List fields); - public void setCond(String name); - } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java index 58a30dd..577bcdb 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java @@ -1,9 +1,12 @@ package eu.dnetlib.pace.condition; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import eu.dnetlib.pace.model.FieldDef; import org.reflections.Reflections; public class ConditionResolver implements Serializable { @@ -16,7 +19,7 @@ public class ConditionResolver implements Serializable { .collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class)cl)); } - public ConditionAlgo resolve(String name) throws IllegalAccessException, InstantiationException { - return functionMap.get(name).newInstance(); + public ConditionAlgo resolve(String name, List fields) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { + return functionMap.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields); } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java index f4ba8de..2776576 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java @@ -19,10 +19,6 @@ public class ExactMatch extends AbstractCondition { super(cond, fields); } - public ExactMatch(){ - super(); - } - @Override protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java index 54d0ba8..71bb6cf 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java @@ -23,8 +23,6 @@ public class YearMatch extends AbstractCondition { super(cond, fields); } - public YearMatch(){} - // @Override // public boolean verify(final Document a, final Document b) { // boolean res = true; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java index c2749c5..5e4f69f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java @@ -13,9 +13,5 @@ public interface DistanceAlgo { public abstract double distance(Field a, Field b); public double getWeight(); - public Map getParams(); - - public void setWeight(double w); - public void setParams(Map params); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java index 0937760..d219ac4 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java @@ -17,8 +17,8 @@ public class DistanceResolver implements Serializable { .collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class)cl)); } - public DistanceAlgo resolve(String algo) throws IllegalAccessException, InstantiationException { + public DistanceAlgo resolve(String algo, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { - return functionMap.get(algo).newInstance(); + return functionMap.get(algo).getDeclaredConstructor(Map.class).newInstance(params); } } \ No newline at end of file diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java index 785c00b..9cc3529 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java @@ -28,23 +28,10 @@ public abstract class SecondStringDistanceAlgo extends AbstractPaceFunctions imp private Map params; - protected SecondStringDistanceAlgo(){ - } - - protected SecondStringDistanceAlgo(Map params){ - this.params = params; - } - - public void setWeight(double w){ - this.weight = w; - } - - public Map getParams(){ - return this.params; - } - - public void setParams(Map params){ + protected SecondStringDistanceAlgo(Map params, final AbstractStringDistance ssalgo){ this.params = params; + this.weight = params.get("weight").doubleValue(); + this.ssalgo = ssalgo; } /** diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java index 7039f05..503235c 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java @@ -9,12 +9,8 @@ import java.util.Map; @DistanceClass("AlwaysMatch") public class AlwaysMatch extends SecondStringDistanceAlgo { - public AlwaysMatch(){ - super(); - } - public AlwaysMatch(final Map params){ - super(params); + super(params, new com.wcohen.ss.JaroWinkler()); } public AlwaysMatch(final double weight) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java index 2e714c4..44d881e 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java @@ -9,12 +9,8 @@ import java.util.Map; @DistanceClass("ExactMatch") public class ExactMatch extends SecondStringDistanceAlgo { - public ExactMatch(){ - super(); - } - public ExactMatch(Map params){ - super(params); + super(params, new com.wcohen.ss.JaroWinkler()); } public ExactMatch(final double weight) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java index ea1e079..20c0912 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java @@ -4,18 +4,15 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.io.Serializable; import java.util.Map; //case class JaroWinkler(w: Double) extends SecondStringDistanceAlgo(w, new com.wcohen.ss.JaroWinkler()) @DistanceClass("JaroWinkler") public class JaroWinkler extends SecondStringDistanceAlgo { - public JaroWinkler(){ - super(); - } - public JaroWinkler(Map params){ - super(params); + super(params, new com.wcohen.ss.JaroWinkler()); } public JaroWinkler(double weight) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java index b37c88d..ff4d6de 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java @@ -10,12 +10,8 @@ import java.util.Map; @DistanceClass("JaroWinklerTitle") public class JaroWinklerTitle extends SecondStringDistanceAlgo { - public JaroWinklerTitle(){ - super(); - } - public JaroWinklerTitle(Map params){ - super(params); + super(params, new com.wcohen.ss.JaroWinkler()); } public JaroWinklerTitle(double weight) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java index a2afc38..135fc53 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java @@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("Level2JaroWinkler") public class Level2JaroWinkler extends SecondStringDistanceAlgo { + public Level2JaroWinkler(Map params){ + super(params, new com.wcohen.ss.Level2JaroWinkler()); + } + public Level2JaroWinkler(double w) { super(w, new com.wcohen.ss.Level2JaroWinkler()); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java index 272e530..2d05a00 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java @@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("Level2JaroWinklerTitle") public class Level2JaroWinklerTitle extends SecondStringDistanceAlgo { + public Level2JaroWinklerTitle(Map params){ + super(params, new com.wcohen.ss.Level2JaroWinkler()); + } + public Level2JaroWinklerTitle(final double w) { super(w, new com.wcohen.ss.Level2JaroWinkler()); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java index 1e955bd..767c597 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java @@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("Level2Levenstein") public class Level2Levenstein extends SecondStringDistanceAlgo { + public Level2Levenstein(Map params){ + super(params, new com.wcohen.ss.Level2Levenstein()); + } + public Level2Levenstein(double w) { super(w, new com.wcohen.ss.Level2Levenstein()); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java index 2e014b6..d9ba5f7 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java @@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("Levenstein") public class Levenstein extends SecondStringDistanceAlgo { - public Levenstein(){ - super(new com.wcohen.ss.Levenstein()); + public Levenstein(Map params){ + super(params, new com.wcohen.ss.Levenstein()); } public Levenstein(double w) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java index c66f972..10de859 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java @@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("LevensteinTitle") public class LevensteinTitle extends SecondStringDistanceAlgo { - public LevensteinTitle(){ - super(new com.wcohen.ss.Levenstein()); + public LevensteinTitle(Map params){ + super(params, new com.wcohen.ss.Levenstein()); } public LevensteinTitle(final double w) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java index 0acb82c..e794f02 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java @@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; +import java.util.Map; + @DistanceClass("MustBeDifferent") public class MustBeDifferent extends SecondStringDistanceAlgo { + public MustBeDifferent(Map params){ + super(params, new com.wcohen.ss.Levenstein()); + } + public MustBeDifferent(final double weight) { super(weight, new com.wcohen.ss.JaroWinkler()); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java index ef798cb..8afc45f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java @@ -13,6 +13,9 @@ import java.util.Map; @DistanceClass("Null") public class NullDistanceAlgo implements DistanceAlgo { + public NullDistanceAlgo(Map params){ + } + @Override public double distance(Field a, Field b) { return 0.0; @@ -23,16 +26,4 @@ public class NullDistanceAlgo implements DistanceAlgo { return 0.0; } - @Override - public void setWeight(double w){ - } - - @Override - public Map getParams() { - return null; - } - - @Override - public void setParams(Map params) { - } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java index 5f71600..e3175a1 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java @@ -3,12 +3,18 @@ package eu.dnetlib.pace.distance.algo; import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; +import java.util.Map; + /** * The Class SortedJaroWinkler. */ @DistanceClass("SortedJaroWinkler") public class SortedJaroWinkler extends SortedSecondStringDistanceAlgo { + public SortedJaroWinkler(Map params){ + super(params, new com.wcohen.ss.Levenstein()); + } + /** * Instantiates a new sorted jaro winkler. * diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java index 493bbef..e53df09 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java @@ -3,6 +3,8 @@ package eu.dnetlib.pace.distance.algo; import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.distance.DistanceClass; +import java.util.Map; + /** * The Class SortedJaroWinkler. */ @@ -19,6 +21,10 @@ public class SortedLevel2JaroWinkler extends SortedSecondStringDistanceAlgo { super(weight, new com.wcohen.ss.Level2JaroWinkler()); } + public SortedLevel2JaroWinkler(final Map params){ + super(params, new com.wcohen.ss.Level2JaroWinkler()); + } + /** * Instantiates a new sorted jaro winkler. * diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java index d47fbba..8a9c514 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java @@ -2,6 +2,7 @@ package eu.dnetlib.pace.distance.algo; import java.util.Collections; import java.util.List; +import java.util.Map; import com.google.common.collect.Lists; import com.wcohen.ss.AbstractStringDistance; @@ -27,6 +28,10 @@ public abstract class SortedSecondStringDistanceAlgo extends SecondStringDistanc super(weight, ssalgo); } + protected SortedSecondStringDistanceAlgo(final Map params, final AbstractStringDistance ssalgo){ + super(params.get("weight").doubleValue(), ssalgo); + } + /* * (non-Javadoc) * diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java index 9fee7df..8f0c024 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java @@ -20,10 +20,6 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo { /** The limit. */ protected int limit; - public SubStringLevenstein() { - super(new com.wcohen.ss.Levenstein()); - } - /** * Instantiates a new sub string levenstein. * @@ -34,6 +30,11 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo { super(w, new com.wcohen.ss.Levenstein()); } + public SubStringLevenstein(Map params){ + super(params, new com.wcohen.ss.Levenstein()); + this.limit = params.get("limit").intValue(); + } + /** * Instantiates a new sub string levenstein. * @@ -95,9 +96,4 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo { return 1 / Math.pow(Math.abs(d) + 1, 0.1); } - public void setParams(Map params){ - this.limit = params.get("limit").intValue(); //necessary because this class needs also the limit - super.setParams(params); - } - } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java index 2aa7ca1..eacfdc0 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java @@ -13,8 +13,9 @@ public class UrlMatcher extends Levenstein { private Map params; - public UrlMatcher(){ - super(); + public UrlMatcher(Map params){ + super(params); + this.params = params; } public UrlMatcher(double weight, Map params) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java index 61d5c93..4e394b2 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java @@ -1,7 +1,9 @@ package eu.dnetlib.pace.distance.eval; import com.google.gson.GsonBuilder; +import org.codehaus.jackson.map.ObjectMapper; +import java.io.IOException; import java.io.Serializable; /** @@ -51,12 +53,10 @@ public class ScoreResult implements Serializable { @Override public String toString() { - //TODO cannot print: why? -// final GsonBuilder b = new GsonBuilder() -// .serializeSpecialFloatingPointValues() -// .serializeNulls(); -// -// return b.setPrettyPrinting().create().toJson(this); - return "{}"; + try { + return new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this); + } catch (IOException e) { + return e.getStackTrace().toString(); + } } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index 7e09d44..7d6cdcb 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -31,15 +31,11 @@ public class ClusteringDef implements Serializable { public ClusteringFunction getClusteringFunction() { try { - ClusteringFunction clusteringFunction = clusteringResolver.resolve(getName()); - clusteringFunction.setParams(params); - return clusteringFunction; - + return clusteringResolver.resolve(getName(), params); } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { e.printStackTrace(); return new RandomClusteringFunction(getParams()); } - } public List getFields() { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java index 14de69a..fda8653 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java @@ -1,6 +1,7 @@ package eu.dnetlib.pace.model; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; import java.util.List; import com.google.gson.Gson; @@ -19,11 +20,8 @@ public class CondDef implements Serializable { public ConditionAlgo getConditionAlgo(final List fields) { try { - ConditionAlgo conditionAlgo = conditionResolver.resolve(getName()); - conditionAlgo.setFields(fields); - conditionAlgo.setCond(getName()); - return conditionAlgo; - } catch (IllegalAccessException | InstantiationException e) { + return conditionResolver.resolve(getName(), fields); + } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { e.printStackTrace(); return new AlwaysTrueCondition(getName(), fields); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 3f4619d..8b72501 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -1,6 +1,7 @@ package eu.dnetlib.pace.model; import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -75,13 +76,10 @@ public class FieldDef implements Serializable { } params.put("limit", getLimit()); params.put("weight", getWeight()); - DistanceAlgo distanceAlgo = distanceResolver.resolve(getAlgo()); - distanceAlgo.setParams(params); - distanceAlgo.setWeight(getWeight()); - return distanceAlgo; - } catch (IllegalAccessException | InstantiationException e) { + return distanceResolver.resolve(getAlgo(), params); + } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { e.printStackTrace(); - return new NullDistanceAlgo(); + return new NullDistanceAlgo(params); } } diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java new file mode 100644 index 0000000..6d01176 --- /dev/null +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java @@ -0,0 +1,30 @@ +package eu.dnetlib.pace.clustering; + +import org.junit.Before; +import org.junit.Test; + +import java.lang.reflect.InvocationTargetException; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class ClusteringResolverTest { + + private ClusteringResolver clusteringResolver; + private Map params = new HashMap(); + + @Before + public void setUp(){ + clusteringResolver = new ClusteringResolver(); + } + + @Test + public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { + + ClusteringFunction ngrams = clusteringResolver.resolve("ngrams", params); + + assertEquals(ngrams.getClass(), Ngrams.class); + } + +} diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java new file mode 100644 index 0000000..87d1c37 --- /dev/null +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java @@ -0,0 +1,35 @@ +package eu.dnetlib.pace.condition; + +import eu.dnetlib.pace.clustering.ClusteringFunction; +import eu.dnetlib.pace.clustering.ClusteringResolver; +import eu.dnetlib.pace.clustering.Ngrams; +import eu.dnetlib.pace.model.FieldDef; +import org.junit.Before; +import org.junit.Test; + +import java.lang.reflect.InvocationTargetException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class ConditionResolverTest { + + private ConditionResolver conditionResolver; + private List fields; + private String name; + + @Before + public void setUp(){ + conditionResolver = new ConditionResolver(); + } + + @Test + public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { + + ConditionAlgo sizeMatch = conditionResolver.resolve("sizeMatch", fields); + + assertEquals(sizeMatch.getClass(), SizeMatch.class); + } +} diff --git a/pom.xml b/pom.xml index e445041..c79573b 100644 --- a/pom.xml +++ b/pom.xml @@ -162,6 +162,11 @@ ${junit.version} test + + org.reflections + reflections + 0.9.10 +