diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml
index 1a41a7416..51958c50c 100644
--- a/dnet-pace-core/pom.xml
+++ b/dnet-pace-core/pom.xml
@@ -58,9 +58,11 @@
org.reflections
reflections
- 0.9.10
-
+
+ org.apache.spark
+ spark-core_2.11
+
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
index 2885994d9..f9192ad51 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
@@ -19,12 +19,6 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i
this.params = params;
}
- public AbstractClusteringFunction(){}
-
- public void setParams(Map params){
- this.params = params;
- }
-
protected abstract Collection doApply(String s);
@Override
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java
index 09d2ce0e6..ee5efc967 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java
@@ -14,10 +14,6 @@ public class Acronyms extends AbstractClusteringFunction {
super(params);
}
- public Acronyms(){
- super();
- }
-
@Override
protected Collection doApply(String s) {
return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java
index 040b92824..4fe1b596e 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java
@@ -12,5 +12,4 @@ public interface ClusteringFunction {
public Map getParams();
- public void setParams(Map params);
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java
index 06a364c22..feec3e213 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java
@@ -17,8 +17,8 @@ public class ClusteringResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class)cl));
}
- public ClusteringFunction resolve(String clusteringFunction) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
+ public ClusteringFunction resolve(String clusteringFunction, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
- return functionMap.get(clusteringFunction).newInstance();
+ return functionMap.get(clusteringFunction).getDeclaredConstructor(Map.class).newInstance(params);
}
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java
index 2d5b67ab5..fab8e989d 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java
@@ -13,10 +13,6 @@ public class ImmutableFieldValue extends AbstractClusteringFunction {
super(params);
}
- public ImmutableFieldValue() {
- super();
- }
-
@Override
protected Collection doApply(final String s) {
final List res = Lists.newArrayList();
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
index 50d73cff9..5ec8590aa 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
@@ -16,10 +16,6 @@ public class LowercaseClustering extends AbstractClusteringFunction {
super(params);
}
- public LowercaseClustering(){
- super();
- }
-
@Override
public Collection apply(List fields) {
Collection c = Sets.newLinkedHashSet();
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java
index 6c96ca214..06885be9f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java
@@ -1,6 +1,7 @@
package eu.dnetlib.pace.clustering;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -9,10 +10,6 @@ import com.google.common.collect.Lists;
@ClusteringClass("ngrampairs")
public class NgramPairs extends Ngrams {
- public NgramPairs() {
- super();
- }
-
public NgramPairs(Map params) {
super(params);
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java
index 49ce40495..8549468db 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java
@@ -1,9 +1,6 @@
package eu.dnetlib.pace.clustering;
-import java.util.Collection;
-import java.util.LinkedHashSet;
-import java.util.Map;
-import java.util.StringTokenizer;
+import java.util.*;
@ClusteringClass("ngrams")
public class Ngrams extends AbstractClusteringFunction {
@@ -12,10 +9,6 @@ public class Ngrams extends AbstractClusteringFunction {
super(params);
}
- public Ngrams() {
- super();
- }
-
@Override
protected Collection doApply(String s) {
return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen"));
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
index 42300797e..67b7dcd58 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
@@ -30,10 +30,6 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin
this.params = params;
}
- public void setParams(Map params){
- this.params = params;
- }
-
@Override
public Collection apply(final List fields) {
final Set hashes = Sets.newHashSet();
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
index b0e57e905..fcb01b994 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
@@ -17,10 +17,6 @@ public class PersonHash extends AbstractClusteringFunction {
super(params);
}
- public PersonHash(){
- super();
- }
-
@Override
protected Collection doApply(final String s) {
final List res = Lists.newArrayList();
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java
index 893abe8e0..f012aacab 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java
@@ -9,10 +9,6 @@ public class RandomClusteringFunction extends AbstractClusteringFunction {
super(params);
}
- public RandomClusteringFunction(){
- super();
- }
-
@Override
protected Collection doApply(String s) {
// TODO Auto-generated method stub
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java
index 9ce12fc30..2f475fe71 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java
@@ -1,9 +1,6 @@
package eu.dnetlib.pace.clustering;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
@@ -16,10 +13,6 @@ public class SortedNgramPairs extends NgramPairs {
super(params);
}
- public SortedNgramPairs(){
- super();
- }
-
@Override
protected Collection doApply(String s) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
index 8e1fdf3e7..22dc4906b 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
@@ -16,10 +16,6 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction {
super(params);
}
- public SpaceTrimmingFieldValue(){
- super();
- }
-
@Override
protected Collection doApply(final String s) {
final List res = Lists.newArrayList();
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java
index 25520d97c..3960331c9 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java
@@ -13,10 +13,6 @@ public class SuffixPrefix extends AbstractClusteringFunction {
super(params);
}
- public SuffixPrefix(){
- super();
- }
-
@Override
protected Collection doApply(String s) {
return suffixPrefix(s, param("len"), param("max"));
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java
index 4c0c33fd1..3c0261376 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java
@@ -20,14 +20,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
this.params = params;
}
- public UrlClustering() {
- super();
- }
-
- public void setParams(Map params){
- this.params = params;
- }
-
@Override
public Collection apply(List fields) {
return fields.stream()
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java
index adc68254e..cf68e740f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AbstractCondition.java
@@ -25,16 +25,6 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
this.fields = fields;
}
- public AbstractCondition(){}
-
- public void setCond(String cond){
- this.cond = cond;
- }
-
- public void setFields(List fields){
- this.fields = fields;
- }
-
protected abstract ConditionEval verify(FieldDef fd, Field a, Field b);
@Override
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java
index a67567eeb..2274da5d5 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/AlwaysTrueCondition.java
@@ -17,9 +17,6 @@ public class AlwaysTrueCondition extends AbstractCondition {
super(cond, fields);
}
- public AlwaysTrueCondition(){
- super();
- }
@Override
protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) {
return new ConditionEval(cond, a, b, 1);
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java
index 1293c7d95..787ad9af1 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionAlgo.java
@@ -24,7 +24,4 @@ public interface ConditionAlgo {
*/
public abstract ConditionEvalMap verify(Document a, Document b);
- public void setFields(List fields);
- public void setCond(String name);
-
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java
index 58a30ddda..577bcdb6e 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java
@@ -1,9 +1,12 @@
package eu.dnetlib.pace.condition;
import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
+import eu.dnetlib.pace.model.FieldDef;
import org.reflections.Reflections;
public class ConditionResolver implements Serializable {
@@ -16,7 +19,7 @@ public class ConditionResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class)cl));
}
- public ConditionAlgo resolve(String name) throws IllegalAccessException, InstantiationException {
- return functionMap.get(name).newInstance();
+ public ConditionAlgo resolve(String name, List fields) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
+ return functionMap.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields);
}
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java
index f4ba8de42..2776576c4 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ExactMatch.java
@@ -19,10 +19,6 @@ public class ExactMatch extends AbstractCondition {
super(cond, fields);
}
- public ExactMatch(){
- super();
- }
-
@Override
protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java
index 54d0ba89f..71bb6cfd6 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/YearMatch.java
@@ -23,8 +23,6 @@ public class YearMatch extends AbstractCondition {
super(cond, fields);
}
- public YearMatch(){}
-
// @Override
// public boolean verify(final Document a, final Document b) {
// boolean res = true;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java
index c2749c503..5e4f69f51 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceAlgo.java
@@ -13,9 +13,5 @@ public interface DistanceAlgo {
public abstract double distance(Field a, Field b);
public double getWeight();
- public Map getParams();
-
- public void setWeight(double w);
- public void setParams(Map params);
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java
index 09377605e..d219ac440 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java
@@ -17,8 +17,8 @@ public class DistanceResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class)cl));
}
- public DistanceAlgo resolve(String algo) throws IllegalAccessException, InstantiationException {
+ public DistanceAlgo resolve(String algo, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
- return functionMap.get(algo).newInstance();
+ return functionMap.get(algo).getDeclaredConstructor(Map.class).newInstance(params);
}
}
\ No newline at end of file
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java
index 785c00bc3..9cc35298f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/SecondStringDistanceAlgo.java
@@ -28,23 +28,10 @@ public abstract class SecondStringDistanceAlgo extends AbstractPaceFunctions imp
private Map params;
- protected SecondStringDistanceAlgo(){
- }
-
- protected SecondStringDistanceAlgo(Map params){
- this.params = params;
- }
-
- public void setWeight(double w){
- this.weight = w;
- }
-
- public Map getParams(){
- return this.params;
- }
-
- public void setParams(Map params){
+ protected SecondStringDistanceAlgo(Map params, final AbstractStringDistance ssalgo){
this.params = params;
+ this.weight = params.get("weight").doubleValue();
+ this.ssalgo = ssalgo;
}
/**
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java
index 7039f05a6..503235c13 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/AlwaysMatch.java
@@ -9,12 +9,8 @@ import java.util.Map;
@DistanceClass("AlwaysMatch")
public class AlwaysMatch extends SecondStringDistanceAlgo {
- public AlwaysMatch(){
- super();
- }
-
public AlwaysMatch(final Map params){
- super(params);
+ super(params, new com.wcohen.ss.JaroWinkler());
}
public AlwaysMatch(final double weight) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java
index 2e714c4af..44d881e55 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/ExactMatch.java
@@ -9,12 +9,8 @@ import java.util.Map;
@DistanceClass("ExactMatch")
public class ExactMatch extends SecondStringDistanceAlgo {
- public ExactMatch(){
- super();
- }
-
public ExactMatch(Map params){
- super(params);
+ super(params, new com.wcohen.ss.JaroWinkler());
}
public ExactMatch(final double weight) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java
index ea1e0798e..20c09121d 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinkler.java
@@ -4,18 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.io.Serializable;
import java.util.Map;
//case class JaroWinkler(w: Double) extends SecondStringDistanceAlgo(w, new com.wcohen.ss.JaroWinkler())
@DistanceClass("JaroWinkler")
public class JaroWinkler extends SecondStringDistanceAlgo {
- public JaroWinkler(){
- super();
- }
-
public JaroWinkler(Map params){
- super(params);
+ super(params, new com.wcohen.ss.JaroWinkler());
}
public JaroWinkler(double weight) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java
index b37c88d63..ff4d6de1f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/JaroWinklerTitle.java
@@ -10,12 +10,8 @@ import java.util.Map;
@DistanceClass("JaroWinklerTitle")
public class JaroWinklerTitle extends SecondStringDistanceAlgo {
- public JaroWinklerTitle(){
- super();
- }
-
public JaroWinklerTitle(Map params){
- super(params);
+ super(params, new com.wcohen.ss.JaroWinkler());
}
public JaroWinklerTitle(double weight) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java
index a2afc3872..135fc5379 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinkler.java
@@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("Level2JaroWinkler")
public class Level2JaroWinkler extends SecondStringDistanceAlgo {
+ public Level2JaroWinkler(Map params){
+ super(params, new com.wcohen.ss.Level2JaroWinkler());
+ }
+
public Level2JaroWinkler(double w) {
super(w, new com.wcohen.ss.Level2JaroWinkler());
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java
index 272e53035..2d05a0084 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2JaroWinklerTitle.java
@@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("Level2JaroWinklerTitle")
public class Level2JaroWinklerTitle extends SecondStringDistanceAlgo {
+ public Level2JaroWinklerTitle(Map params){
+ super(params, new com.wcohen.ss.Level2JaroWinkler());
+ }
+
public Level2JaroWinklerTitle(final double w) {
super(w, new com.wcohen.ss.Level2JaroWinkler());
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java
index 1e955bd4a..767c5976b 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Level2Levenstein.java
@@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("Level2Levenstein")
public class Level2Levenstein extends SecondStringDistanceAlgo {
+ public Level2Levenstein(Map params){
+ super(params, new com.wcohen.ss.Level2Levenstein());
+ }
+
public Level2Levenstein(double w) {
super(w, new com.wcohen.ss.Level2Levenstein());
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java
index 2e014b67e..d9ba5f73e 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/Levenstein.java
@@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("Levenstein")
public class Levenstein extends SecondStringDistanceAlgo {
- public Levenstein(){
- super(new com.wcohen.ss.Levenstein());
+ public Levenstein(Map params){
+ super(params, new com.wcohen.ss.Levenstein());
}
public Levenstein(double w) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java
index c66f972c3..10de8597c 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/LevensteinTitle.java
@@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("LevensteinTitle")
public class LevensteinTitle extends SecondStringDistanceAlgo {
- public LevensteinTitle(){
- super(new com.wcohen.ss.Levenstein());
+ public LevensteinTitle(Map params){
+ super(params, new com.wcohen.ss.Levenstein());
}
public LevensteinTitle(final double w) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java
index 0acb82ca4..e794f025f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/MustBeDifferent.java
@@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
+import java.util.Map;
+
@DistanceClass("MustBeDifferent")
public class MustBeDifferent extends SecondStringDistanceAlgo {
+ public MustBeDifferent(Map params){
+ super(params, new com.wcohen.ss.Levenstein());
+ }
+
public MustBeDifferent(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler());
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java
index ef798cbad..8afc45fd6 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/NullDistanceAlgo.java
@@ -13,6 +13,9 @@ import java.util.Map;
@DistanceClass("Null")
public class NullDistanceAlgo implements DistanceAlgo {
+ public NullDistanceAlgo(Map params){
+ }
+
@Override
public double distance(Field a, Field b) {
return 0.0;
@@ -23,16 +26,4 @@ public class NullDistanceAlgo implements DistanceAlgo {
return 0.0;
}
- @Override
- public void setWeight(double w){
- }
-
- @Override
- public Map getParams() {
- return null;
- }
-
- @Override
- public void setParams(Map params) {
- }
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java
index 5f716001d..e3175a13e 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedJaroWinkler.java
@@ -3,12 +3,18 @@ package eu.dnetlib.pace.distance.algo;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
+import java.util.Map;
+
/**
* The Class SortedJaroWinkler.
*/
@DistanceClass("SortedJaroWinkler")
public class SortedJaroWinkler extends SortedSecondStringDistanceAlgo {
+ public SortedJaroWinkler(Map params){
+ super(params, new com.wcohen.ss.Levenstein());
+ }
+
/**
* Instantiates a new sorted jaro winkler.
*
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java
index 493bbef7c..e53df09c8 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedLevel2JaroWinkler.java
@@ -3,6 +3,8 @@ package eu.dnetlib.pace.distance.algo;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass;
+import java.util.Map;
+
/**
* The Class SortedJaroWinkler.
*/
@@ -19,6 +21,10 @@ public class SortedLevel2JaroWinkler extends SortedSecondStringDistanceAlgo {
super(weight, new com.wcohen.ss.Level2JaroWinkler());
}
+ public SortedLevel2JaroWinkler(final Map params){
+ super(params, new com.wcohen.ss.Level2JaroWinkler());
+ }
+
/**
* Instantiates a new sorted jaro winkler.
*
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java
index d47fbbacd..8a9c51402 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SortedSecondStringDistanceAlgo.java
@@ -2,6 +2,7 @@ package eu.dnetlib.pace.distance.algo;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import com.google.common.collect.Lists;
import com.wcohen.ss.AbstractStringDistance;
@@ -27,6 +28,10 @@ public abstract class SortedSecondStringDistanceAlgo extends SecondStringDistanc
super(weight, ssalgo);
}
+ protected SortedSecondStringDistanceAlgo(final Map params, final AbstractStringDistance ssalgo){
+ super(params.get("weight").doubleValue(), ssalgo);
+ }
+
/*
* (non-Javadoc)
*
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java
index 9fee7df5d..8f0c024c7 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/SubStringLevenstein.java
@@ -20,10 +20,6 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
/** The limit. */
protected int limit;
- public SubStringLevenstein() {
- super(new com.wcohen.ss.Levenstein());
- }
-
/**
* Instantiates a new sub string levenstein.
*
@@ -34,6 +30,11 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
super(w, new com.wcohen.ss.Levenstein());
}
+ public SubStringLevenstein(Map params){
+ super(params, new com.wcohen.ss.Levenstein());
+ this.limit = params.get("limit").intValue();
+ }
+
/**
* Instantiates a new sub string levenstein.
*
@@ -95,9 +96,4 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
return 1 / Math.pow(Math.abs(d) + 1, 0.1);
}
- public void setParams(Map params){
- this.limit = params.get("limit").intValue(); //necessary because this class needs also the limit
- super.setParams(params);
- }
-
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java
index 2aa7ca1ce..eacfdc08f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java
@@ -13,8 +13,9 @@ public class UrlMatcher extends Levenstein {
private Map params;
- public UrlMatcher(){
- super();
+ public UrlMatcher(Map params){
+ super(params);
+ this.params = params;
}
public UrlMatcher(double weight, Map params) {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java
index 61d5c9327..4e394b261 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/eval/ScoreResult.java
@@ -1,7 +1,9 @@
package eu.dnetlib.pace.distance.eval;
import com.google.gson.GsonBuilder;
+import org.codehaus.jackson.map.ObjectMapper;
+import java.io.IOException;
import java.io.Serializable;
/**
@@ -51,12 +53,10 @@ public class ScoreResult implements Serializable {
@Override
public String toString() {
- //TODO cannot print: why?
-// final GsonBuilder b = new GsonBuilder()
-// .serializeSpecialFloatingPointValues()
-// .serializeNulls();
-//
-// return b.setPrettyPrinting().create().toJson(this);
- return "{}";
+ try {
+ return new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
+ } catch (IOException e) {
+ return e.getStackTrace().toString();
+ }
}
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java
index 7e09d446e..7d6cdcba9 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java
@@ -31,15 +31,11 @@ public class ClusteringDef implements Serializable {
public ClusteringFunction getClusteringFunction() {
try {
- ClusteringFunction clusteringFunction = clusteringResolver.resolve(getName());
- clusteringFunction.setParams(params);
- return clusteringFunction;
-
+ return clusteringResolver.resolve(getName(), params);
} catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
e.printStackTrace();
return new RandomClusteringFunction(getParams());
}
-
}
public List getFields() {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java
index 14de69a37..fda8653c9 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java
@@ -1,6 +1,7 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
import java.util.List;
import com.google.gson.Gson;
@@ -19,11 +20,8 @@ public class CondDef implements Serializable {
public ConditionAlgo getConditionAlgo(final List fields) {
try {
- ConditionAlgo conditionAlgo = conditionResolver.resolve(getName());
- conditionAlgo.setFields(fields);
- conditionAlgo.setCond(getName());
- return conditionAlgo;
- } catch (IllegalAccessException | InstantiationException e) {
+ return conditionResolver.resolve(getName(), fields);
+ } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
e.printStackTrace();
return new AlwaysTrueCondition(getName(), fields);
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
index 3f4619dcf..8b7250171 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
@@ -1,6 +1,7 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -75,13 +76,10 @@ public class FieldDef implements Serializable {
}
params.put("limit", getLimit());
params.put("weight", getWeight());
- DistanceAlgo distanceAlgo = distanceResolver.resolve(getAlgo());
- distanceAlgo.setParams(params);
- distanceAlgo.setWeight(getWeight());
- return distanceAlgo;
- } catch (IllegalAccessException | InstantiationException e) {
+ return distanceResolver.resolve(getAlgo(), params);
+ } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
e.printStackTrace();
- return new NullDistanceAlgo();
+ return new NullDistanceAlgo(params);
}
}
diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java
new file mode 100644
index 000000000..6d01176f9
--- /dev/null
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java
@@ -0,0 +1,30 @@
+package eu.dnetlib.pace.clustering;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class ClusteringResolverTest {
+
+ private ClusteringResolver clusteringResolver;
+ private Map params = new HashMap();
+
+ @Before
+ public void setUp(){
+ clusteringResolver = new ClusteringResolver();
+ }
+
+ @Test
+ public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
+
+ ClusteringFunction ngrams = clusteringResolver.resolve("ngrams", params);
+
+ assertEquals(ngrams.getClass(), Ngrams.class);
+ }
+
+}
diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java
new file mode 100644
index 000000000..87d1c375b
--- /dev/null
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java
@@ -0,0 +1,35 @@
+package eu.dnetlib.pace.condition;
+
+import eu.dnetlib.pace.clustering.ClusteringFunction;
+import eu.dnetlib.pace.clustering.ClusteringResolver;
+import eu.dnetlib.pace.clustering.Ngrams;
+import eu.dnetlib.pace.model.FieldDef;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class ConditionResolverTest {
+
+ private ConditionResolver conditionResolver;
+ private List fields;
+ private String name;
+
+ @Before
+ public void setUp(){
+ conditionResolver = new ConditionResolver();
+ }
+
+ @Test
+ public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
+
+ ConditionAlgo sizeMatch = conditionResolver.resolve("sizeMatch", fields);
+
+ assertEquals(sizeMatch.getClass(), SizeMatch.class);
+ }
+}