modification in the initialization of clustering functions, distance algos and conditions.

This commit is contained in:
Michele De Bonis 2018-10-25 15:15:40 +02:00
parent 1cbbc3f15a
commit 3cf3dc1934
47 changed files with 164 additions and 187 deletions

View File

@ -58,9 +58,11 @@
<dependency> <dependency>
<groupId>org.reflections</groupId> <groupId>org.reflections</groupId>
<artifactId>reflections</artifactId> <artifactId>reflections</artifactId>
<version>0.9.10</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -19,12 +19,6 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i
this.params = params; this.params = params;
} }
public AbstractClusteringFunction(){}
public void setParams(Map<String, Integer> params){
this.params = params;
}
protected abstract Collection<String> doApply(String s); protected abstract Collection<String> doApply(String s);
@Override @Override

View File

@ -14,10 +14,6 @@ public class Acronyms extends AbstractClusteringFunction {
super(params); super(params);
} }
public Acronyms(){
super();
}
@Override @Override
protected Collection<String> doApply(String s) { protected Collection<String> doApply(String s) {
return extractAcronyms(s, param("max"), param("minLen"), param("maxLen")); return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));

View File

@ -12,5 +12,4 @@ public interface ClusteringFunction {
public Map<String, Integer> getParams(); public Map<String, Integer> getParams();
public void setParams(Map<String, Integer> params);
} }

View File

@ -17,8 +17,8 @@ public class ClusteringResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class<ClusteringFunction>)cl)); .collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class<ClusteringFunction>)cl));
} }
public ClusteringFunction resolve(String clusteringFunction) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { public ClusteringFunction resolve(String clusteringFunction, Map<String, Integer> params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
return functionMap.get(clusteringFunction).newInstance(); return functionMap.get(clusteringFunction).getDeclaredConstructor(Map.class).newInstance(params);
} }
} }

View File

@ -13,10 +13,6 @@ public class ImmutableFieldValue extends AbstractClusteringFunction {
super(params); super(params);
} }
public ImmutableFieldValue() {
super();
}
@Override @Override
protected Collection<String> doApply(final String s) { protected Collection<String> doApply(final String s) {
final List<String> res = Lists.newArrayList(); final List<String> res = Lists.newArrayList();

View File

@ -16,10 +16,6 @@ public class LowercaseClustering extends AbstractClusteringFunction {
super(params); super(params);
} }
public LowercaseClustering(){
super();
}
@Override @Override
public Collection<String> apply(List<Field> fields) { public Collection<String> apply(List<Field> fields) {
Collection<String> c = Sets.newLinkedHashSet(); Collection<String> c = Sets.newLinkedHashSet();

View File

@ -1,6 +1,7 @@
package eu.dnetlib.pace.clustering; package eu.dnetlib.pace.clustering;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -9,10 +10,6 @@ import com.google.common.collect.Lists;
@ClusteringClass("ngrampairs") @ClusteringClass("ngrampairs")
public class NgramPairs extends Ngrams { public class NgramPairs extends Ngrams {
public NgramPairs() {
super();
}
public NgramPairs(Map<String, Integer> params) { public NgramPairs(Map<String, Integer> params) {
super(params); super(params);
} }

View File

@ -1,9 +1,6 @@
package eu.dnetlib.pace.clustering; package eu.dnetlib.pace.clustering;
import java.util.Collection; import java.util.*;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.StringTokenizer;
@ClusteringClass("ngrams") @ClusteringClass("ngrams")
public class Ngrams extends AbstractClusteringFunction { public class Ngrams extends AbstractClusteringFunction {
@ -12,10 +9,6 @@ public class Ngrams extends AbstractClusteringFunction {
super(params); super(params);
} }
public Ngrams() {
super();
}
@Override @Override
protected Collection<String> doApply(String s) { protected Collection<String> doApply(String s) {
return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen")); return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen"));

View File

@ -30,10 +30,6 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin
this.params = params; this.params = params;
} }
public void setParams(Map<String, Integer> params){
this.params = params;
}
@Override @Override
public Collection<String> apply(final List<Field> fields) { public Collection<String> apply(final List<Field> fields) {
final Set<String> hashes = Sets.newHashSet(); final Set<String> hashes = Sets.newHashSet();

View File

@ -17,10 +17,6 @@ public class PersonHash extends AbstractClusteringFunction {
super(params); super(params);
} }
public PersonHash(){
super();
}
@Override @Override
protected Collection<String> doApply(final String s) { protected Collection<String> doApply(final String s) {
final List<String> res = Lists.newArrayList(); final List<String> res = Lists.newArrayList();

View File

@ -9,10 +9,6 @@ public class RandomClusteringFunction extends AbstractClusteringFunction {
super(params); super(params);
} }
public RandomClusteringFunction(){
super();
}
@Override @Override
protected Collection<String> doApply(String s) { protected Collection<String> doApply(String s) {
// TODO Auto-generated method stub // TODO Auto-generated method stub

View File

@ -1,9 +1,6 @@
package eu.dnetlib.pace.clustering; package eu.dnetlib.pace.clustering;
import java.util.Collection; import java.util.*;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
@ -16,10 +13,6 @@ public class SortedNgramPairs extends NgramPairs {
super(params); super(params);
} }
public SortedNgramPairs(){
super();
}
@Override @Override
protected Collection<String> doApply(String s) { protected Collection<String> doApply(String s) {

View File

@ -16,10 +16,6 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction {
super(params); super(params);
} }
public SpaceTrimmingFieldValue(){
super();
}
@Override @Override
protected Collection<String> doApply(final String s) { protected Collection<String> doApply(final String s) {
final List<String> res = Lists.newArrayList(); final List<String> res = Lists.newArrayList();

View File

@ -13,10 +13,6 @@ public class SuffixPrefix extends AbstractClusteringFunction {
super(params); super(params);
} }
public SuffixPrefix(){
super();
}
@Override @Override
protected Collection<String> doApply(String s) { protected Collection<String> doApply(String s) {
return suffixPrefix(s, param("len"), param("max")); return suffixPrefix(s, param("len"), param("max"));

View File

@ -20,14 +20,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
this.params = params; this.params = params;
} }
public UrlClustering() {
super();
}
public void setParams(Map<String, Integer> params){
this.params = params;
}
@Override @Override
public Collection<String> apply(List<Field> fields) { public Collection<String> apply(List<Field> fields) {
return fields.stream() return fields.stream()

View File

@ -25,16 +25,6 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
this.fields = fields; this.fields = fields;
} }
public AbstractCondition(){}
public void setCond(String cond){
this.cond = cond;
}
public void setFields(List<FieldDef> fields){
this.fields = fields;
}
protected abstract ConditionEval verify(FieldDef fd, Field a, Field b); protected abstract ConditionEval verify(FieldDef fd, Field a, Field b);
@Override @Override

View File

@ -17,9 +17,6 @@ public class AlwaysTrueCondition extends AbstractCondition {
super(cond, fields); super(cond, fields);
} }
public AlwaysTrueCondition(){
super();
}
@Override @Override
protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) { protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) {
return new ConditionEval(cond, a, b, 1); return new ConditionEval(cond, a, b, 1);

View File

@ -24,7 +24,4 @@ public interface ConditionAlgo {
*/ */
public abstract ConditionEvalMap verify(Document a, Document b); public abstract ConditionEvalMap verify(Document a, Document b);
public void setFields(List<FieldDef> fields);
public void setCond(String name);
} }

View File

@ -1,9 +1,12 @@
package eu.dnetlib.pace.condition; package eu.dnetlib.pace.condition;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.pace.model.FieldDef;
import org.reflections.Reflections; import org.reflections.Reflections;
public class ConditionResolver implements Serializable { public class ConditionResolver implements Serializable {
@ -16,7 +19,7 @@ public class ConditionResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class<ConditionAlgo>)cl)); .collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class<ConditionAlgo>)cl));
} }
public ConditionAlgo resolve(String name) throws IllegalAccessException, InstantiationException { public ConditionAlgo resolve(String name, List<FieldDef> fields) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
return functionMap.get(name).newInstance(); return functionMap.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields);
} }
} }

View File

@ -19,10 +19,6 @@ public class ExactMatch extends AbstractCondition {
super(cond, fields); super(cond, fields);
} }
public ExactMatch(){
super();
}
@Override @Override
protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) { protected ConditionEval verify(final FieldDef fd, final Field a, final Field b) {

View File

@ -23,8 +23,6 @@ public class YearMatch extends AbstractCondition {
super(cond, fields); super(cond, fields);
} }
public YearMatch(){}
// @Override // @Override
// public boolean verify(final Document a, final Document b) { // public boolean verify(final Document a, final Document b) {
// boolean res = true; // boolean res = true;

View File

@ -13,9 +13,5 @@ public interface DistanceAlgo {
public abstract double distance(Field a, Field b); public abstract double distance(Field a, Field b);
public double getWeight(); public double getWeight();
public Map<String, Number> getParams();
public void setWeight(double w);
public void setParams(Map<String, Number> params);
} }

View File

@ -17,8 +17,8 @@ public class DistanceResolver implements Serializable {
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl)); .collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
} }
public DistanceAlgo resolve(String algo) throws IllegalAccessException, InstantiationException { public DistanceAlgo resolve(String algo, Map<String, Number> params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
return functionMap.get(algo).newInstance(); return functionMap.get(algo).getDeclaredConstructor(Map.class).newInstance(params);
} }
} }

View File

@ -28,23 +28,10 @@ public abstract class SecondStringDistanceAlgo extends AbstractPaceFunctions imp
private Map<String, Number> params; private Map<String, Number> params;
protected SecondStringDistanceAlgo(){ protected SecondStringDistanceAlgo(Map<String, Number> params, final AbstractStringDistance ssalgo){
}
protected SecondStringDistanceAlgo(Map<String, Number> params){
this.params = params;
}
public void setWeight(double w){
this.weight = w;
}
public Map<String, Number> getParams(){
return this.params;
}
public void setParams(Map<String, Number> params){
this.params = params; this.params = params;
this.weight = params.get("weight").doubleValue();
this.ssalgo = ssalgo;
} }
/** /**

View File

@ -9,12 +9,8 @@ import java.util.Map;
@DistanceClass("AlwaysMatch") @DistanceClass("AlwaysMatch")
public class AlwaysMatch extends SecondStringDistanceAlgo { public class AlwaysMatch extends SecondStringDistanceAlgo {
public AlwaysMatch(){
super();
}
public AlwaysMatch(final Map<String, Number> params){ public AlwaysMatch(final Map<String, Number> params){
super(params); super(params, new com.wcohen.ss.JaroWinkler());
} }
public AlwaysMatch(final double weight) { public AlwaysMatch(final double weight) {

View File

@ -9,12 +9,8 @@ import java.util.Map;
@DistanceClass("ExactMatch") @DistanceClass("ExactMatch")
public class ExactMatch extends SecondStringDistanceAlgo { public class ExactMatch extends SecondStringDistanceAlgo {
public ExactMatch(){
super();
}
public ExactMatch(Map<String, Number> params){ public ExactMatch(Map<String, Number> params){
super(params); super(params, new com.wcohen.ss.JaroWinkler());
} }
public ExactMatch(final double weight) { public ExactMatch(final double weight) {

View File

@ -4,18 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.io.Serializable;
import java.util.Map; import java.util.Map;
//case class JaroWinkler(w: Double) extends SecondStringDistanceAlgo(w, new com.wcohen.ss.JaroWinkler()) //case class JaroWinkler(w: Double) extends SecondStringDistanceAlgo(w, new com.wcohen.ss.JaroWinkler())
@DistanceClass("JaroWinkler") @DistanceClass("JaroWinkler")
public class JaroWinkler extends SecondStringDistanceAlgo { public class JaroWinkler extends SecondStringDistanceAlgo {
public JaroWinkler(){
super();
}
public JaroWinkler(Map<String, Number> params){ public JaroWinkler(Map<String, Number> params){
super(params); super(params, new com.wcohen.ss.JaroWinkler());
} }
public JaroWinkler(double weight) { public JaroWinkler(double weight) {

View File

@ -10,12 +10,8 @@ import java.util.Map;
@DistanceClass("JaroWinklerTitle") @DistanceClass("JaroWinklerTitle")
public class JaroWinklerTitle extends SecondStringDistanceAlgo { public class JaroWinklerTitle extends SecondStringDistanceAlgo {
public JaroWinklerTitle(){
super();
}
public JaroWinklerTitle(Map<String, Number> params){ public JaroWinklerTitle(Map<String, Number> params){
super(params); super(params, new com.wcohen.ss.JaroWinkler());
} }
public JaroWinklerTitle(double weight) { public JaroWinklerTitle(double weight) {

View File

@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("Level2JaroWinkler") @DistanceClass("Level2JaroWinkler")
public class Level2JaroWinkler extends SecondStringDistanceAlgo { public class Level2JaroWinkler extends SecondStringDistanceAlgo {
public Level2JaroWinkler(Map<String, Number> params){
super(params, new com.wcohen.ss.Level2JaroWinkler());
}
public Level2JaroWinkler(double w) { public Level2JaroWinkler(double w) {
super(w, new com.wcohen.ss.Level2JaroWinkler()); super(w, new com.wcohen.ss.Level2JaroWinkler());
} }

View File

@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("Level2JaroWinklerTitle") @DistanceClass("Level2JaroWinklerTitle")
public class Level2JaroWinklerTitle extends SecondStringDistanceAlgo { public class Level2JaroWinklerTitle extends SecondStringDistanceAlgo {
public Level2JaroWinklerTitle(Map<String,Number> params){
super(params, new com.wcohen.ss.Level2JaroWinkler());
}
public Level2JaroWinklerTitle(final double w) { public Level2JaroWinklerTitle(final double w) {
super(w, new com.wcohen.ss.Level2JaroWinkler()); super(w, new com.wcohen.ss.Level2JaroWinkler());
} }

View File

@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("Level2Levenstein") @DistanceClass("Level2Levenstein")
public class Level2Levenstein extends SecondStringDistanceAlgo { public class Level2Levenstein extends SecondStringDistanceAlgo {
public Level2Levenstein(Map<String,Number> params){
super(params, new com.wcohen.ss.Level2Levenstein());
}
public Level2Levenstein(double w) { public Level2Levenstein(double w) {
super(w, new com.wcohen.ss.Level2Levenstein()); super(w, new com.wcohen.ss.Level2Levenstein());
} }

View File

@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("Levenstein") @DistanceClass("Levenstein")
public class Levenstein extends SecondStringDistanceAlgo { public class Levenstein extends SecondStringDistanceAlgo {
public Levenstein(){ public Levenstein(Map<String,Number> params){
super(new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }
public Levenstein(double w) { public Levenstein(double w) {

View File

@ -4,11 +4,13 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("LevensteinTitle") @DistanceClass("LevensteinTitle")
public class LevensteinTitle extends SecondStringDistanceAlgo { public class LevensteinTitle extends SecondStringDistanceAlgo {
public LevensteinTitle(){ public LevensteinTitle(Map<String,Number> params){
super(new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }
public LevensteinTitle(final double w) { public LevensteinTitle(final double w) {

View File

@ -4,9 +4,15 @@ import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
import java.util.Map;
@DistanceClass("MustBeDifferent") @DistanceClass("MustBeDifferent")
public class MustBeDifferent extends SecondStringDistanceAlgo { public class MustBeDifferent extends SecondStringDistanceAlgo {
public MustBeDifferent(Map<String,Number> params){
super(params, new com.wcohen.ss.Levenstein());
}
public MustBeDifferent(final double weight) { public MustBeDifferent(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler()); super(weight, new com.wcohen.ss.JaroWinkler());
} }

View File

@ -13,6 +13,9 @@ import java.util.Map;
@DistanceClass("Null") @DistanceClass("Null")
public class NullDistanceAlgo implements DistanceAlgo { public class NullDistanceAlgo implements DistanceAlgo {
public NullDistanceAlgo(Map<String, Number> params){
}
@Override @Override
public double distance(Field a, Field b) { public double distance(Field a, Field b) {
return 0.0; return 0.0;
@ -23,16 +26,4 @@ public class NullDistanceAlgo implements DistanceAlgo {
return 0.0; return 0.0;
} }
@Override
public void setWeight(double w){
}
@Override
public Map<String, Number> getParams() {
return null;
}
@Override
public void setParams(Map<String, Number> params) {
}
} }

View File

@ -3,12 +3,18 @@ package eu.dnetlib.pace.distance.algo;
import com.wcohen.ss.AbstractStringDistance; import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import java.util.Map;
/** /**
* The Class SortedJaroWinkler. * The Class SortedJaroWinkler.
*/ */
@DistanceClass("SortedJaroWinkler") @DistanceClass("SortedJaroWinkler")
public class SortedJaroWinkler extends SortedSecondStringDistanceAlgo { public class SortedJaroWinkler extends SortedSecondStringDistanceAlgo {
public SortedJaroWinkler(Map<String,Number> params){
super(params, new com.wcohen.ss.Levenstein());
}
/** /**
* Instantiates a new sorted jaro winkler. * Instantiates a new sorted jaro winkler.
* *

View File

@ -3,6 +3,8 @@ package eu.dnetlib.pace.distance.algo;
import com.wcohen.ss.AbstractStringDistance; import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.DistanceClass;
import java.util.Map;
/** /**
* The Class SortedJaroWinkler. * The Class SortedJaroWinkler.
*/ */
@ -19,6 +21,10 @@ public class SortedLevel2JaroWinkler extends SortedSecondStringDistanceAlgo {
super(weight, new com.wcohen.ss.Level2JaroWinkler()); super(weight, new com.wcohen.ss.Level2JaroWinkler());
} }
public SortedLevel2JaroWinkler(final Map<String, Number> params){
super(params, new com.wcohen.ss.Level2JaroWinkler());
}
/** /**
* Instantiates a new sorted jaro winkler. * Instantiates a new sorted jaro winkler.
* *

View File

@ -2,6 +2,7 @@ package eu.dnetlib.pace.distance.algo;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.wcohen.ss.AbstractStringDistance; import com.wcohen.ss.AbstractStringDistance;
@ -27,6 +28,10 @@ public abstract class SortedSecondStringDistanceAlgo extends SecondStringDistanc
super(weight, ssalgo); super(weight, ssalgo);
} }
protected SortedSecondStringDistanceAlgo(final Map<String, Number> params, final AbstractStringDistance ssalgo){
super(params.get("weight").doubleValue(), ssalgo);
}
/* /*
* (non-Javadoc) * (non-Javadoc)
* *

View File

@ -20,10 +20,6 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
/** The limit. */ /** The limit. */
protected int limit; protected int limit;
public SubStringLevenstein() {
super(new com.wcohen.ss.Levenstein());
}
/** /**
* Instantiates a new sub string levenstein. * Instantiates a new sub string levenstein.
* *
@ -34,6 +30,11 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
super(w, new com.wcohen.ss.Levenstein()); super(w, new com.wcohen.ss.Levenstein());
} }
public SubStringLevenstein(Map<String, Number> params){
super(params, new com.wcohen.ss.Levenstein());
this.limit = params.get("limit").intValue();
}
/** /**
* Instantiates a new sub string levenstein. * Instantiates a new sub string levenstein.
* *
@ -95,9 +96,4 @@ public class SubStringLevenstein extends SecondStringDistanceAlgo {
return 1 / Math.pow(Math.abs(d) + 1, 0.1); return 1 / Math.pow(Math.abs(d) + 1, 0.1);
} }
public void setParams(Map<String, Number> params){
this.limit = params.get("limit").intValue(); //necessary because this class needs also the limit
super.setParams(params);
}
} }

View File

@ -13,8 +13,9 @@ public class UrlMatcher extends Levenstein {
private Map<String, Number> params; private Map<String, Number> params;
public UrlMatcher(){ public UrlMatcher(Map<String, Number> params){
super(); super(params);
this.params = params;
} }
public UrlMatcher(double weight, Map<String, Number> params) { public UrlMatcher(double weight, Map<String, Number> params) {

View File

@ -1,7 +1,9 @@
package eu.dnetlib.pace.distance.eval; package eu.dnetlib.pace.distance.eval;
import com.google.gson.GsonBuilder; import com.google.gson.GsonBuilder;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
/** /**
@ -51,12 +53,10 @@ public class ScoreResult implements Serializable {
@Override @Override
public String toString() { public String toString() {
//TODO cannot print: why? try {
// final GsonBuilder b = new GsonBuilder() return new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
// .serializeSpecialFloatingPointValues() } catch (IOException e) {
// .serializeNulls(); return e.getStackTrace().toString();
// }
// return b.setPrettyPrinting().create().toJson(this);
return "{}";
} }
} }

View File

@ -31,15 +31,11 @@ public class ClusteringDef implements Serializable {
public ClusteringFunction getClusteringFunction() { public ClusteringFunction getClusteringFunction() {
try { try {
ClusteringFunction clusteringFunction = clusteringResolver.resolve(getName()); return clusteringResolver.resolve(getName(), params);
clusteringFunction.setParams(params);
return clusteringFunction;
} catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
e.printStackTrace(); e.printStackTrace();
return new RandomClusteringFunction(getParams()); return new RandomClusteringFunction(getParams());
} }
} }
public List<String> getFields() { public List<String> getFields() {

View File

@ -1,6 +1,7 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.List; import java.util.List;
import com.google.gson.Gson; import com.google.gson.Gson;
@ -19,11 +20,8 @@ public class CondDef implements Serializable {
public ConditionAlgo getConditionAlgo(final List<FieldDef> fields) { public ConditionAlgo getConditionAlgo(final List<FieldDef> fields) {
try { try {
ConditionAlgo conditionAlgo = conditionResolver.resolve(getName()); return conditionResolver.resolve(getName(), fields);
conditionAlgo.setFields(fields); } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
conditionAlgo.setCond(getName());
return conditionAlgo;
} catch (IllegalAccessException | InstantiationException e) {
e.printStackTrace(); e.printStackTrace();
return new AlwaysTrueCondition(getName(), fields); return new AlwaysTrueCondition(getName(), fields);
} }

View File

@ -1,6 +1,7 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -75,13 +76,10 @@ public class FieldDef implements Serializable {
} }
params.put("limit", getLimit()); params.put("limit", getLimit());
params.put("weight", getWeight()); params.put("weight", getWeight());
DistanceAlgo distanceAlgo = distanceResolver.resolve(getAlgo()); return distanceResolver.resolve(getAlgo(), params);
distanceAlgo.setParams(params); } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
distanceAlgo.setWeight(getWeight());
return distanceAlgo;
} catch (IllegalAccessException | InstantiationException e) {
e.printStackTrace(); e.printStackTrace();
return new NullDistanceAlgo(); return new NullDistanceAlgo(params);
} }
} }

View File

@ -0,0 +1,30 @@
package eu.dnetlib.pace.clustering;
import org.junit.Before;
import org.junit.Test;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Map;
import static org.junit.Assert.assertEquals;
public class ClusteringResolverTest {
private ClusteringResolver clusteringResolver;
private Map<String,Integer> params = new HashMap<String, Integer>();
@Before
public void setUp(){
clusteringResolver = new ClusteringResolver();
}
@Test
public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
ClusteringFunction ngrams = clusteringResolver.resolve("ngrams", params);
assertEquals(ngrams.getClass(), Ngrams.class);
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.pace.condition;
import eu.dnetlib.pace.clustering.ClusteringFunction;
import eu.dnetlib.pace.clustering.ClusteringResolver;
import eu.dnetlib.pace.clustering.Ngrams;
import eu.dnetlib.pace.model.FieldDef;
import org.junit.Before;
import org.junit.Test;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
public class ConditionResolverTest {
private ConditionResolver conditionResolver;
private List<FieldDef> fields;
private String name;
@Before
public void setUp(){
conditionResolver = new ConditionResolver();
}
@Test
public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
ConditionAlgo sizeMatch = conditionResolver.resolve("sizeMatch", fields);
assertEquals(sizeMatch.getClass(), SizeMatch.class);
}
}