Master branch updates from beta September 2023 #337
|
@ -1,24 +0,0 @@
|
||||||
package eu.dnetlib.pace.clustering;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.reflections.Reflections;
|
|
||||||
|
|
||||||
public class ClusteringResolver implements Serializable {
|
|
||||||
private final Map<String, Class<ClusteringFunction>> functionMap;
|
|
||||||
|
|
||||||
public ClusteringResolver() {
|
|
||||||
|
|
||||||
this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ClusteringClass.class).stream()
|
|
||||||
.filter(ClusteringFunction.class::isAssignableFrom)
|
|
||||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class<ClusteringFunction>)cl));
|
|
||||||
}
|
|
||||||
|
|
||||||
public ClusteringFunction resolve(String clusteringFunction, Map<String, Integer> params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
|
|
||||||
|
|
||||||
return functionMap.get(clusteringFunction).getDeclaredConstructor(Map.class).newInstance(params);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
package eu.dnetlib.pace.condition;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
|
||||||
import org.reflections.Reflections;
|
|
||||||
|
|
||||||
public class ConditionResolver implements Serializable {
|
|
||||||
private final Map<String, Class<ConditionAlgo>> functionMap;
|
|
||||||
|
|
||||||
public ConditionResolver() {
|
|
||||||
|
|
||||||
this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ConditionClass.class).stream()
|
|
||||||
.filter(ConditionAlgo.class::isAssignableFrom)
|
|
||||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class<ConditionAlgo>)cl));
|
|
||||||
}
|
|
||||||
|
|
||||||
public ConditionAlgo resolve(String name, List<FieldDef> fields) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
|
|
||||||
return functionMap.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -7,12 +7,11 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.util.PaceException;
|
||||||
import org.antlr.stringtemplate.StringTemplate;
|
import org.antlr.stringtemplate.StringTemplate;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.GsonBuilder;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.condition.ConditionAlgo;
|
import eu.dnetlib.pace.condition.ConditionAlgo;
|
||||||
import eu.dnetlib.pace.model.ClusteringDef;
|
import eu.dnetlib.pace.model.ClusteringDef;
|
||||||
|
@ -35,7 +34,7 @@ public class DedupConfig implements Config, Serializable {
|
||||||
|
|
||||||
static {
|
static {
|
||||||
defaults.put("threshold", "0");
|
defaults.put("threshold", "0");
|
||||||
defaults.put("run", "001");
|
defaults.put("dedupRun", "001");
|
||||||
defaults.put("entityType", "result");
|
defaults.put("entityType", "result");
|
||||||
defaults.put("orderField", "title");
|
defaults.put("orderField", "title");
|
||||||
defaults.put("queueMaxSize", "2000");
|
defaults.put("queueMaxSize", "2000");
|
||||||
|
@ -49,11 +48,15 @@ public class DedupConfig implements Config, Serializable {
|
||||||
|
|
||||||
public static DedupConfig load(final String json) {
|
public static DedupConfig load(final String json) {
|
||||||
|
|
||||||
final DedupConfig config = new Gson().fromJson(json, DedupConfig.class);
|
final DedupConfig config;
|
||||||
|
try {
|
||||||
|
config = new ObjectMapper().readValue(json, DedupConfig.class);
|
||||||
config.getPace().initModel();
|
config.getPace().initModel();
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new PaceException("Error in parsing configuration json", e);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DedupConfig loadDefault() throws IOException {
|
public static DedupConfig loadDefault() throws IOException {
|
||||||
|
|
|
@ -11,6 +11,7 @@ import eu.dnetlib.pace.condition.ConditionAlgo;
|
||||||
import eu.dnetlib.pace.model.ClusteringDef;
|
import eu.dnetlib.pace.model.ClusteringDef;
|
||||||
import eu.dnetlib.pace.model.CondDef;
|
import eu.dnetlib.pace.model.CondDef;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
|
import eu.dnetlib.pace.util.PaceResolver;
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
|
||||||
public class PaceConfig implements Serializable {
|
public class PaceConfig implements Serializable {
|
||||||
|
@ -23,6 +24,8 @@ public class PaceConfig implements Serializable {
|
||||||
|
|
||||||
private Map<String, FieldDef> modelMap;
|
private Map<String, FieldDef> modelMap;
|
||||||
|
|
||||||
|
public static PaceResolver paceResolver;
|
||||||
|
|
||||||
public PaceConfig() {}
|
public PaceConfig() {}
|
||||||
|
|
||||||
public void initModel() {
|
public void initModel() {
|
||||||
|
@ -30,6 +33,8 @@ public class PaceConfig implements Serializable {
|
||||||
for(FieldDef fd : getModel()) {
|
for(FieldDef fd : getModel()) {
|
||||||
modelMap.put(fd.getName(), fd);
|
modelMap.put(fd.getName(), fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
paceResolver = new PaceResolver();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<FieldDef> getModel() {
|
public List<FieldDef> getModel() {
|
||||||
|
|
|
@ -1,24 +0,0 @@
|
||||||
package eu.dnetlib.pace.distance;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.reflections.Reflections;
|
|
||||||
|
|
||||||
public class DistanceResolver implements Serializable {
|
|
||||||
private final Map<String, Class<DistanceAlgo>> functionMap;
|
|
||||||
|
|
||||||
public DistanceResolver() {
|
|
||||||
|
|
||||||
this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream()
|
|
||||||
.filter(DistanceAlgo.class::isAssignableFrom)
|
|
||||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
|
||||||
}
|
|
||||||
|
|
||||||
public DistanceAlgo resolve(String algo, Map<String, Number> params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException {
|
|
||||||
|
|
||||||
return functionMap.get(algo).getDeclaredConstructor(Map.class).newInstance(params);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,12 +1,14 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import eu.dnetlib.pace.clustering.*;
|
import eu.dnetlib.pace.clustering.*;
|
||||||
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
|
import eu.dnetlib.pace.util.PaceException;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class ClusteringDef implements Serializable {
|
public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
|
@ -16,8 +18,6 @@ public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
private Map<String, Integer> params;
|
private Map<String, Integer> params;
|
||||||
|
|
||||||
private ClusteringResolver clusteringResolver = new ClusteringResolver();
|
|
||||||
|
|
||||||
public ClusteringDef() {}
|
public ClusteringDef() {}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
|
@ -29,12 +29,11 @@ public class ClusteringDef implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public ClusteringFunction getClusteringFunction() {
|
public ClusteringFunction getClusteringFunction() {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return clusteringResolver.resolve(getName(), params);
|
return PaceConfig.paceResolver.getClusteringFunction(getName(), params);
|
||||||
} catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
|
} catch (PaceException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
return new RandomClusteringFunction(getParams());
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,7 +55,11 @@ public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return new Gson().toJson(this);
|
try {
|
||||||
|
return new ObjectMapper().writeValueAsString(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e.getStackTrace().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
package eu.dnetlib.pace.model;
|
package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import eu.dnetlib.pace.condition.*;
|
import eu.dnetlib.pace.condition.*;
|
||||||
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class CondDef implements Serializable {
|
public class CondDef implements Serializable {
|
||||||
|
|
||||||
|
@ -13,19 +14,10 @@ public class CondDef implements Serializable {
|
||||||
|
|
||||||
private List<String> fields;
|
private List<String> fields;
|
||||||
|
|
||||||
private ConditionResolver conditionResolver = new ConditionResolver();
|
|
||||||
|
|
||||||
public CondDef() {}
|
public CondDef() {}
|
||||||
|
|
||||||
public ConditionAlgo getConditionAlgo(final List<FieldDef> fields) {
|
public ConditionAlgo getConditionAlgo(final List<FieldDef> fields){
|
||||||
|
return PaceConfig.paceResolver.getConditionAlgo(getName(), fields);
|
||||||
try {
|
|
||||||
return conditionResolver.resolve(getName(), fields);
|
|
||||||
} catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
return new AlwaysTrueCondition(getName(), fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
|
@ -46,7 +38,11 @@ public class CondDef implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return new Gson().toJson(this);
|
try {
|
||||||
|
return new ObjectMapper().writeValueAsString(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e.getStackTrace().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,9 +9,11 @@ import java.util.Map;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
import eu.dnetlib.pace.config.Type;
|
import eu.dnetlib.pace.config.Type;
|
||||||
import eu.dnetlib.pace.distance.*;
|
import eu.dnetlib.pace.distance.*;
|
||||||
import eu.dnetlib.pace.distance.algo.*;
|
import eu.dnetlib.pace.distance.algo.*;
|
||||||
|
import eu.dnetlib.pace.util.PaceException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
||||||
|
@ -38,8 +40,6 @@ public class FieldDef implements Serializable {
|
||||||
|
|
||||||
private Map<String, Number> params;
|
private Map<String, Number> params;
|
||||||
|
|
||||||
private DistanceResolver distanceResolver = new DistanceResolver();
|
|
||||||
|
|
||||||
public FieldDef() {}
|
public FieldDef() {}
|
||||||
|
|
||||||
// def apply(s: String): Field[A]
|
// def apply(s: String): Field[A]
|
||||||
|
@ -70,18 +70,12 @@ public class FieldDef implements Serializable {
|
||||||
|
|
||||||
public DistanceAlgo getDistanceAlgo() {
|
public DistanceAlgo getDistanceAlgo() {
|
||||||
|
|
||||||
try {
|
|
||||||
if (params == null) {
|
if (params == null) {
|
||||||
params = new HashMap<>();
|
params = new HashMap<>();
|
||||||
}
|
}
|
||||||
params.put("limit", getLimit());
|
params.put("limit", getLimit());
|
||||||
params.put("weight", getWeight());
|
params.put("weight", getWeight());
|
||||||
return distanceResolver.resolve(getAlgo(), params);
|
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||||
} catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
return new NullDistanceAlgo(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isIgnoreMissing() {
|
public boolean isIgnoreMissing() {
|
||||||
|
|
|
@ -13,7 +13,6 @@ import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.gson.GsonBuilder;
|
import com.google.gson.GsonBuilder;
|
||||||
import eu.dnetlib.pace.model.adaptor.PidOafSerialiser;
|
|
||||||
|
|
||||||
public class GTAuthor implements Comparable<GTAuthor> {
|
public class GTAuthor implements Comparable<GTAuthor> {
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
package eu.dnetlib.pace.model.gt;
|
package eu.dnetlib.pace.model.gt;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public class ScoredResult extends Result {
|
public class ScoredResult extends Result {
|
||||||
|
|
||||||
|
@ -20,7 +22,11 @@ public class ScoredResult extends Result {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return new Gson().toJson(this);
|
try {
|
||||||
|
return new ObjectMapper().writeValueAsString(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e.getStackTrace().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
package eu.dnetlib.pace.util;
|
||||||
|
|
||||||
|
public class PaceException extends RuntimeException {
|
||||||
|
|
||||||
|
public PaceException(String s, Throwable e){
|
||||||
|
super(s, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
package eu.dnetlib.pace.util;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.clustering.ClusteringClass;
|
||||||
|
import eu.dnetlib.pace.clustering.ClusteringFunction;
|
||||||
|
import eu.dnetlib.pace.condition.ConditionAlgo;
|
||||||
|
import eu.dnetlib.pace.condition.ConditionClass;
|
||||||
|
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||||
|
import eu.dnetlib.pace.distance.DistanceClass;
|
||||||
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
|
import org.reflections.Reflections;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class PaceResolver implements Serializable {
|
||||||
|
|
||||||
|
private final Map<String, Class<ClusteringFunction>> clusteringFunctions;
|
||||||
|
private final Map<String, Class<ConditionAlgo>> conditionAlgos;
|
||||||
|
private final Map<String, Class<DistanceAlgo>> distanceAlgos;
|
||||||
|
|
||||||
|
public PaceResolver() {
|
||||||
|
|
||||||
|
this.clusteringFunctions = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ClusteringClass.class).stream()
|
||||||
|
.filter(ClusteringFunction.class::isAssignableFrom)
|
||||||
|
.collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class<ClusteringFunction>)cl));
|
||||||
|
|
||||||
|
this.conditionAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ConditionClass.class).stream()
|
||||||
|
.filter(ConditionAlgo.class::isAssignableFrom)
|
||||||
|
.collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class<ConditionAlgo>)cl));
|
||||||
|
|
||||||
|
this.distanceAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream()
|
||||||
|
.filter(DistanceAlgo.class::isAssignableFrom)
|
||||||
|
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClusteringFunction getClusteringFunction(String name, Map<String, Integer> params) throws PaceException {
|
||||||
|
try {
|
||||||
|
return clusteringFunctions.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||||
|
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||||
|
throw new PaceException(name + "not found", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DistanceAlgo getDistanceAlgo(String name, Map<String, Number> params) throws PaceException {
|
||||||
|
try {
|
||||||
|
return distanceAlgos.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||||
|
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||||
|
throw new PaceException(name + "not found", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConditionAlgo getConditionAlgo(String name, List<FieldDef> fields) throws PaceException {
|
||||||
|
try {
|
||||||
|
return conditionAlgos.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields);
|
||||||
|
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||||
|
throw new PaceException(name + "not found", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,30 +0,0 @@
|
||||||
package eu.dnetlib.pace.clustering;
|
|
||||||
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
public class ClusteringResolverTest {
|
|
||||||
|
|
||||||
private ClusteringResolver clusteringResolver;
|
|
||||||
private Map<String,Integer> params = new HashMap<String, Integer>();
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setUp(){
|
|
||||||
clusteringResolver = new ClusteringResolver();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
|
|
||||||
|
|
||||||
ClusteringFunction ngrams = clusteringResolver.resolve("ngrams", params);
|
|
||||||
|
|
||||||
assertEquals(ngrams.getClass(), Ngrams.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,35 +0,0 @@
|
||||||
package eu.dnetlib.pace.condition;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.clustering.ClusteringFunction;
|
|
||||||
import eu.dnetlib.pace.clustering.ClusteringResolver;
|
|
||||||
import eu.dnetlib.pace.clustering.Ngrams;
|
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
public class ConditionResolverTest {
|
|
||||||
|
|
||||||
private ConditionResolver conditionResolver;
|
|
||||||
private List<FieldDef> fields;
|
|
||||||
private String name;
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setUp(){
|
|
||||||
conditionResolver = new ConditionResolver();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException {
|
|
||||||
|
|
||||||
ConditionAlgo sizeMatch = conditionResolver.resolve("sizeMatch", fields);
|
|
||||||
|
|
||||||
assertEquals(sizeMatch.getClass(), SizeMatch.class);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -76,8 +76,16 @@ public class ConfigTest extends AbstractPaceTest {
|
||||||
final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString());
|
final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString());
|
||||||
String params = "\"params\":{\"limit\":-1,\"weight\":0.0}";
|
String params = "\"params\":{\"limit\":-1,\"weight\":0.0}";
|
||||||
//verify if the serialization produces the same result of the input json
|
//verify if the serialization produces the same result of the input json
|
||||||
// assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", ""));
|
assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", ""));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void dedupConfigTest() {
|
||||||
|
|
||||||
|
DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
|
||||||
|
|
||||||
|
System.out.println(load.toString());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"wf" : {
|
"wf" : {
|
||||||
"threshold" : "0.99",
|
"threshold" : "0.99",
|
||||||
"run" : "001",
|
"dedupRun" : "001",
|
||||||
"entityType" : "result",
|
"entityType" : "result",
|
||||||
"orderField" : "title",
|
"orderField" : "title",
|
||||||
"queueMaxSize" : "2000",
|
"queueMaxSize" : "2000",
|
||||||
|
|
Loading…
Reference in New Issue