From ea36007d1fd1eb95644ceae0b478106b75e0f0a0 Mon Sep 17 00:00:00 2001 From: Michele De Bonis Date: Mon, 29 Oct 2018 11:13:55 +0100 Subject: [PATCH] DedupConf parsed using Jackson library --- .../pace/clustering/ClusteringResolver.java | 24 ------- .../pace/condition/ConditionResolver.java | 25 -------- .../eu/dnetlib/pace/config/DedupConfig.java | 17 ++--- .../eu/dnetlib/pace/config/PaceConfig.java | 5 ++ .../pace/distance/DistanceResolver.java | 24 ------- .../eu/dnetlib/pace/model/ClusteringDef.java | 21 ++++--- .../java/eu/dnetlib/pace/model/CondDef.java | 24 +++---- .../java/eu/dnetlib/pace/model/FieldDef.java | 20 +++--- .../eu/dnetlib/pace/model/gt/GTAuthor.java | 1 - .../dnetlib/pace/model/gt/ScoredResult.java | 10 ++- .../eu/dnetlib/pace/util/PaceException.java | 9 +++ .../eu/dnetlib/pace/util/PaceResolver.java | 63 +++++++++++++++++++ .../clustering/ClusteringResolverTest.java | 30 --------- .../pace/condition/ConditionResolverTest.java | 35 ----------- .../eu/dnetlib/pace/config/ConfigTest.java | 10 ++- .../dnetlib/pace/config/result.pace.conf.json | 2 +- 16 files changed, 134 insertions(+), 186 deletions(-) delete mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java delete mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java delete mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java create mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceException.java create mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java delete mode 100644 dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java delete mode 100644 dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java deleted file mode 100644 index feec3e213..000000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringResolver.java +++ /dev/null @@ -1,24 +0,0 @@ -package eu.dnetlib.pace.clustering; - -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.Map; -import java.util.stream.Collectors; - -import org.reflections.Reflections; - -public class ClusteringResolver implements Serializable { - private final Map> functionMap; - - public ClusteringResolver() { - - this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ClusteringClass.class).stream() - .filter(ClusteringFunction.class::isAssignableFrom) - .collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class)cl)); - } - - public ClusteringFunction resolve(String clusteringFunction, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { - - return functionMap.get(clusteringFunction).getDeclaredConstructor(Map.class).newInstance(params); - } -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java deleted file mode 100644 index 577bcdb6e..000000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/condition/ConditionResolver.java +++ /dev/null @@ -1,25 +0,0 @@ -package eu.dnetlib.pace.condition; - -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import eu.dnetlib.pace.model.FieldDef; -import org.reflections.Reflections; - -public class ConditionResolver implements Serializable { - private final Map> functionMap; - - public ConditionResolver() { - - this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ConditionClass.class).stream() - .filter(ConditionAlgo.class::isAssignableFrom) - .collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class)cl)); - } - - public ConditionAlgo resolve(String name, List fields) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { - return functionMap.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields); - } -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java index a0fcc2c6c..2f6b5e9e0 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java @@ -7,12 +7,11 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import eu.dnetlib.pace.util.PaceException; import org.antlr.stringtemplate.StringTemplate; import org.apache.commons.io.IOUtils; import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; import eu.dnetlib.pace.condition.ConditionAlgo; import eu.dnetlib.pace.model.ClusteringDef; @@ -35,7 +34,7 @@ public class DedupConfig implements Config, Serializable { static { defaults.put("threshold", "0"); - defaults.put("run", "001"); + defaults.put("dedupRun", "001"); defaults.put("entityType", "result"); defaults.put("orderField", "title"); defaults.put("queueMaxSize", "2000"); @@ -49,11 +48,15 @@ public class DedupConfig implements Config, Serializable { public static DedupConfig load(final String json) { - final DedupConfig config = new Gson().fromJson(json, DedupConfig.class); + final DedupConfig config; + try { + config = new ObjectMapper().readValue(json, DedupConfig.class); + config.getPace().initModel(); + return config; + } catch (IOException e) { + throw new PaceException("Error in parsing configuration json", e); + } - config.getPace().initModel(); - - return config; } public static DedupConfig loadDefault() throws IOException { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java index 37fea00cd..c26ccaf72 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java @@ -11,6 +11,7 @@ import eu.dnetlib.pace.condition.ConditionAlgo; import eu.dnetlib.pace.model.ClusteringDef; import eu.dnetlib.pace.model.CondDef; import eu.dnetlib.pace.model.FieldDef; +import eu.dnetlib.pace.util.PaceResolver; import org.apache.commons.collections.CollectionUtils; public class PaceConfig implements Serializable { @@ -23,6 +24,8 @@ public class PaceConfig implements Serializable { private Map modelMap; + public static PaceResolver paceResolver; + public PaceConfig() {} public void initModel() { @@ -30,6 +33,8 @@ public class PaceConfig implements Serializable { for(FieldDef fd : getModel()) { modelMap.put(fd.getName(), fd); } + + paceResolver = new PaceResolver(); } public List getModel() { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java deleted file mode 100644 index d219ac440..000000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceResolver.java +++ /dev/null @@ -1,24 +0,0 @@ -package eu.dnetlib.pace.distance; - -import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; -import java.util.Map; -import java.util.stream.Collectors; - -import org.reflections.Reflections; - -public class DistanceResolver implements Serializable { - private final Map> functionMap; - - public DistanceResolver() { - - this.functionMap = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream() - .filter(DistanceAlgo.class::isAssignableFrom) - .collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class)cl)); - } - - public DistanceAlgo resolve(String algo, Map params) throws IllegalAccessException, InstantiationException, NoSuchMethodException, InvocationTargetException { - - return functionMap.get(algo).getDeclaredConstructor(Map.class).newInstance(params); - } -} \ No newline at end of file diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index 7d6cdcba9..a5eb51aca 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -1,12 +1,14 @@ package eu.dnetlib.pace.model; +import java.io.IOException; import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; import java.util.List; import java.util.Map; -import com.google.gson.Gson; import eu.dnetlib.pace.clustering.*; +import eu.dnetlib.pace.config.PaceConfig; +import eu.dnetlib.pace.util.PaceException; +import org.codehaus.jackson.map.ObjectMapper; public class ClusteringDef implements Serializable { @@ -16,8 +18,6 @@ public class ClusteringDef implements Serializable { private Map params; - private ClusteringResolver clusteringResolver = new ClusteringResolver(); - public ClusteringDef() {} public String getName() { @@ -29,12 +29,11 @@ public class ClusteringDef implements Serializable { } public ClusteringFunction getClusteringFunction() { - try { - return clusteringResolver.resolve(getName(), params); - } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { + return PaceConfig.paceResolver.getClusteringFunction(getName(), params); + } catch (PaceException e) { e.printStackTrace(); - return new RandomClusteringFunction(getParams()); + return null; } } @@ -56,7 +55,11 @@ public class ClusteringDef implements Serializable { @Override public String toString() { - return new Gson().toJson(this); + try { + return new ObjectMapper().writeValueAsString(this); + } catch (IOException e) { + return e.getStackTrace().toString(); + } } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java index fda8653c9..488ea6387 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java @@ -1,11 +1,12 @@ package eu.dnetlib.pace.model; +import java.io.IOException; import java.io.Serializable; -import java.lang.reflect.InvocationTargetException; import java.util.List; -import com.google.gson.Gson; import eu.dnetlib.pace.condition.*; +import eu.dnetlib.pace.config.PaceConfig; +import org.codehaus.jackson.map.ObjectMapper; public class CondDef implements Serializable { @@ -13,19 +14,10 @@ public class CondDef implements Serializable { private List fields; - private ConditionResolver conditionResolver = new ConditionResolver(); - public CondDef() {} - public ConditionAlgo getConditionAlgo(final List fields) { - - try { - return conditionResolver.resolve(getName(), fields); - } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { - e.printStackTrace(); - return new AlwaysTrueCondition(getName(), fields); - } - + public ConditionAlgo getConditionAlgo(final List fields){ + return PaceConfig.paceResolver.getConditionAlgo(getName(), fields); } public String getName() { @@ -46,7 +38,11 @@ public class CondDef implements Serializable { @Override public String toString() { - return new Gson().toJson(this); + try { + return new ObjectMapper().writeValueAsString(this); + } catch (IOException e) { + return e.getStackTrace().toString(); + } } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 8b7250171..825b913da 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -9,9 +9,11 @@ import java.util.Map; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import com.google.gson.Gson; +import eu.dnetlib.pace.config.PaceConfig; import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.distance.*; import eu.dnetlib.pace.distance.algo.*; +import eu.dnetlib.pace.util.PaceException; /** * The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm. @@ -38,8 +40,6 @@ public class FieldDef implements Serializable { private Map params; - private DistanceResolver distanceResolver = new DistanceResolver(); - public FieldDef() {} // def apply(s: String): Field[A] @@ -70,18 +70,12 @@ public class FieldDef implements Serializable { public DistanceAlgo getDistanceAlgo() { - try { - if (params == null) { - params = new HashMap<>(); - } - params.put("limit", getLimit()); - params.put("weight", getWeight()); - return distanceResolver.resolve(getAlgo(), params); - } catch (IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) { - e.printStackTrace(); - return new NullDistanceAlgo(params); + if (params == null) { + params = new HashMap<>(); } - + params.put("limit", getLimit()); + params.put("weight", getWeight()); + return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params); } public boolean isIgnoreMissing() { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java index e91edccc1..c9d4797e3 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java @@ -13,7 +13,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import eu.dnetlib.pace.model.adaptor.PidOafSerialiser; public class GTAuthor implements Comparable { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/ScoredResult.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/ScoredResult.java index 5d4526c4b..9caecee87 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/ScoredResult.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/ScoredResult.java @@ -1,6 +1,8 @@ package eu.dnetlib.pace.model.gt; -import com.google.gson.Gson; +import org.codehaus.jackson.map.ObjectMapper; + +import java.io.IOException; public class ScoredResult extends Result { @@ -20,7 +22,11 @@ public class ScoredResult extends Result { @Override public String toString() { - return new Gson().toJson(this); + try { + return new ObjectMapper().writeValueAsString(this); + } catch (IOException e) { + return e.getStackTrace().toString(); + } } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceException.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceException.java new file mode 100644 index 000000000..34fd8ba20 --- /dev/null +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceException.java @@ -0,0 +1,9 @@ +package eu.dnetlib.pace.util; + +public class PaceException extends RuntimeException { + + public PaceException(String s, Throwable e){ + super(s, e); + } + +} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java new file mode 100644 index 000000000..d14e81b50 --- /dev/null +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java @@ -0,0 +1,63 @@ +package eu.dnetlib.pace.util; + +import eu.dnetlib.pace.clustering.ClusteringClass; +import eu.dnetlib.pace.clustering.ClusteringFunction; +import eu.dnetlib.pace.condition.ConditionAlgo; +import eu.dnetlib.pace.condition.ConditionClass; +import eu.dnetlib.pace.distance.DistanceAlgo; +import eu.dnetlib.pace.distance.DistanceClass; +import eu.dnetlib.pace.model.FieldDef; +import org.reflections.Reflections; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class PaceResolver implements Serializable { + + private final Map> clusteringFunctions; + private final Map> conditionAlgos; + private final Map> distanceAlgos; + + public PaceResolver() { + + this.clusteringFunctions = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ClusteringClass.class).stream() + .filter(ClusteringFunction.class::isAssignableFrom) + .collect(Collectors.toMap(cl -> cl.getAnnotation(ClusteringClass.class).value(), cl -> (Class)cl)); + + this.conditionAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ConditionClass.class).stream() + .filter(ConditionAlgo.class::isAssignableFrom) + .collect(Collectors.toMap(cl -> cl.getAnnotation(ConditionClass.class).value(), cl -> (Class)cl)); + + this.distanceAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream() + .filter(DistanceAlgo.class::isAssignableFrom) + .collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class)cl)); + } + + public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { + try { + return clusteringFunctions.get(name).getDeclaredConstructor(Map.class).newInstance(params); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new PaceException(name + "not found", e); + } + } + + public DistanceAlgo getDistanceAlgo(String name, Map params) throws PaceException { + try { + return distanceAlgos.get(name).getDeclaredConstructor(Map.class).newInstance(params); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new PaceException(name + "not found", e); + } + } + + public ConditionAlgo getConditionAlgo(String name, List fields) throws PaceException { + try { + return conditionAlgos.get(name).getDeclaredConstructor(String.class, List.class).newInstance(name, fields); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new PaceException(name + "not found", e); + } + } + +} diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java deleted file mode 100644 index 6d01176f9..000000000 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringResolverTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package eu.dnetlib.pace.clustering; - -import org.junit.Before; -import org.junit.Test; - -import java.lang.reflect.InvocationTargetException; -import java.util.HashMap; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -public class ClusteringResolverTest { - - private ClusteringResolver clusteringResolver; - private Map params = new HashMap(); - - @Before - public void setUp(){ - clusteringResolver = new ClusteringResolver(); - } - - @Test - public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { - - ClusteringFunction ngrams = clusteringResolver.resolve("ngrams", params); - - assertEquals(ngrams.getClass(), Ngrams.class); - } - -} diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java deleted file mode 100644 index 87d1c375b..000000000 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/condition/ConditionResolverTest.java +++ /dev/null @@ -1,35 +0,0 @@ -package eu.dnetlib.pace.condition; - -import eu.dnetlib.pace.clustering.ClusteringFunction; -import eu.dnetlib.pace.clustering.ClusteringResolver; -import eu.dnetlib.pace.clustering.Ngrams; -import eu.dnetlib.pace.model.FieldDef; -import org.junit.Before; -import org.junit.Test; - -import java.lang.reflect.InvocationTargetException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -public class ConditionResolverTest { - - private ConditionResolver conditionResolver; - private List fields; - private String name; - - @Before - public void setUp(){ - conditionResolver = new ConditionResolver(); - } - - @Test - public void testResolve() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { - - ConditionAlgo sizeMatch = conditionResolver.resolve("sizeMatch", fields); - - assertEquals(sizeMatch.getClass(), SizeMatch.class); - } -} diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java index 2b7a5baf3..cd2361bee 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java @@ -76,8 +76,16 @@ public class ConfigTest extends AbstractPaceTest { final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString()); String params = "\"params\":{\"limit\":-1,\"weight\":0.0}"; //verify if the serialization produces the same result of the input json -// assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", "")); + assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", "")); } + @Test + public void dedupConfigTest() { + + DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf.json")); + + System.out.println(load.toString()); + } + } diff --git a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/result.pace.conf.json b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/result.pace.conf.json index 7d8fe244b..786424a34 100644 --- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/result.pace.conf.json +++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/result.pace.conf.json @@ -1,7 +1,7 @@ { "wf" : { "threshold" : "0.99", - "run" : "001", + "dedupRun" : "001", "entityType" : "result", "orderField" : "title", "queueMaxSize" : "2000",