diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store b/dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store new file mode 100644 index 0000000000..e20f7b2a25 Binary files /dev/null and b/dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store differ diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java index d6887050a8..a4b58aa81f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java @@ -21,7 +21,7 @@ public class ClusteringCombiner { for (final ClusteringDef cd : defs) { for (final String fieldName : cd.getFields()) { final Field values = a.values(fieldName); - res.addAll(cd.getClusteringFunction().apply((List) values)); + res.addAll(cd.clusteringFunction().apply((List) values)); } } return res; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java index c26ccaf728..4666db7ab7 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java @@ -13,6 +13,7 @@ import eu.dnetlib.pace.model.CondDef; import eu.dnetlib.pace.model.FieldDef; import eu.dnetlib.pace.util.PaceResolver; import org.apache.commons.collections.CollectionUtils; +import org.codehaus.jackson.annotate.JsonIgnore; public class PaceConfig implements Serializable { @@ -57,10 +58,12 @@ public class PaceConfig implements Serializable { return conditions; } + @JsonIgnore public List getConditionAlgos() { return asConditionAlgos(getConditions()); } + @JsonIgnore public List getStrictConditionAlgos() { return asConditionAlgos(getStrictConditions()); } @@ -102,7 +105,7 @@ public class PaceConfig implements Serializable { final List fields = getModel().stream() .filter(fd -> cd.getFields().contains(fd.getName())) .collect(Collectors.toList()); - algos.add(cd.getConditionAlgo(fields)); + algos.add(cd.conditionAlgo(fields)); } return algos; } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java index 467a19c862..115fd1a1ee 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java @@ -77,7 +77,7 @@ public class DistanceScorer { } } else { if (va.getType().equals(vb.getType())) { - de.setDistance(w * fd.getDistanceAlgo().distance(va, vb)); + de.setDistance(w * fd.distanceAlgo().distance(va, vb)); } else { throw new IllegalArgumentException(String.format("Types are differents type: %s:%s - %s:%s", va, va.getType(), vb, vb.getType())); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index a5eb51aca5..57239263a6 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -28,7 +28,7 @@ public class ClusteringDef implements Serializable { this.name = name; } - public ClusteringFunction getClusteringFunction() { + public ClusteringFunction clusteringFunction() { try { return PaceConfig.paceResolver.getClusteringFunction(getName(), params); } catch (PaceException e) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java index 488ea6387e..aefd44d950 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java @@ -16,7 +16,7 @@ public class CondDef implements Serializable { public CondDef() {} - public ConditionAlgo getConditionAlgo(final List fields){ + public ConditionAlgo conditionAlgo(final List fields){ return PaceConfig.paceResolver.getConditionAlgo(getName(), fields); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 825b913da6..b954df7d6a 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -68,7 +68,7 @@ public class FieldDef implements Serializable { return Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(getPath())); } - public DistanceAlgo getDistanceAlgo() { + public DistanceAlgo distanceAlgo() { if (params == null) { params = new HashMap<>(); diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java index cd2361bee1..5ae030674f 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java @@ -14,69 +14,23 @@ import static org.junit.Assert.assertNotNull; public class ConfigTest extends AbstractPaceTest { @Test - public void dedupConfigSerializationTest() throws IOException { + public void dedupConfigSerializationTest() { final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("result.pace.conf.json")); + final String conf = cfgFromClasspath.toString(); + +// System.out.println("*****SERIALIZED*****"); +// System.out.println(conf); +// System.out.println("*****FROM CLASSPATH*****"); +// System.out.println(readFromClasspath("result.pace.conf.json")); + + final DedupConfig cfgFromSerialization = DedupConfig.load(conf); + + assertEquals(cfgFromClasspath.toString(), cfgFromSerialization.toString()); + assertNotNull(cfgFromClasspath); + assertNotNull(cfgFromSerialization); - String conf = "{ \n" + - "wf\" : { " + - " \"threshold\" : \"0.99\", " + - " \"run\" : \"001\", " + - " \"entityType\" : \"result\", " + - " \"orderField\" : \"title\", " + - " \"queueMaxSize\" : \"2000\"," + - " \"groupMaxSize\" : \"10\"," + - " \"slidingWindowSize\" : \"200\"," + - " \"rootBuilder\" : [ \"result\" ]," + - " \"includeChildren\" : \"true\" " + - " }," + - "\t\"pace\" : {\t\t\n" + - "\t\t\"clustering\" : [\n" + - "\t\t\t{ \"name\" : \"acronyms\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"minLen\" : \"2\", \"maxLen\" : \"4\"} },\n" + - "\t\t\t{ \"name\" : \"ngrampairs\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"ngramLen\" : \"3\"} },\n" + - "\t\t\t{ \"name\" : \"suffixprefix\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"len\" : \"3\" } } \n" + - "\t\t],\t\t\n" + - "\t\t\"strictConditions\" : [\n" + - " \t\t\t{ \"name\" : \"exactMatch\", \"fields\" : [ \"pid\" ] }\n" + - " \t\t], \n" + - " \t\t\"conditions\" : [ \n" + - " \t\t\t{ \"name\" : \"yearMatch\", \"fields\" : [ \"dateofacceptance\" ] },\n" + - " \t\t\t{ \"name\" : \"titleVersionMatch\", \"fields\" : [ \"title\" ] },\n" + - " \t\t\t{ \"name\" : \"sizeMatch\", \"fields\" : [ \"authors\" ] } \n" + - " \t\t],\t\t\n" + - "\t\t\"model\" : [\n" + - "\t\t\t{ \"name\" : \"pid\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"pid[qualifier#classid = {doi}]/value\", \"overrideMatch\" : \"true\" }, \t\n" + - "\t\t\t{ \"name\" : \"title\", \"algo\" : \"JaroWinkler\", \"type\" : \"String\", \"weight\" : \"1.0\", \"ignoreMissing\" : \"false\", \"path\" : \"result/metadata/title[qualifier#classid = {main title}]/value\" },\n" + - "\t\t\t{ \"name\" : \"dateofacceptance\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/metadata/dateofacceptance/value\" } ,\n" + - "\t\t\t{ \"name\" : \"authors\", \"algo\" : \"Null\", \"type\" : \"List\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/author/metadata/fullname/value\" }\n" + - "\t\t],\n" + - "\t\t\"blacklists\" : {\n" + - "\t\t\t\"title\" : [\n" + - "\t\t\t\t\"^(Corpus Oral Dialectal \\\\(COD\\\\)\\\\.).*$\",\n" + - "\t\t\t\t\"^(Kiri Karl Morgensternile).*$\",\n" + - "\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*\\\\]$\",\n" + - "\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Aleksandr).*$\",\n" + - "\t\t\t\t\"^(Kiri A\\\\. de Vignolles).*$\",\n" + - "\t\t\t\t\"^(2 kirja Karl Morgensternile).*$\",\n" + - "\t\t\t\t\"^(Pirita kloostri idaosa arheoloogilised).*$\",\n" + - "\t\t\t\t\"^(Kiri tundmatule).*$\",\n" + - "\t\t\t\t\"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Nikolai Birukovile).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Nikolai Issakovile).*$\",\n" + - "\t\t\t\t\"^(WHP Cruise Summary Information of section).*$\",\n" + - "\t\t\t\t\"^(Measurement of the top quark\\\\-pair production cross section with ATLAS in pp collisions at).*$\",\n" + - "\t\t\t\t\"^(Measurement of the spin\\\\-dependent structure function).*\"\n" + - "\t\t\t] } \t\t\n" + - "\t}\n" + - "\n" + - "}"; - - final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString()); - String params = "\"params\":{\"limit\":-1,\"weight\":0.0}"; - //verify if the serialization produces the same result of the input json - assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", "")); }