From dc41b7664351a8995b2d30d2792f1fa8c350b6c3 Mon Sep 17 00:00:00 2001 From: Michele De Bonis Date: Mon, 29 Oct 2018 16:16:11 +0100 Subject: [PATCH] serialization test added. useless getter methods ignored by json serialization --- .../src/main/java/eu/dnetlib/pace/.DS_Store | Bin 0 -> 6148 bytes .../pace/clustering/ClusteringCombiner.java | 2 +- .../eu/dnetlib/pace/config/PaceConfig.java | 5 +- .../dnetlib/pace/distance/DistanceScorer.java | 2 +- .../eu/dnetlib/pace/model/ClusteringDef.java | 2 +- .../java/eu/dnetlib/pace/model/CondDef.java | 2 +- .../java/eu/dnetlib/pace/model/FieldDef.java | 2 +- .../eu/dnetlib/pace/config/ConfigTest.java | 72 ++++-------------- 8 files changed, 22 insertions(+), 65 deletions(-) create mode 100644 dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store b/dnet-pace-core/src/main/java/eu/dnetlib/pace/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e20f7b2a25b5b9a98d09de517d0e0e498fed3b86 GIT binary patch literal 6148 zcmeHK!A{#i5Zw)-wXsyWQV*@#!%7^uq>`hwp+_qPPL+xt&4b&LD0w<}$`CP{f)I%>#n{ zs54SC9z?EcWYMsH6lF4+i6zIs$pEh1BKyH4i&?_%uis5{l%@UB^R9Vqp}zR+xn(ao zw)4WV4}zW?24z?d`kk=oy6=KCPNJ~bP2;k+o#g9Ta-8H}i!WiB6nhxFEoBA)+76?g zG#Pf?SD$5GhH0J@s-CW~znkW<9CqX&kNYa#6j%0USoWZfwd_ZSicj0JyYAc3=zX*O z>0?7QTV8YA5Tn&quOU9P+T*cpS#MT0w!RI1{yI55`~Bzq;!-aJhc8yiOM?@*0^=Sd z$3c;2GXDb?He*d3Nq9) values)); + res.addAll(cd.clusteringFunction().apply((List) values)); } } return res; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java index c26ccaf72..4666db7ab 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java @@ -13,6 +13,7 @@ import eu.dnetlib.pace.model.CondDef; import eu.dnetlib.pace.model.FieldDef; import eu.dnetlib.pace.util.PaceResolver; import org.apache.commons.collections.CollectionUtils; +import org.codehaus.jackson.annotate.JsonIgnore; public class PaceConfig implements Serializable { @@ -57,10 +58,12 @@ public class PaceConfig implements Serializable { return conditions; } + @JsonIgnore public List getConditionAlgos() { return asConditionAlgos(getConditions()); } + @JsonIgnore public List getStrictConditionAlgos() { return asConditionAlgos(getStrictConditions()); } @@ -102,7 +105,7 @@ public class PaceConfig implements Serializable { final List fields = getModel().stream() .filter(fd -> cd.getFields().contains(fd.getName())) .collect(Collectors.toList()); - algos.add(cd.getConditionAlgo(fields)); + algos.add(cd.conditionAlgo(fields)); } return algos; } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java index 467a19c86..115fd1a1e 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/distance/DistanceScorer.java @@ -77,7 +77,7 @@ public class DistanceScorer { } } else { if (va.getType().equals(vb.getType())) { - de.setDistance(w * fd.getDistanceAlgo().distance(va, vb)); + de.setDistance(w * fd.distanceAlgo().distance(va, vb)); } else { throw new IllegalArgumentException(String.format("Types are differents type: %s:%s - %s:%s", va, va.getType(), vb, vb.getType())); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index a5eb51aca..57239263a 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -28,7 +28,7 @@ public class ClusteringDef implements Serializable { this.name = name; } - public ClusteringFunction getClusteringFunction() { + public ClusteringFunction clusteringFunction() { try { return PaceConfig.paceResolver.getClusteringFunction(getName(), params); } catch (PaceException e) { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java index 488ea6387..aefd44d95 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/CondDef.java @@ -16,7 +16,7 @@ public class CondDef implements Serializable { public CondDef() {} - public ConditionAlgo getConditionAlgo(final List fields){ + public ConditionAlgo conditionAlgo(final List fields){ return PaceConfig.paceResolver.getConditionAlgo(getName(), fields); } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 825b913da..b954df7d6 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -68,7 +68,7 @@ public class FieldDef implements Serializable { return Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(getPath())); } - public DistanceAlgo getDistanceAlgo() { + public DistanceAlgo distanceAlgo() { if (params == null) { params = new HashMap<>(); diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java index cd2361bee..5ae030674 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java @@ -14,69 +14,23 @@ import static org.junit.Assert.assertNotNull; public class ConfigTest extends AbstractPaceTest { @Test - public void dedupConfigSerializationTest() throws IOException { + public void dedupConfigSerializationTest() { final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("result.pace.conf.json")); + final String conf = cfgFromClasspath.toString(); + +// System.out.println("*****SERIALIZED*****"); +// System.out.println(conf); +// System.out.println("*****FROM CLASSPATH*****"); +// System.out.println(readFromClasspath("result.pace.conf.json")); + + final DedupConfig cfgFromSerialization = DedupConfig.load(conf); + + assertEquals(cfgFromClasspath.toString(), cfgFromSerialization.toString()); + assertNotNull(cfgFromClasspath); + assertNotNull(cfgFromSerialization); - String conf = "{ \n" + - "wf\" : { " + - " \"threshold\" : \"0.99\", " + - " \"run\" : \"001\", " + - " \"entityType\" : \"result\", " + - " \"orderField\" : \"title\", " + - " \"queueMaxSize\" : \"2000\"," + - " \"groupMaxSize\" : \"10\"," + - " \"slidingWindowSize\" : \"200\"," + - " \"rootBuilder\" : [ \"result\" ]," + - " \"includeChildren\" : \"true\" " + - " }," + - "\t\"pace\" : {\t\t\n" + - "\t\t\"clustering\" : [\n" + - "\t\t\t{ \"name\" : \"acronyms\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"minLen\" : \"2\", \"maxLen\" : \"4\"} },\n" + - "\t\t\t{ \"name\" : \"ngrampairs\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"ngramLen\" : \"3\"} },\n" + - "\t\t\t{ \"name\" : \"suffixprefix\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"len\" : \"3\" } } \n" + - "\t\t],\t\t\n" + - "\t\t\"strictConditions\" : [\n" + - " \t\t\t{ \"name\" : \"exactMatch\", \"fields\" : [ \"pid\" ] }\n" + - " \t\t], \n" + - " \t\t\"conditions\" : [ \n" + - " \t\t\t{ \"name\" : \"yearMatch\", \"fields\" : [ \"dateofacceptance\" ] },\n" + - " \t\t\t{ \"name\" : \"titleVersionMatch\", \"fields\" : [ \"title\" ] },\n" + - " \t\t\t{ \"name\" : \"sizeMatch\", \"fields\" : [ \"authors\" ] } \n" + - " \t\t],\t\t\n" + - "\t\t\"model\" : [\n" + - "\t\t\t{ \"name\" : \"pid\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"pid[qualifier#classid = {doi}]/value\", \"overrideMatch\" : \"true\" }, \t\n" + - "\t\t\t{ \"name\" : \"title\", \"algo\" : \"JaroWinkler\", \"type\" : \"String\", \"weight\" : \"1.0\", \"ignoreMissing\" : \"false\", \"path\" : \"result/metadata/title[qualifier#classid = {main title}]/value\" },\n" + - "\t\t\t{ \"name\" : \"dateofacceptance\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/metadata/dateofacceptance/value\" } ,\n" + - "\t\t\t{ \"name\" : \"authors\", \"algo\" : \"Null\", \"type\" : \"List\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/author/metadata/fullname/value\" }\n" + - "\t\t],\n" + - "\t\t\"blacklists\" : {\n" + - "\t\t\t\"title\" : [\n" + - "\t\t\t\t\"^(Corpus Oral Dialectal \\\\(COD\\\\)\\\\.).*$\",\n" + - "\t\t\t\t\"^(Kiri Karl Morgensternile).*$\",\n" + - "\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*\\\\]$\",\n" + - "\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Aleksandr).*$\",\n" + - "\t\t\t\t\"^(Kiri A\\\\. de Vignolles).*$\",\n" + - "\t\t\t\t\"^(2 kirja Karl Morgensternile).*$\",\n" + - "\t\t\t\t\"^(Pirita kloostri idaosa arheoloogilised).*$\",\n" + - "\t\t\t\t\"^(Kiri tundmatule).*$\",\n" + - "\t\t\t\t\"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Nikolai Birukovile).*$\",\n" + - "\t\t\t\t\"^(Eksliibris Nikolai Issakovile).*$\",\n" + - "\t\t\t\t\"^(WHP Cruise Summary Information of section).*$\",\n" + - "\t\t\t\t\"^(Measurement of the top quark\\\\-pair production cross section with ATLAS in pp collisions at).*$\",\n" + - "\t\t\t\t\"^(Measurement of the spin\\\\-dependent structure function).*\"\n" + - "\t\t\t] } \t\t\n" + - "\t}\n" + - "\n" + - "}"; - - final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString()); - String params = "\"params\":{\"limit\":-1,\"weight\":0.0}"; - //verify if the serialization produces the same result of the input json - assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", "")); }