diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java index ee593b533..d4a11c050 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java @@ -136,13 +136,7 @@ public class FieldListImpl extends AbstractField implements FieldList { */ @Override public boolean isEmpty() { - return Iterables.all(fields, new Predicate() { - - @Override - public boolean apply(final Field f) { - return f.isEmpty(); - } - }); + return Iterables.all(fields, f -> f.isEmpty()); } /* diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java index ea31ec36e..bf861276e 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java @@ -47,13 +47,17 @@ public class FieldValueImpl extends AbstractField implements FieldValue { if (value == null) return false; switch (type) { - case String: - case JSON: + case String: + case JSON: return value.toString().isEmpty(); - case List: - List list = (List) value; - return list.isEmpty() || ((FieldValueImpl) list.get(0)).isEmpty(); - case URL: + case List: + try { + List list = (List) value; + return list.isEmpty() || ((FieldValueImpl) list.get(0)).isEmpty(); + } catch (Exception e) { + throw new RuntimeException(value.toString()); + } + case URL: String str = value.toString(); return StringUtils.isBlank(str) || !isValidURL(str); default: diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index c55e13d36..54da950af 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -45,7 +45,7 @@ public class MapDocumentUtil { FieldListImpl fi = new FieldListImpl(fdef.getName(), fdef.getType()); getJPathList(fdef.getPath(), json, fdef.getType()) .stream() - .map(item -> new FieldValueImpl(fdef.getType(), fdef.getName(), item)) + .map(item -> new FieldValueImpl(Type.String, fdef.getName(), item)) .forEach(fi::add); stringField.put(fdef.getName(), fi); break; diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java index 09d7c0b9b..5d6643461 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java @@ -12,6 +12,7 @@ import org.junit.Test; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -68,11 +69,32 @@ public class ConfigTest extends AbstractPaceTest { DedupConfig dedupConf = DedupConfig.load(readFromClasspath("publication.current.conf.json")); - final String json = readFromClasspath("publication.json"); + final String json = readFromClasspath("pub2.json"); final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json); System.out.println("mapDocument = " + mapDocument.getFieldMap()); + + System.out.println(mapDocument.getFieldMap().values().stream().map(Field::isEmpty).count()); + + } + + + + + + @Test + public void testJPath() { + final String json = readFromClasspath("pub2.json"); + + final String jpath ="$.pid"; + + + final List jPathList = MapDocumentUtil.getJPathList(jpath, json, Type.JSON); + + System.out.println("jPathList = " + jPathList); + + } } diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java index 36aca3346..b85be57d1 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java @@ -20,4 +20,5 @@ public class UtilTest { PaceResolver paceResolver = new PaceResolver(); paceResolver.getComparator("keywordMatch", params); } + } diff --git a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub2.json b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub2.json new file mode 100644 index 000000000..d3e5bf69e --- /dev/null +++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub2.json @@ -0,0 +1,269 @@ +{ + "journal": { + "name": "", + "issnPrinted": "", + "issnOnline": "", + "issnLinking": "", + "ep": "", + "iss": "", + "sp": "", + "vol": "", + "edition": "", + "conferenceplace": "", + "conferencedate": "", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + }, + "author": [ + { + "fullname": "Clingerman Daniel J.", + "name": "", + "surname": "", + "rank": 1, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Morris William", + "name": "", + "surname": "", + "rank": 2, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Sarjeant Amy A.", + "name": "", + "surname": "", + "rank": 3, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Farha Omar K.", + "name": "", + "surname": "", + "rank": 4, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Mondloch Joseph E.", + "name": "", + "surname": "", + "rank": 5, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Hupp Joseph T.", + "name": "", + "surname": "", + "rank": 6, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Stern Charlotte", + "name": "", + "surname": "", + "rank": 7, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Kennedy Robert D.", + "name": "", + "surname": "", + "rank": 8, + "pid": [], + "affiliation": [] + }, + { + "fullname": "Mirkin Chad A.", + "name": "", + "surname": "", + "rank": 9, + "pid": [], + "affiliation": [] + } + ], + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": { + "classid": "und", + "classname": "Undetermined", + "schemeid": "dent:languages", + "schemename": "dent:languages" + }, + "country": [], + "subject": [], + "title": [ + { + "value": "Stabilization of a highly porous metal\u2013organic framework utilizing a carborane-based linker", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + } + ], + "relevantdate": [], + "description": [], + "dateofacceptance": { + "value": "", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + }, + "publisher": { + "value": "Royal Society of Chemistry (RSC)", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + }, + "embargoenddate": { + "value": "", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + }, + "source": [], + "fulltext": [], + "format": [], + "contributor": [], + "resourcetype": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "coverage": [], + "refereed": { + "value": "", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + }, + "context": [], + "id": "50|scholexplore::cf30b31310d816cccd28b514a12ea4a0", + "originalId": [], + "collectedfrom": [ + { + "key": "10|openaire____::e034d6a11054f5ade9221ebac484e864", + "value": "scholExplorer", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + } + ], + "pid": [ + { + "value": "10.1039/c4cc09212k", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "inferenceprovenance": "", + "provenanceaction": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + } + } + } + ], + "extraInfo": [], + "dataInfo": { + "invisible": false, + "inferred": true, + "deletedbyinference": true, + "inferenceprovenance": "dedup-similarity-result-levenstein", + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 0 +}