From 1506f49052655964bfbf8390c10361f92953b245 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 14 Dec 2020 11:14:03 +0100 Subject: [PATCH] Xml record serialization for author PIDs: 1) only one value per PID type is allowed; 2) orcid prevails over orcid_pending --- .../utils/AuthorPidTypeComparator.java | 52 +++++++++++++++++++ .../oa/provision/utils/XmlRecordFactory.java | 12 +++++ .../oa/provision/XmlRecordFactoryTest.java | 14 +++-- .../dnetlib/dhp/oa/provision/publication.json | 49 ++++++++--------- 4 files changed, 96 insertions(+), 31 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java new file mode 100644 index 000000000..7391569ed --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.provision.utils; + +import java.util.Comparator; +import java.util.Optional; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +public class AuthorPidTypeComparator implements Comparator { + + @Override + public int compare(StructuredProperty left, StructuredProperty right) { + + String lClass = Optional + .ofNullable(left) + .map(StructuredProperty::getQualifier) + .map(Qualifier::getClassid) + .orElse(null); + + String rClass = Optional + .ofNullable(right) + .map(StructuredProperty::getQualifier) + .map(Qualifier::getClassid) + .orElse(null); + + if (lClass == null && rClass == null) + return 0; + if (lClass == null) + return 1; + if (rClass == null) + return -1; + + if (lClass.equals(rClass)) + return 0; + + if (lClass.equals(ModelConstants.ORCID)) + return -1; + if (rClass.equals(ModelConstants.ORCID)) + return 1; + + if (lClass.equals(ModelConstants.ORCID_PENDING)) + return -1; + if (rClass.equals(ModelConstants.ORCID_PENDING)) + return 1; + + return 0; + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index eba736228..9f16e99d8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -254,6 +254,18 @@ public class XmlRecordFactory implements Serializable { p -> p, (p1, p2) -> p1)) .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), + Collectors + .mapping( + p -> p, + Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 0f1912cc3..8ae8a55c3 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -14,6 +14,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; @@ -25,6 +26,9 @@ public class XmlRecordFactoryTest { private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; + private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test public void testXMLRecordFactory() throws IOException, DocumentException { @@ -33,7 +37,7 @@ public class XmlRecordFactoryTest { XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); - Publication p = new ObjectMapper() + Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); @@ -44,10 +48,14 @@ public class XmlRecordFactoryTest { assertNotNull(doc); - // System.out.println(doc.asXML()); + System.out.println(doc.asXML()); - Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid")); + Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); + + Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid")); + Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending")); + // TODO add assertions based of values extracted from the XML record } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index b6b183b15..ea7a30051 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -6,41 +6,15 @@ "name": "Jaehyun", "pid": [ { - "dataInfo": { - "deletedbyinference": false, - "inferenceprovenance": "", - "inferred": false, - "invisible": false, - "provenanceaction": { - "classid": "", - "classname": "", - "schemeid": "", - "schemename": "" - }, - "trust": "" - }, "qualifier": { "classid": "orcid", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types" }, - "value": "0000-0001-9613-6639" + "value": "0000-0001-9613-6638" }, { - "dataInfo": { - "deletedbyinference": false, - "inferenceprovenance": "", - "inferred": false, - "invisible": false, - "provenanceaction": { - "classid": "", - "classname": "", - "schemeid": "", - "schemename": "" - }, - "trust": "" - }, "qualifier": { "classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", @@ -57,7 +31,26 @@ "affiliation": [], "fullname": "Berrada, Salim", "name": "Salim", - "pid": [], + "pid": [ + { + "qualifier": { + "classid": "orcid", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-9956" + }, + { + "qualifier": { + "classid": "orcid_pending", + "classname": "Open Researcher and Contributor ID", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-9956" + } + ], "rank": 2, "surname": "Berrada" },