Xml record serialization for author PIDs: 1) only one value per PID type is allowed; 2) orcid prevails over orcid_pending

This commit is contained in:
Claudio Atzori 2020-12-14 11:14:03 +01:00
parent 61cd129ded
commit 1506f49052
4 changed files with 96 additions and 31 deletions

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.oa.provision.utils;
import java.util.Comparator;
import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class AuthorPidTypeComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
String lClass = Optional
.ofNullable(left)
.map(StructuredProperty::getQualifier)
.map(Qualifier::getClassid)
.orElse(null);
String rClass = Optional
.ofNullable(right)
.map(StructuredProperty::getQualifier)
.map(Qualifier::getClassid)
.orElse(null);
if (lClass == null && rClass == null)
return 0;
if (lClass == null)
return 1;
if (rClass == null)
return -1;
if (lClass.equals(rClass))
return 0;
if (lClass.equals(ModelConstants.ORCID))
return -1;
if (rClass.equals(ModelConstants.ORCID))
return 1;
if (lClass.equals(ModelConstants.ORCID_PENDING))
return -1;
if (rClass.equals(ModelConstants.ORCID_PENDING))
return 1;
return 0;
}
}

View File

@ -254,6 +254,18 @@ public class XmlRecordFactory implements Serializable {
p -> p, p -> p,
(p1, p2) -> p1)) (p1, p2) -> p1))
.values() .values()
.stream()
.collect(
Collectors
.groupingBy(
p -> p.getValue(),
Collectors
.mapping(
p -> p,
Collectors.minBy(new AuthorPidTypeComparator()))))
.values()
.stream()
.map(op -> op.get())
.forEach( .forEach(
sp -> { sp -> {
String pidType = getAuthorPidType(sp.getQualifier().getClassid()); String pidType = getAuthorPidType(sp.getQualifier().getClassid());

View File

@ -14,6 +14,7 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
@ -25,6 +26,9 @@ public class XmlRecordFactoryTest {
private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test @Test
public void testXMLRecordFactory() throws IOException, DocumentException { public void testXMLRecordFactory() throws IOException, DocumentException {
@ -33,7 +37,7 @@ public class XmlRecordFactoryTest {
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId); otherDsTypeId);
Publication p = new ObjectMapper() Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
@ -44,10 +48,14 @@ public class XmlRecordFactoryTest {
assertNotNull(doc); assertNotNull(doc);
// System.out.println(doc.asXML()); System.out.println(doc.asXML());
Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid")); Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid"));
Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending"));
// TODO add assertions based of values extracted from the XML record // TODO add assertions based of values extracted from the XML record
} }
} }

View File

@ -6,41 +6,15 @@
"name": "Jaehyun", "name": "Jaehyun",
"pid": [ "pid": [
{ {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"trust": ""
},
"qualifier": { "qualifier": {
"classid": "orcid", "classid": "orcid",
"classname": "Open Researcher and Contributor ID", "classname": "Open Researcher and Contributor ID",
"schemeid": "dnet:pid_types", "schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types" "schemename": "dnet:pid_types"
}, },
"value": "0000-0001-9613-6639" "value": "0000-0001-9613-6638"
}, },
{ {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"trust": ""
},
"qualifier": { "qualifier": {
"classid": "orcid_pending", "classid": "orcid_pending",
"classname": "Open Researcher and Contributor ID", "classname": "Open Researcher and Contributor ID",
@ -57,7 +31,26 @@
"affiliation": [], "affiliation": [],
"fullname": "Berrada, Salim", "fullname": "Berrada, Salim",
"name": "Salim", "name": "Salim",
"pid": [], "pid": [
{
"qualifier": {
"classid": "orcid",
"classname": "Open Researcher and Contributor ID",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "0000-0001-9613-9956"
},
{
"qualifier": {
"classid": "orcid_pending",
"classname": "Open Researcher and Contributor ID",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "0000-0001-9613-9956"
}
],
"rank": 2, "rank": 2,
"surname": "Berrada" "surname": "Berrada"
}, },