forked from antonis.lempesis/dnet-hadoop
Xml record serialization for author PIDs: 1) only one value per PID type is allowed; 2) orcid prevails over orcid_pending
This commit is contained in:
parent
61cd129ded
commit
1506f49052
|
@ -0,0 +1,52 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision.utils;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class AuthorPidTypeComparator implements Comparator<StructuredProperty> {
|
||||
|
||||
@Override
|
||||
public int compare(StructuredProperty left, StructuredProperty right) {
|
||||
|
||||
String lClass = Optional
|
||||
.ofNullable(left)
|
||||
.map(StructuredProperty::getQualifier)
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(null);
|
||||
|
||||
String rClass = Optional
|
||||
.ofNullable(right)
|
||||
.map(StructuredProperty::getQualifier)
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(null);
|
||||
|
||||
if (lClass == null && rClass == null)
|
||||
return 0;
|
||||
if (lClass == null)
|
||||
return 1;
|
||||
if (rClass == null)
|
||||
return -1;
|
||||
|
||||
if (lClass.equals(rClass))
|
||||
return 0;
|
||||
|
||||
if (lClass.equals(ModelConstants.ORCID))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.ORCID))
|
||||
return 1;
|
||||
|
||||
if (lClass.equals(ModelConstants.ORCID_PENDING))
|
||||
return -1;
|
||||
if (rClass.equals(ModelConstants.ORCID_PENDING))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
|
@ -254,6 +254,18 @@ public class XmlRecordFactory implements Serializable {
|
|||
p -> p,
|
||||
(p1, p2) -> p1))
|
||||
.values()
|
||||
.stream()
|
||||
.collect(
|
||||
Collectors
|
||||
.groupingBy(
|
||||
p -> p.getValue(),
|
||||
Collectors
|
||||
.mapping(
|
||||
p -> p,
|
||||
Collectors.minBy(new AuthorPidTypeComparator()))))
|
||||
.values()
|
||||
.stream()
|
||||
.map(op -> op.get())
|
||||
.forEach(
|
||||
sp -> {
|
||||
String pidType = getAuthorPidType(sp.getQualifier().getClassid());
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.junit.jupiter.api.Assertions;
|
|||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||
|
@ -25,6 +26,9 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
|
||||
|
||||
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
public void testXMLRecordFactory() throws IOException, DocumentException {
|
||||
|
||||
|
@ -33,7 +37,7 @@ public class XmlRecordFactoryTest {
|
|||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
|
||||
otherDsTypeId);
|
||||
|
||||
Publication p = new ObjectMapper()
|
||||
Publication p = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||
|
||||
String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
|
@ -44,10 +48,14 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
assertNotNull(doc);
|
||||
|
||||
// System.out.println(doc.asXML());
|
||||
System.out.println(doc.asXML());
|
||||
|
||||
Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
||||
Assertions.assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
||||
Assertions.assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
||||
|
||||
Assertions.assertEquals("0000-0001-9613-9956", doc.valueOf("//creator[@rank = '2']/@orcid"));
|
||||
Assertions.assertEquals("", doc.valueOf("//creator[@rank = '2']/@orcid_pending"));
|
||||
|
||||
// TODO add assertions based of values extracted from the XML record
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,41 +6,15 @@
|
|||
"name": "Jaehyun",
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"trust": ""
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "orcid",
|
||||
"classname": "Open Researcher and Contributor ID",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "0000-0001-9613-6639"
|
||||
"value": "0000-0001-9613-6638"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"trust": ""
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "orcid_pending",
|
||||
"classname": "Open Researcher and Contributor ID",
|
||||
|
@ -57,7 +31,26 @@
|
|||
"affiliation": [],
|
||||
"fullname": "Berrada, Salim",
|
||||
"name": "Salim",
|
||||
"pid": [],
|
||||
"pid": [
|
||||
{
|
||||
"qualifier": {
|
||||
"classid": "orcid",
|
||||
"classname": "Open Researcher and Contributor ID",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "0000-0001-9613-9956"
|
||||
},
|
||||
{
|
||||
"qualifier": {
|
||||
"classid": "orcid_pending",
|
||||
"classname": "Open Researcher and Contributor ID",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "0000-0001-9613-9956"
|
||||
}
|
||||
],
|
||||
"rank": 2,
|
||||
"surname": "Berrada"
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue