fixed mapping OdfToGraph: pick the correct element to map author pids and author affiliations; extended mapping Oaf2Graph: added support for author pids

This commit is contained in:
Claudio Atzori 2020-05-15 12:26:16 +02:00
parent a832658296
commit cfc8948717
5 changed files with 71 additions and 17 deletions

View File

@ -1,15 +1,16 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node; import org.dom4j.Node;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
@ -28,15 +29,24 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final List<Author> res = new ArrayList<>(); final List<Author> res = new ArrayList<>();
int pos = 1; int pos = 1;
for (final Object o : doc.selectNodes("//dc:creator")) { for (final Object o : doc.selectNodes("//dc:creator")) {
final Node n = (Node) o; final Element e = (Element) o;
final Author author = new Author(); final Author author = new Author();
author.setFullname(n.getText()); author.setFullname(e.getText());
author.setRank(pos++); author.setRank(pos++);
final PacePerson p = new PacePerson(n.getText(), false); final PacePerson p = new PacePerson(e.getText(), false);
if (p.isAccurate()) { if (p.isAccurate()) {
author.setName(p.getNormalisedFirstName()); author.setName(p.getNormalisedFirstName());
author.setSurname(p.getNormalisedSurname()); author.setSurname(p.getNormalisedSurname());
} }
final String pid = e.attributeValue("nameIdentifier");
final String pidType = e.attributeValue("nameIdentifierScheme");
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
author.setPid(new ArrayList<>());
author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
}
res.add(author); res.add(author);
} }
return res; return res;

View File

@ -63,17 +63,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setSurname(surname); author.setSurname(surname);
} }
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
author.setPid(preparePids(doc, info)); author.setPid(preparePids(n, info));
author.setRank(pos++); author.setRank(pos++);
res.add(author); res.add(author);
} }
return res; return res;
} }
private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) { private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) { for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
res res
.add( .add(
structuredProperty( structuredProperty(

View File

@ -10,7 +10,10 @@ import static org.mockito.Mockito.when;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -19,12 +22,6 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class MappersTest { public class MappersTest {
@ -54,7 +51,26 @@ public class MappersTest {
assertValidId(p.getId()); assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
Optional<Author> author = p.getAuthor()
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid()
.stream()
.findFirst()
.get();
assertEquals("0000-0001-6651-1178", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname());
assertEquals("Votsi", author.get().getSurname());
assertEquals("Nefta", author.get().getName());
assertTrue(p.getSubject().size() > 0); assertTrue(p.getSubject().size() > 0);
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline())); assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
@ -100,6 +116,33 @@ public class MappersTest {
assertValidId(d.getCollectedfrom().get(0).getKey()); assertValidId(d.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(d.getAuthor().size() > 0); assertTrue(d.getAuthor().size() > 0);
Optional<Author> author = d.getAuthor()
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid()
.stream()
.findFirst()
.get();
assertEquals("0000-0001-9074-1619", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Baracchini, Theo", author.get().getFullname());
assertEquals("Baracchini", author.get().getSurname());
assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size());
Optional<Field<String>> opAff = author.get().getAffiliation()
.stream()
.findFirst();
assertTrue(opAff.isPresent());
Field<String> affiliation = opAff.get();
assertEquals("ISTI-CNR", affiliation.getValue());
assertTrue(d.getSubject().size() > 0); assertTrue(d.getSubject().size() > 0);
assertTrue(d.getInstance().size() > 0); assertTrue(d.getInstance().size() > 0);
assertTrue(d.getContext().size() > 0); assertTrue(d.getContext().size() > 0);

View File

@ -19,7 +19,7 @@
<metadata xmlns="http://namespace.openaire.eu/"> <metadata xmlns="http://namespace.openaire.eu/">
<dc:title>Ecosystem Service capacity is higher in areas of multiple designation types</dc:title> <dc:title>Ecosystem Service capacity is higher in areas of multiple designation types</dc:title>
<dc:creator>Nikolaidou,Charitini</dc:creator> <dc:creator>Nikolaidou,Charitini</dc:creator>
<dc:creator>Votsi,Nefta</dc:creator> <dc:creator nameIdentifier="0000-0001-6651-1178" nameIdentifierScheme="ORCID">Votsi,Nefta</dc:creator>
<dc:creator>Sgardelis,Steanos</dc:creator> <dc:creator>Sgardelis,Steanos</dc:creator>
<dc:creator>Halley,John</dc:creator> <dc:creator>Halley,John</dc:creator>
<dc:creator>Pantis,John</dc:creator> <dc:creator>Pantis,John</dc:creator>

View File

@ -35,9 +35,10 @@
</creator> </creator>
<creator> <creator>
<creatorName>Baracchini, Theo</creatorName> <creatorName>Baracchini, Theo</creatorName>
<nameIdentifier nameIdentifierScheme="ORCID">0000-0001-9074-1619</nameIdentifier>
<givenName>Theo</givenName> <givenName>Theo</givenName>
<familyName>Baracchini</familyName> <familyName>Baracchini</familyName>
<affiliation>Physics of Aquatic Systems Laboratory (APHYS) Margaretha Kamprad Chair, ENAC, EPFL, Lausanne, 1015, Switzerland</affiliation> <affiliation>ISTI-CNR</affiliation>
</creator> </creator>
<creator> <creator>
<creatorName>Wüest, Alfred</creatorName> <creatorName>Wüest, Alfred</creatorName>