improved mapping titles from datacite records to consider title types

This commit is contained in:
Claudio Atzori 2022-01-21 10:50:34 +01:00
parent b37bc277c4
commit f0ea2410e5
3 changed files with 45 additions and 5 deletions

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLDecoder; import java.net.URLDecoder;
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node; import org.dom4j.Node;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(
doc, final List<StructuredProperty> title = Lists.newArrayList();
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']", final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
MAIN_TITLE_QUALIFIER, info);
for (Object o : doc.selectNodes(xpath)) {
Element e = (Element) o;
final String titleValue = e.getTextTrim();
final String titleType = e.attributeValue("titleType");
if (StringUtils.isNotBlank(titleType)) {
title
.add(
structuredProperty(
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
} else {
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
}
}
return title;
} }
@Override @Override

View File

@ -10,6 +10,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -355,6 +356,25 @@ class MappersTest {
assertTrue(r2.getValidated()); assertTrue(r2.getValidated());
assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r1.getValidationDate());
assertEquals("2020-01-01", r2.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate());
assertNotNull(d.getTitle());
assertEquals(2, d.getTitle().size());
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
verifyTitle(d, "Subtitle", "survey");
}
private void verifyTitle(Dataset d, String titleType, String title) {
Optional
.of(
d
.getTitle()
.stream()
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
.collect(Collectors.toList()))
.ifPresent(t -> {
assertEquals(1, t.size());
assertEquals(title, t.get(0).getValue());
});
} }
@Test @Test

View File

@ -54,7 +54,8 @@
</creator> </creator>
</creators> </creators>
<titles> <titles>
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017</title> <title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
<title titleType="Subtitle">survey</title>
</titles> </titles>
<publisher>Zenodo</publisher> <publisher>Zenodo</publisher>
<publicationYear>2019</publicationYear> <publicationYear>2019</publicationYear>