forked from D-Net/dnet-hadoop
improved mapping titles from datacite records to consider title types
This commit is contained in:
parent
b37bc277c4
commit
f0ea2410e5
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLDecoder;
|
import java.net.URLDecoder;
|
||||||
|
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.Element;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
||||||
return prepareListStructProps(
|
|
||||||
doc,
|
final List<StructuredProperty> title = Lists.newArrayList();
|
||||||
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']",
|
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
|
||||||
MAIN_TITLE_QUALIFIER, info);
|
|
||||||
|
for (Object o : doc.selectNodes(xpath)) {
|
||||||
|
Element e = (Element) o;
|
||||||
|
final String titleValue = e.getTextTrim();
|
||||||
|
final String titleType = e.attributeValue("titleType");
|
||||||
|
if (StringUtils.isNotBlank(titleType)) {
|
||||||
|
title
|
||||||
|
.add(
|
||||||
|
structuredProperty(
|
||||||
|
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
|
||||||
|
} else {
|
||||||
|
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return title;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -355,6 +356,25 @@ class MappersTest {
|
||||||
assertTrue(r2.getValidated());
|
assertTrue(r2.getValidated());
|
||||||
assertEquals("2020-01-01", r1.getValidationDate());
|
assertEquals("2020-01-01", r1.getValidationDate());
|
||||||
assertEquals("2020-01-01", r2.getValidationDate());
|
assertEquals("2020-01-01", r2.getValidationDate());
|
||||||
|
|
||||||
|
assertNotNull(d.getTitle());
|
||||||
|
assertEquals(2, d.getTitle().size());
|
||||||
|
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
|
||||||
|
verifyTitle(d, "Subtitle", "survey");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyTitle(Dataset d, String titleType, String title) {
|
||||||
|
Optional
|
||||||
|
.of(
|
||||||
|
d
|
||||||
|
.getTitle()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.ifPresent(t -> {
|
||||||
|
assertEquals(1, t.size());
|
||||||
|
assertEquals(title, t.get(0).getValue());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -54,7 +54,8 @@
|
||||||
</creator>
|
</creator>
|
||||||
</creators>
|
</creators>
|
||||||
<titles>
|
<titles>
|
||||||
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017</title>
|
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
|
||||||
|
<title titleType="Subtitle">survey</title>
|
||||||
</titles>
|
</titles>
|
||||||
<publisher>Zenodo</publisher>
|
<publisher>Zenodo</publisher>
|
||||||
<publicationYear>2019</publicationYear>
|
<publicationYear>2019</publicationYear>
|
||||||
|
|
Loading…
Reference in New Issue