Merge branch 'beta' into validation

This commit is contained in:
Claudio Atzori 2022-01-21 13:02:35 +01:00
commit 2b06eebdc6
5 changed files with 78 additions and 1024 deletions

View File

@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate
import java.time.format.DateTimeFormatter
import java.util.{Date, Locale}
import scala.collection.JavaConverters._
import scala.io.{Codec, Source}
object DataciteToOAFTransformation {
case class HostedByMapType(
openaire_id: String,
datacite_name: String,
official_name: String,
similarity: Option[Float]
) {}
val mapper = new ObjectMapper()
val unknown_repository: HostedByMapType = HostedByMapType(
ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID,
ModelConstants.UNKNOWN_REPOSITORY.getValue,
ModelConstants.UNKNOWN_REPOSITORY.getValue,
Some(1.0f)
)
val hostedByMap: Map[String, HostedByMapType] = {
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: org.json4s.JValue = parse(s)
json.extract[Map[String, HostedByMapType]]
}
/** This method should skip record if json contains invalid text
* defined in gile datacite_filter
*
@ -534,12 +556,9 @@ object DataciteToOAFTransformation {
if (client.isDefined) {
instance.setHostedby(
OafMapperUtils.keyValue(
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
ModelConstants.UNKNOWN_REPOSITORY.getValue
)
)
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
instance.setAccessright(access_rights_qualifier)

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*;
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(
doc,
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']",
MAIN_TITLE_QUALIFIER, info);
final List<StructuredProperty> title = Lists.newArrayList();
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
for (Object o : doc.selectNodes(xpath)) {
Element e = (Element) o;
final String titleValue = e.getTextTrim();
final String titleType = e.attributeValue("titleType");
if (StringUtils.isNotBlank(titleType)) {
title
.add(
structuredProperty(
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
} else {
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
}
}
return title;
}
@Override

View File

@ -10,6 +10,7 @@ import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@ -355,6 +356,25 @@ class MappersTest {
assertTrue(r2.getValidated());
assertEquals("2020-01-01", r1.getValidationDate());
assertEquals("2020-01-01", r2.getValidationDate());
assertNotNull(d.getTitle());
assertEquals(2, d.getTitle().size());
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
verifyTitle(d, "Subtitle", "survey");
}
private void verifyTitle(Dataset d, String titleType, String title) {
Optional
.of(
d
.getTitle()
.stream()
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
.collect(Collectors.toList()))
.ifPresent(t -> {
assertEquals(1, t.size());
assertEquals(title, t.get(0).getValue());
});
}
@Test

View File

@ -54,7 +54,8 @@
</creator>
</creators>
<titles>
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017</title>
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
<title titleType="Subtitle">survey</title>
</titles>
<publisher>Zenodo</publisher>
<publicationYear>2019</publicationYear>