Validation tools of the Oaf objects #186

Open
claudio.atzori wants to merge 5 commits from validation into beta
5 changed files with 78 additions and 1024 deletions
Showing only changes of commit 2b06eebdc6 - Show all commits

View File

@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
import java.util.{Date, Locale} import java.util.{Date, Locale}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.io.{Codec, Source}
object DataciteToOAFTransformation { object DataciteToOAFTransformation {
case class HostedByMapType(
openaire_id: String,
datacite_name: String,
official_name: String,
similarity: Option[Float]
) {}
val mapper = new ObjectMapper() val mapper = new ObjectMapper()
val unknown_repository: HostedByMapType = HostedByMapType(
ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID,
ModelConstants.UNKNOWN_REPOSITORY.getValue,
ModelConstants.UNKNOWN_REPOSITORY.getValue,
Some(1.0f)
)
val hostedByMap: Map[String, HostedByMapType] = {
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: org.json4s.JValue = parse(s)
json.extract[Map[String, HostedByMapType]]
}
/** This method should skip record if json contains invalid text /** This method should skip record if json contains invalid text
* defined in gile datacite_filter * defined in gile datacite_filter
* *
@ -534,12 +556,9 @@ object DataciteToOAFTransformation {
if (client.isDefined) { if (client.isDefined) {
instance.setHostedby( val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
OafMapperUtils.keyValue( instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
ModelConstants.UNKNOWN_REPOSITORY.getValue
)
)
instance.setCollectedfrom(DATACITE_COLLECTED_FROM) instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava) instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
instance.setAccessright(access_rights_qualifier) instance.setAccessright(access_rights_qualifier)

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLDecoder; import java.net.URLDecoder;
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node; import org.dom4j.Node;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(
doc, final List<StructuredProperty> title = Lists.newArrayList();
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']", final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
MAIN_TITLE_QUALIFIER, info);
for (Object o : doc.selectNodes(xpath)) {
Element e = (Element) o;
final String titleValue = e.getTextTrim();
final String titleType = e.attributeValue("titleType");
if (StringUtils.isNotBlank(titleType)) {
title
.add(
structuredProperty(
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
} else {
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
}
}
return title;
} }
@Override @Override

View File

@ -10,6 +10,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -355,6 +356,25 @@ class MappersTest {
assertTrue(r2.getValidated()); assertTrue(r2.getValidated());
assertEquals("2020-01-01", r1.getValidationDate()); assertEquals("2020-01-01", r1.getValidationDate());
assertEquals("2020-01-01", r2.getValidationDate()); assertEquals("2020-01-01", r2.getValidationDate());
assertNotNull(d.getTitle());
assertEquals(2, d.getTitle().size());
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
verifyTitle(d, "Subtitle", "survey");
}
private void verifyTitle(Dataset d, String titleType, String title) {
Optional
.of(
d
.getTitle()
.stream()
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
.collect(Collectors.toList()))
.ifPresent(t -> {
assertEquals(1, t.size());
assertEquals(title, t.get(0).getValue());
});
} }
@Test @Test

View File

@ -55,6 +55,7 @@
</creators> </creators>
<titles> <titles>
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title> <title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
<title titleType="Subtitle">survey</title>
</titles> </titles>
<publisher>Zenodo</publisher> <publisher>Zenodo</publisher>
<publicationYear>2019</publicationYear> <publicationYear>2019</publicationYear>