forked from D-Net/dnet-hadoop
Merge branch 'beta' into validation
This commit is contained in:
commit
2b06eebdc6
File diff suppressed because it is too large
Load Diff
|
@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate
|
|||
import java.time.format.DateTimeFormatter
|
||||
import java.util.{Date, Locale}
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.{Codec, Source}
|
||||
|
||||
object DataciteToOAFTransformation {
|
||||
|
||||
case class HostedByMapType(
|
||||
openaire_id: String,
|
||||
datacite_name: String,
|
||||
official_name: String,
|
||||
similarity: Option[Float]
|
||||
) {}
|
||||
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
val unknown_repository: HostedByMapType = HostedByMapType(
|
||||
ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID,
|
||||
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||
Some(1.0f)
|
||||
)
|
||||
|
||||
val hostedByMap: Map[String, HostedByMapType] = {
|
||||
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: org.json4s.JValue = parse(s)
|
||||
json.extract[Map[String, HostedByMapType]]
|
||||
}
|
||||
|
||||
/** This method should skip record if json contains invalid text
|
||||
* defined in gile datacite_filter
|
||||
*
|
||||
|
@ -534,12 +556,9 @@ object DataciteToOAFTransformation {
|
|||
|
||||
if (client.isDefined) {
|
||||
|
||||
instance.setHostedby(
|
||||
OafMapperUtils.keyValue(
|
||||
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
|
||||
ModelConstants.UNKNOWN_REPOSITORY.getValue
|
||||
)
|
||||
)
|
||||
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
|
||||
instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
|
||||
|
||||
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
||||
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
||||
instance.setAccessright(access_rights_qualifier)
|
||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
|||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
|
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
|
|||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
@Override
|
||||
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
||||
return prepareListStructProps(
|
||||
doc,
|
||||
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']",
|
||||
MAIN_TITLE_QUALIFIER, info);
|
||||
|
||||
final List<StructuredProperty> title = Lists.newArrayList();
|
||||
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
|
||||
|
||||
for (Object o : doc.selectNodes(xpath)) {
|
||||
Element e = (Element) o;
|
||||
final String titleValue = e.getTextTrim();
|
||||
final String titleType = e.attributeValue("titleType");
|
||||
if (StringUtils.isNotBlank(titleType)) {
|
||||
title
|
||||
.add(
|
||||
structuredProperty(
|
||||
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
|
||||
} else {
|
||||
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -355,6 +356,25 @@ class MappersTest {
|
|||
assertTrue(r2.getValidated());
|
||||
assertEquals("2020-01-01", r1.getValidationDate());
|
||||
assertEquals("2020-01-01", r2.getValidationDate());
|
||||
|
||||
assertNotNull(d.getTitle());
|
||||
assertEquals(2, d.getTitle().size());
|
||||
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
|
||||
verifyTitle(d, "Subtitle", "survey");
|
||||
}
|
||||
|
||||
private void verifyTitle(Dataset d, String titleType, String title) {
|
||||
Optional
|
||||
.of(
|
||||
d
|
||||
.getTitle()
|
||||
.stream()
|
||||
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
|
||||
.collect(Collectors.toList()))
|
||||
.ifPresent(t -> {
|
||||
assertEquals(1, t.size());
|
||||
assertEquals(title, t.get(0).getValue());
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -54,7 +54,8 @@
|
|||
</creator>
|
||||
</creators>
|
||||
<titles>
|
||||
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017</title>
|
||||
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
|
||||
<title titleType="Subtitle">survey</title>
|
||||
</titles>
|
||||
<publisher>Zenodo</publisher>
|
||||
<publicationYear>2019</publicationYear>
|
||||
|
|
Loading…
Reference in New Issue