forked from D-Net/dnet-hadoop
Merge branch 'beta' into delegated_authorities
This commit is contained in:
commit
4983d6536d
File diff suppressed because it is too large
Load Diff
|
@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate
|
||||||
import java.time.format.DateTimeFormatter
|
import java.time.format.DateTimeFormatter
|
||||||
import java.util.{Date, Locale}
|
import java.util.{Date, Locale}
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
import scala.io.{Codec, Source}
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
|
case class HostedByMapType(
|
||||||
|
openaire_id: String,
|
||||||
|
datacite_name: String,
|
||||||
|
official_name: String,
|
||||||
|
similarity: Option[Float]
|
||||||
|
) {}
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
val unknown_repository: HostedByMapType = HostedByMapType(
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID,
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||||
|
Some(1.0f)
|
||||||
|
)
|
||||||
|
|
||||||
|
val hostedByMap: Map[String, HostedByMapType] = {
|
||||||
|
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: org.json4s.JValue = parse(s)
|
||||||
|
json.extract[Map[String, HostedByMapType]]
|
||||||
|
}
|
||||||
|
|
||||||
/** This method should skip record if json contains invalid text
|
/** This method should skip record if json contains invalid text
|
||||||
* defined in gile datacite_filter
|
* defined in gile datacite_filter
|
||||||
*
|
*
|
||||||
|
@ -534,12 +556,9 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
if (client.isDefined) {
|
if (client.isDefined) {
|
||||||
|
|
||||||
instance.setHostedby(
|
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
|
||||||
OafMapperUtils.keyValue(
|
instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
|
||||||
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
|
|
||||||
ModelConstants.UNKNOWN_REPOSITORY.getValue
|
|
||||||
)
|
|
||||||
)
|
|
||||||
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
||||||
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
||||||
instance.setAccessright(access_rights_qualifier)
|
instance.setAccessright(access_rights_qualifier)
|
||||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLDecoder;
|
import java.net.URLDecoder;
|
||||||
|
@ -11,8 +12,11 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.Element;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -34,10 +38,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
|
||||||
return prepareListStructProps(
|
|
||||||
doc,
|
final List<StructuredProperty> title = Lists.newArrayList();
|
||||||
"//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']",
|
final String xpath = "//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']";
|
||||||
MAIN_TITLE_QUALIFIER, info);
|
|
||||||
|
for (Object o : doc.selectNodes(xpath)) {
|
||||||
|
Element e = (Element) o;
|
||||||
|
final String titleValue = e.getTextTrim();
|
||||||
|
final String titleType = e.attributeValue("titleType");
|
||||||
|
if (StringUtils.isNotBlank(titleType)) {
|
||||||
|
title
|
||||||
|
.add(
|
||||||
|
structuredProperty(
|
||||||
|
titleValue, titleType, titleType, DNET_DATACITE_TITLE, DNET_DATACITE_TITLE, info));
|
||||||
|
} else {
|
||||||
|
title.add(structuredProperty(titleValue, MAIN_TITLE_QUALIFIER, info));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return title;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -10,6 +10,7 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -355,6 +356,25 @@ class MappersTest {
|
||||||
assertTrue(r2.getValidated());
|
assertTrue(r2.getValidated());
|
||||||
assertEquals("2020-01-01", r1.getValidationDate());
|
assertEquals("2020-01-01", r1.getValidationDate());
|
||||||
assertEquals("2020-01-01", r2.getValidationDate());
|
assertEquals("2020-01-01", r2.getValidationDate());
|
||||||
|
|
||||||
|
assertNotNull(d.getTitle());
|
||||||
|
assertEquals(2, d.getTitle().size());
|
||||||
|
verifyTitle(d, "main title", "Temperature and ADCP data collected on Lake Geneva between 2015 and 2017");
|
||||||
|
verifyTitle(d, "Subtitle", "survey");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyTitle(Dataset d, String titleType, String title) {
|
||||||
|
Optional
|
||||||
|
.of(
|
||||||
|
d
|
||||||
|
.getTitle()
|
||||||
|
.stream()
|
||||||
|
.filter(t -> titleType.equals(t.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.ifPresent(t -> {
|
||||||
|
assertEquals(1, t.size());
|
||||||
|
assertEquals(title, t.get(0).getValue());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -54,7 +54,8 @@
|
||||||
</creator>
|
</creator>
|
||||||
</creators>
|
</creators>
|
||||||
<titles>
|
<titles>
|
||||||
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017</title>
|
<title>Temperature and ADCP data collected on Lake Geneva between 2015 and 2017 </title>
|
||||||
|
<title titleType="Subtitle">survey</title>
|
||||||
</titles>
|
</titles>
|
||||||
<publisher>Zenodo</publisher>
|
<publisher>Zenodo</publisher>
|
||||||
<publicationYear>2019</publicationYear>
|
<publicationYear>2019</publicationYear>
|
||||||
|
|
Loading…
Reference in New Issue