forked from antonis.lempesis/dnet-hadoop
Merge branch 'master' of https://code-repo.d4science.org/D-Net/dnet-hadoop
This commit is contained in:
commit
10e673660f
|
@ -1,10 +1,36 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.keyValue;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -15,7 +41,24 @@ import org.dom4j.Node;
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public abstract class AbstractMdRecordToOafMapper {
|
public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -92,10 +135,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getResultType(final Document doc, final List<Instance> instances) {
|
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||||
String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
|
|
||||||
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
||||||
String instanceType = instances
|
final String instanceType = instances
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> i.getInstancetype().getClassid())
|
.map(i -> i.getInstancetype().getClassid())
|
||||||
.findFirst()
|
.findFirst()
|
||||||
|
@ -256,13 +299,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setDataInfo(info);
|
r.setDataInfo(info);
|
||||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
||||||
|
|
||||||
r.setOriginalId(Arrays.asList(findOriginalId(doc)));
|
r.setOriginalId(Arrays.asList(findOriginalId(doc)));
|
||||||
|
|
||||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||||
r.setPid(prepareResultPids(doc, info));
|
r.setPid(prepareResultPids(doc, info));
|
||||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection"));
|
||||||
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation|//dri:dateOfTransformation"));
|
||||||
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
r.setOaiprovenance(prepareOAIprovenance(doc));
|
r.setOaiprovenance(prepareOAIprovenance(doc));
|
||||||
r.setAuthor(prepareAuthors(doc, info));
|
r.setAuthor(prepareAuthors(doc, info));
|
||||||
|
|
|
@ -162,22 +162,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : doc.selectNodes("//datacite:date")) {
|
for (final Object o : doc.selectNodes("//datacite:date")) {
|
||||||
final String dateType = ((Node) o).valueOf("@dateType");
|
final String dateType = ((Node) o).valueOf("@dateType");
|
||||||
if (StringUtils.isBlank(dateType)
|
if (StringUtils.isBlank(dateType)
|
||||||
|| ( !dateType.equalsIgnoreCase("Accepted")
|
|| (!dateType.equalsIgnoreCase("Accepted")
|
||||||
&& !dateType.equalsIgnoreCase("Issued")
|
&& !dateType.equalsIgnoreCase("Issued")
|
||||||
&& !dateType.equalsIgnoreCase("Updated")
|
&& !dateType.equalsIgnoreCase("Updated")
|
||||||
&& !dateType.equalsIgnoreCase("Available"))) {
|
&& !dateType.equalsIgnoreCase("Available"))) {
|
||||||
res
|
res
|
||||||
.add(
|
.add(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
||||||
info));
|
info));
|
||||||
}
|
} else {
|
||||||
else{
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
((Node) o).getText(), dateType, dateType, DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
((Node) o).getText(), dateType, dateType, DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
||||||
info));
|
info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
|
@ -78,6 +78,8 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
assertFalse(p.getDataInfo().getInvisible());
|
assertFalse(p.getDataInfo().getInvisible());
|
||||||
assertTrue(p.getSource().size() == 1);
|
assertTrue(p.getSource().size() == 1);
|
||||||
|
assertTrue(StringUtils.isNotBlank(p.getDateofcollection()));
|
||||||
|
assertTrue(StringUtils.isNotBlank(p.getDateoftransformation()));
|
||||||
|
|
||||||
assertTrue(p.getAuthor().size() > 0);
|
assertTrue(p.getAuthor().size() > 0);
|
||||||
final Optional<Author> author = p
|
final Optional<Author> author = p
|
||||||
|
@ -329,7 +331,7 @@ public class MappersTest {
|
||||||
@Test
|
@Test
|
||||||
void testODFRecord() throws IOException {
|
void testODFRecord() throws IOException {
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
|
||||||
List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
|
|
|
@ -7,13 +7,12 @@
|
||||||
<header xmlns="http://namespace.openaire.eu/">
|
<header xmlns="http://namespace.openaire.eu/">
|
||||||
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
|
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
|
||||||
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
|
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
|
||||||
<dri:dateOfCollection/>
|
|
||||||
<dri:mdFormat/>
|
<dri:mdFormat/>
|
||||||
<dri:mdFormatInterpretation/>
|
<dri:mdFormatInterpretation/>
|
||||||
<dri:repositoryId/>
|
<dri:repositoryId/>
|
||||||
<dr:objectIdentifier/>
|
<dr:objectIdentifier/>
|
||||||
<dr:dateOfCollection>2020-03-23T00:20:51.392Z</dr:dateOfCollection>
|
<dri:dateOfCollection>2020-03-23T00:20:51.392Z</dri:dateOfCollection>
|
||||||
<dr:dateOfTransformation>2020-03-23T00:26:59.078Z</dr:dateOfTransformation>
|
<dri:dateOfTransformation>2020-03-23T00:26:59.078Z</dri:dateOfTransformation>
|
||||||
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
|
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
|
||||||
</header>
|
</header>
|
||||||
<metadata xmlns="http://namespace.openaire.eu/">
|
<metadata xmlns="http://namespace.openaire.eu/">
|
||||||
|
|
Loading…
Reference in New Issue