xpaths for dateOfCollection and dateOfTransformation

This commit is contained in:
Michele Artini 2020-11-18 09:26:20 +01:00
parent 8f87020a50
commit 33da2e3d6c
4 changed files with 65 additions and 24 deletions

View File

@ -1,10 +1,36 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.keyValue;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
import java.util.*; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
@ -15,7 +41,24 @@ import org.dom4j.Node;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.LicenseComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public abstract class AbstractMdRecordToOafMapper { public abstract class AbstractMdRecordToOafMapper {
@ -92,10 +135,10 @@ public abstract class AbstractMdRecordToOafMapper {
} }
protected String getResultType(final Document doc, final List<Instance> instances) { protected String getResultType(final Document doc, final List<Instance> instances) {
String type = doc.valueOf("//dr:CobjCategory/@type"); final String type = doc.valueOf("//dr:CobjCategory/@type");
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
String instanceType = instances final String instanceType = instances
.stream() .stream()
.map(i -> i.getInstancetype().getClassid()) .map(i -> i.getInstancetype().getClassid())
.findFirst() .findFirst()
@ -256,13 +299,11 @@ public abstract class AbstractMdRecordToOafMapper {
r.setDataInfo(info); r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
r.setOriginalId(Arrays.asList(findOriginalId(doc))); r.setOriginalId(Arrays.asList(findOriginalId(doc)));
r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setPid(prepareResultPids(doc, info)); r.setPid(prepareResultPids(doc, info));
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection"));
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation|//dri:dateOfTransformation"));
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setOaiprovenance(prepareOAIprovenance(doc)); r.setOaiprovenance(prepareOAIprovenance(doc));
r.setAuthor(prepareAuthors(doc, info)); r.setAuthor(prepareAuthors(doc, info));

View File

@ -171,8 +171,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
structuredProperty( structuredProperty(
((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
info)); info));
} } else {
else{
res res
.add( .add(
structuredProperty( structuredProperty(

View File

@ -78,6 +78,8 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertFalse(p.getDataInfo().getInvisible()); assertFalse(p.getDataInfo().getInvisible());
assertTrue(p.getSource().size() == 1); assertTrue(p.getSource().size() == 1);
assertTrue(StringUtils.isNotBlank(p.getDateofcollection()));
assertTrue(StringUtils.isNotBlank(p.getDateoftransformation()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
final Optional<Author> author = p final Optional<Author> author = p
@ -329,7 +331,7 @@ public class MappersTest {
@Test @Test
void testODFRecord() throws IOException { void testODFRecord() throws IOException {
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml); final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
System.out.println("***************"); System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************"); System.out.println("***************");

View File

@ -7,13 +7,12 @@
<header xmlns="http://namespace.openaire.eu/"> <header xmlns="http://namespace.openaire.eu/">
<dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier> <dri:objIdentifier>pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2</dri:objIdentifier>
<dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier> <dri:recordIdentifier>10.3897/oneeco.2.e13718</dri:recordIdentifier>
<dri:dateOfCollection/>
<dri:mdFormat/> <dri:mdFormat/>
<dri:mdFormatInterpretation/> <dri:mdFormatInterpretation/>
<dri:repositoryId/> <dri:repositoryId/>
<dr:objectIdentifier/> <dr:objectIdentifier/>
<dr:dateOfCollection>2020-03-23T00:20:51.392Z</dr:dateOfCollection> <dri:dateOfCollection>2020-03-23T00:20:51.392Z</dri:dateOfCollection>
<dr:dateOfTransformation>2020-03-23T00:26:59.078Z</dr:dateOfTransformation> <dri:dateOfTransformation>2020-03-23T00:26:59.078Z</dri:dateOfTransformation>
<oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix> <oaf:datasourceprefix>pensoft_____</oaf:datasourceprefix>
</header> </header>
<metadata xmlns="http://namespace.openaire.eu/"> <metadata xmlns="http://namespace.openaire.eu/">