From dccaf173cf9c788c1fad8a70dfe8084ff0eebaeb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 May 2021 16:36:15 +0200 Subject: [PATCH] fixed mapping applied to ODF records. Added unit test to verify the mapping for OpenTrials --- .../raw/AbstractMdRecordToOafMapper.java | 16 ++- .../dhp/oa/graph/raw/OdfToOafMapper.java | 15 ++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 127 ++++++++++++++++-- .../dhp/oa/graph/raw/odf_opentrial.xml | 75 +++++++++++ 4 files changed, 210 insertions(+), 23 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index ad256a3c5..912c55634 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; import java.util.*; import java.util.stream.Collectors; +import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentFactory; @@ -271,7 +272,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); - r.setOriginalId(Lists.newArrayList(findOriginalId(doc))); + r.setOriginalId(findOriginalId(doc)); r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setPid(IdentifierFactory.getPids(prepareResultPids(doc, info), collectedFrom)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection/text()|//dri:dateOfCollection/text()")); @@ -402,16 +403,23 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - private String findOriginalId(final Document doc) { + private List findOriginalId(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); if (n != null) { final String id = n.valueOf("./*[local-name()='identifier']"); if (StringUtils.isNotBlank(id)) { - return id; + return Lists.newArrayList(id); } } - return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']"); + List idList = doc + .selectNodes( + "normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())"); + Set originalIds = Sets.newHashSet(idList); + if (originalIds.isEmpty()) { + throw new IllegalStateException("missing originalID on " + doc.asXML()); + } + return Lists.newArrayList(originalIds); } protected AccessRight prepareAccessRight(final Node node, final String xpath, final String schemeId) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 4624f171b..e5b477209 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -117,7 +117,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { instance.setPid(pid); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); - instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); + final String distributionlocation = doc.valueOf("//oaf:distributionlocation"); + instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null); instance .setAccessright(prepareAccessRight(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); @@ -198,12 +199,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()=':format']", info); + return prepareListFields(doc, "//*[local-name()='format']", info); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()=':publisher']", info); + return prepareField(doc, "//*[local-name()='publisher']", info); } @Override @@ -218,7 +219,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES); + return prepareQualifier(doc, "//*[local-name()='language']", DNET_LANGUAGES); } @Override @@ -285,9 +286,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { final GeoLocation loc = new GeoLocation(); - loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']")); - loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']")); - loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']")); + loc.setBox(((Node) o).valueOf("./*[local-name()='geoLocationBox']")); + loc.setPlace(((Node) o).valueOf("./*[local-name()='geoLocationPlace']")); + loc.setPoint(((Node) o).valueOf("./*[local-name()='geoLocationPoint']")); res.add(loc); } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c86e31280..5435a64eb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,16 +1,14 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -24,14 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -337,6 +327,119 @@ public class MappersTest { assertEquals(r2.getValidationDate(), "2020-01-01"); } + @Test + void testOpentrial() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + final Dataset d = (Dataset) list.get(0); + + assertNotNull(d.getDateofcollection()); + assertEquals("2019-03-27T15:15:22.22Z", d.getDateofcollection()); + + assertNotNull(d.getDateoftransformation()); + assertEquals("2019-04-17T16:04:20.586Z", d.getDateoftransformation()); + + assertNotNull(d.getDataInfo()); + assertFalse(d.getDataInfo().getInvisible()); + assertFalse(d.getDataInfo().getDeletedbyinference()); + assertEquals("0.9", d.getDataInfo().getTrust()); + + assertEquals("", d.getDataInfo().getInferenceprovenance()); + + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); + + assertValidId(d.getId()); + assertTrue(d.getOriginalId().size() == 1); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); + assertValidId(d.getCollectedfrom().get(0).getKey()); + + assertNotNull(d.getTitle()); + assertEquals(1, d.getTitle().size()); + assertEquals( + "Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia", + d.getTitle().get(0).getValue()); + + assertNotNull(d.getDescription()); + assertEquals(1, d.getDescription().size()); + assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); + + assertTrue(d.getAuthor().size() == 1); + assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); + assertEquals("Kristian K.", d.getAuthor().get(0).getName()); + assertEquals("Jensen", d.getAuthor().get(0).getSurname()); + + assertNotNull(d.getAuthor().get(0).getPid()); + assertTrue(d.getAuthor().get(0).getPid().isEmpty()); + + assertNotNull(d.getPid()); + assertEquals(0, d.getPid().size()); + + assertNotNull(d.getPublisher()); + assertEquals("nct", d.getPublisher().getValue()); + + assertTrue(d.getSubject().isEmpty()); + assertTrue(d.getContext().isEmpty()); + + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() == 1); + + Instance i = d.getInstance().get(0); + + assertNotNull(i.getAccessright()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename()); + assertEquals("OPEN", i.getAccessright().getClassid()); + assertEquals("Open Access", i.getAccessright().getClassname()); + + assertNotNull(i.getCollectedfrom()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getCollectedfrom().getKey()); + assertEquals("OpenTrials", i.getCollectedfrom().getValue()); + + assertNotNull(i.getHostedby()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getHostedby().getKey()); + assertEquals("OpenTrials", i.getHostedby().getValue()); + + assertNotNull(i.getInstancetype()); + assertEquals("0037", i.getInstancetype().getClassid()); + assertEquals("Clinical Trial", i.getInstancetype().getClassname()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename()); + + assertNull(i.getLicense()); + assertNotNull(i.getDateofacceptance()); + assertEquals("2014-11-11", i.getDateofacceptance().getValue()); + + assertNull(i.getDistributionlocation()); + assertNull(i.getProcessingchargeamount()); + assertNull(i.getProcessingchargecurrency()); + + assertNotNull(i.getPid()); + assertEquals(0, i.getPid().size()); + + assertNotNull(i.getAlternateIdentifier()); + assertEquals(1, i.getAlternateIdentifier().size()); + assertEquals("NCT02321059", i.getAlternateIdentifier().get(0).getValue()); + assertEquals("nct", i.getAlternateIdentifier().get(0).getQualifier().getClassid()); + assertEquals("ClinicalTrials.gov Identifier", i.getAlternateIdentifier().get(0).getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, i.getAlternateIdentifier().get(0).getQualifier().getSchemename()); + + + assertNotNull(i.getUrl()); + assertEquals(2, i.getUrl().size()); + assertTrue(i.getUrl().contains("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059")); + assertTrue(i.getUrl().contains("https://clinicaltrials.gov/ct2/show/NCT02321059")); + + assertEquals("UNKNOWN", i.getRefereed().getClassid()); + } + @Test void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml new file mode 100644 index 000000000..97e966385 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059 + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059 + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file