From 8c96a82a03c4a9b01944dbc34175c6a07c481e69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 May 2021 15:30:06 +0200 Subject: [PATCH] fixed mapping applied to ODF records. Added unit test to verify the mapping for OpenTrials --- .../raw/AbstractMdRecordToOafMapper.java | 26 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 15 ++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 120 ++++++++++++++++-- .../dhp/oa/graph/raw/odf_opentrial.xml | 75 +++++++++++ 4 files changed, 206 insertions(+), 30 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index cccf15398..aa9e4a11f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -24,13 +24,7 @@ import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -38,6 +32,9 @@ import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -330,7 +327,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); - r.setOriginalId(Arrays.asList(findOriginalId(doc))); + r.setOriginalId(findOriginalId(doc)); r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setPid(prepareResultPids(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection")); @@ -493,16 +490,23 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - private String findOriginalId(final Document doc) { + private List findOriginalId(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); if (n != null) { final String id = n.valueOf("./*[local-name()='identifier']"); if (StringUtils.isNotBlank(id)) { - return id; + return Lists.newArrayList(id); } } - return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']"); + List idList = doc + .selectNodes( + "normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())"); + Set originalIds = Sets.newHashSet(idList); + if (originalIds.isEmpty()) { + throw new IllegalStateException("missing originalID on " + doc.asXML()); + } + return Lists.newArrayList(originalIds); } protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index c2c2cb645..6e0161be6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -119,7 +119,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); - instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); + final String distributionlocation = doc.valueOf("//oaf:distributionlocation"); + instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null); instance .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); @@ -200,12 +201,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()=':format']", info); + return prepareListFields(doc, "//*[local-name()='format']", info); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()=':publisher']", info); + return prepareField(doc, "//*[local-name()='publisher']", info); } @Override @@ -220,7 +221,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES); + return prepareQualifier(doc, "//*[local-name()='language']", DNET_LANGUAGES); } @Override @@ -287,9 +288,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { final GeoLocation loc = new GeoLocation(); - loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']")); - loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']")); - loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']")); + loc.setBox(((Node) o).valueOf("./*[local-name()='geoLocationBox']")); + loc.setPlace(((Node) o).valueOf("./*[local-name()='geoLocationPlace']")); + loc.setPoint(((Node) o).valueOf("./*[local-name()='geoLocationPoint']")); res.add(loc); } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ab956a378..3d90794a9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,10 +1,7 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; @@ -25,14 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) @@ -256,6 +246,112 @@ public class MappersTest { assertEquals(r2.getValidationDate(), "2020-01-01"); } + @Test + void testOpentrial() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + final Dataset d = (Dataset) list.get(0); + + assertNotNull(d.getDateofcollection()); + assertEquals("2019-03-27T15:15:22.22Z", d.getDateofcollection()); + + assertNotNull(d.getDateoftransformation()); + assertEquals("2019-04-17T16:04:20.586Z", d.getDateoftransformation()); + + assertNotNull(d.getDataInfo()); + assertFalse(d.getDataInfo().getInvisible()); + assertFalse(d.getDataInfo().getDeletedbyinference()); + assertEquals("0.9", d.getDataInfo().getTrust()); + + assertEquals("", d.getDataInfo().getInferenceprovenance()); + + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); + + assertValidId(d.getId()); + assertTrue(d.getOriginalId().size() == 1); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); + assertValidId(d.getCollectedfrom().get(0).getKey()); + + assertNotNull(d.getTitle()); + assertEquals(1, d.getTitle().size()); + assertEquals( + "Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia", + d.getTitle().get(0).getValue()); + + assertNotNull(d.getDescription()); + assertEquals(1, d.getDescription().size()); + assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); + + assertTrue(d.getAuthor().size() == 1); + assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); + assertEquals("Kristian K.", d.getAuthor().get(0).getName()); + assertEquals("Jensen", d.getAuthor().get(0).getSurname()); + + assertNotNull(d.getAuthor().get(0).getPid()); + assertTrue(d.getAuthor().get(0).getPid().isEmpty()); + + assertNotNull(d.getPid()); + assertEquals(1, d.getPid().size()); + assertEquals("NCT02321059", d.getPid().get(0).getValue()); + assertEquals("nct", d.getPid().get(0).getQualifier().getClassid()); + assertEquals("ClinicalTrials.gov Identifier", d.getPid().get(0).getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemename()); + + assertNotNull(d.getPublisher()); + assertEquals("nct", d.getPublisher().getValue()); + + assertTrue(d.getSubject().isEmpty()); + assertTrue(d.getContext().isEmpty()); + + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() == 1); + + Instance i = d.getInstance().get(0); + + assertNotNull(i.getAccessright()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename()); + assertEquals("OPEN", i.getAccessright().getClassid()); + assertEquals("Open Access", i.getAccessright().getClassname()); + + assertNotNull(i.getCollectedfrom()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getCollectedfrom().getKey()); + assertEquals("OpenTrials", i.getCollectedfrom().getValue()); + + assertNotNull(i.getHostedby()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getHostedby().getKey()); + assertEquals("OpenTrials", i.getHostedby().getValue()); + + assertNotNull(i.getInstancetype()); + assertEquals("0037", i.getInstancetype().getClassid()); + assertEquals("Clinical Trial", i.getInstancetype().getClassname()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename()); + + assertNull(i.getLicense()); + assertNotNull(i.getDateofacceptance()); + assertEquals("2014-11-11", i.getDateofacceptance().getValue()); + + assertNull(i.getDistributionlocation()); + assertNull(i.getProcessingchargeamount()); + assertNull(i.getProcessingchargecurrency()); + + assertNotNull(i.getUrl()); + assertEquals(2, i.getUrl().size()); + assertTrue(i.getUrl().contains("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059")); + assertTrue(i.getUrl().contains("https://clinicaltrials.gov/ct2/show/NCT02321059")); + + assertEquals("UNKNOWN", i.getRefereed().getClassid()); + } + @Test void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml new file mode 100644 index 000000000..97e966385 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059 + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059 + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file