forked from antonis.lempesis/dnet-hadoop
fixed mapping applied to ODF records. Added unit test to verify the mapping for OpenTrials
This commit is contained in:
parent
50fc128ff7
commit
8c96a82a03
|
@ -24,13 +24,7 @@ import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance;
|
|||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier;
|
||||
import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
|
@ -38,6 +32,9 @@ import org.dom4j.DocumentFactory;
|
|||
import org.dom4j.DocumentHelper;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
|
@ -330,7 +327,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
r.setDataInfo(info);
|
||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
||||
r.setOriginalId(Arrays.asList(findOriginalId(doc)));
|
||||
r.setOriginalId(findOriginalId(doc));
|
||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||
r.setPid(prepareResultPids(doc, info));
|
||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection"));
|
||||
|
@ -493,16 +490,23 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
return null;
|
||||
}
|
||||
|
||||
private String findOriginalId(final Document doc) {
|
||||
private List<String> findOriginalId(final Document doc) {
|
||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||
if (n != null) {
|
||||
final String id = n.valueOf("./*[local-name()='identifier']");
|
||||
if (StringUtils.isNotBlank(id)) {
|
||||
return id;
|
||||
return Lists.newArrayList(id);
|
||||
}
|
||||
}
|
||||
return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']");
|
||||
List<String> idList = doc
|
||||
.selectNodes(
|
||||
"normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
|
||||
Set<String> originalIds = Sets.newHashSet(idList);
|
||||
|
||||
if (originalIds.isEmpty()) {
|
||||
throw new IllegalStateException("missing originalID on " + doc.asXML());
|
||||
}
|
||||
return Lists.newArrayList(originalIds);
|
||||
}
|
||||
|
||||
protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) {
|
||||
|
|
|
@ -119,7 +119,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
instance.setCollectedfrom(collectedfrom);
|
||||
instance.setHostedby(hostedby);
|
||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||
final String distributionlocation = doc.valueOf("//oaf:distributionlocation");
|
||||
instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null);
|
||||
instance
|
||||
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||
|
@ -200,12 +201,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
@Override
|
||||
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
|
||||
return prepareListFields(doc, "//*[local-name()=':format']", info);
|
||||
return prepareListFields(doc, "//*[local-name()='format']", info);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
|
||||
return prepareField(doc, "//*[local-name()=':publisher']", info);
|
||||
return prepareField(doc, "//*[local-name()='publisher']", info);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -220,7 +221,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
@Override
|
||||
protected Qualifier prepareLanguages(final Document doc) {
|
||||
return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES);
|
||||
return prepareQualifier(doc, "//*[local-name()='language']", DNET_LANGUAGES);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -287,9 +288,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) {
|
||||
final GeoLocation loc = new GeoLocation();
|
||||
loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']"));
|
||||
loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']"));
|
||||
loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']"));
|
||||
loc.setBox(((Node) o).valueOf("./*[local-name()='geoLocationBox']"));
|
||||
loc.setPlace(((Node) o).valueOf("./*[local-name()='geoLocationPlace']"));
|
||||
loc.setPoint(((Node) o).valueOf("./*[local-name()='geoLocationPoint']"));
|
||||
res.add(loc);
|
||||
}
|
||||
return res;
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -25,14 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
|
@ -256,6 +246,112 @@ public class MappersTest {
|
|||
assertEquals(r2.getValidationDate(), "2020-01-01");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testOpentrial() throws IOException {
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml"));
|
||||
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||
|
||||
assertEquals(1, list.size());
|
||||
assertTrue(list.get(0) instanceof Dataset);
|
||||
final Dataset d = (Dataset) list.get(0);
|
||||
|
||||
assertNotNull(d.getDateofcollection());
|
||||
assertEquals("2019-03-27T15:15:22.22Z", d.getDateofcollection());
|
||||
|
||||
assertNotNull(d.getDateoftransformation());
|
||||
assertEquals("2019-04-17T16:04:20.586Z", d.getDateoftransformation());
|
||||
|
||||
assertNotNull(d.getDataInfo());
|
||||
assertFalse(d.getDataInfo().getInvisible());
|
||||
assertFalse(d.getDataInfo().getDeletedbyinference());
|
||||
assertEquals("0.9", d.getDataInfo().getTrust());
|
||||
|
||||
assertEquals("", d.getDataInfo().getInferenceprovenance());
|
||||
|
||||
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid());
|
||||
assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname());
|
||||
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid());
|
||||
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename());
|
||||
|
||||
assertValidId(d.getId());
|
||||
assertTrue(d.getOriginalId().size() == 1);
|
||||
assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0));
|
||||
assertValidId(d.getCollectedfrom().get(0).getKey());
|
||||
|
||||
assertNotNull(d.getTitle());
|
||||
assertEquals(1, d.getTitle().size());
|
||||
assertEquals(
|
||||
"Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia",
|
||||
d.getTitle().get(0).getValue());
|
||||
|
||||
assertNotNull(d.getDescription());
|
||||
assertEquals(1, d.getDescription().size());
|
||||
assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue()));
|
||||
|
||||
assertTrue(d.getAuthor().size() == 1);
|
||||
assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname());
|
||||
assertEquals("Kristian K.", d.getAuthor().get(0).getName());
|
||||
assertEquals("Jensen", d.getAuthor().get(0).getSurname());
|
||||
|
||||
assertNotNull(d.getAuthor().get(0).getPid());
|
||||
assertTrue(d.getAuthor().get(0).getPid().isEmpty());
|
||||
|
||||
assertNotNull(d.getPid());
|
||||
assertEquals(1, d.getPid().size());
|
||||
assertEquals("NCT02321059", d.getPid().get(0).getValue());
|
||||
assertEquals("nct", d.getPid().get(0).getQualifier().getClassid());
|
||||
assertEquals("ClinicalTrials.gov Identifier", d.getPid().get(0).getQualifier().getClassname());
|
||||
assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemeid());
|
||||
assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemename());
|
||||
|
||||
assertNotNull(d.getPublisher());
|
||||
assertEquals("nct", d.getPublisher().getValue());
|
||||
|
||||
assertTrue(d.getSubject().isEmpty());
|
||||
assertTrue(d.getContext().isEmpty());
|
||||
|
||||
assertNotNull(d.getInstance());
|
||||
assertTrue(d.getInstance().size() == 1);
|
||||
|
||||
Instance i = d.getInstance().get(0);
|
||||
|
||||
assertNotNull(i.getAccessright());
|
||||
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid());
|
||||
assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename());
|
||||
assertEquals("OPEN", i.getAccessright().getClassid());
|
||||
assertEquals("Open Access", i.getAccessright().getClassname());
|
||||
|
||||
assertNotNull(i.getCollectedfrom());
|
||||
assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getCollectedfrom().getKey());
|
||||
assertEquals("OpenTrials", i.getCollectedfrom().getValue());
|
||||
|
||||
assertNotNull(i.getHostedby());
|
||||
assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getHostedby().getKey());
|
||||
assertEquals("OpenTrials", i.getHostedby().getValue());
|
||||
|
||||
assertNotNull(i.getInstancetype());
|
||||
assertEquals("0037", i.getInstancetype().getClassid());
|
||||
assertEquals("Clinical Trial", i.getInstancetype().getClassname());
|
||||
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid());
|
||||
assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename());
|
||||
|
||||
assertNull(i.getLicense());
|
||||
assertNotNull(i.getDateofacceptance());
|
||||
assertEquals("2014-11-11", i.getDateofacceptance().getValue());
|
||||
|
||||
assertNull(i.getDistributionlocation());
|
||||
assertNull(i.getProcessingchargeamount());
|
||||
assertNull(i.getProcessingchargecurrency());
|
||||
|
||||
assertNotNull(i.getUrl());
|
||||
assertEquals(2, i.getUrl().size());
|
||||
assertTrue(i.getUrl().contains("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059"));
|
||||
assertTrue(i.getUrl().contains("https://clinicaltrials.gov/ct2/show/NCT02321059"));
|
||||
|
||||
assertEquals("UNKNOWN", i.getRefereed().getClassid());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSoftware() throws IOException {
|
||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml"));
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<oai:record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oai="http://www.openarchives.org/OAI/2.0/">
|
||||
<oai:header>
|
||||
<dri:objIdentifier>opentrials__::0000bf8e63d3d7e6b88421eabafae3f6</dri:objIdentifier>
|
||||
<dri:recordIdentifier>feabb67c-1fd1-423b-aec6-606d04ce53c6</dri:recordIdentifier>
|
||||
<dri:dateOfCollection>2019-03-27T15:15:22.22Z</dri:dateOfCollection>
|
||||
<oaf:datasourceprefix>opentrials__</oaf:datasourceprefix>
|
||||
<dr:dateOfTransformation>2019-04-17T16:04:20.586Z</dr:dateOfTransformation>
|
||||
</oai:header>
|
||||
<oai:metadata>
|
||||
<resource xmlns="http://datacite.org/schema/kernel-3"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
|
||||
<identifier identifierType="URL">https://clinicaltrials.gov/ct2/show/NCT02321059</identifier>
|
||||
<alternateIdentifiers>
|
||||
<alternateIdentifier alternateIdentifierType="URL">http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059</alternateIdentifier>
|
||||
<alternateIdentifier alternateIdentifierType="nct">NCT02321059</alternateIdentifier>
|
||||
</alternateIdentifiers>
|
||||
<creators>
|
||||
<creator>
|
||||
<creatorName>Jensen, Kristian K</creatorName>
|
||||
</creator>
|
||||
</creators>
|
||||
<titles>
|
||||
<title>Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia</title>
|
||||
</titles>
|
||||
<publisher>nct</publisher>
|
||||
<geoLocations>
|
||||
<geoLocationPlace>Denmark</geoLocationPlace>
|
||||
</geoLocations>
|
||||
<resourceType resourceTypeGeneral="clinicalTrial">0037</resourceType>
|
||||
<descriptions>
|
||||
<description descriptionType="Abstract">Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer.</description>
|
||||
</descriptions>
|
||||
</resource>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<dr:CobjCategory type="dataset">0037</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2014-11-11</oaf:dateAccepted>
|
||||
<oaf:hostedBy id="openaire____::opentrials" name="OpenTrials"/>
|
||||
<oaf:collectedFrom id="openaire____::opentrials" name="OpenTrials"/>
|
||||
<oaf:about>
|
||||
<oaf:datainfo>
|
||||
<oaf:inferred>false</oaf:inferred>
|
||||
<oaf:deletedbyinference>false</oaf:deletedbyinference>
|
||||
<oaf:trust>0.9</oaf:trust>
|
||||
<oaf:inferenceprovenance/>
|
||||
<oaf:provenanceaction
|
||||
classid="sysimport:crosswalk:datasetarchive"
|
||||
classname="sysimport:crosswalk:datasetarchive"
|
||||
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||
</oaf:datainfo>
|
||||
</oaf:about>
|
||||
</oai:metadata>
|
||||
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
|
||||
<originDescription altered="true" harvestDate="2019-03-27T15:15:22.22Z">
|
||||
<baseURL>file:///var/lib/dnet/data/opentrials/opentrials.csv</baseURL>
|
||||
<identifier/>
|
||||
<datestamp/>
|
||||
<metadataNamespace/>
|
||||
</originDescription>
|
||||
</provenance>
|
||||
<oaf:datainfo>
|
||||
<oaf:inferred>false</oaf:inferred>
|
||||
<oaf:deletedbyinference>false</oaf:deletedbyinference>
|
||||
<oaf:trust>0.9</oaf:trust>
|
||||
<oaf:inferenceprovenance/>
|
||||
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
|
||||
classname="sysimport:crosswalk:datasetarchive"
|
||||
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
|
||||
</oaf:datainfo>
|
||||
</about>
|
||||
</oai:record>
|
Loading…
Reference in New Issue