in case of missing attribute //dr:CobjCategory/@type the resulttype is derived by looking up the vocabulary dnet:result_typologies with the 1st instance type available

This commit is contained in:
Claudio Atzori 2020-07-20 17:33:37 +02:00
parent 050dda223d
commit 124e7ce19c
5 changed files with 60 additions and 20 deletions

View File

@ -32,7 +32,11 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.sql.sources.In;
import org.apache.zookeeper.Op;
import org.dom4j.Document;
import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper;
@ -99,7 +103,6 @@ public abstract class AbstractMdRecordToOafMapper {
final Document doc = DocumentHelper
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
@ -118,12 +121,32 @@ public abstract class AbstractMdRecordToOafMapper {
final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime();
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
protected String getResultType(final Document doc, final List<Instance> instances) {
String type = doc.valueOf("//dr:CobjCategory/@type");
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
String instanceType = instances
.stream()
.map(i -> i.getInstancetype().getClassid())
.findFirst()
.orElse("0000"); // Unknown
Qualifier resultType = vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType);
return resultType.getClassid();
}
return type;
}
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
final String dsId = doc.valueOf(xpathId);
final String dsName = doc.valueOf(xpathName);
@ -138,8 +161,8 @@ public abstract class AbstractMdRecordToOafMapper {
protected List<Oaf> createOafs(
final Document doc,
final String type,
final List<Instance> instances,
final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info,
final long lastUpdateTimestamp) {
@ -148,14 +171,14 @@ public abstract class AbstractMdRecordToOafMapper {
switch (type.toLowerCase()) {
case "publication":
final Publication p = new Publication();
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
p.setJournal(prepareJournal(doc, info));
oafs.add(p);
break;
case "dataset":
final Dataset d = new Dataset();
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info));
@ -168,7 +191,7 @@ public abstract class AbstractMdRecordToOafMapper {
break;
case "software":
final Software s = new Software();
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info));
@ -180,7 +203,7 @@ public abstract class AbstractMdRecordToOafMapper {
case "otherresearchproducts":
default:
final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
@ -259,8 +282,8 @@ public abstract class AbstractMdRecordToOafMapper {
private void populateResultFields(
final Result r,
final Document doc,
final List<Instance> instances,
final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info,
final long lastUpdateTimestamp) {
r.setDataInfo(info);
@ -293,7 +316,7 @@ public abstract class AbstractMdRecordToOafMapper {
r.setCoverage(prepareCoverages(doc, info));
r.setContext(prepareContexts(doc, info));
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
r.setInstance(instances);
r.setBestaccessright(getBestAccessRights(instances));
}

View File

@ -6,12 +6,15 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
@ -35,20 +38,20 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ExtendWith(MockitoExtension.class)
public class MappersTest {
@Mock
private ISLookUpService isLookUpService;
@Mock
private VocabularyGroup vocs;
@BeforeEach
public void setUp() throws Exception {
when(vocs.getTermAsQualifier(anyString(), anyString()))
.thenAnswer(
invocation -> OafMapperUtils
.qualifier(
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
invocation.getArgument(0)));
when(vocs.termExists(anyString(), anyString())).thenReturn(true);
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
}
@Test
@ -269,4 +272,15 @@ public class MappersTest {
assertEquals(':', id.charAt(15));
assertEquals(':', id.charAt(16));
}
private List<String> vocs() throws IOException {
return IOUtils
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
}
private List<String> synonyms() throws IOException {
return IOUtils
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
}
}

View File

@ -49,7 +49,8 @@
<dc:subject>regulating services</dc:subject>
<dc:subject>supporting services</dc:subject>
<dc:type>Research Article</dc:type>
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
<!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
<dr:CobjCategory>0001</dr:CobjCategory>
<oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
<oaf:projectid>corda_______::226852</oaf:projectid>
<oaf:accessrights>OPEN</oaf:accessrights>

View File

@ -82,7 +82,8 @@
<p>All files are in MATLAB .mat format.</p></description>
</descriptions>
</resource>
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
<!--<dr:CobjCategory type="dataset">0021</dr:CobjCategory>-->
<dr:CobjCategory>0021</dr:CobjCategory>
<oaf:dateAccepted>2019-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language>und</oaf:language>

View File

@ -52,7 +52,8 @@
subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject>
</datacite:subjects>
</datacite:resource>
<dr:CobjCategory type="software">0029</dr:CobjCategory>
<!--<dr:CobjCategory type="software">0029</dr:CobjCategory>-->
<dr:CobjCategory>0029</dr:CobjCategory>
<oaf:hostedBy id="rest________::bioTools" name="bio.tools"/>
<oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/>
<oaf:dateAccepted>2018-06-06</oaf:dateAccepted>