in case of missing attribute //dr:CobjCategory/@type the resulttype is derived by looking up the vocabulary dnet:result_typologies with the 1st instance type available

This commit is contained in:
Claudio Atzori 2020-07-20 17:33:37 +02:00
parent 050dda223d
commit 124e7ce19c
5 changed files with 60 additions and 20 deletions

View File

@ -32,7 +32,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.sql.sources.In;
import org.apache.zookeeper.Op;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentFactory; import org.dom4j.DocumentFactory;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
@ -99,7 +103,6 @@ public abstract class AbstractMdRecordToOafMapper {
final Document doc = DocumentHelper final Document doc = DocumentHelper
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom = getProvenanceDatasource( final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
@ -118,12 +121,32 @@ public abstract class AbstractMdRecordToOafMapper {
final DataInfo info = prepareDataInfo(doc, invisible); final DataInfo info = prepareDataInfo(doc, invisible);
final long lastUpdateTimestamp = new Date().getTime(); final long lastUpdateTimestamp = new Date().getTime();
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp); final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
final String type = getResultType(doc, instances);
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
protected String getResultType(final Document doc, final List<Instance> instances) {
String type = doc.valueOf("//dr:CobjCategory/@type");
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
String instanceType = instances
.stream()
.map(i -> i.getInstancetype().getClassid())
.findFirst()
.orElse("0000"); // Unknown
Qualifier resultType = vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType);
return resultType.getClassid();
}
return type;
}
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) { private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
final String dsId = doc.valueOf(xpathId); final String dsId = doc.valueOf(xpathId);
final String dsName = doc.valueOf(xpathName); final String dsName = doc.valueOf(xpathName);
@ -138,8 +161,8 @@ public abstract class AbstractMdRecordToOafMapper {
protected List<Oaf> createOafs( protected List<Oaf> createOafs(
final Document doc, final Document doc,
final String type, final String type,
final List<Instance> instances,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
@ -148,14 +171,14 @@ public abstract class AbstractMdRecordToOafMapper {
switch (type.toLowerCase()) { switch (type.toLowerCase()) {
case "publication": case "publication":
final Publication p = new Publication(); final Publication p = new Publication();
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
p.setJournal(prepareJournal(doc, info)); p.setJournal(prepareJournal(doc, info));
oafs.add(p); oafs.add(p);
break; break;
case "dataset": case "dataset":
final Dataset d = new Dataset(); final Dataset d = new Dataset();
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setResulttype(DATASET_DEFAULT_RESULTTYPE); d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info)); d.setDevice(prepareDatasetDevice(doc, info));
@ -168,7 +191,7 @@ public abstract class AbstractMdRecordToOafMapper {
break; break;
case "software": case "software":
final Software s = new Software(); final Software s = new Software();
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info));
@ -180,7 +203,7 @@ public abstract class AbstractMdRecordToOafMapper {
case "otherresearchproducts": case "otherresearchproducts":
default: default:
final OtherResearchProduct o = new OtherResearchProduct(); final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setResulttype(ORP_DEFAULT_RESULTTYPE); o.setResulttype(ORP_DEFAULT_RESULTTYPE);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
@ -259,8 +282,8 @@ public abstract class AbstractMdRecordToOafMapper {
private void populateResultFields( private void populateResultFields(
final Result r, final Result r,
final Document doc, final Document doc,
final List<Instance> instances,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final KeyValue hostedBy,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
r.setDataInfo(info); r.setDataInfo(info);
@ -293,7 +316,7 @@ public abstract class AbstractMdRecordToOafMapper {
r.setCoverage(prepareCoverages(doc, info)); r.setCoverage(prepareCoverages(doc, info));
r.setContext(prepareContexts(doc, info)); r.setContext(prepareContexts(doc, info));
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
r.setInstance(instances); r.setInstance(instances);
r.setBestaccessright(getBestAccessRights(instances)); r.setBestaccessright(getBestAccessRights(instances));
} }

View File

@ -6,12 +6,15 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -35,20 +38,20 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class MappersTest { public class MappersTest {
@Mock
private ISLookUpService isLookUpService;
@Mock @Mock
private VocabularyGroup vocs; private VocabularyGroup vocs;
@BeforeEach @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
when(vocs.getTermAsQualifier(anyString(), anyString())) lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
.thenAnswer( lenient()
invocation -> OafMapperUtils .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.qualifier( .thenReturn(synonyms());
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
invocation.getArgument(0)));
when(vocs.termExists(anyString(), anyString())).thenReturn(true);
vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
} }
@Test @Test
@ -269,4 +272,15 @@ public class MappersTest {
assertEquals(':', id.charAt(15)); assertEquals(':', id.charAt(15));
assertEquals(':', id.charAt(16)); assertEquals(':', id.charAt(16));
} }
private List<String> vocs() throws IOException {
return IOUtils
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
}
private List<String> synonyms() throws IOException {
return IOUtils
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
}
} }

View File

@ -49,7 +49,8 @@
<dc:subject>regulating services</dc:subject> <dc:subject>regulating services</dc:subject>
<dc:subject>supporting services</dc:subject> <dc:subject>supporting services</dc:subject>
<dc:type>Research Article</dc:type> <dc:type>Research Article</dc:type>
<dr:CobjCategory type="publication">0001</dr:CobjCategory> <!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
<dr:CobjCategory>0001</dr:CobjCategory>
<oaf:dateAccepted>2017-01-01</oaf:dateAccepted> <oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
<oaf:projectid>corda_______::226852</oaf:projectid> <oaf:projectid>corda_______::226852</oaf:projectid>
<oaf:accessrights>OPEN</oaf:accessrights> <oaf:accessrights>OPEN</oaf:accessrights>

View File

@ -82,7 +82,8 @@
<p>All files are in MATLAB .mat format.</p></description> <p>All files are in MATLAB .mat format.</p></description>
</descriptions> </descriptions>
</resource> </resource>
<dr:CobjCategory type="dataset">0021</dr:CobjCategory> <!--<dr:CobjCategory type="dataset">0021</dr:CobjCategory>-->
<dr:CobjCategory>0021</dr:CobjCategory>
<oaf:dateAccepted>2019-01-01</oaf:dateAccepted> <oaf:dateAccepted>2019-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights> <oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language>und</oaf:language> <oaf:language>und</oaf:language>

View File

@ -52,7 +52,8 @@
subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject> subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject>
</datacite:subjects> </datacite:subjects>
</datacite:resource> </datacite:resource>
<dr:CobjCategory type="software">0029</dr:CobjCategory> <!--<dr:CobjCategory type="software">0029</dr:CobjCategory>-->
<dr:CobjCategory>0029</dr:CobjCategory>
<oaf:hostedBy id="rest________::bioTools" name="bio.tools"/> <oaf:hostedBy id="rest________::bioTools" name="bio.tools"/>
<oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/> <oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/>
<oaf:dateAccepted>2018-06-06</oaf:dateAccepted> <oaf:dateAccepted>2018-06-06</oaf:dateAccepted>