forked from antonis.lempesis/dnet-hadoop
in case of missing attribute //dr:CobjCategory/@type the resulttype is derived by looking up the vocabulary dnet:result_typologies with the 1st instance type available
This commit is contained in:
parent
050dda223d
commit
124e7ce19c
|
@ -32,7 +32,11 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.sql.sources.In;
|
||||
import org.apache.zookeeper.Op;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentFactory;
|
||||
import org.dom4j.DocumentHelper;
|
||||
|
@ -99,7 +103,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
final Document doc = DocumentHelper
|
||||
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
||||
|
||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||
|
||||
|
@ -118,12 +121,32 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||
final long lastUpdateTimestamp = new Date().getTime();
|
||||
|
||||
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||
|
||||
final String type = getResultType(doc, instances);
|
||||
|
||||
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||
String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||
|
||||
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
||||
String instanceType = instances
|
||||
.stream()
|
||||
.map(i -> i.getInstancetype().getClassid())
|
||||
.findFirst()
|
||||
.orElse("0000"); // Unknown
|
||||
Qualifier resultType = vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType);
|
||||
return resultType.getClassid();
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
|
||||
final String dsId = doc.valueOf(xpathId);
|
||||
final String dsName = doc.valueOf(xpathName);
|
||||
|
@ -138,8 +161,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
protected List<Oaf> createOafs(
|
||||
final Document doc,
|
||||
final String type,
|
||||
final List<Instance> instances,
|
||||
final KeyValue collectedFrom,
|
||||
final KeyValue hostedBy,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
|
||||
|
@ -148,14 +171,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
switch (type.toLowerCase()) {
|
||||
case "publication":
|
||||
final Publication p = new Publication();
|
||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||
p.setJournal(prepareJournal(doc, info));
|
||||
oafs.add(p);
|
||||
break;
|
||||
case "dataset":
|
||||
final Dataset d = new Dataset();
|
||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||
d.setDevice(prepareDatasetDevice(doc, info));
|
||||
|
@ -168,7 +191,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
break;
|
||||
case "software":
|
||||
final Software s = new Software();
|
||||
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||
|
@ -180,7 +203,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
case "otherresearchproducts":
|
||||
default:
|
||||
final OtherResearchProduct o = new OtherResearchProduct();
|
||||
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||
|
@ -259,8 +282,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
private void populateResultFields(
|
||||
final Result r,
|
||||
final Document doc,
|
||||
final List<Instance> instances,
|
||||
final KeyValue collectedFrom,
|
||||
final KeyValue hostedBy,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
r.setDataInfo(info);
|
||||
|
@ -293,7 +316,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
r.setCoverage(prepareCoverages(doc, info));
|
||||
r.setContext(prepareContexts(doc, info));
|
||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||
|
||||
r.setInstance(instances);
|
||||
r.setBestaccessright(getBestAccessRights(instances));
|
||||
}
|
||||
|
|
|
@ -6,12 +6,15 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
|
|||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
@ -35,20 +38,20 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|||
@ExtendWith(MockitoExtension.class)
|
||||
public class MappersTest {
|
||||
|
||||
@Mock
|
||||
private ISLookUpService isLookUpService;
|
||||
|
||||
@Mock
|
||||
private VocabularyGroup vocs;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
when(vocs.getTermAsQualifier(anyString(), anyString()))
|
||||
.thenAnswer(
|
||||
invocation -> OafMapperUtils
|
||||
.qualifier(
|
||||
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
|
||||
invocation.getArgument(0)));
|
||||
|
||||
when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
||||
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||
lenient()
|
||||
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||
.thenReturn(synonyms());
|
||||
|
||||
vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -269,4 +272,15 @@ public class MappersTest {
|
|||
assertEquals(':', id.charAt(15));
|
||||
assertEquals(':', id.charAt(16));
|
||||
}
|
||||
|
||||
private List<String> vocs() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||
}
|
||||
|
||||
private List<String> synonyms() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -49,7 +49,8 @@
|
|||
<dc:subject>regulating services</dc:subject>
|
||||
<dc:subject>supporting services</dc:subject>
|
||||
<dc:type>Research Article</dc:type>
|
||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
||||
<!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
|
||||
<dr:CobjCategory>0001</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
|
||||
<oaf:projectid>corda_______::226852</oaf:projectid>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
|
|
|
@ -82,7 +82,8 @@
|
|||
<p>All files are in MATLAB .mat format.</p></description>
|
||||
</descriptions>
|
||||
</resource>
|
||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
||||
<!--<dr:CobjCategory type="dataset">0021</dr:CobjCategory>-->
|
||||
<dr:CobjCategory>0021</dr:CobjCategory>
|
||||
<oaf:dateAccepted>2019-01-01</oaf:dateAccepted>
|
||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||
<oaf:language>und</oaf:language>
|
||||
|
|
|
@ -52,7 +52,8 @@
|
|||
subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject>
|
||||
</datacite:subjects>
|
||||
</datacite:resource>
|
||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
||||
<!--<dr:CobjCategory type="software">0029</dr:CobjCategory>-->
|
||||
<dr:CobjCategory>0029</dr:CobjCategory>
|
||||
<oaf:hostedBy id="rest________::bioTools" name="bio.tools"/>
|
||||
<oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/>
|
||||
<oaf:dateAccepted>2018-06-06</oaf:dateAccepted>
|
||||
|
|
Loading…
Reference in New Issue