forked from antonis.lempesis/dnet-hadoop
in case of missing attribute //dr:CobjCategory/@type the resulttype is derived by looking up the vocabulary dnet:result_typologies with the 1st instance type available
This commit is contained in:
parent
050dda223d
commit
124e7ce19c
|
@ -32,7 +32,11 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.sql.sources.In;
|
||||||
|
import org.apache.zookeeper.Op;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentFactory;
|
import org.dom4j.DocumentFactory;
|
||||||
import org.dom4j.DocumentHelper;
|
import org.dom4j.DocumentHelper;
|
||||||
|
@ -99,7 +103,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Document doc = DocumentHelper
|
final Document doc = DocumentHelper
|
||||||
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
||||||
|
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
|
|
||||||
|
@ -118,12 +121,32 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final DataInfo info = prepareDataInfo(doc, invisible);
|
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
|
||||||
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||||
|
|
||||||
|
final String type = getResultType(doc, instances);
|
||||||
|
|
||||||
|
return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||||
|
String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
||||||
|
String instanceType = instances
|
||||||
|
.stream()
|
||||||
|
.map(i -> i.getInstancetype().getClassid())
|
||||||
|
.findFirst()
|
||||||
|
.orElse("0000"); // Unknown
|
||||||
|
Qualifier resultType = vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType);
|
||||||
|
return resultType.getClassid();
|
||||||
|
}
|
||||||
|
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
|
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
@ -138,8 +161,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected List<Oaf> createOafs(
|
protected List<Oaf> createOafs(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final String type,
|
final String type,
|
||||||
|
final List<Instance> instances,
|
||||||
final KeyValue collectedFrom,
|
final KeyValue collectedFrom,
|
||||||
final KeyValue hostedBy,
|
|
||||||
final DataInfo info,
|
final DataInfo info,
|
||||||
final long lastUpdateTimestamp) {
|
final long lastUpdateTimestamp) {
|
||||||
|
|
||||||
|
@ -148,14 +171,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||||
p.setJournal(prepareJournal(doc, info));
|
p.setJournal(prepareJournal(doc, info));
|
||||||
oafs.add(p);
|
oafs.add(p);
|
||||||
break;
|
break;
|
||||||
case "dataset":
|
case "dataset":
|
||||||
final Dataset d = new Dataset();
|
final Dataset d = new Dataset();
|
||||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||||
d.setDevice(prepareDatasetDevice(doc, info));
|
d.setDevice(prepareDatasetDevice(doc, info));
|
||||||
|
@ -168,7 +191,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
break;
|
break;
|
||||||
case "software":
|
case "software":
|
||||||
final Software s = new Software();
|
final Software s = new Software();
|
||||||
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||||
|
@ -180,7 +203,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
case "otherresearchproducts":
|
case "otherresearchproducts":
|
||||||
default:
|
default:
|
||||||
final OtherResearchProduct o = new OtherResearchProduct();
|
final OtherResearchProduct o = new OtherResearchProduct();
|
||||||
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||||
|
@ -259,8 +282,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
private void populateResultFields(
|
private void populateResultFields(
|
||||||
final Result r,
|
final Result r,
|
||||||
final Document doc,
|
final Document doc,
|
||||||
|
final List<Instance> instances,
|
||||||
final KeyValue collectedFrom,
|
final KeyValue collectedFrom,
|
||||||
final KeyValue hostedBy,
|
|
||||||
final DataInfo info,
|
final DataInfo info,
|
||||||
final long lastUpdateTimestamp) {
|
final long lastUpdateTimestamp) {
|
||||||
r.setDataInfo(info);
|
r.setDataInfo(info);
|
||||||
|
@ -293,7 +316,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setCoverage(prepareCoverages(doc, info));
|
r.setCoverage(prepareCoverages(doc, info));
|
||||||
r.setContext(prepareContexts(doc, info));
|
r.setContext(prepareContexts(doc, info));
|
||||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
|
||||||
r.setInstance(instances);
|
r.setInstance(instances);
|
||||||
r.setBestaccessright(getBestAccessRights(instances));
|
r.setBestaccessright(getBestAccessRights(instances));
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,12 +6,15 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.mockito.ArgumentMatchers.anyString;
|
import static org.mockito.ArgumentMatchers.anyString;
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
@ -35,20 +38,20 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class MappersTest {
|
public class MappersTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
private VocabularyGroup vocs;
|
private VocabularyGroup vocs;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
when(vocs.getTermAsQualifier(anyString(), anyString()))
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
.thenAnswer(
|
lenient()
|
||||||
invocation -> OafMapperUtils
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
.qualifier(
|
.thenReturn(synonyms());
|
||||||
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
|
|
||||||
invocation.getArgument(0)));
|
|
||||||
|
|
||||||
when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
|
||||||
|
|
||||||
|
vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -269,4 +272,15 @@ public class MappersTest {
|
||||||
assertEquals(':', id.charAt(15));
|
assertEquals(':', id.charAt(15));
|
||||||
assertEquals(':', id.charAt(16));
|
assertEquals(':', id.charAt(16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,8 @@
|
||||||
<dc:subject>regulating services</dc:subject>
|
<dc:subject>regulating services</dc:subject>
|
||||||
<dc:subject>supporting services</dc:subject>
|
<dc:subject>supporting services</dc:subject>
|
||||||
<dc:type>Research Article</dc:type>
|
<dc:type>Research Article</dc:type>
|
||||||
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
|
<!--<dr:CobjCategory type="publication">0001</dr:CobjCategory>-->
|
||||||
|
<dr:CobjCategory>0001</dr:CobjCategory>
|
||||||
<oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
|
<oaf:dateAccepted>2017-01-01</oaf:dateAccepted>
|
||||||
<oaf:projectid>corda_______::226852</oaf:projectid>
|
<oaf:projectid>corda_______::226852</oaf:projectid>
|
||||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||||
|
|
|
@ -82,7 +82,8 @@
|
||||||
<p>All files are in MATLAB .mat format.</p></description>
|
<p>All files are in MATLAB .mat format.</p></description>
|
||||||
</descriptions>
|
</descriptions>
|
||||||
</resource>
|
</resource>
|
||||||
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
|
<!--<dr:CobjCategory type="dataset">0021</dr:CobjCategory>-->
|
||||||
|
<dr:CobjCategory>0021</dr:CobjCategory>
|
||||||
<oaf:dateAccepted>2019-01-01</oaf:dateAccepted>
|
<oaf:dateAccepted>2019-01-01</oaf:dateAccepted>
|
||||||
<oaf:accessrights>OPEN</oaf:accessrights>
|
<oaf:accessrights>OPEN</oaf:accessrights>
|
||||||
<oaf:language>und</oaf:language>
|
<oaf:language>und</oaf:language>
|
||||||
|
|
|
@ -52,7 +52,8 @@
|
||||||
subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject>
|
subjectScheme="EDAM Ontology" valueURI="http://edamontology.org/topic_3534">Protein binding sites</datacite:subject>
|
||||||
</datacite:subjects>
|
</datacite:subjects>
|
||||||
</datacite:resource>
|
</datacite:resource>
|
||||||
<dr:CobjCategory type="software">0029</dr:CobjCategory>
|
<!--<dr:CobjCategory type="software">0029</dr:CobjCategory>-->
|
||||||
|
<dr:CobjCategory>0029</dr:CobjCategory>
|
||||||
<oaf:hostedBy id="rest________::bioTools" name="bio.tools"/>
|
<oaf:hostedBy id="rest________::bioTools" name="bio.tools"/>
|
||||||
<oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/>
|
<oaf:collectedFrom id="rest________::bioTools" name="bio.tools"/>
|
||||||
<oaf:dateAccepted>2018-06-06</oaf:dateAccepted>
|
<oaf:dateAccepted>2018-06-06</oaf:dateAccepted>
|
||||||
|
|
Loading…
Reference in New Issue