master #59

Closed
claudio.atzori wants to merge 3221 commits from master into stable_ids
9 changed files with 149 additions and 77 deletions
Showing only changes of commit 122e75aa17 - Show all commits

View File

@ -119,8 +119,9 @@ public class SparkEoscTag {
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
}
if (containsCriteriaNotebook(orp)) {
addEIG(orp.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
COMPLIES_WITH);
addEIG(
orp.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
COMPLIES_WITH);
}
return orp;
}, Encoders.bean(OtherResearchProduct.class))
@ -199,6 +200,13 @@ public class SparkEoscTag {
return false;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
return true;
if (s
.getSubject()
.stream()
.anyMatch(
sbj -> sbj.getValue().toLowerCase().contains("python") &&
sbj.getValue().toLowerCase().contains("notebook")))
return true;
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
return true;

View File

@ -31,26 +31,7 @@ import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
@ -91,6 +72,17 @@ public abstract class AbstractMdRecordToOafMapper {
nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3);
}
// lowercase pidTypes as keys, normal casing for the values
protected static final Map<String, String> pidTypeWithAuthority = new HashMap<>();
static {
IdentifierFactory.PID_AUTHORITY
.keySet()
.stream()
.forEach(entry -> pidTypeWithAuthority.put(entry.toString().toLowerCase(), entry.toString()));
}
protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible,
final boolean shouldHashId, final boolean forceOriginalId) {
this.vocs = vocs;
@ -377,10 +369,30 @@ public abstract class AbstractMdRecordToOafMapper {
r.setInstance(instances);
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info));
}
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
private List<EoscIfGuidelines> prepareEOSCIfGuidelines(Document doc, DataInfo info) {
final Set<EoscIfGuidelines> set = Sets.newHashSet();
for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) {
final String code = ((Node) o).valueOf("@code");
final String label = ((Node) o).valueOf("@label");
final String url = ((Node) o).valueOf("@url");
final String semrel = ((Node) o).valueOf("@semanticrelation");
if (StringUtils.isNotBlank(code)) {
final EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode(code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semrel);
set.add(eig);
}
}
return Lists.newArrayList(set);
}
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
final List<Context> list = new ArrayList<>();
for (final Object o : doc.selectNodes("//oaf:concept")) {

View File

@ -19,6 +19,8 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
@ -171,6 +173,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) {
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
}
for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='w3id']")) {
url.add(trimAndDecodeUrl(((Node) o).getText().trim()));
}
for (final Object o : doc
.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) {
url.add(HTTP_DOI_PREIFX + ((Node) o).getText().trim());
@ -382,40 +387,53 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final List<Oaf> res = new ArrayList<>();
for (final Object o : doc
.selectNodes("//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) {
.selectNodes("//*[local-name()='relatedIdentifier']")) {
final String originalId = ((Node) o).getText();
final String originalId = ((Node) o).getText().trim();
if (StringUtils.isNotBlank(originalId)) {
final String otherId = createOpenaireId(50, originalId, false);
final String type = ((Node) o).valueOf("@relationType");
if (type.equalsIgnoreCase(IS_SUPPLEMENT_TO)) {
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, entity));
} else if (type.equalsIgnoreCase(IS_PART_OF)) {
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, PART, HAS_PART, entity));
} else {
// TODO catch more semantics
final String idType = ((Node) o).valueOf("@relatedIdentifierType");
final String relType = ((Node) o).valueOf("@relationType");
String otherId = guessRelatedIdentifier(idType, originalId);
if (StringUtils.isNotBlank(otherId)) {
res.addAll(getRelations(relType, docId, otherId, entity));
}
}
}
return res;
}
protected String guessRelatedIdentifier(final String idType, final String value) {
if (StringUtils.isBlank(idType) || StringUtils.isBlank(value))
return null;
if (idType.equalsIgnoreCase("OPENAIRE"))
return createOpenaireId(50, value, false);
if (pidTypeWithAuthority.containsKey(idType.toLowerCase())) {
return IdentifierFactory.idFromPid("50", pidTypeWithAuthority.get(idType.toLowerCase()), value, true);
}
return null;
}
protected List<Oaf> getRelations(final String reltype, final String entityId, final String otherId,
final OafEntity entity) {
final List<Oaf> res = new ArrayList<>();
RelationInverse rel = ModelSupport.findRelation(reltype);
if (rel != null) {
res
.add(
getRelation(
entityId, otherId, rel.getRelType(), rel.getSubReltype(), rel.getRelClass(), entity));
res
.add(
getRelation(
otherId, entityId, rel.getRelType(), rel.getSubReltype(), rel.getInverseRelClass(), entity));
}
return res;
}
@Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return prepareQualifier(

View File

@ -579,7 +579,7 @@ class MappersTest {
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
assertEquals(1, list.size());
assertEquals(3, list.size());
assertTrue(list.get(0) instanceof Software);
final Software s = (Software) list.get(0);
@ -912,27 +912,62 @@ class MappersTest {
}
@Test
void testROHub() throws IOException, DocumentException {
void testROHub() throws IOException {
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
// final Dataset p = (Dataset) list.get(0);
// assertValidId(p.getId());
// assertValidId(p.getCollectedfrom().get(0).getKey());
// System.out.println(p.getTitle().get(0).getValue());
// assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertEquals(5, list.size());
final OtherResearchProduct p = (OtherResearchProduct) list.get(0);
assertValidId(p.getId());
assertTrue(p.getId().startsWith("50|w3id"));
assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertEquals(1, p.getInstance().size());
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", p.getPid().get(0).getValue());
Instance inst = p.getInstance().get(0);
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getPid().get(0).getValue());
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", inst.getUrl().get(0));
assertEquals(1, p.getEoscifguidelines().size());
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getCode());
assertEquals("EOSC::RO-crate", p.getEoscifguidelines().get(0).getLabel());
assertEquals("", p.getEoscifguidelines().get(0).getUrl());
assertEquals("compliesWith", p.getEoscifguidelines().get(0).getSemanticRelation());
}
@Test
void testROHub2() throws IOException, DocumentException {
void testROHub2() throws IOException {
final String xml = IOUtils
.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub-modified.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
assertEquals(7, list.size());
final OtherResearchProduct p = (OtherResearchProduct) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals("50|w3id________::afc7592914ae190a50570db90f55f9c2", p.getId());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertEquals("w3id", (p.getPid().get(0).getQualifier().getClassid()));
assertEquals("https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca", (p.getPid().get(0).getValue()));
assertEquals(1, list.stream().filter(o -> o instanceof OtherResearchProduct).count());
assertEquals(6, list.stream().filter(o -> o instanceof Relation).count());
for (Oaf oaf : list) {
if (oaf instanceof Relation) {
String source = ((Relation) oaf).getSource();
String target = ((Relation) oaf).getTarget();
assertNotEquals(source, target);
assertTrue(source.equals(p.getId()) || target.equals(p.getId()));
assertNotNull(((Relation) oaf).getSubRelType());
assertNotNull(((Relation) oaf).getRelClass());
assertNotNull(((Relation) oaf).getRelType());
}
}
}
@Test

View File

@ -69,7 +69,6 @@
</dates>
<resourceType resourceTypeGeneral="Dataset"/>
<relatedIdentifiers>
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.3234525</relatedIdentifier>
<relatedIdentifier relatedIdentifierType="URL" relationType="IsPartOf">https://zenodo.org/communities/epfl</relatedIdentifier>
</relatedIdentifiers>
<version>1.0.0</version>

View File

@ -20,7 +20,7 @@
<datacite:publisher>bio.tools</datacite:publisher>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy">http://maplab.imppc.org/chainy/</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="DOI" relationType="isReferencedBy">10.1093/bioinformatics/btw839</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="DOI" relationType="isreferencedBy">10.1093/bioinformatics/btw839</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="LandingPage">https://bio.tools/</datacite:alternateIdentifier>

View File

@ -30,6 +30,9 @@
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="OPENAIRE" relationType="isSupplementedBy">
fsh_____4119::afc7592914ae190a50570db90f55f9c3
</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
<datacite:rightsList>

View File

@ -21,15 +21,13 @@
</header>
<metadata>
<datacite:resource>
<datacite:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier>
<datacite:alternateIdentifiers/>
<datacite:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:identifier>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="w3id" relationType="HasPart">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_1843">RO-crate</datacite:resourceType>
<datacite:rightsList>
@ -43,21 +41,17 @@
</datacite:descriptions>
<datacite:publisher>Poznań Supercomputing and Networking Center</datacite:publisher>
<contributors xmlns="http://datacite.org/schema/kernel-4">
<contributor>
<contributor contributorType="Researcher">
<contributorName>Generation Service</contributorName>
</contributor>
<contributor contributorType="Researcher">
<contributorName>Generation Service</contributorName>
</contributor>
</contributors>
<creators xmlns="http://datacite.org/schema/kernel-4">
<creator>
<creator>
<creatorName>CNR-ISMAR</creatorName>
</creator>
<creatorName>CNR-ISMAR</creatorName>
</creator>
</creators>
<dates xmlns="http://datacite.org/schema/kernel-4">
<date dateType="Created">2018-06-20T11:21:46Z</date>
<date dateType="Issued">2018-06-20T11:21:46Z</date>
</dates>
<dc:descriptions>
<dc:description descriptionType="Abstract">The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved.</dc:description>
@ -71,15 +65,18 @@
</sizes>
<subjects xmlns="http://datacite.org/schema/kernel-4">
<subject>Ecology</subject>
<subject>EOSC::RO-crate</subject>
</subjects>
</datacite:resource>
<oaf:identifier identifierType="URL">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier>
<oaf:identifier identifierType="w3id">https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca</oaf:identifier>
<dr:CobjCategory type="other research product">other research product</dr:CobjCategory>
<oaf:dateAccepted/>
<oaf:dateAccepted>2018-06-20</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language/>
<oaf:hostedBy name="ROHub" id="fairsharing_::4119"/>
<oaf:collectedFrom name="ROHub" id="fairsharing_::4119"/>
<oaf:eoscifguidelines code="EOSC::RO-crate"
label="EOSC::RO-crate"
url=""
semanticrelation="compliesWith"/>
</metadata>
</record>

View File

@ -801,7 +801,7 @@
<mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[2.12.1]</dhp-schemas.version>
<dhp-schemas.version>[2.12.1-patched]</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>