From a2fdf85ba1bacef38c43d94c8f62e7e05ecb265d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 9 Jun 2020 19:52:53 +0200 Subject: [PATCH] WIP: graph cleaner implementation --- .../dhp/schema/common/ModelConstants.java | 1 + .../eu/dnetlib/dhp/schema/oaf/Instance.java | 6 +-- .../migration/ProtoConverter.java | 11 ++++- .../DnetCollectorWorkerApplicationTests.java | 2 + .../doiboost/crossref/Crossref2Oaf.scala | 6 ++- .../dhp/oa/graph/clean/CleaningRule.java | 16 +++---- .../dhp/oa/graph/raw/OafToOafMapper.java | 10 +--- .../dhp/oa/graph/raw/OdfToOafMapper.java | 16 +------ .../oa/graph/raw/common/VocabularyGroup.java | 18 ++++++-- .../dhp/oa/graph/clean/CleaningRuleTest.java | 40 ++++++++++++++-- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 46 ++++++++++++++++++- .../oa/provision/utils/XmlRecordFactory.java | 4 +- 12 files changed, 131 insertions(+), 45 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index e32dd10fa9..fba20dda15 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -14,6 +14,7 @@ public class ModelConstants { public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource"; public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; public static final String DNET_COUNTRY_TYPE = "dnet:countries"; + public static final String DNET_REVIEW_LEVELS = "dnet:review_levels"; public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository"; public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry"; diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java index 2b7d3846c0..29d4952619 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java @@ -31,7 +31,7 @@ public class Instance implements Serializable { // typed results private Field processingchargecurrency; - private Field refereed; // peer-review status + private Qualifier refereed; // peer-review status public Field getLicense() { return license; @@ -113,11 +113,11 @@ public class Instance implements Serializable { this.processingchargecurrency = processingchargecurrency; } - public Field getRefereed() { + public Qualifier getRefereed() { return refereed; } - public void setRefereed(Field refereed) { + public void setRefereed(Qualifier refereed) { this.refereed = refereed; } diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index e55c0eb7b6..8ea877aec5 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -96,12 +96,21 @@ public class ProtoConverter implements Serializable { .stream() .distinct() .collect(Collectors.toCollection(ArrayList::new)) : null); - i.setRefereed(mapStringField(ri.getRefereed())); + i.setRefereed(mapRefereed(ri.getRefereed())); i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount())); i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency())); return i; } + private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) { + Qualifier q = new Qualifier(); + q.setClassid(refereed.getValue()); + q.setSchemename(refereed.getValue()); + q.setSchemeid("dnet:review_levels"); + q.setSchemename("dnet:review_levels"); + return q; + } + private static List convertExternalRefs(OafProtos.Oaf oaf) { ResultProtos.Result r = oaf.getEntity().getResult(); if (r.getExternalReferenceCount() > 0) { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java index 87bd3be3d8..c745219fea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java @@ -8,6 +8,7 @@ import java.io.File; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.ObjectMapper; @@ -19,6 +20,7 @@ import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.message.Message; import eu.dnetlib.message.MessageManager; +@Disabled public class DnetCollectorWorkerApplicationTests { private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index cc2c9d5860..ec8aca55ca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -166,8 +166,10 @@ case object Crossref2Oaf { val has_review = (json \ "relation" \"has-review" \ "id") - if(has_review != JNothing) - instance.setRefereed(asField("peerReviewed")) + if(has_review != JNothing) { + instance.setRefereed( + createQualifier("0001", "peerReviewed", "dnet:review_levels", "dnet:review_levels")) + } instance.setAccessright(getRestrictedQualifier()) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java index c00c7b4d9f..51b9309625 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRule.java @@ -1,19 +1,16 @@ package eu.dnetlib.dhp.oa.graph.clean; -import java.beans.BeanInfo; -import java.beans.IntrospectionException; -import java.beans.Introspector; -import java.beans.PropertyDescriptor; import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.util.*; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; import org.apache.spark.api.java.function.MapFunction; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Qualifier; public class CleaningRule implements MapFunction { @@ -61,7 +58,6 @@ public class CleaningRule implements MapFunction { if (value instanceof Qualifier) { Qualifier q = (Qualifier) value; if (vocabularies.vocabularyExists(q.getSchemeid())) { - field.set(o, vocabularies.lookup(q.getSchemeid(), q.getClassid())); } @@ -86,4 +82,8 @@ public class CleaningRule implements MapFunction { return fields; } + + public VocabularyGroup getVocabularies() { + return vocabularies; + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 6f91ce733b..2ea8bba4a1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -4,13 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; -import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.ArrayList; import java.util.List; @@ -139,7 +133,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { instance .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); - instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); + instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index bbd9442e14..a2019e9597 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,19 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; -import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE; -import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS; -import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF; -import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY; -import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO; -import static eu.dnetlib.dhp.schema.common.ModelConstants.PART; -import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; -import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.util.ArrayList; import java.util.Arrays; @@ -129,7 +117,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { instance .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); - instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); + instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java index 6af80683eb..ec95ade003 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -2,10 +2,9 @@ package eu.dnetlib.dhp.oa.graph.raw.common; import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; +import java.util.*; import java.util.function.Supplier; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -86,6 +85,19 @@ public class VocabularyGroup implements Serializable { } } + public Set getTerms(String vocId) { + if (!vocabularyExists(vocId)) { + return new HashSet<>(); + } + return vocs + .get(vocId.toLowerCase()) + .getTerms() + .values() + .stream() + .map(t -> t.getId()) + .collect(Collectors.toCollection(HashSet::new)); + } + public Qualifier lookup(String vocId, String id) { return Optional .ofNullable(getSynonymAsQualifier(vocId, id)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleTest.java index fab3c0c01b..019285cc3d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleTest.java @@ -1,14 +1,17 @@ package eu.dnetlib.dhp.oa.graph.clean; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -18,7 +21,9 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm; import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -26,6 +31,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class CleaningRuleTest { public static final ObjectMapper MAPPER = new ObjectMapper(); + @Mock private ISLookUpService isLookUpService; @@ -47,18 +53,46 @@ public class CleaningRuleTest { @Test public void testCleaning() throws Exception { + assertNotNull(cleaningRule.getVocabularies()); + String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json")); Publication p_in = MAPPER.readValue(json, Publication.class); Publication p_out = cleaningRule.call(p_in); - Assertions.assertNotNull(p_out); + assertNotNull(p_out); + + assertEquals("eng", p_out.getLanguage().getClassid()); + assertEquals("English", p_out.getLanguage().getClassname()); + + assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid()); + assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname()); + + assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid()); + assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname()); + + Set pidTerms = vocabularies.getTerms("dnet:pid_types"); + assertTrue( + p_out + .getPid() + .stream() + .map(p -> p.getQualifier()) + .allMatch(q -> pidTerms.contains(q.getClassid()))); // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_out)); } + private Stream getAuthorPidTypes(Publication pub) { + return pub + .getAuthor() + .stream() + .map(a -> a.getPid()) + .flatMap(p -> p.stream()) + .map(s -> s.getQualifier()); + } + private List vocs() throws IOException { return IOUtils .readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index c45544b404..435b001b7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -6,6 +6,28 @@ "fullname": "Brien, Tom", "name": "Tom", "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "ORCID12", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" + } ], "rank": 1, "surname": "Brien" @@ -16,6 +38,28 @@ "fullname": "Ade, Peter", "name": "Peter", "pid": [ + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:datasetarchive", + "classname": "sysimport:crosswalk:datasetarchive", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "xyz", + "classname": "XYZ", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "qwerty" + } ], "rank": 2, "surname": "Ade" @@ -207,7 +251,7 @@ { "accessright": { "classid": "CLOSED", - "classname": "Closed Access", + "classname": "CLOSED", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes" }, diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index d950a816d9..cb45f3f32e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1165,10 +1165,10 @@ public class XmlRecordFactory implements Serializable { .asXmlElement( "distributionlocation", instance.getDistributionlocation())); } - if (instance.getRefereed() != null && isNotBlank(instance.getRefereed().getValue())) { + if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields .add( - XmlSerializationUtils.asXmlElement("refereed", instance.getRefereed().getValue())); + XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) {