From 211aa04726bb1789006dde33e5755ba3894d7ed7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 9 Dec 2020 18:08:51 +0100 Subject: [PATCH 01/12] allow orcid_pending to be percolate to the XML graph serialization --- .../dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index 9cdf1cd2e..f3fbca47d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -8,6 +8,7 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -272,11 +273,7 @@ public class CreateRelatedEntitiesJob_phase2 { .filter(Objects::nonNull) .map(Qualifier::getClassid) .filter(StringUtils::isNotBlank) - .anyMatch(c -> "orcid".equals(c.toLowerCase())); - } - - private static FilterFunction filterEmptyEntityFn() { - return (FilterFunction) v -> Objects.nonNull(v.getEntity()); + .anyMatch(c -> c.toLowerCase().contains(ModelConstants.ORCID)); } private static void removeOutputDir(SparkSession spark, String path) { From ff72fcd91a501d3c824f7adee375d48118a6052b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 9 Dec 2020 19:04:50 +0100 Subject: [PATCH 02/12] allow orcid_pending to be percolate to the XML graph serialization --- .../eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index 0e742365a..74d35bf5e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -7,13 +7,15 @@ import java.util.Set; import com.google.common.collect.Sets; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets.newHashSet("orcid", "magidentifier"); + public static Set authorPidTypes = Sets.newHashSet( + ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); public static String removePrefix(final String s) { if (s.contains("|")) From 82e6c50f3fe37f69a82b9f273a7ab96b1a9ebe8b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 9 Feb 2021 16:27:04 +0100 Subject: [PATCH 03/12] updated solr fields (authoridtypevalue, resultsubject, resultresourcetypename) --- .../src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml | 4 +++- .../src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index 1f5cf7b81..70cc28754 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -61,7 +61,7 @@ - + @@ -72,6 +72,7 @@ + @@ -79,6 +80,7 @@ + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml index b617dbea2..a0ca0aa6f 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml @@ -39,6 +39,8 @@ Saykally, Jessica N. Keeley, Kristen L. Haris Hatic + Baglioni, Miriam + De Bonis, Michele 2017-06-01 Withania somnifera has been used in traditional medicine for a variety of neural disorders. Recently, chronic neurodegenerative conditions have been @@ -115,7 +117,7 @@ Cell Transplantation - + Cell Transplantation From 09fc7e2f7882277534b9ccf97b6a1d9060ded335 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 10 Feb 2021 11:22:09 +0100 Subject: [PATCH 04/12] serialization of validated flag on relationships --- .../CreateRelatedEntitiesJob_phase2.java | 2 +- .../oa/provision/utils/GraphMappingUtils.java | 3 +- .../oa/provision/utils/TemplateFactory.java | 6 +- .../oa/provision/utils/XmlRecordFactory.java | 3 +- .../dnetlib/dhp/oa/provision/template/rel.st | 3 +- .../oa/provision/XmlRecordFactoryTest.java | 70 +++++++++++ .../eu/dnetlib/dhp/oa/provision/project.json | 109 ++++++++++++++++++ .../eu/dnetlib/dhp/oa/provision/record.xml | 22 ++++ .../dhp/oa/provision/relToProject.json | 31 +++++ .../oa/provision/relToValidatedProject.json | 31 +++++ 10 files changed, 275 insertions(+), 5 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project.json create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToValidatedProject.json diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index f3fbca47d..e2cf58d00 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -8,7 +8,6 @@ import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -28,6 +27,7 @@ import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index 74d35bf5e..d2131ef28 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -14,7 +14,8 @@ public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets.newHashSet( + public static Set authorPidTypes = Sets + .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); public static String removePrefix(final String s) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 21b526ab1..173ba326a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -73,7 +73,9 @@ public class TemplateFactory { final Collection fields, final String semanticclass, final String semantischeme, - final DataInfo info) { + final DataInfo info, + final boolean validated, + final String validationDate) { return getTemplate(resources.getRel()) .add("type", type) .add("objIdentifier", escapeXml(removePrefix(objIdentifier))) @@ -86,6 +88,8 @@ public class TemplateFactory { .add( "provenanceaction", info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "") + .add("validated", validated) + .add("validationdate", validationDate) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 9f16e99d8..b0a21c62b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1096,7 +1096,8 @@ public class XmlRecordFactory implements Serializable { final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); return templateFactory .getRel( - targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo()); + targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), + rel.getValidationDate()); } private List listChildren( diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/rel.st b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/rel.st index af19ba497..e77a86e1d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/rel.st +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/rel.st @@ -1,4 +1,5 @@ - $objIdentifier$ +$if(validated)$$else$$endif$ +$objIdentifier$ $metadata:{ it | $it$ }$ \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 8ae8a55c3..c391a95c9 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -5,22 +5,29 @@ import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.io.StringReader; +import java.util.List; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; +import org.junit.Assert; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import com.clearspring.analytics.util.Lists; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { @@ -58,4 +65,67 @@ public class XmlRecordFactoryTest { // TODO add assertions based of values extracted from the XML record } + + @Test + public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { + + ContextMapper contextMapper = new ContextMapper(); + + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Publication p = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); + Project pj = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); + Relation rel = OBJECT_MAPPER + .readValue( + (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); + RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + List links = Lists.newArrayList(); + RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + links.add(rew); + JoinedEntity je = new JoinedEntity<>(p); + je.setLinks(links); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); + } + + @Test + public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { + + ContextMapper contextMapper = new ContextMapper(); + + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Publication p = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); + Project pj = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); + Relation rel = OBJECT_MAPPER + .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); + RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + List links = Lists.newArrayList(); + RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + links.add(rew); + JoinedEntity je = new JoinedEntity<>(p); + je.setLinks(links); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + assertEquals("", doc.valueOf("//rel/validated")); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project.json new file mode 100644 index 000000000..b61e55d1a --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project.json @@ -0,0 +1,109 @@ +{ + "id": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", + "originalId": [], + "pid": [], + "dateofcollection": "2020-01-01", + "dateoftransformation": "2020-01-01", + "extraInfo": [], + "oaiprovenance": null, + "websiteurl": { + "value": "https://web.site", + "datainfo": null + }, + "code": { + "value": "79a0e", + "datainfo": null + }, + "acronym": { + "value": "79a0e_acronym", + "datainfo": null + }, + "title": { + "value": "79a0e_title", + "datainfo": null + }, + "startdate": { + "value": "2019-02-01", + "datainfo": null + }, + "enddate": { + "value": "2021-01-09", + "datainfo": null + }, + "callidentifier": { + "value": "79a0e_callID", + "datainfo": null + }, + "keywords": { + "value": "", + "datainfo": null + }, + "duration": { + "value": "", + "datainfo": null + }, + "ecsc39": { + "value": "true", + "datainfo": null + }, + "oamandatepublications": { + "value": "true", + "datainfo": null + }, + "ecarticle29_3": { + "value": "false", + "datainfo": null + }, + "optional1": { + "value": "", + "datainfo": null + }, + "optional2": { + "value": "", + "datainfo": null + }, + "jsonextrainfo":{ + "value": "", + "datainfo": null + }, + "contactfullname":{ + "value": "", + "datainfo": null + }, + "contactfax": { + "value": "", + "datainfo": null + }, + "contactphone": { + "value": "", + "datainfo": null + }, + "contactemail": { + "value": "", + "datainfo": null + }, + "summary": { + "value": "79a0e_description", + "datainfo": null + }, + "currency": { + "value": "EUR", + "datainfo": null + }, + "totalcost": 120000, + "fundedamount": 18000, + "h2020topiccode": "", + "h2020topicdescription": "", + "h2020classification": [], + "subjects": [ + { + "value": "", + "qualifier": null, + "datainfo": null + } + ], + "fundingtree": [] + +} + + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml index a0ca0aa6f..2d6049416 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/record.xml @@ -294,6 +294,28 @@ >rcuk________::23feba2a5ca7f6b6016bf3a45180da50 University of Delhi + + true + corda_______::30c6b5ab90f30666de1d112fb93d8c77 + 227878 + + + ec__________::EC::FP7 + ec__________::EC::FP7::SP2 + ec__________::EC::FP7::SP2::ERC + + Complex structure and dynamics of collective motion + COLLMOT + + + + irb_hr______::2330a1d0dac71ffbe15fbcbc807288d4 + 108-1083570-3635 + + + + Pentadecapeptide BPC 157 - further investigations + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json new file mode 100644 index 000000000..b4d975255 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json @@ -0,0 +1,31 @@ +{ +"collectedfrom": [ +{ +"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", +"value": "AMS Acta", +"dataInfo": null +} +], +"dataInfo": { +"invisible": false, +"inferred": false, +"deletedbyinference": false, +"trust": "0.9", +"inferenceprovenance": "", +"provenanceaction": { +"classid": "sysimport:crosswalk:repository", +"classname": "sysimport:crosswalk:repository", +"schemeid": "dnet:provenanceActions", +"schemename": "dnet:provenanceActions" +} +}, +"lastupdatetimestamp": 1606898557407, +"relType": "resultProject", +"subRelType": "outcome", +"relClass": "isProducedBy", +"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", +"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", +"validated": false, +"validationDate": null, +"properties": [] +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToValidatedProject.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToValidatedProject.json new file mode 100644 index 000000000..0346d7264 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToValidatedProject.json @@ -0,0 +1,31 @@ +{ +"collectedfrom": [ +{ +"key": "10|opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf3", +"value": "AMS Acta", +"dataInfo": null +} +], +"dataInfo": { +"invisible": false, +"inferred": false, +"deletedbyinference": false, +"trust": "0.9", +"inferenceprovenance": "", +"provenanceaction": { +"classid": "sysimport:crosswalk:repository", +"classname": "sysimport:crosswalk:repository", +"schemeid": "dnet:provenanceActions", +"schemename": "dnet:provenanceActions" +} +}, +"lastupdatetimestamp": 1606898557407, +"relType": "resultProject", +"subRelType": "outcome", +"relClass": "isProducedBy", +"source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", +"target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", +"validated": true, +"validationDate": "2021-01-01", +"properties": [] +} \ No newline at end of file From c4d1feca7451b8e50f0abe8287fb056c32516f17 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 10 Feb 2021 11:22:54 +0100 Subject: [PATCH 05/12] mapper test with validated link to project --- .../src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 1 + .../resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_crossref.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 853106cd8..ab956a378 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -334,6 +334,7 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); System.out.println(p.getTitle().get(0).getValue()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_crossref.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_crossref.xml index 8f69a5e2d..93349f3c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_crossref.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_crossref.xml @@ -46,6 +46,7 @@ 10.1080/23744235.2020.1774644 Infectious Diseases + corda__h2020::814530 From 986dd969d3e32975f7d49cbbdda2eb6f2adcee8a Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 10 Feb 2021 12:03:54 +0100 Subject: [PATCH 06/12] use the proper import for Lists --- .../eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index c391a95c9..27860ca32 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -11,14 +11,12 @@ import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; -import org.junit.Assert; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import com.clearspring.analytics.util.Lists; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; From 7249cceb5327a6fdd69018c9e7065d5cb703cf16 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Feb 2021 09:27:08 +0100 Subject: [PATCH 07/12] switch of 2 nodes --- .../oa/generate_all/oozie_app/workflow.xml | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml index d06eec87e..0aeb51c3c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -505,6 +505,33 @@ --maxEventsForTopic${maxIndexedEventsForDsAndTopic} --brokerApiBaseUrl${brokerApiBaseUrl} + + + + + + + yarn + cluster + GenerateStatsJob + eu.dnetlib.dhp.broker.oa.GenerateStatsJob + dhp-broker-events-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --outputDir${outputDir} + --dbUrl${brokerDbUrl} + --dbUser${brokerDbUser} + --dbPassword${brokerDbPassword} + --brokerApiBaseUrl${brokerApiBaseUrl} + @@ -535,37 +562,10 @@ --esNodesWanOnly${esNodesWanOnly} --brokerApiBaseUrl${brokerApiBaseUrl} - - - - - - - yarn - cluster - GenerateStatsJob - eu.dnetlib.dhp.broker.oa.GenerateStatsJob - dhp-broker-events-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=3840 - - --outputDir${outputDir} - --dbUrl${brokerDbUrl} - --dbUser${brokerDbUser} - --dbPassword${brokerDbPassword} - --brokerApiBaseUrl${brokerApiBaseUrl} - - + \ No newline at end of file From b34b5a39ca05cc3a2f1363aca7171bacb9f739a6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 11 Feb 2021 09:36:04 +0100 Subject: [PATCH 08/12] index field authoridtypevalue mixes up different author id-type value pairs, dropped in favour of orcidtypevalue --- .../src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index 70cc28754..f92b63dfd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -80,7 +80,7 @@ - + From 8c1600398a1f0c8d51fc84d8ab71ac468da5791b Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Feb 2021 10:54:16 +0100 Subject: [PATCH 09/12] added resumeFrom parameter --- .../broker/oa/generate_all/oozie_app/workflow.xml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml index 0aeb51c3c..ea80c3acf 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/generate_all/oozie_app/workflow.xml @@ -136,7 +136,17 @@ - + + + + + ${wf:conf('resumeFrom') eq 'ensure_output_dir'} + ${wf:conf('resumeFrom') eq 'index_event_subset'} + ${wf:conf('resumeFrom') eq 'stats'} + ${wf:conf('resumeFrom') eq 'index_notifications'} + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 83d815d0bce32ab98cd385832ea2bcbf7e1a694d Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 11 Feb 2021 10:57:23 +0100 Subject: [PATCH 10/12] only stats --- .../oa/stats/oozie_app/config-default.xml | 18 +++ .../broker/oa/stats/oozie_app/workflow.xml | 114 ++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/config-default.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/config-default.xml new file mode 100644 index 000000000..2e0ed9aee --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/config-default.xml @@ -0,0 +1,18 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/workflow.xml new file mode 100644 index 000000000..218af4515 --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/stats/oozie_app/workflow.xml @@ -0,0 +1,114 @@ + + + + + outputDir + the path where the the generated data will be stored + + + brokerApiBaseUrl + the url of the broker service api + + + brokerDbUrl + the url of the broker database + + + brokerDbUser + the user of the broker database + + + brokerDbPassword + the password of the broker database + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + GenerateStatsJob + eu.dnetlib.dhp.broker.oa.GenerateStatsJob + dhp-broker-events-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --outputDir${outputDir} + --dbUrl${brokerDbUrl} + --dbUser${brokerDbUser} + --dbPassword${brokerDbPassword} + --brokerApiBaseUrl${brokerApiBaseUrl} + + + + + + + + From 32e81c2d8973687767c85e57007f2e00a9021d29 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 16 Feb 2021 11:01:42 +0100 Subject: [PATCH 11/12] non validated rel has null value in validated field --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 2 ++ .../resources/eu/dnetlib/dhp/oa/provision/relToProject.json | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index b0a21c62b..af6081c5d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1094,6 +1094,8 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type.toString(), targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); + if (rel.getValidated() == null) + rel.setValidated(false); return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json index b4d975255..9e2824e52 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/relToProject.json @@ -25,7 +25,7 @@ "relClass": "isProducedBy", "source": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", "target": "40|corda__h2020::79a0e16c122c9a18eb60e4a5e64b620d", -"validated": false, +"validated": null, "validationDate": null, "properties": [] } \ No newline at end of file From 6f9864c564757362387f9bc34eb00a463cba0d6f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Feb 2021 11:49:23 +0100 Subject: [PATCH 12/12] manage merging of Relatation validation attributes --- .../dhp/schema/common/ModelSupport.java | 24 ++++++++++ .../eu/dnetlib/dhp/schema/oaf/Relation.java | 12 ++++- .../eu/dnetlib/dhp/schema/oaf/MergeTest.java | 45 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index b5bca2e93..0c7903137 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.schema.common; import static com.google.common.base.Preconditions.checkArgument; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -473,4 +476,25 @@ public class ModelSupport { private static String idFnForOafEntity(T t) { return ((OafEntity) t).getId(); } + + public static final String ISO8601FORMAT = "yyyy-MM-dd'T'HH:mm:ssZ"; + + public static String oldest(String dateA, String dateB) throws ParseException { + + if (StringUtils.isBlank(dateA)) { + return dateB; + } + if (StringUtils.isBlank(dateB)) { + return dateA; + } + if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) { + + final Date a = new SimpleDateFormat(ISO8601FORMAT).parse(dateA); + final Date b = new SimpleDateFormat(ISO8601FORMAT).parse(dateB); + + return a.before(b) ? dateA : dateB; + } else { + return null; + } + } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 0de34dbec..8825d7137 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -1,8 +1,11 @@ package eu.dnetlib.dhp.schema.oaf; +import eu.dnetlib.dhp.schema.common.ModelSupport; + import static com.google.common.base.Preconditions.checkArgument; +import java.text.ParseException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -106,7 +109,7 @@ public class Relation extends Oaf { } public Boolean getValidated() { - return validated; + return Objects.nonNull(validated) && validated; } public void setValidated(Boolean validated) { @@ -130,6 +133,13 @@ public class Relation extends Oaf { Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal"); + setValidated(getValidated() || r.getValidated()); + try { + setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate())); + } catch (ParseException e) { + throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate())); + } + super.mergeFrom(r); } diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java index f91646f2c..6ee5b9d85 100644 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java @@ -63,6 +63,51 @@ public class MergeTest { assertEquals(3, a.getSubject().size()); } + @Test + public void mergeRelationTest() { + + Relation a = createRel(null, null); + Relation b = createRel(null, null); + a.mergeFrom(b); + assertEquals(a, b); + + a = createRel(true, null); + b = createRel(null, null); + a.mergeFrom(b); + assertEquals(true, a.getValidated()); + + a = createRel(true, null); + b = createRel(false, null); + a.mergeFrom(b); + assertEquals(true, a.getValidated()); + + a = createRel(true, null); + b = createRel(true, "2016-04-05T12:41:19.202Z"); + a.mergeFrom(b); + assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate()); + } + + @Test + public void mergeRelationTestParseException() { + assertThrows(IllegalArgumentException.class, () -> { + Relation a = createRel(true, "2016-04-05"); + Relation b = createRel(true, "2016-04-05"); + a.mergeFrom(b); + }); + } + + private Relation createRel(Boolean validated, String validationDate) { + Relation rel = new Relation(); + rel.setSource("1"); + rel.setTarget("2"); + rel.setRelType("reltype"); + rel.setSubRelType("subreltype"); + rel.setRelClass("relclass"); + rel.setValidated(validated); + rel.setValidationDate(validationDate); + return rel; + } + private KeyValue setKV(final String key, final String value) { KeyValue k = new KeyValue();