From 8b2043c7b1924d47cc9d5871649929ca0c437ff8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 14 Apr 2020 16:43:40 +0200 Subject: [PATCH 01/53] introducing List generic container for Relation specific properties. Ref ticket https://issue.openaire.research-infrastructures.eu/issues/5512 --- .../eu/dnetlib/dhp/schema/oaf/Relation.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 6871c0197..3b45c4f7d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -6,20 +6,49 @@ import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkArgument; +/** + * Relation aims to model any edge between two nodes in the OpenAIRE graph. It has a source id and a target id + * pointing to graph node identifiers and it is further characterised by the semantic of the link throught the fields + * relType, subRelType and relClass. Provenance information is modeled according to the dataInfo element and collectedFrom, + * while individual relationship types can provide extra information via the properties field. + */ public class Relation extends Oaf { + /** + * Main relationship classifier, values include 'resultResult', 'resultProject', 'resultOrganization', etc. + */ private String relType; + /** + * Further classifies a relationship, values include 'affiliation', 'similarity', 'supplement', etc. + */ private String subRelType; + /** + * Indicates the direction of the relationship, values include 'isSupplementTo', 'isSupplementedBy', 'merges, 'isMergedIn'. + */ private String relClass; + /** + * The source entity id. + */ private String source; + /** + * The target entity id. + */ private String target; + /** + * The list of datasource id/name pairs providing this relationship. + */ private List collectedFrom = new ArrayList<>(); + /** + * List of relation specific properties. Values values include 'similarityLevel', indicating the similarity score between a pair of publications. + */ + private List properties = new ArrayList<>(); + public String getRelType() { return relType; } @@ -68,6 +97,14 @@ public class Relation extends Oaf { this.collectedFrom = collectedFrom; } + public List getProperties() { + return properties; + } + + public void setProperties(List properties) { + this.properties = properties; + } + public void mergeFrom(final Relation r) { checkArgument(Objects.equals(getSource(), r.getSource()),"source ids must be equal"); From cc67dbff819594b3b05a3eedefe05a058bcfb15d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 14 Apr 2020 17:11:55 +0200 Subject: [PATCH 02/53] typo in text --- .../src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 3b45c4f7d..d8d150831 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -45,7 +45,7 @@ public class Relation extends Oaf { private List collectedFrom = new ArrayList<>(); /** - * List of relation specific properties. Values values include 'similarityLevel', indicating the similarity score between a pair of publications. + * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a pair of publications. */ private List properties = new ArrayList<>(); From 11938dac5e39ba083842e6cd2401e1ddee29a196 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 4 May 2020 16:47:07 +0200 Subject: [PATCH 03/53] this commit adds: validated/validationDate to relationships; measure type and simple unit test to indicate the relative serialization --- .../eu/dnetlib/dhp/schema/oaf/Measure.java | 52 +++++++++++++++++++ .../eu/dnetlib/dhp/schema/oaf/Relation.java | 40 ++++++++++++++ .../eu/dnetlib/dhp/schema/oaf/Result.java | 12 +++++ .../dnetlib/dhp/schema/oaf/MeasureTest.java | 37 +++++++++++++ 4 files changed, 141 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java index 98adffd2a..c37e76061 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java @@ -1,4 +1,56 @@ package eu.dnetlib.dhp.schema.oaf; +import com.google.common.base.Objects; + +import java.util.List; + +/** + * Represent a measure, must be further described by a system available resource providing name and descriptions. + */ public class Measure { + + /** + * Unique measure identifier. + */ + private String id; + + /** + * List of units associated with this measure. KeyValue provides a pair to store the laber (key) and the value, + * plus common provenance information. + */ + private List unit; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getUnit() { + return unit; + } + + public void setUnit(List unit) { + this.unit = unit; + } + + public void mergeFrom(Measure m) { + //TODO + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Measure measure = (Measure) o; + return Objects.equal(id, measure.id) && + Objects.equal(unit, measure.unit); + } + + @Override + public int hashCode() { + return Objects.hashCode(id, unit); + } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 76503f885..d77bd7d73 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -40,6 +40,21 @@ public class Relation extends Oaf { */ private String target; + /** + * Was this relationship authoritatively validated? + */ + private Boolean validated; + + /** + * When was this relationship authoritatively validated. + */ + private String validationDate; + + /** + * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a pair of publications. + */ + private List properties = new ArrayList<>(); + public String getRelType() { return relType; } @@ -80,6 +95,30 @@ public class Relation extends Oaf { this.target = target; } + public List getProperties() { + return properties; + } + + public void setProperties(List properties) { + this.properties = properties; + } + + public Boolean getValidated() { + return validated; + } + + public void setValidated(Boolean validated) { + this.validated = validated; + } + + public String getValidationDate() { + return validationDate; + } + + public void setValidationDate(String validationDate) { + this.validationDate = validationDate; + } + public void mergeFrom(final Relation r) { checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal"); @@ -122,4 +161,5 @@ public class Relation extends Oaf { public int hashCode() { return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom); } + } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 711b1ca68..11316f36e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -7,6 +7,8 @@ import java.util.List; public class Result extends OafEntity implements Serializable { + private List measures; + private List author; // resulttype allows subclassing results into publications | datasets | software @@ -51,6 +53,14 @@ public class Result extends OafEntity implements Serializable { private List instance; + public List getMeasures() { + return measures; + } + + public void setMeasures(List measures) { + this.measures = measures; + } + public List getAuthor() { return author; } @@ -229,6 +239,8 @@ public class Result extends OafEntity implements Serializable { Result r = (Result) e; + //TODO consider merging also Measures + instance = mergeLists(instance, r.getInstance()); if (r.getBestaccessright() != null && compareTrust(this, r) < 0) diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java index 26376349c..25d929db2 100644 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java +++ b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java @@ -1,4 +1,41 @@ package eu.dnetlib.dhp.schema.oaf; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + public class MeasureTest { + + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL); + + @Test + public void testMeasureSerialization() throws IOException { + + Measure m = new Measure(); + + m.setId("popularity"); + m.setUnit(Lists.newArrayList( + unit("score", "0.5"))); + + String s = OBJECT_MAPPER.writeValueAsString(m); + System.out.println(s); + + Measure mm = OBJECT_MAPPER.readValue(s, Measure.class); + + Assertions.assertNotNull(mm); + } + + private KeyValue unit(String key, String value) { + KeyValue unit = new KeyValue(); + unit.setKey(key); + unit.setValue(value); + return unit; + } + } From c54d7ca18c1c4a4f8fe64b05d94d3e74fb25b4a0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 4 May 2020 17:02:40 +0200 Subject: [PATCH 04/53] example measures in serialization test --- .../dnetlib/dhp/schema/oaf/MeasureTest.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java index 25d929db2..4275e2c56 100644 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java +++ b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java @@ -1,13 +1,14 @@ package eu.dnetlib.dhp.schema.oaf; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.io.IOException; +import java.util.List; public class MeasureTest { @@ -17,16 +18,22 @@ public class MeasureTest { @Test public void testMeasureSerialization() throws IOException { - Measure m = new Measure(); - - m.setId("popularity"); - m.setUnit(Lists.newArrayList( + Measure popularity = new Measure(); + popularity.setId("popularity"); + popularity.setUnit(Lists.newArrayList( unit("score", "0.5"))); + Measure influence = new Measure(); + influence.setId("influence"); + influence.setUnit(Lists.newArrayList( + unit("score", "0.3"))); + + List m = Lists.newArrayList(popularity, influence); + String s = OBJECT_MAPPER.writeValueAsString(m); System.out.println(s); - Measure mm = OBJECT_MAPPER.readValue(s, Measure.class); + List mm = OBJECT_MAPPER.readValue(s, new TypeReference>() { }); Assertions.assertNotNull(mm); } From 22cb9e0da7ce016853c741017f98ca300e4197ab Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 May 2020 18:18:01 +0200 Subject: [PATCH 05/53] simple code to get file from URL --- .../eu/dnetlib/dhp/schema/oaf/Programme.java | 4 + .../dhp/actionset/h2020programme/GetFile.java | 53 +++++++++ .../h2020programme/action_set_parameters.json | 0 .../oozie_app/lib/scripts/getprogrammefile.sh | 0 .../oozie_app/lib/scripts/getprojectfile.sh | 0 .../h2020programme/oozie_app/workflow.xml | 112 ++++++++++++++++++ .../actionset/h2020programme/parameters.json | 86 ++++++++++++++ 7 files changed, 255 insertions(+) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java new file mode 100644 index 000000000..f91333616 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.schema.oaf; + +public class Programme { +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java new file mode 100644 index 000000000..2fed1a0e3 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java @@ -0,0 +1,53 @@ + +package eu.dnetlib.dhp.actionset.h2020programme; + +import java.io.*; +import java.net.URL; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetFile { + + private static final Log log = LogFactory.getLog(GetFile.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetFile.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionset/h2020programme/parameters.json"))); + + Configuration conf = new Configuration(); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + + conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fsDataOutputStream = fileSystem.append(hdfsWritePath); + } else { + fsDataOutputStream = fileSystem.create(hdfsWritePath); + } + + InputStream is = new BufferedInputStream(new URL(fileURL).openStream()); + + org.apache.hadoop.io.IOUtils.copyBytes(is, fsDataOutputStream, 4096, true); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml new file mode 100644 index 000000000..3e7f68401 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml @@ -0,0 +1,112 @@ + + + + sequenceFilePath + the path to store the sequence file of the native metadata collected + + + + mdStorePath + the path of the native mdstore + + + + apiDescription + A json encoding of the API Description class + + + + dataSourceInfo + A json encoding of the Datasource Info + + + identifierPath + An xpath to retrieve the metadata idnentifier for the generation of DNet Identifier + + + + metadataEncoding + The type of the metadata XML/JSON + + + + timestamp + The timestamp of the collection date + + + + workflowId + The identifier of the workflow + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.dhp.collection.worker.DnetCollectorWorker + -p${sequenceFilePath} + -a${apiDescription} + -n${nameNode} + -rh${rmq_host} + -ru${rmq_user} + -rp${rmq_pwd} + -rr${rmq_report} + -ro${rmq_ongoing} + -usandro.labruzzo + -w${workflowId} + + + + + + + ${jobTracker} + ${nameNode} + yarn + cluster + GenerateNativeStoreSparkJob + eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob + dhp-aggregations-1.0.0-SNAPSHOT.jar + --num-executors 50 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" + --encoding ${metadataEncoding} + --dateOfCollection ${timestamp} + --provenance ${dataSourceInfo} + --xpath${identifierPath} + --input${sequenceFilePath} + --output${mdStorePath} + -rh${rmq_host} + -ru${rmq_user} + -rp${rmq_pwd} + -rr${rmq_report} + -ro${rmq_ongoing} + -w${workflowId} + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json new file mode 100644 index 000000000..4a6aec5ee --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json @@ -0,0 +1,86 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "e", + "paramLongName": "encoding", + "paramDescription": "the encoding of the input record should be JSON or XML", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "dateOfCollection", + "paramDescription": "the date when the record has been stored", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "provenance", + "paramDescription": "the infos about the provenance of the collected records", + "paramRequired": true + }, + { + "paramName": "x", + "paramLongName": "xpath", + "paramDescription": "the xpath to identify the record identifier", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "input", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "o", + "paramLongName": "output", + "paramDescription": "the path of the result DataFrame on HDFS", + "paramRequired": true + }, + { + "paramName": "ru", + "paramLongName": "rabbitUser", + "paramDescription": "the user to connect with RabbitMq for messaging", + "paramRequired": true + }, + { + "paramName": "rp", + "paramLongName": "rabbitPassword", + "paramDescription": "the password to connect with RabbitMq for messaging", + "paramRequired": true + }, + { + "paramName": "rh", + "paramLongName": "rabbitHost", + "paramDescription": "the host of the RabbitMq server", + "paramRequired": true + }, + { + "paramName": "ro", + "paramLongName": "rabbitOngoingQueue", + "paramDescription": "the name of the ongoing queue", + "paramRequired": true + }, + { + "paramName": "rr", + "paramLongName": "rabbitReportQueue", + "paramDescription": "the name of the report queue", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workflowId", + "paramDescription": "the identifier of the dnet Workflow", + "paramRequired": true + }, + { + "paramName": "t", + "paramLongName": "isTest", + "paramDescription": "the name of the report queue", + "paramRequired": false + } +] \ No newline at end of file From 83c262a483a4ecc4d7a12d6ddab2086c2f447387 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 May 2020 18:18:31 +0200 Subject: [PATCH 06/53] workflow to download the files --- .../h2020programme/oozie_app/workflow.xml | 145 +++++++----------- 1 file changed, 57 insertions(+), 88 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml index 3e7f68401..9b200c2a9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml @@ -1,112 +1,81 @@ - + - sequenceFilePath - the path to store the sequence file of the native metadata collected + projectFileURL + the url where to get the projects file - mdStorePath - the path of the native mdstore + programmeFileURL + the url where to get the programme file - apiDescription - A json encoding of the API Description class - - - - dataSourceInfo - A json encoding of the Datasource Info - - - identifierPath - An xpath to retrieve the metadata idnentifier for the generation of DNet Identifier - - - - metadataEncoding - The type of the metadata XML/JSON - - - - timestamp - The timestamp of the collection date - - - - workflowId - The identifier of the workflow + outputPath + path where to store the action set - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - - - + + - + - - - ${jobTracker} - ${nameNode} - eu.dnetlib.dhp.collection.worker.DnetCollectorWorker - -p${sequenceFilePath} - -a${apiDescription} - -n${nameNode} - -rh${rmq_host} - -ru${rmq_user} - -rp${rmq_pwd} - -rr${rmq_report} - -ro${rmq_ongoing} - -usandro.labruzzo - -w${workflowId} - - - - - - - ${jobTracker} - ${nameNode} - yarn - cluster - GenerateNativeStoreSparkJob - eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob - dhp-aggregations-1.0.0-SNAPSHOT.jar - --num-executors 50 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" - --encoding ${metadataEncoding} - --dateOfCollection ${timestamp} - --provenance ${dataSourceInfo} - --xpath${identifierPath} - --input${sequenceFilePath} - --output${mdStorePath} - -rh${rmq_host} - -ru${rmq_user} - -rp${rmq_pwd} - -rr${rmq_report} - -ro${rmq_ongoing} - -w${workflowId} - - - - - - - - - + + + eu.dnetlib.dhp.actionset.h2020programme.GetFile + --hdfsNameNode${nameNode} + --fileUrl${projectFileURL} + --hdfsPath${workingDir}/projects.csv + + + + + eu.dnetlib.dhp.actionset.h2020programme.GetFile + --hdfsNameNode${nameNode} + --fileUrl${programmeFileURL} + --hdfsPath${workingDir}/programme.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 5a648016ef7061354f4ae795c0ab8c558c52ccff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 15 May 2020 18:18:50 +0200 Subject: [PATCH 07/53] parameters from the GetFile class --- .../actionset/h2020programme/parameters.json | 91 +++---------------- 1 file changed, 13 insertions(+), 78 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json index 4a6aec5ee..40ad5bf36 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json @@ -1,86 +1,21 @@ [ + { - "paramName": "issm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "when true will stop SparkSession after job execution", - "paramRequired": false + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true }, { - "paramName": "e", - "paramLongName": "encoding", - "paramDescription": "the encoding of the input record should be JSON or XML", - "paramRequired": true + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true }, { - "paramName": "d", - "paramLongName": "dateOfCollection", - "paramDescription": "the date when the record has been stored", - "paramRequired": true - }, - { - "paramName": "p", - "paramLongName": "provenance", - "paramDescription": "the infos about the provenance of the collected records", - "paramRequired": true - }, - { - "paramName": "x", - "paramLongName": "xpath", - "paramDescription": "the xpath to identify the record identifier", - "paramRequired": true - }, - { - "paramName": "i", - "paramLongName": "input", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "o", - "paramLongName": "output", - "paramDescription": "the path of the result DataFrame on HDFS", - "paramRequired": true - }, - { - "paramName": "ru", - "paramLongName": "rabbitUser", - "paramDescription": "the user to connect with RabbitMq for messaging", - "paramRequired": true - }, - { - "paramName": "rp", - "paramLongName": "rabbitPassword", - "paramDescription": "the password to connect with RabbitMq for messaging", - "paramRequired": true - }, - { - "paramName": "rh", - "paramLongName": "rabbitHost", - "paramDescription": "the host of the RabbitMq server", - "paramRequired": true - }, - { - "paramName": "ro", - "paramLongName": "rabbitOngoingQueue", - "paramDescription": "the name of the ongoing queue", - "paramRequired": true - }, - { - "paramName": "rr", - "paramLongName": "rabbitReportQueue", - "paramDescription": "the name of the report queue", - "paramRequired": true - }, - { - "paramName": "w", - "paramLongName": "workflowId", - "paramDescription": "the identifier of the dnet Workflow", - "paramRequired": true - }, - { - "paramName": "t", - "paramLongName": "isTest", - "paramDescription": "the name of the report queue", - "paramRequired": false + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true } ] \ No newline at end of file From abc45f2708e7497cd16d1cc09041737be6d07f55 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 May 2020 13:04:06 +0200 Subject: [PATCH 08/53] added dnet-45 HttpConnector and related Classes, produced the POJO for projects and programme --- .../project/CollectorPluginErrorLogList.java | 20 ++ .../project/CollectorServiceException.java | 20 ++ .../project}/GetFile.java | 4 +- .../actionmanager/project/HttpConnector.java | 240 ++++++++++++++++++ .../project/PrepareProjects.java | 139 ++++++++++ .../dhp/actionmanager/project/Programme.java | 52 ++++ .../dhp/actionmanager/project/Project.java | 196 ++++++++++++++ .../project/SparkAtomicActionJob.java | 74 ++++++ .../project/action_set_parameters.json | 26 ++ .../project}/oozie_app/workflow.xml | 4 +- .../project}/parameters.json | 0 .../h2020programme/action_set_parameters.json | 0 12 files changed, 771 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/{actionset/h2020programme => actionmanager/project}/GetFile.java (92%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionset/h2020programme => actionmanager/project}/oozie_app/workflow.xml (95%) rename dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/{actionset/h2020programme => actionmanager/project}/parameters.json (100%) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java new file mode 100644 index 000000000..bc00e4604 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.util.LinkedList; + +public class CollectorPluginErrorLogList extends LinkedList { + + private static final long serialVersionUID = -6925786561303289704L; + + @Override + public String toString() { + String log = new String(); + int index = 0; + for (String errorMessage : this) { + log += String.format("Retry #%s: %s / ", index++, errorMessage); + } + return log; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java new file mode 100644 index 000000000..a417de50d --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +public class CollectorServiceException extends Exception { + + private static final long serialVersionUID = 7523999812098059764L; + + public CollectorServiceException(String string) { + super(string); + } + + public CollectorServiceException(String string, Throwable exception) { + super(string, exception); + } + + public CollectorServiceException(Throwable exception) { + super(exception); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java similarity index 92% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java index 2fed1a0e3..bbf59a20f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionset/h2020programme/GetFile.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionset.h2020programme; +package eu.dnetlib.dhp.actionmanager.project; import java.io.*; import java.net.URL; @@ -24,7 +24,7 @@ public class GetFile { .toString( GetFile.class .getResourceAsStream( - "/eu/dnetlib/dhp/actionset/h2020programme/parameters.json"))); + "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); Configuration conf = new Configuration(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java new file mode 100644 index 000000000..63f67f145 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java @@ -0,0 +1,240 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.IOException; +import java.io.InputStream; +import java.net.*; +import java.security.GeneralSecurityException; +import java.security.cert.X509Certificate; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.HttpsURLConnection; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * @author jochen, michele, andrea + */ +public class HttpConnector { + + private static final Log log = LogFactory.getLog(HttpConnector.class); + + private int maxNumberOfRetry = 6; + private int defaultDelay = 120; // seconds + private int readTimeOut = 120; // seconds + + private String responseType = null; + + private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + + public HttpConnector() { + CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @return the content of the downloaded resource + * @throws CollectorServiceException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl) throws CollectorServiceException { + return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList()); + } + + /** + * Given the URL returns the content as a stream via HTTP GET + * + * @param requestUrl the URL + * @return the content of the downloaded resource as InputStream + * @throws CollectorServiceException when retrying more than maxNumberOfRetry times + */ + public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorServiceException { + return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); + } + + private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, + final CollectorPluginErrorLogList errorList) + throws CollectorServiceException { + try { + InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); + try { + return IOUtils.toString(s); + } catch (IOException e) { + log.error("error while retrieving from http-connection occured: " + requestUrl, e); + Thread.sleep(defaultDelay * 1000); + errorList.add(e.getMessage()); + return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList); + } finally { + IOUtils.closeQuietly(s); + } + } catch (InterruptedException e) { + throw new CollectorServiceException(e); + } + } + + private InputStream attemptDownload(final String requestUrl, final int retryNumber, + final CollectorPluginErrorLogList errorList) + throws CollectorServiceException { + + if (retryNumber > maxNumberOfRetry) { + throw new CollectorServiceException("Max number of retries exceeded. Cause: \n " + errorList); + } + + log.debug("Downloading " + requestUrl + " - try: " + retryNumber); + try { + InputStream input = null; + + try { + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); + urlConn.setInstanceFollowRedirects(false); + urlConn.setReadTimeout(readTimeOut * 1000); + urlConn.addRequestProperty("User-Agent", userAgent); + + if (log.isDebugEnabled()) { + logHeaderFields(urlConn); + } + + int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { + log.warn("waiting and repeating request after " + retryAfter + " sec."); + Thread.sleep(retryAfter * 1000); + errorList.add("503 Service Unavailable"); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } else if ((urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) + || (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP)) { + final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); + log.debug("The requested url has been moved to " + newUrl); + errorList + .add( + String + .format( + "%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), + newUrl)); + urlConn.disconnect(); + return attemptDownload(newUrl, retryNumber + 1, errorList); + } else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) { + log + .error( + String + .format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + Thread.sleep(defaultDelay * 1000); + errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } else { + input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + return input; + } + } catch (IOException e) { + log.error("error while retrieving from http-connection occured: " + requestUrl, e); + Thread.sleep(defaultDelay * 1000); + errorList.add(e.getMessage()); + return attemptDownload(requestUrl, retryNumber + 1, errorList); + } + } catch (InterruptedException e) { + throw new CollectorServiceException(e); + } + } + + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { + log.debug("StatusCode: " + urlConn.getResponseMessage()); + + for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { + if (e.getKey() != null) { + for (String v : e.getValue()) { + log.debug(" key: " + e.getKey() + " - value: " + v); + } + } + } + } + + private int obtainRetryAfter(final Map> headerMap) { + for (String key : headerMap.keySet()) { + if ((key != null) && key.toLowerCase().equals("retry-after") && (headerMap.get(key).size() > 0) + && NumberUtils.isCreatable(headerMap.get(key).get(0))) { + return Integer + .parseInt(headerMap.get(key).get(0)) + 10; + } + } + return -1; + } + + private String obtainNewLocation(final Map> headerMap) throws CollectorServiceException { + for (String key : headerMap.keySet()) { + if ((key != null) && key.toLowerCase().equals("location") && (headerMap.get(key).size() > 0)) { + return headerMap.get(key).get(0); + } + } + throw new CollectorServiceException("The requested url has been MOVED, but 'location' param is MISSING"); + } + + /** + * register for https scheme; this is a workaround and not intended for the use in trusted environments + */ + public void initTrustManager() { + final X509TrustManager tm = new X509TrustManager() { + + @Override + public void checkClientTrusted(final X509Certificate[] xcs, final String string) { + } + + @Override + public void checkServerTrusted(final X509Certificate[] xcs, final String string) { + } + + @Override + public X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + try { + final SSLContext ctx = SSLContext.getInstance("TLS"); + ctx.init(null, new TrustManager[] { + tm + }, null); + HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory()); + } catch (GeneralSecurityException e) { + log.fatal(e); + throw new IllegalStateException(e); + } + } + + public int getMaxNumberOfRetry() { + return maxNumberOfRetry; + } + + public void setMaxNumberOfRetry(final int maxNumberOfRetry) { + this.maxNumberOfRetry = maxNumberOfRetry; + } + + public int getDefaultDelay() { + return defaultDelay; + } + + public void setDefaultDelay(final int defaultDelay) { + this.defaultDelay = defaultDelay; + } + + public int getReadTimeOut() { + return readTimeOut; + } + + public void setReadTimeOut(final int readTimeOut) { + this.readTimeOut = readTimeOut; + } + + public String getResponseType() { + return responseType; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java new file mode 100644 index 000000000..8955edeb4 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -0,0 +1,139 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVFormat; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.common.RelationInverse; +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class PrepareProjects implements Closeable { + private static final Log log = LogFactory.getLog(PrepareProjects.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final HttpConnector httpConnector; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + PrepareProjects.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + + try (final PrepareProjects prepareProjects = new PrepareProjects(hdfsPath, hdfsNameNode)) { + + log.info("Getting projects..."); + prepareProjects.execute(fileURL); + + } + } + + public void execute(final String fileURL) throws Exception { + + String projects = httpConnector.getInputSource(fileURL); + final CSVFormat format = CSVFormat.EXCEL + .withHeader() + .withDelimiter(';') + .withQuote('"') + .withTrim(); + final CSVParser parser = CSVParser.parse(projects, format); + final Set headers = parser.getHeaderMap().keySet(); + } + + public List processBlacklistEntry(ResultSet rs) { + try { + Relation direct = new Relation(); + Relation inverse = new Relation(); + + String source_prefix = ModelSupport.entityIdPrefix.get(rs.getString("source_type")); + String target_prefix = ModelSupport.entityIdPrefix.get(rs.getString("target_type")); + + String source_direct = source_prefix + "|" + rs.getString("source"); + direct.setSource(source_direct); + inverse.setTarget(source_direct); + + String target_direct = target_prefix + "|" + rs.getString("target"); + direct.setTarget(target_direct); + inverse.setSource(target_direct); + + String encoding = rs.getString("relationship"); + RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); + direct.setRelClass(ri.getRelation()); + inverse.setRelClass(ri.getInverse()); + direct.setRelType(ri.getRelType()); + inverse.setRelType(ri.getRelType()); + direct.setSubRelType(ri.getSubReltype()); + inverse.setSubRelType(ri.getSubReltype()); + + return Arrays.asList(direct, inverse); + + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() throws IOException { + writer.close(); + } + + public PrepareProjects( + final String hdfsPath, String hdfsNameNode) + throws Exception { + + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + this.httpConnector = new HttpConnector(); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fsDataOutputStream = fileSystem.append(hdfsWritePath); + } else { + fsDataOutputStream = fileSystem.create(hdfsWritePath); + } + + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + } + + protected void writeRelation(final Relation r) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(r)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java new file mode 100644 index 000000000..20877b1a1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.Serializable; + +public class Programme implements Serializable { + private String rcn; + private String code; + private String title; + private String shortTitle; + private String language; + + public String getRcn() { + return rcn; + } + + public void setRcn(String rcn) { + this.rcn = rcn; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getShortTitle() { + return shortTitle; + } + + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java new file mode 100644 index 000000000..abee7f861 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java @@ -0,0 +1,196 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.Serializable; + +public class Project implements Serializable { + private String rcn; + private String id; + private String acronym; + private String status; + private String programme; + private String topics; + private String frameworkProgramme; + private String title; + private String startDate; + private String endDate; + private String projectUrl; + private String objective; + private String totalCost; + private String ecMaxContribution; + private String call; + private String fundingScheme; + private String coordinator; + private String coordinatorCountry; + private String participants; + private String participantCountries; + private String subjects; + + public String getRcn() { + return rcn; + } + + public void setRcn(String rcn) { + this.rcn = rcn; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getAcronym() { + return acronym; + } + + public void setAcronym(String acronym) { + this.acronym = acronym; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + public String getFrameworkProgramme() { + return frameworkProgramme; + } + + public void setFrameworkProgramme(String frameworkProgramme) { + this.frameworkProgramme = frameworkProgramme; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getStartDate() { + return startDate; + } + + public void setStartDate(String startDate) { + this.startDate = startDate; + } + + public String getEndDate() { + return endDate; + } + + public void setEndDate(String endDate) { + this.endDate = endDate; + } + + public String getProjectUrl() { + return projectUrl; + } + + public void setProjectUrl(String projectUrl) { + this.projectUrl = projectUrl; + } + + public String getObjective() { + return objective; + } + + public void setObjective(String objective) { + this.objective = objective; + } + + public String getTotalCost() { + return totalCost; + } + + public void setTotalCost(String totalCost) { + this.totalCost = totalCost; + } + + public String getEcMaxContribution() { + return ecMaxContribution; + } + + public void setEcMaxContribution(String ecMaxContribution) { + this.ecMaxContribution = ecMaxContribution; + } + + public String getCall() { + return call; + } + + public void setCall(String call) { + this.call = call; + } + + public String getFundingScheme() { + return fundingScheme; + } + + public void setFundingScheme(String fundingScheme) { + this.fundingScheme = fundingScheme; + } + + public String getCoordinator() { + return coordinator; + } + + public void setCoordinator(String coordinator) { + this.coordinator = coordinator; + } + + public String getCoordinatorCountry() { + return coordinatorCountry; + } + + public void setCoordinatorCountry(String coordinatorCountry) { + this.coordinatorCountry = coordinatorCountry; + } + + public String getParticipants() { + return participants; + } + + public void setParticipants(String participants) { + this.participants = participants; + } + + public String getParticipantCountries() { + return participantCountries; + } + + public void setParticipantCountries(String participantCountries) { + this.participantCountries = participantCountries; + } + + public String getSubjects() { + return subjects; + } + + public void setSubjects(String subjects) { + this.subjects = subjects; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java new file mode 100644 index 000000000..b8703378e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -0,0 +1,74 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; + +public class SparkAtomicActionJob { + private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkAtomicActionJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String projectPath = parser.get("projectPath"); + log.info("projectPath: {}", projectPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final String programmePath = parser.get("programmePath"); + log.info("programmePath {}: ", programmePath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + getAtomicActions( + spark, + projectPath, + programmePath, + outputPath); + }); + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void getAtomicActions(SparkSession spark, String projectPatj, String programmePath, + String outputPath) { + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json new file mode 100644 index 000000000..ca9ae9e97 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pjfu", +"paramLongName": "projectsFileURL", +"paramDescription": "the URL from where to get the projects file", +"paramRequired": true +}, +{ +"paramName": "pfu", +"paramLongName": "programmeFileURL", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 9b200c2a9..5bfa2e7c4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -33,7 +33,7 @@ eu.dnetlib.dhp.actionset.h2020programme.GetFile --hdfsNameNode${nameNode} - --fileUrl${projectFileURL} + --fileURL${projectFileURL} --hdfsPath${workingDir}/projects.csv @@ -44,7 +44,7 @@ eu.dnetlib.dhp.actionset.h2020programme.GetFile --hdfsNameNode${nameNode} - --fileUrl${programmeFileURL} + --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme.csv diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/parameters.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/action_set_parameters.json deleted file mode 100644 index e69de29bb..000000000 From 4f1ff7ba73690c64f2f35fca03a959ac605d4247 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 May 2020 13:04:39 +0200 Subject: [PATCH 09/53] added dependency to org.apache.commons common-csv --- dhp-workflows/dhp-aggregation/pom.xml | 50 ++++----------------------- 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 1c5465c14..361517af9 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -38,48 +38,6 @@ ${project.version} - - eu.dnetlib - dnet-actionmanager-common - - - eu.dnetlib - dnet-openaireplus-mapping-utils - - - saxonica - saxon - - - saxonica - saxon-dom - - - jgrapht - jgrapht - - - net.sf.ehcache - ehcache - - - org.springframework - spring-test - - - org.apache.* - * - - - apache - * - - - - - eu.dnetlib - dnet-openaire-data-protos - net.sf.saxon @@ -100,11 +58,15 @@ jaxen + - org.apache.hadoop - hadoop-distcp + org.apache.commons + commons-csv + 1.8 + + From 23bbac7d7cb20f9cadb0d68fcfcaf2dafe094b9b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 May 2020 13:05:03 +0200 Subject: [PATCH 10/53] - --- .../dhp/actionmanager/project/action_set_parameters.json | 8 ++++---- .../dhp/actionmanager/project/oozie_app/workflow.xml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json index ca9ae9e97..a0856e10e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/action_set_parameters.json @@ -6,14 +6,14 @@ "paramRequired": false }, { -"paramName": "pjfu", -"paramLongName": "projectsFileURL", +"paramName": "pjp", +"paramLongName": "projectPath", "paramDescription": "the URL from where to get the projects file", "paramRequired": true }, { -"paramName": "pfu", -"paramLongName": "programmeFileURL", +"paramName": "pp", +"paramLongName": "programmePath", "paramDescription": "the URL from where to get the programme file", "paramRequired": true }, diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 5bfa2e7c4..992c2ded1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -31,7 +31,7 @@ - eu.dnetlib.dhp.actionset.h2020programme.GetFile + eu.dnetlib.dhp.actionmanager.project.GetFile --hdfsNameNode${nameNode} --fileURL${projectFileURL} --hdfsPath${workingDir}/projects.csv @@ -42,7 +42,7 @@ - eu.dnetlib.dhp.actionset.h2020programme.GetFile + eu.dnetlib.dhp.actionmanager.project.GetFile --hdfsNameNode${nameNode} --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme.csv @@ -56,7 +56,7 @@ - + From f0f14caf996828237e7ce6bd4a3720c7ae01ae0f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 May 2020 13:06:16 +0200 Subject: [PATCH 11/53] removed script files for shell actions not performed --- .../h2020programme/oozie_app/lib/scripts/getprogrammefile.sh | 0 .../h2020programme/oozie_app/lib/scripts/getprojectfile.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh delete mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprogrammefile.sh deleted file mode 100644 index e69de29bb..000000000 diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionset/h2020programme/oozie_app/lib/scripts/getprojectfile.sh deleted file mode 100644 index e69de29bb..000000000 From 9447d78ef38abc918e6f0b7bb4f338ebe9f02c86 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 May 2020 18:42:50 +0200 Subject: [PATCH 12/53] added preparation classes --- .../dhp/actionmanager/project/GetFile.java | 53 ----- .../project/PrepareProgramme.java | 124 ++++++++++++ .../project/PrepareProjects.java | 189 ++++++++---------- .../project/SparkAtomicActionJob.java | 61 +++++- .../project/csvutils/CSVParser.java | 37 ++++ .../CSVProgramme.java} | 4 +- .../CSVProject.java} | 5 +- .../project/csvutils/ReadCSV.java | 98 +++++++++ .../CollectorPluginErrorLogList.java | 2 +- .../CollectorServiceException.java | 2 +- .../{ => httpconnector}/HttpConnector.java | 2 +- .../project/prepare_programme_parameters.json | 26 +++ .../project/prepare_project_parameters.json | 20 ++ .../actionmanager/project/CSVParserTest.java | 43 ++++ .../project/PrepareProgrammeTest.java | 4 + .../project/SparkUpdateProjectTest.java | 4 + .../httpconnector/HttpConnectorTest.java | 4 + .../preparedProgramme_whole.json | 0 .../dhp/actionmanager/project/programme.csv | 0 .../dhp/actionmanager/projects_subset.json | 0 .../dhp/actionmanager/whole_programme.json | 0 21 files changed, 507 insertions(+), 171 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/{Programme.java => csvutils/CSVProgramme.java} (87%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/{Project.java => csvutils/CSVProject.java} (97%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/{ => httpconnector}/CollectorPluginErrorLogList.java (86%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/{ => httpconnector}/CollectorServiceException.java (86%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/{ => httpconnector}/HttpConnector.java (99%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java deleted file mode 100644 index bbf59a20f..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/GetFile.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project; - -import java.io.*; -import java.net.URL; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class GetFile { - - private static final Log log = LogFactory.getLog(GetFile.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetFile.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); - - Configuration conf = new Configuration(); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - - conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fsDataOutputStream = fileSystem.append(hdfsWritePath); - } else { - fsDataOutputStream = fileSystem.create(hdfsWritePath); - } - - InputStream is = new BufferedInputStream(new URL(fileURL).openStream()); - - org.apache.hadoop.io.IOUtils.copyBytes(is, fsDataOutputStream, 4096, true); - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java new file mode 100644 index 000000000..a5abb9ea7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -0,0 +1,124 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.HashMap; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import scala.Tuple2; + +public class PrepareProgramme { + + private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final HashMap programmeMap = new HashMap<>(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareProgramme.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String programmePath = parser.get("programmePath"); + log.info("programmePath {}: ", programmePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + exec(spark, programmePath, outputPath); + }); + } + + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void exec(SparkSession spark, String programmePath, String outputPath) { + Dataset programme = readPath(spark, programmePath, CSVProgramme.class); + + programme + .toJavaRDD() + .filter(p -> !p.getCode().contains("FP7")) + .mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme)) + .reduceByKey((a, b) -> { + if (StringUtils.isEmpty(a.getShortTitle())) { + if (StringUtils.isEmpty(b.getShortTitle())) { + if (StringUtils.isEmpty(a.getTitle())) { + if (StringUtils.isNotEmpty(b.getTitle())) { + a.setShortTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); + } + } else {// notIsEmpty a.getTitle + if (StringUtils.isEmpty(b.getTitle())) { + a.setShortTitle(a.getTitle()); + } else { + if (b.getLanguage().equalsIgnoreCase("en")) { + a.setShortTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); + } else { + a.setShortTitle(a.getTitle()); + } + } + } + } else {// not isEmpty b.getShortTitle + a.setShortTitle(b.getShortTitle()); + // a.setLanguage(b.getLanguage()); + } + } + return a; + + }) + .map(p -> { + CSVProgramme csvProgramme = p._2(); + if (StringUtils.isEmpty(csvProgramme.getShortTitle())) { + csvProgramme.setShortTitle(csvProgramme.getTitle()); + } + return OBJECT_MAPPER.writeValueAsString(csvProgramme); + }) + .saveAsTextFile(outputPath); + + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index 8955edeb4..1c98199f8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -1,139 +1,108 @@ package eu.dnetlib.dhp.actionmanager.project; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.util.Arrays; -import java.util.List; -import java.util.Set; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.common.RelationInverse; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.common.HdfsSupport; +import scala.Tuple2; -public class PrepareProjects implements Closeable { - private static final Log log = LogFactory.getLog(PrepareProjects.class); - private final Configuration conf; - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final HttpConnector httpConnector; +public class PrepareProjects { - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - PrepareProjects.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); + private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final HashMap programmeMap = new HashMap<>(); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + PrepareProjects.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); - try (final PrepareProjects prepareProjects = new PrepareProjects(hdfsPath, hdfsNameNode)) { + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - log.info("Getting projects..."); - prepareProjects.execute(fileURL); + final String projectPath = parser.get("projectPath"); + log.info("projectPath {}: ", projectPath); - } + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + exec(spark, projectPath, outputPath); + }); } - public void execute(final String fileURL) throws Exception { - - String projects = httpConnector.getInputSource(fileURL); - final CSVFormat format = CSVFormat.EXCEL - .withHeader() - .withDelimiter(';') - .withQuote('"') - .withTrim(); - final CSVParser parser = CSVParser.parse(projects, format); - final Set headers = parser.getHeaderMap().keySet(); + private static void removeOutputDir(SparkSession spark, String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - public List processBlacklistEntry(ResultSet rs) { - try { - Relation direct = new Relation(); - Relation inverse = new Relation(); + private static void exec(SparkSession spark, String progjectPath, String outputPath) { + Dataset project = readPath(spark, progjectPath, CSVProject.class); - String source_prefix = ModelSupport.entityIdPrefix.get(rs.getString("source_type")); - String target_prefix = ModelSupport.entityIdPrefix.get(rs.getString("target_type")); + project + .toJavaRDD() + .flatMap(p -> { + List csvProjectList = new ArrayList<>(); + String[] programme = p.getProgramme().split(";"); + if (programme.length > 1) { + for (int i = 0; i < programme.length; i++) { + CSVProject csvProject = new CSVProject(); + csvProject.setProgramme(programme[i]); + csvProjectList.add(csvProject); + } + } else { + csvProjectList.add(p); + } - String source_direct = source_prefix + "|" + rs.getString("source"); - direct.setSource(source_direct); - inverse.setTarget(source_direct); - - String target_direct = target_prefix + "|" + rs.getString("target"); - direct.setTarget(target_direct); - inverse.setSource(target_direct); - - String encoding = rs.getString("relationship"); - RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); - direct.setRelClass(ri.getRelation()); - inverse.setRelClass(ri.getInverse()); - direct.setRelType(ri.getRelType()); - inverse.setRelType(ri.getRelType()); - direct.setSubRelType(ri.getSubReltype()); - inverse.setSubRelType(ri.getSubReltype()); - - return Arrays.asList(direct, inverse); - - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void close() throws IOException { - writer.close(); - } - - public PrepareProjects( - final String hdfsPath, String hdfsNameNode) - throws Exception { - - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - this.httpConnector = new HttpConnector(); - FileSystem fileSystem = FileSystem.get(this.conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fsDataOutputStream = fileSystem.append(hdfsWritePath); - } else { - fsDataOutputStream = fileSystem.create(hdfsWritePath); - } - - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + return csvProjectList.iterator(); + }) + .map(p -> OBJECT_MAPPER.writeValueAsString(p)) + .saveAsTextFile(outputPath); } - protected void writeRelation(final Relation r) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(r)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index b8703378e..61bd952db 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -3,22 +3,35 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Arrays; +import java.util.HashMap; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Programme; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.utils.DHPUtils; public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -67,8 +80,54 @@ public class SparkAtomicActionJob { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static void getAtomicActions(SparkSession spark, String projectPatj, String programmePath, + private static void getAtomicActions(SparkSession spark, String projectPatH, + String programmePath, String outputPath) { + Dataset project = readPath(spark, projectPatH, CSVProject.class); + Dataset programme = readPath(spark, programmePath, CSVProgramme.class); + + project + .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") + .map(c -> { + CSVProject csvProject = c._1(); + Optional csvProgramme = Optional.ofNullable(c._2()); + if (csvProgramme.isPresent()) { + Project p = new Project(); + p + .setId( + createOpenaireId( + ModelSupport.entityIdPrefix.get("project"), + "corda__h2020", csvProject.getId())); + Programme pm = new Programme(); + pm.setCode(csvProject.getProgramme()); + pm.setDescription(csvProgramme.get().getShortTitle()); + p.setProgramme(Arrays.asList(pm)); + return p; + } + + return null; + }, Encoders.bean(Project.class)) + .filter(p -> !(p == null)) + // .map(p -> new AtomicAction<>(Project.class, p), Encoders.bean(AtomicAction.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + } + + public static Dataset readPath( + SparkSession spark, String inputPath, Class clazz) { + return spark + .read() + .textFile(inputPath) + .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); + } + + public static String createOpenaireId( + final String prefix, final String nsPrefix, final String id) { + + return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(id)); + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java new file mode 100644 index 000000000..ef29a6b6a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVParser.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.lang.reflect.FieldUtils; + +public class CSVParser { + + public List parse(String csvFile, String classForName) + throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { + final CSVFormat format = CSVFormat.EXCEL + .withHeader() + .withDelimiter(';') + .withQuote('"') + .withTrim(); + List ret = new ArrayList<>(); + final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); + final Set headers = parser.getHeaderMap().keySet(); + Class clazz = Class.forName(classForName); + for (CSVRecord csvRecord : parser.getRecords()) { + final Object cc = clazz.newInstance(); + for (String header : headers) { + FieldUtils.writeField(cc, header, csvRecord.get(header), true); + + } + ret.add((R) cc); + } + + return ret; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java similarity index 87% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java index 20877b1a1..a9069e510 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Programme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProgramme.java @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.actionmanager.project; +package eu.dnetlib.dhp.actionmanager.project.csvutils; import java.io.Serializable; -public class Programme implements Serializable { +public class CSVProgramme implements Serializable { private String rcn; private String code; private String title; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java index abee7f861..ff18c6260 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/Project.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/CSVProject.java @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.actionmanager.project; +package eu.dnetlib.dhp.actionmanager.project.csvutils; import java.io.Serializable; -public class Project implements Serializable { +public class CSVProject implements Serializable { private String rcn; private String id; private String acronym; @@ -193,4 +193,5 @@ public class Project implements Serializable { public void setSubjects(String subjects) { this.subjects = subjects; } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java new file mode 100644 index 000000000..905194232 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java @@ -0,0 +1,98 @@ + +package eu.dnetlib.dhp.actionmanager.project.csvutils; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.httpconnector.HttpConnector; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class ReadCSV implements Closeable { + private static final Log log = LogFactory.getLog(ReadCSV.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private String csvFile; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ReadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + + log.info("Getting CSV file..."); + readCSV.execute(classForName); + + } + } + + public void execute(final String classForName) throws Exception { + CSVParser csvParser = new CSVParser(); + csvParser + .parse(csvFile, classForName) + .stream() + .forEach(p -> write(p)); + + } + + @Override + public void close() throws IOException { + writer.close(); + } + + public ReadCSV( + final String hdfsPath, + final String hdfsNameNode, + final String fileURL) + throws Exception { + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + HttpConnector httpConnector = new HttpConnector(); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fsDataOutputStream = fileSystem.append(hdfsWritePath); + } else { + fsDataOutputStream = fileSystem.create(hdfsWritePath); + } + + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.csvFile = httpConnector.getInputSource(fileURL); + ; + } + + protected void write(final Object p) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(p)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java similarity index 86% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java index bc00e4604..9d3f88265 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorPluginErrorLogList.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorPluginErrorLogList.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project; +package eu.dnetlib.dhp.actionmanager.project.httpconnector; import java.util.LinkedList; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java similarity index 86% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java index a417de50d..9167d97b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/CollectorServiceException.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/CollectorServiceException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project; +package eu.dnetlib.dhp.actionmanager.project.httpconnector; public class CollectorServiceException extends Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java index 63f67f145..e20518b55 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/HttpConnector.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnector.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project; +package eu.dnetlib.dhp.actionmanager.project.httpconnector; import java.io.IOException; import java.io.InputStream; diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json new file mode 100644 index 000000000..a0856e10e --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pjp", +"paramLongName": "projectPath", +"paramDescription": "the URL from where to get the projects file", +"paramRequired": true +}, +{ +"paramName": "pp", +"paramLongName": "programmePath", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json new file mode 100644 index 000000000..54083e108 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false +}, +{ +"paramName": "pp", +"paramLongName": "programmePath", +"paramDescription": "the URL from where to get the programme file", +"paramRequired": true +}, +{ +"paramName": "o", +"paramLongName": "outputPath", +"paramDescription": "the path of the new ActionSet", +"paramRequired": true +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java new file mode 100644 index 000000000..d344f3118 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class ReadCSVTest { + + private static Path workingDir; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(eu.dnetlib.dhp.actionmanager.project.ReadCSVTest.class.getSimpleName()); + + + } + @Test + public void readProgrammeTest() throws Exception { + + String programmecsv = IOUtils.toString(getClass() + .getClassLoader().getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); + ReadCSV + .main( + new String[] { + "-fileURL", + "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv", + "-outputPath", + workingDir.toString() + "/project", + "-hdfsPath", + getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(), + "-mergesPath", + getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRelOneMerge").getPath(), + }); + + + + + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java new file mode 100644 index 000000000..b22e6bd6d --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.actionmanager.project; + +public class PrepareProgrammeTest { +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java new file mode 100644 index 000000000..f7d271fe0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.actionmanager.project; + +public class SparkUpdateProjectSet { +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java new file mode 100644 index 000000000..29e9a6cce --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.actionmanager.project.httpconnector; + +public class HttpConnectorTest { +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json new file mode 100644 index 000000000..e69de29bb From 457293ccc0c86f1cb5e51b179772302102b18c81 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 May 2020 18:43:42 +0200 Subject: [PATCH 13/53] test for the variuos steps of project update with programme --- .../actionmanager/project/CSVParserTest.java | 50 +++++----- .../project/PrepareProgrammeTest.java | 90 ++++++++++++++++++ .../project/SparkUpdateProjectTest.java | 92 ++++++++++++++++++- .../httpconnector/HttpConnectorTest.java | 35 +++++++ 4 files changed, 240 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index d344f3118..17fdd4511 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -1,43 +1,41 @@ + package eu.dnetlib.dhp.actionmanager.project; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVParser; -public class ReadCSVTest { +public class CSVParserTest { - private static Path workingDir; + private static Path workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(eu.dnetlib.dhp.actionmanager.project.ReadCSVTest.class.getSimpleName()); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + } - } - @Test - public void readProgrammeTest() throws Exception { + @Test + public void readProgrammeTest() throws Exception { - String programmecsv = IOUtils.toString(getClass() - .getClassLoader().getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - ReadCSV - .main( - new String[] { - "-fileURL", - "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv", - "-outputPath", - workingDir.toString() + "/project", - "-hdfsPath", - getClass().getResource("/eu/dnetlib/dhp/blacklist/blacklist").getPath(), - "-mergesPath", - getClass().getResource("/eu/dnetlib/dhp/blacklist/mergesRelOneMerge").getPath(), - }); + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); + CSVParser csvParser = new CSVParser(); + List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme"); + System.out.println(pl.size()); - } + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java index b22e6bd6d..50804f75e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -1,4 +1,94 @@ + package eu.dnetlib.dhp.actionmanager.project; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; + public class PrepareProgrammeTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.PrepareProgrammeTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareProgrammeTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + PrepareProgramme + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/whole_programme.json").getPath(), + "-outputPath", + workingDir.toString() + "/preparedProgramme" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedProgramme") + .map(item -> OBJECT_MAPPER.readValue(item, CSVProgramme.class)); + + Assertions.assertEquals(277, tmp.count()); + + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); + + Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } + } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index f7d271fe0..d48884842 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -1,4 +1,94 @@ + package eu.dnetlib.dhp.actionmanager.project; -public class SparkUpdateProjectSet { +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Project; + +public class SparkUpdateProjectTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.actionmanager.project.SparkUpdateProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SparkUpdateProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + SparkAtomicActionJob + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-programmePath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json").getPath(), + "-projectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/projects_subset.json").getPath(), + "-outputPath", + workingDir.toString() + "/actionSet" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/actionSet") + .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + + Assertions.assertEquals(14, tmp.count()); + +// Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); +// +// Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java index 29e9a6cce..51a7019ca 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/httpconnector/HttpConnectorTest.java @@ -1,4 +1,39 @@ + package eu.dnetlib.dhp.actionmanager.project.httpconnector; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.ssl.SSLContextBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + public class HttpConnectorTest { + + private static final Log log = LogFactory.getLog(HttpConnectorTest.class); + private static HttpConnector connector; + + private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020programmes.csv"; + private static final String URL_MISCONFIGURED_SERVER = "https://www.alexandria.unisg.ch/cgi/oai2?verb=Identify"; + private static final String URL_GOODSNI_SERVER = "https://air.unimi.it/oai/openaire?verb=Identify"; + + private static final SSLContextBuilder sslContextBuilder = new SSLContextBuilder(); + private static SSLConnectionSocketFactory sslSocketFactory; + + @BeforeAll + public static void setUp() { + connector = new HttpConnector(); + } + + @Test + + public void testGetInputSource() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL)); + } + + @Test + public void testGoodServers() throws CollectorServiceException { + System.out.println(connector.getInputSource(URL_GOODSNI_SERVER)); + } + } From 08218d2f3f9fd64423907ebf31508b8213745249 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 May 2020 18:44:25 +0200 Subject: [PATCH 14/53] new workflow with added steps --- .../project/oozie_app/workflow.xml | 114 +++++++++++++----- 1 file changed, 82 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 992c2ded1..cd4d79ab7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -24,6 +24,7 @@ + @@ -31,51 +32,100 @@ - eu.dnetlib.dhp.actionmanager.project.GetFile + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV --hdfsNameNode${nameNode} --fileURL${projectFileURL} - --hdfsPath${workingDir}/projects.csv + --hdfsPath${workingDir}/projects + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject - + - + - eu.dnetlib.dhp.actionmanager.project.GetFile + eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV --hdfsNameNode${nameNode} --fileURL${programmeFileURL} - --hdfsPath${workingDir}/programme.csv + --hdfsPath${workingDir}/programme + --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProgramme + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --programmePath${workingDir}/programme + --outputPath${workingDir}/preparedProgramme + + + + + + + + yarn + cluster + PrepareProgramme + eu.dnetlib.dhp.actionmanager.project.PrepareProjects + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/projects + --outputPath${workingDir}/preparedProjects + + + + + + + + yarn + cluster + ProjectProgrammeAS + eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --projectPath${workingDir}/preparedProjects + --programmePath${workingDir}/preparedProgramme + --outputPath/tmp/h2020programme + - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From eb0e47ba53d22e130740215d20a1636144c9071a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:26:44 +0200 Subject: [PATCH 15/53] parameters for h2020 programme --- .../project/oozie_app/config-default.xml | 54 +++++++++++++++++++ .../dhp/actionmanager/project/parameters.json | 10 +++- .../project/prepare_programme_parameters.json | 6 --- .../project/prepare_project_parameters.json | 4 +- 4 files changed, 65 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml new file mode 100644 index 000000000..fe82ae194 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/config-default.xml @@ -0,0 +1,54 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 6G + + + sparkExecutorCores + 1 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index 40ad5bf36..dd3de70f6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -17,5 +17,13 @@ "paramLongName" : "hdfsNameNode", "paramDescription" : "the name node", "paramRequired" : true - } + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +} + + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json index a0856e10e..54083e108 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_programme_parameters.json @@ -6,12 +6,6 @@ "paramRequired": false }, { -"paramName": "pjp", -"paramLongName": "projectPath", -"paramDescription": "the URL from where to get the projects file", -"paramRequired": true -}, -{ "paramName": "pp", "paramLongName": "programmePath", "paramDescription": "the URL from where to get the programme file", diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json index 54083e108..5fc88ce8e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json @@ -6,8 +6,8 @@ "paramRequired": false }, { -"paramName": "pp", -"paramLongName": "programmePath", +"paramName": "pjp", +"paramLongName": "projectPath", "paramDescription": "the URL from where to get the programme file", "paramRequired": true }, From d323100af09a8301694a6a712753547dcaecd127 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:27:27 +0200 Subject: [PATCH 16/53] added the new Programme POJO. It contains the code and the description of the programme --- .../eu/dnetlib/dhp/schema/oaf/Programme.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java index f91333616..69223ab01 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java @@ -1,4 +1,23 @@ + package eu.dnetlib.dhp.schema.oaf; public class Programme { + private String code; + private String description; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } } From 24daa1deaaa1b78a14556c4fb53570a5db478aa4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:28:16 +0200 Subject: [PATCH 17/53] added to the Project class a new field that is the list of programmes --- .../main/java/eu/dnetlib/dhp/schema/oaf/Project.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java index 924c08cc9..1eae3e8ee 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java @@ -58,6 +58,8 @@ public class Project extends OafEntity implements Serializable { private Float fundedamount; + private List programme; + public Field getWebsiteurl() { return websiteurl; } @@ -266,6 +268,14 @@ public class Project extends OafEntity implements Serializable { this.fundedamount = fundedamount; } + public List getProgramme() { + return programme; + } + + public void setProgramme(List programme) { + this.programme = programme; + } + @Override public void mergeFrom(OafEntity e) { super.mergeFrom(e); From 75491482de353bbc9f5d7a415028061267d2f43e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:28:56 +0200 Subject: [PATCH 18/53] added a new preparation step to replicate each project for the programme it is associated to --- .../eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index 1c98199f8..df37f9286 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -82,9 +82,11 @@ public class PrepareProjects { List csvProjectList = new ArrayList<>(); String[] programme = p.getProgramme().split(";"); if (programme.length > 1) { + String id = p.getId(); for (int i = 0; i < programme.length; i++) { CSVProject csvProject = new CSVProject(); csvProject.setProgramme(programme[i]); + csvProject.setId(id); csvProjectList.add(csvProject); } } else { From faed7521bf100bca898ada798dc76abe96f02e80 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:29:29 +0200 Subject: [PATCH 19/53] added resources for testing --- .../preparedProgramme_whole.json.gz | Bin 0 -> 14212 bytes .../dhp/actionmanager/whole_programme.json.gz | Bin 0 -> 34620 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..1afa730616472d8f4e3f76f078ce753bc9cd6b24 GIT binary patch literal 14212 zcmV-~H+#q*iwFqr0K{GZ18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUF6lOK%%l zmgatbMIbkJ0hG%WCEMv8Em1ZrWl1B-?XE-ugJ8rZd97dsGajPJMguj8Dhy^ltLd&8 zbOE&(V^E7h0==nPwbFm2f61J4?v2NN%1SF;nbo<-Dk~U|8#nGf&+mNaA3n>Y^s}d* zJ$kgi*MIm~@3Sb28T|cX_u=ltozbfYg9m#L_TfMHspMsXzl|>rgCWmndAMY7xL9O) zd13!83DbEM&hZzPe)f<5=^vWM?d?6hsHh$1E)9$0Mm}jE5l;JMo{50WrgWy$rQN*;v)tMY8c!XRd_9AGQ3qRgwP ztaA9&bPQgzxFVNFcna^E!XH^0Ci(=Pvx0?rv=Fzh@67P=JlfDZ z%IizIzP`2PoeS-qcWsNSuWtU)-ye*Qk4Gm5qhN9{9-Ul_kH-J`)hO5r#>#l;O>-74 z;2SIqG10t)c9A;3S*{Flf3P=jto}#p>OU9Xzg)nyUVv{=DtgK)KiNu{YPdck_Z6a0En4ttii@q%&q^nAuj5D$2l zgk9nCg)`yZq7a|CT*2yv#aqIG48D9SKT8R7lV(Xa=gbf~Kf&?4_tO4gU|UzaB(ff> z`{`wslzb=VMZ{s`X_#;D9r7%MHB6ccd((xRG$xa?>uGFYr(f>Htm&}=6MX&k1dyd+$e;S}ah zd3dl0ukg|$TP^4@Vaep)gZoN>md$329)J^>u($B>(n!o8FJb0_X^3AfOMCQo>}`mr z_2|>X7FmK|Ea;wdv4ipZ7c9lk*9&m;a1{Rua|DkRhZ#uD`34VN@dB^GfuyWlXF1AV zLn8k?=bvKEeWuW3mnrl}5xr?afS|FlxAyzCWu|<&<9p-D z#rWdY#c42lb$)s_It>oT!T99l^zVjWj!#cU!6>*G9h{sVpFSTCgZrcL#V?GFD2&k* zHW|EqS%ET#X&$F(b|s`64*#N7=9}gwossiWRp@&UpEx~5WzNUL!?WNB?7QGRD>Gs7 z8y=(m9M6i}-M26MKK<8u`~?UD6x9JJHxLV~D|w{e!hhy$j^9mu(j|*|#8bm!{m6Uy z#LW|zRjaMwVXa0xHlfr(mSuXw$v$ zqhJz%C4v@9^Y9oZJ}*G+oD{DwFwh#-k8gI zjElFxMJiORVZ`fFcHb&xQ4QcIlx6WOLq~^K2~N z?|BcO9(!b)#9bP^vds>!-_dpkrx`u-7+j;2OgfyY>vv#{Ff?w1G)yy}6E~5zOXm(C zc$KTP@2>f~dwUMeX3Y3S4w?;3+I$YWx2PSy6?}ZKkr~?TEWhHlflQ$SG=$4VSfa@U zmMRJcT$|`hRy@IVUGRj1UX+p!7Yp1Hwg6na3dQDtWh@j(MB=f#oY1xt3mtDDFRYR< z4@waNqJ0=fv;l)9m&SHavAprtF>;%tQ0nW>UaadgP^UlEi zUVy-P4dY|v+XSz(MFQ3&TQDPt`U2eMa-ohF#-)z;bGz`^)gyWn*gPTl{pf7NRAI2v zP@ax+T+{XV;luu}%SL>nPFcqiJNf`6U<`vzhsUUp8J)C%z(b z2+Do@OJL6$&>c`dXIEVU6wzt)?G< zf0oCvyFn(^2@G`ozpgMMCCo@zlVHWb%%vE80z-jPNIVVG+^Z0U+XVrWWOu1^qo36`8+{`=wwwb-&SGzUs&zJgZ02D z(&7k!-{}Lqa0-`@>{{HTi_=MPI0}x3!Q}K{JUSd6f;~!}D(p~rdco$DrvCTe{0|Hl zfh!`s;Dk?EUcx4(Y&D%#?`aa1A^cEOMG)}{4ug+t3MCB4JHrh|F52ouXz}->>)*dP zADzJaoSz;akKo15gYn7XtI5Ur7&FpG*MC`nB!n@ZX9*a1Q^tdN4%S$LQMwqGctzRr z7_8fzgFgUAE?S4cu*!~Eh$)8!U!jggtTh*YD;zEb;4q0rd{+7n1`iNWZXpiwphh3XvKxfVje-KCD~tAmOL8 zmm~Q8jaSd%4l7i5_s9%dUcUnyhr?=*?^P&AaeI>mU#za*MSRBLzNX1f=do~!YYLQG zooDsOi1v3q5^rGpCa6+%ucD~^J0Yh!(=7o2DFsi;;8lvVK`;T=r@O=Z`#=TzcHgG2 z1IUJ8fG&BIXH(pqXwG5I(3IDn^L)eDb!EqJAc{?NR!84^U;~$nEQWuSStb)mt0XMZ zt%HL(o3WfMPLV?wC!n(6AX`XFh|G17&B`^#ui*DzW;q3!^DNoHMVL}>mo29(j#>Q6 zXan+lXSLWAlsj#5`C%OAB8T0kzTq}08TYlyqD4)cQGCB%C!q*fL#h*Arh} z=3!~TpiyS{G`f)DX5cNr&Hz_qvtk-tMgERMVQ~||(?8qKvwBCf>a#8St~kQLSmH~t z8AR&76EZ?cmZ;`WxNcLu-CP<-KXMac{zMWLrUmW_!Wh36y?WS#H@FfI5B7GIWOeA2 zy)C5+s)93ewo_roE@#muYE(A-g?BOR_n+!Z_|)Medq%GhpRjKaYb-FrDDzisFd7_t zgks9~Ur-n_h{MvbI6me^?x^D6^C$M$@*~|eY+Ydbj_iuZwD%GEBQ=uHR=9x#1f_Az zgbu=!M(0DfZ~gH%_H8+fTCYQQDq;@2qDly>AVfJ>;HsIU_yIWxax4Zx2UahJV_>bM z+0t0>-|rUQ!P~zQ4qgd8A7vOpX|_pF2WF#9;~mF*PB9CIz4*QE;5Bj0p$l`W>pM0_ zbfkqm?8}byT``G$RhD)!LJB-ch7}sw(!6MxHpwFp?KOr^ALSk1*Y}zZ^K78T>`qt( z4rG4G(zHxDtUdC`QatKsz>1~LT#-<8MFgW~;%_JEug!8#gUVE!PaAg8hFv>$!( zKWiFBBw_l7?yRmdMC2kykt@k%bhK9)eg+EGVj>PqmH-tM{9k-eiPb1$J*Y~I?d;H0 z>30QBpKhcCrYQWA``wPgo?W91)D1oiPz8`h;&M+24 z;_B#BjzW#PBYze`9<;nb#rIK4rYY7!i1sjsLR@9@^~=BXn}uV&1w&vHjCvXOKplyjvZ8Fj zH9hIeHi=(Vzoc=en829BzqPmX+5s-$fU?bxN5jL>`Q*j;tchd~k1>|ri9M$bmKpd@ z83q_pQQ@fMpaKyl2!(WSqtYD1;o)=&nlqFd1EB;0Ngxh{S7EvZp>C*Rd-Vo|R~rA+ zD^>Of`*w%GsQ)>D9mQd;3jvjKrH9<7`8J)^77z$VWe{f|^2t4AL6QZUPIIU*ZM7(e!Y20lepcZ)t*I+%C!ItUxGJv5WT_pP-OBLAZl@4dv-(oS=8;l^euk7S}^k(Kn<)8GrX zX>-epy^*toLNEwz_1_N8XzthvG{d`Q2;)l2YUzd2{^&>R6U2_t#3Mb#Gor9NqQBPx zC5Z%%t&W5Ag!@#iz-okz3tM#&qDL$%0pJiM?Bya9SB-Xs==U=rT)AcCOFj44Bh zqKYC$aQ0vpawE_B(;2qcEwc;bJzZ*vw%BE{c0b;%c4=L2nUkr57FVoBY3AXI zRanz~6-HRk=JVfgbratLam@R^k#PjcPy-4Dx-<*I<&+C8Hb?vh{#eJvsCWnLdX!I5 zmBV2WNo1yAmJaR^!iai^{h0Dh)@Q{6_XMYcQWO!h&}JiH9$6YSR0C24%955CMBqH@J&wH?@bng~F{dgvf9Bq#%Qe4QDUOf{V# zOwQ)uL10Z4$Y>V(-smo$;ag$WA^`3C9G*_TI6WD@|F6T~+3?F@a58-L#p&xI7}bYc zv{S*&34?$C&HsKG!deL?m+!yH;%r@9hC2ta1=zp;=6@M>7o5pTDx5*6)*0FkeB!kVhh^kEU@S-owDh(&%(RiOKY+`v&pzNcq*T8j)Lz~Zx`mOx!x z6`7%}Hwel^O#!x17#J#Ff0=U~8BlgiKq>>SXKX`f#27dH=C%DLj7AH|$p-cj^*! zyQylN<68xe1j3#tO@ptOYHl!ED$Q&xS(s8v7|&dXR18}P)VAEg#Ujj`IF3jhdrZL} z?17$%dHBSPgdC{$B9;UO@L#sxpm2Txv%bw>S>VYQkvuvGi2GsVmA?C~_1@RZlui)Y zA9raYTv2ZCnO^J26_e0}#Dz>7T>lBa0oVm0739pW|3GPif(&Y!bTjPJ73CHv3Hyf>5kSJ4LFwbB11Gg7ChDIk*36= zB&Y*i2Jgr6$Uv<*xb=$KhvB)bq-sDi{$T#VLC`bM3;189Nfuq2m>j)Q6+BpS4;O-r zWG4a}h|D5;lJf3u>$4P}xIARdTpD$PofyES*f0?`HJoA;3hGvlij&;vmOHwq@AVv{ zgMITGb^|AJtD0ZLev%K8W`USD2!AEUc#I+x1!V>*UjTk-b4rNMtfJ&6O&H}j%A&*I zChEVMh^S$#$#4nAuQjRTt^sY@oM&;Wyq8?sQGO>B0^ zG&V!Di0;4|Sw8EY*?SNxMSLsd(u%h%DC;R@{{MXLNK0G0=PxJRA2_Nb`}zt43t+{i zDekh%9Orn*Fm@lhKuAwi34V}gMX?hJ$RL@^+gGy#sZH<)3IA z;5L?xy{EzWBA8@J)k^=c6y*%?(ejWdXyv;~dG6?*-eA59-KNoDd`KW(uuj#n3#?bf zb49uq7X~x@{X4v`*Ee(}gB2li%)t@~xAQDmg1)9$rtkpTUEd+HPC9P7{#wtIYzmhr zEkdcTeP~d-tG=Wls3`+j)6KUEY8_C!?+p6ALL8 zN2$AF2dBGewg|?$X5zsdhE)~LYl8G0OVsWYKG#b>=t4kgv5iSt#T_y|M1z(gCK{G( zN%UL@{wt|wPAAohs-%=&Kto$1oID(N(ok_VJhdf8pjPRVe|9s zsI%kc26V9vXqw?Uh{&`=de|5fN9c=$c@FQBfk9QJVGvTSrLd2PC4o$>j*V8+o=6_R z#*Q;SUoH5%Sz!z1pf9cHpq19VMnNVPSa7wSsvrDSAL4B;^aa&--F-(s*Ua8J4>!6B zlL{>`XJAS_$4jp1f;)E2LOXU!QeO~}1gX&s-gh}g7IOwa7cH+w4MhyeOF*p>XUMa$ z!Mfyxy~STld#K?Z>_8QT7@Z{o8~s6i8erJ%J11FTMVT!4tlT{Zqsqn?t^MdNi+7RF zX;|B?Ni^jdycm|w+SoKbC;Gmwm0Wl4L8w~YtaWM9iXka<#1PLfD52j*ECD}`8&2+p zWNYxE7{RZp8b+*mLJ z5mFpQq%{C5XcUP^0l>3eP>LV&dES7ZYwop45eOG|tj~Iex6IyEHmhNx z#&S*?T0Iq1T3m9Zwgg$M6sQ~G<@?^n3q*D0pcOBBI4!bV?6?&MN-&9+M<5b~c2sD> z+NeSZB9&nEW!M6Kq^B4LqnfyV+@e@p5iRT>BS*M_`T zVt5GJbfu+C5CqxgF-^ei-QVB0&8)051IdBkd2Lx9-*^ENQL856K{%WP!Te|g{onFh zZ&AG|QJV~g5>$$LPJ%zOPO{~9uX3|bl2&rmA#`Y1RT^qO!Gr1Ua&p^?y`%|twX?6) zfnMmJjQVIj4foIr;?UIf z>+xhVIuUf^lhNOQNuBw=6wHTt5QBd?;^~$pFuo-CO0v-Y9kI#^<9iE=Ix2;bd|$eDQL0LM)EY zM=wX?lgSYOJ~};zH+*&Sd@{Mf7Yyz{Kbrg^7`}pcKYuYieg;=ef|FNY9FK@mLhg%kb!_(5MSSOZP`%hP-X-y%44B@FH7cxx41`AQ8xIgDiXh!IH` z2VuP;YSXf&bK=w*0~EZa_1~?p_EK)ziEegvjoy%95>lc-7~VEqv31JBH{bm(+BCOp;IShKkfd^1+*O$*}J zAb!WX3`*ddXCl3o&IN0*A#QJZu}?ucvK9lgy^8E;KcZQ$lyLelkILlYGCKnfn!6=>EiZIom zyro^P0JqjqPS8HcW>vmSpzlO4QpFVpa2Kg!4R2k!OUOD%7*6(G_zK(rS(0HfVZqFSy58UYVr zjR5RXDnkzQoWsP5+7Vs|5|Ov!n@*{@puqDlyD0!qc<(>V1bDp?mshms^hso)-v<(< z-0o6e9gww5PL*!wrJoB|seulSUNed_ zkBWH8vmH@o?JTYKcdz+Acl3g?UBy(|7aSHyqAXe-ft`REOEPA!-(lnuJ43|R@6fvr z1qVGG62Ue6dDS6*?QppUF&4-rqQfZ?z1BQc1kAjCXVQHgw9q}w@$LleVrhGl-;XKW1UR7k-roW6``vDjDoC5~~PG8N!cc|fB%1+fqFvf(Q zIJg_!AHN*$8X<{WT<0xhc7-UkIO9-Il1o3X7hF|IT9;qcsYto!yEv&xc6yZY1*&6i zQ&v=})6%m_Gpf-tOF@Zl_NDskj>TJr=R#a{gO!f{Pfa_gFyZ7$gT>J!NV>#uB8HJC zSIwVF(TTdpr6JpE?O0K3g1jn)fM_Co3!!fynMF+c`cH4BsLI12GC}m*|QZ>_`?awpIgmX$!NuKyiEOQ!kF(w^rQltHqHc_^uVz6(AyI%TjWsM5vfTv62G2Mq>~{ zA?d;ulI9%rzrxmwT6G%0VAQCpd$-H;r8U_GE0I5Ni3DSv=aLQ7RE~==WcW!%>U)** zI)#^7+HU{Ny`48#;R;9S-P$@(&^Dn!jaM?k5TP_%)E#KMk@YdR^CGldNQj%&af4(O z%@}(DDCHGLHQJPSn(GDgLw?1#>u6OrFk;-RY!hPfS>2vBs0%Mc%98kP`bnq%O~rWjEB3A96q!$3R8UBE@NtrC(;#(})`6g>*nHxz)~(LKFF+f{3#)f~KJh&7@_ zR1brxup?pR$y#1?>~NPiwY{4LoJ4S#1kE`^R?9nrHiNAxZ&*m#K!r70v2)GyD{p;M zI?LX~VCKR*%`sd>#qv$&UMABTItNy)zzZZ0kSk||iJ~OI;a{>vmSBlcPsZ#y_A`ac zMZhkTNN36aC2e&uwBte%jM!wPTp%|oeAOjeXZfYNm<=D=iMaZkc|&ihhl|+WXvKIa z$eXR9Q>AW*79{(+;&LOlK~U)%b$IA;;2wTik&6M3X3k34nvEmhW3%oh$K$N-eom8C z=Zd&IX@Pw}GQ{U%lhDpE&9DEr_=?`lsVj%0x$)f|AA}_Cyu6KW$5kc%5%=*n3UcxN zspwnud*^8;=yhj;99sfv;kob)=iHgcsOS6sMTE$mCp zK?oGZ92{y}rOoZG_7dRR;S8bbiCnU7Ehrg~&1Q*+aV2M%x9Al4gJ9ca6>-W}7|)h% zEMZ#^LcLo;(5+aVvT}_*>xyWBfRmm*#Mo;}(nU@Mtlp%Nqg}3gJLdnV`z~H2jonn# z48=q6AHT?1%*z?<0nq^~m?8A^s2V>FzWR(B9u{C#By1tOZHo$8?4vABtU;*sV~fi+ zk*J4kS~?(D2EnQ$v}nA*n0zJH>6i-BYp14gpXik;wk^>&>bVftAmVPA7QpCYiOO6u zmLdI+C8O#_ zW%m9r317n}1S!cbNhfTnm&H5-n;(SYB@eKXQ1<>C7GV#IqFiG*8TZ5%uc#rZW*}H? z#wyGlDdp>EOc$=;jj}#|xHs5y)7thQYW0Cw=Mx1NV1b$F8uMqL<>(;R8|Vv?^ekp^ zB?@+l-4Aq*l8)lE>gn5E?}diDsigXfY4&%5=w2UAd)P*_klKG57z~|y zp=HM=X?7>FDb+Zr50X}Q^dK+L(N*gBM2`k7vsBpN7{9~l6*xp-FroS+UP2_VZ-aRi z0ZA8|n4F$2cX*c$gI^EL-zja7X%`IgB#eYxukPK2HQcMR+cQUN zqoZ_2RL7rHC8z^2(5o=#80VdFmc#@$mM^jq8r5tS27@GCZS3>4Kq1uaCBJC`(@OD^ zW<9Du7v`JGPLu z-0}`yP=;&G>^jpro!XzaD>g0WILjec|k_4>aq(pZBkrCS^)+@MZj9CCWJ)gxF%QTusI7BIN#); z-qAh1xkH=R+t+QjNx4~8i6qNtNGzH(f`Fa`>uvakEmQUe)5X}l0MA(mu^=wOOV{`> zB%BcHI5ta{6x*oz}{p#l+r}~>Z-``oac=exR4WC!IqDW-)1mL7oaIO zvry55ZG^jHg6Z$|DIe@*`th*xsqk@v$->IY2~Q@2%2LT`L1c+1rXZCls+axurW;ZE zD%kbeTdb;<%1rGLBy;?>uH=8caroqrqyLo(96ADbB{&)&9jA%PvjUg{B?V?IuwNBmoH6aB}2NTgIp~Qjc#R*f`vU@pq|$@31+g0EeMTi zP=d1%`2?Fvs0dUd*azHD0Iq!OUEbJRdggL&^nS}7h`1JBpV`9Zf|kB!LZ|qE%-h!| z!>@0cgWd86ewf$WbCoG;_Qz3`lS;!#o-xL@VRnaJa8{)>0(njgGwaFv^A5_Xkge-Q zK%~oICQhaBHwKM_0Ig+VHw`Z0-fP$^J5y$%PvV^@ z!S1{tis$%#x9|;q&(;1>D_fN9%P4eMgDl0gr=cWZiLwTrZ+!CGUhUJF7TTRgXMS#G zh2T>j32o9TEC_}O->}hTedtBr7Dq1gM+@A%kPU^%xWHx;*nCK0#yKk`?kX|51K;90 zAK}fzJ%_H*xD%;I@-Zc@&(LQ8>6iN(Dtk`$*7>_~GfI-_QoQ6S;!zQWCxPFaa{4HR!bsShHTBywe*VJ^?4lK9k5 z_sK@9M%E$q&i?bTbT-BI{WqX(;BjlvHb||=x?r8){?X{dq#G0_Fuv&s+ZW#8I#=X3B^MkV+B%w($5oz7+_31=7<2Kq*x(I|fouFM zQHBeWWS(Jb$1TVBUUCnU&!{kyx~&3h2mrVVw`+`eIi($zB5Ax64Kt{X5`32%oDy*7 zUEPIsQpek7Kt4K!a?=jBr_*(ddUO$-u1q4A&T-!&aMaFf4%;)HwH4p(rxTR%9q5(# z4eMJ%WP>jR*F3ElY;M(1EVQiw^6x`5EOsp_cjsA zj(JxHu6d4HVdOyu%~B~qWcPlWFKP;pn-C3>J{V#v#p zR?8sE+kel%JJb@)^))$*HW9rnRbABiMDWtCG9K3oPYy808YGYmAl6ty{-&j{<8S1x zycAllj?IcYDA>7M#?Tvyyj`by3#?r`MC0=s9ZW{YhB+w@dV?As%KyhUJ=>*hP8(If z>54yOpL6Ne55L?eIIKl}yE1;%8MM+Zoq8PO@(PJ1rO=u&wj;2$V^n%Dc11njklK`f zI5wznjW_gl=36uakCJd+D__G#?EdKEf^tB5wRVi>*jVzCs`HG>Bn(N!woWJDW0u+b zzZBszd;ibh{WobEz9}vZ>+t->zLmzf9Tb0$3@(-0o`FTB;-$I38&JHm*Psu;&lRl$ zld1tA3gU5DY1>>=cWe}ex?ym|*;+t3vZ^eQ-M)EbSU0`3HjV$Wjn7?vwEKA1MQ6O{ zw5d9%DsLyx54*%GuY8o8YK}LYF}Mv_=OVu+g8wx`pHg^YwxT1BqZMZFtDHC1uu@^& z1itG!3mF$}d4rpY*N8ZqZUK%k#!qVG-dX||v_-SdFHkeiSWX^z%f}?@Q&KV}R+MIu zn*qz1WSP9uKfsb%yW3lPGa7b6HbF~Hgt_H^}jD;)a54XC& z7zT+_OEs$Uo=Yk{0%=$I6g?Hhz8OQi_2pg$2|Hn-(lM<=_$wNk)fn*HjJ^^ctzm4g zmpm~tw0LcBri1k&3z9Ipgq_sXUGwxRORfZzlXtd*xYYJMV-NqRhw3F0b@4eX()A>a z77NV7!)7{$XYG(zhS7;~&GS-(9?VJU$f}m!vu43yW&yt*W$0VOLc$vtm0o|y?e0Y)cO#`cYk|t(*{@BvF852Duia;L9*{CqXJ5-Hx`~( zhgeu3?lipe3h6u|TN?Ea@9Raq+4CM+FyQF=U$Ok7qFi)^&FrrKxZv|k#!Jv#afOPF zQ7^Q&Td}3ULB(8uRe)TI@w%(54y3`?(_i-G!DJ{4yb_dbl&klz%7G*Cy05Rwnjmk< zkwcG^OI_yDEtExm7+fNR>z!p$MZK71vo~Shkx78m;2HK6tZ(9wLOYYwYQz2@iN^&32&niSk;Vf$xMwDb#++L%<;3s~z@B2BLI1w|0LSu1&!!j6m zo1q`^_1vPHP&xPMa7hA!oy>_Z2BS{`nL^=8W1T;^-n zhzp=hu5hiv_YU8GLlaG+p;3aL%a&yTIQ26B~d;1t8z3X6L+0)azlTNcMp&&%$t@d~u8xad9ShkYRi-UjSqwnU$zqw?$BDud5 zZ3>CeMIk&w#73OK@YZ0J<=Hxd-vT&`GN7#7z(Nox4>$%sKe!y*joGVrLLF&w{OEDX z>?sxd!Ji#6c5gME0sgQ833kbD*ww;h4K!d4OLiI7i;zarVRDi?cZ18hiZ|&c4>EkS zHMlRA9P~simgxppV+E@_c>m9N7H4ZDe=>YCYYIWUe243)D=elJsBk#zhBxn9lTm$> znXoQQt(67X&Z&Y8ZmehFIM2l&PShKI!NvryD^X3K?$bQfOQqy$kp9GGuiz#lLbrSH zq27{Hk+faUn@U}i>I1~h41O&2N)Z?;(oM^+=`ve>_{Vx3dRM4PvtNdUiR`FB#O#9F z%2JY1qyWVdekz-^38(>L_iHW5mQxnTQeAeof1UqmAuWzu>r?n+8I-s2of`+JWLmJh z-Nez&UHQpd@!&ykD*9VsK|s#NTw2UI*8N)HrQTD$d23g7u*PdtOq1;E3Og6GM0CC+ zcrmsR$`E@CTcN-{rcrhMhZ0;f?CW%unc0`7XC{8%uPY+z5}ke96q`S3F>nAfH0J6F zY)aGu^#B0+{d-UV literal 0 HcmV?d00001 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..85440ceed5531f242a912f2661aa698ec3835a4e GIT binary patch literal 34620 zcmV(?K-a$?iwFqs+rwS}19xa|Y-L|?a&KpHVQpyU`Sxm$<-`8T*A$-j!>+6j;G_So^@3ODM70fTqMqcBRY<%>?k z*=|3KlO4y$PjnHyh(*J>6Q|RuKZiSfHyEbw5U(FZpZ#C|$A8oMb9h+OpXYjiTD>@o z`h59A;7r5FVZJkAQQ!4KY?|*nJP5*DS6o1EUw%k<-*t8!_SMu66Zw+|H(o~JFmflZ z)28eG>N=euVLl(i5HYx+okro*75_83d*()C7^QTYxMSvo1JzaW{sacA*j*Nvy1W0h zrn`H3cdxjYMiFzk4>J?Q=>+=8#k;ucK#!TjcuD{^0&@Ue>q-$7;xI8aR}@D18nV|{$`9G z{KOL1caBD|I#@7-mu_We#;`Mc90d$5QregJ5f}!e8FMLZW)YH z;JL6oubffc|v!e#krMD$%3+_6OR%*%~@-9YgD`SQ5rknBpp8p6i)| zymk?pE4k{p5`v&`V4bn|VdO^m`o22_aSiXs=IDZ*S{FN)U!Jwg4QmG@Hz4_idn_L# z`LJY;BiKD_uWN3lcrMAT)6Q}Gs`c*Gh0{4dzUf|Hby}yeFgt}K2s<)mzfRfm|8<=? z>Jg{h|?J=_a;H5mGNkxGk!p@?Xc#oDJxX)uZzzed=VGbs4z?Xl<)2VQ}Bo3TeZg1O1iy@XQyG}19O(8o$r3}I&e~v>k zN}>5~uH1xjnxHo+bYGpHvV-&N_}q3Eja9f;wRQ2FdiMB0@8nCylgJ%H51^wcMV8<5 zxIcoqI|(EBfbwaPQZJ%_X5NKO>%3(?+y?D)qXgOu7rF!JnAxeA!Qni@B^ise7k5$N zX%3vwpO;&{ zMmcHN1Z=@0QX_dHw+|fW9;nsJ5AXpRkw7Cz5|(5V>;_sC`n@oMV$7fi?&_s(Ej-`N z_{ZMad2EvFSzD5zWgW}9%~$RK^rM9< zrIUCGSRbH5=vo7+5zM(f>ujmL8!v`eaN2u5JOsA!^2Z3;$(-{vStuPqZ7S4djIGpo zDVW=)dRVcUg<(B$Z^MZ7K_K{Wp1Zd&MzBWdJc^w}*J~NRd-cL~J$XM$0BUKX^xOFw z5%#1`)qYl#)1W?rIxNopa56=+4YWeCPU&Rgfw%lHWuPJkeL8*oN5V40pFv%mgz0Sv za$f10Vt=||v%<28!PW)2sJ5t-ksr+YIf7R+dNT;{INouBFxd4NzL(*9QHIwkUK{V) zD;B^G=nLE2D{O-ga6;?-fhoA$n$lx5_(|OsI4o{~f?1RwhA>=gz(FyDgMpS>oC@7x zk?U=+B-03Pfm0Oz>bdV-GGNuu>im`K6ec~5lZcb4<-oQIVi>7O{x~oag6v$+?R~=> z>%wDPj?0 zu$@^v4JYozSQl)FkbW*aMZ!k5zk?TPQYSvdZ{-L#@q=VI3Q=x!!e&Yq340} zaQDr82KsF3fO3BK`zZ9n8IQ-S?Ru6;bFn`1^(0;y)4J-V-1p$BH3#Ow{_{$G_7MKj z`rW3{u%=FcZxaKBR02@DFWTXYBo!ySU+W9xp$@$cNJ@>@Jk<`0VAM zC-A9Holasr-#}i0UO(Rab91waLMp!&C))2QMI&-%jNRoZTDUK z==FK0b>3+^#~0l<7w4_Jf4R8(-BHKsIPY4Q?YHtrxbO=8dv)R59KCCIug^P2*Kb>f z<_070dctJtqgYdF_>9A3;P>crfMa)R$|S_Ys*D7OSkU$}+rmp>W~-R@BfRHEGh3Z!7T=A^XSr>!Bo4Si`t zlb+1QDSyjA+lH0=h=FbpPAF)U2GX^QW;R>^iUJ-#6Si1LcRgIJ+1}+2v_S1%cDd% zw`fnl0=?AtK0j_dR~Mk9JIC#- z%+cHy3i~J`xpo!eg6lC5$}~o7C?r)%vPN1cxOE&&eKk6H%J6_i*PSqVl1tMFeUbo*H*QhNRnhU}L&7uqUH%)<+c&u99J- zd4CZItq{Hg6(ZpMMNQ1mCp#D|dg-0TuHm2VFyj5xr>*To@FF}$vA64VJiD&Fa$!Rz zJVIx(8H|_Ca$QM_!M+5=UVIEtult}(+h3=QG(!BAnjKd7ZMM?c4gKxEWKjgmhEYrm zHCh?r0y)!*+_XQ6X<5Q$J%;9pxGZvxF7|g{a(MqjZplJW0dEQ5SRT240$(YI+jiaW z<&|rx%e{5|vFj{g*@ActK7Q+rNe@e5NrDmt$9o8di;L?z<{!UdFh! zvc;!ilHfrAugQGrE`kR@SEfI?d@CHQTWO~u@on^(P7I$?>HWplC8)`uQqyf;*{d<8 z?OEj|!7+MPFi94ic?;f8W}!2t?E*%l;VxABXaqYbao0`H!HcRbbnw{KUwj1mPUhbV zQaIPa5P~_8rVnXBpagLvc6iO)3C;y<`zVjt&>)hos+?SS*rWc5*G6oJKdo_IRr;Lw z$~*sL`M)AhZ0LZc=q*})KjOnN<6yhWO^ru1bYVfK5r)4b5bSB(Pf{w!jLwhpy=Cvy8ap&ab{J3-FoVJ|yKOePE+vi7}mh+|4I=ejWwCMZG z*41_AsB_sm>MZ~5e5)F&p=8^w&Px|g;|U>slq_$$ILrIOa+cr5Q#fZyoCU%klY+hR z{^NIvj5R6KFL2+_=665F3-;c5?~cWfvpD(q-4uOt3$P{Wun*jHEKc3BRyx=M4sRW3YooKnveS zuq8MXSOh;g^-6N%Mw-4~%lN}Fi;H@w)S)NwhFBN0#tFup|OMu8>%(a{LQD`#{3-as2+hoeTGI)DHAKgKQ| zzfSw|<=?;l@6PA&^vC#he(b{6KeIiQ&6Cl_!NCJiT}MfVwmP}mJ!KnpV29qEzjA(gaqi?nEjZYn)=l@V{1FZ4m+k9z=d|+*{@&^g8McGn44qQ6 z8>K$NS2Qt|k58OMvAuBO@<0Z^bKF+o#E-zNOy`Ku0uvc8RrpEf{PdUQZ{?L$07_;8 zM4L{Tu{iCJ>0Qtp0uJIz0J0Pu9pQd128Kbi!N~IJK(XP&Xox~L-1l~Ef9Hm<2qJ;A znK`&sK7P9cc3nIUv6b`pXa|Z70wy1HZk$BusrdNqeOqjpe0D84UO@(JPs=yMGc$0F@`0_UE!7>3=1Kx%neb}2i8Ne>=7*n zO2P&jy3Ti3#Cg6syD2wx9T3#ioh)%SgbT``pbG6mh$|)n_dEC#`jvNQO^9J=0ls7Meq&GG2UBC9v~1x4hKh(@WA_$8KpxYgtZVeef}kaEPH*`?69Tz6 zz1JoAoWkJAR2vRPychuz+Nwfs^cJvEqPL z=!PQkTzaNUg{43U5exG`VP}rU%+$bssYx=BpgZWn3d5#DCu5%gjiJOw) zzfL_F!nv^g5M{v}Jw|7!EL#f8ncLS|8p5&0B(_MymbzboE~e~_33{8CAMPy-H^$I* zljZl(SOt@E?I|9+sGh*D3Hb~vRILrsvxWv{qZhvITpWM=4G4?Ni+5ih{i5Z(buO>o zwa>o1``xM13kfU(QU^i%P1V8;{h`vGjEhyclTFmo8pmjP;;i=4y3u&cL+|6aee{kl z=6$dLHHSsM;4#b=M&V)>zE=Q26zY?CeD_Ud{^*u=)cwXj;fNQcIT~1Vn_iRs14LB6WWH3(95||MWL!7%l%NsH_TUSi(GP6*XbFE$#Yw8OE5| z30hZxm(D(4G&kzyA;$MB{(7m_I4=NopL&ea;w%yx%>->a4pr=)wqAH|Zv>W>56A z=R&_SBw(JRA<{PK)iL$bMSuzRY57;^l#58)K+mLb9|l}vW;$ACUljqVvw7~Tc2Vy_ z@sEcE9Q*WnMn^#`Ty@Skj3*ooj|%X2!@MlRxU@DfHI;AYnqK~$PW z@=t#Rs$U?afl>us`A0Ha{t(q*1V3$9){r5#e(^FA_9Udu2D&0epa~hPrZ!oD_}su7 zzVW=C=i+2155fn~Yz#3-aK7TQiQNd!-&)^7KE#)x_rtTY5e^=g`?#+8_I?iC8KY9K zvc4`c6e1txE9Irv=B_7RiG9#aWgp_AVSP$stFnv3gQs}KrtHYCa90p@B z3xhuFIRWJnAzuu7u?4jM?nn837`cl=j9G*Q-p810Cj`xcWk>ORvQ4RTA75K0L@=H` zaC^~wEbyBPkR{l&?ReU@(L}ZFp^C-kE8cw+c4H^P{jndc3+Hf1yaI;9N`N{$<-txNf5SyGy&8&3q` zNuKkJdz?boxW8eXMGjiF$scZyo4GnR0NVh;myygo{OK=*l|jUI`^rQG)PZSCY>^G* z^5o(iG~qVFX^sS(=3UEaE6inEp=sKPrg_)7I5$+Udx13!$|t+!4U(zn3K|f#1PQ?} zvC_D7Fjw=DT;$A9s5V;H_s-JiI^P@LDV$j$9zvY>wg%i+sY?+DWj{uP*QN8Sj${tB z1*zk3&2xtN#LQ>67$?i%aDUWjzy8sBS84hWpB_|4Ko80XBE=&aNlkbMd=_qE7+e&l zgA_#`3}CilVQhP#*u)G^Ea8YE@*Eco9vBDmPwzOh5f{;5SkbX`L!+0D4;V(8@hLEx z%#ZO(P-6Q;d4Pu`hNF@ZN}XGrPU7LwM`yJz1o~ic7-~E{PanT14ns#YRSbjz?jWm% zoUy(z-|$xgW}+`{u8tcJsRI__v}WTKiy^|(X&QoEqYv)pomT@JsUR}WQ?nsC*sC<4 z_YAa0``S4HeO%EV8Qcx}kM+6$EW~OL@?H@%rYt&G>Su7i^eNaYaMJYUzVmtKtg~-` zG1v9=0Vpa;!t3y0Z@*@%mGXK!*T+s9?AV~`Dn5Afto99sW53fnzI3iA{{dD`i%0#) zHT9=;Tc&!#aLbes%M;k^AOT?6q7TvWT@Z#&+(+CDZi@l#{d}jSBnU=xFACAhLm!?b z-a;`qj57Ze>?Agm`C|frCBe-c9e=bz<@Z7+?VzCXQ%}el;g#kI?LY^V8yypr-wFyN z^q-WObVBNg zTL;tU->Bqfma$x3FTYPzUQfA+E0ddzfV&-bLyP9RLB#liY3&QI9br$ z7%4~PTaIH|*~l8WhS|nyu~@=aYROLoO_wQ7Dw%Nz+hVaxJ`Q056VU*C3x_;9Kh8qh z`!KMTZIwT-_Qk-Wdr`aSo*Ikp#09nGDulLdAovdU>eu(P@)qi56bFq&YqbPG(Yjx+ zjR={eX(AvaHFX01%7iL{GyJ)bS~K)2OQ{z6b1?Qug~^4P600ShA_QB}%~blnJLMR} zB28ll?twPB2{jDFY7Y)GPs(AX4P}kd+Ng9V(rhBsy5(^|=R`@#G!-Je%Jn5R#dEOZyrFUd7IdY{O+bo@NqdE6 z(GJ(NBV1FvgD}icBL*yRT{Eift^KmfEeL0D6bb(dqtG9P&AV-z#*4lJ+NdD=oOsO9 zKv^tbl=4sJ}+AKx4mDpD%|7 z3<4Qxzz7*Di%6{IF!ZJJn2n3a`b1p=J9w;=-f0R#t`BoJl!tb+!LiI=v&uQUUn*dE zo7o^pG~#~^+*1IMbTVW}BkYLLoM+W%&R$UyJ!8oz^n^T;sHFyzakyu$R9JCIIN2@H z$VzGM#O{DO`DQ`wJMH^$#@6YsYgR*z2ejnF0L2WJ9}1Zm&XUB*C}(v$2h=mqJTUEV zT-D4yLxgtDJCsNlmq{jBzFZ)nlhR_mu_9Dz1*9U&2 z2!p0ADXjsTR)Bys(rc@Qa2c_0rCK9%H;fdHmMaz;9iR*>+|e8zK!&*4;&WGS$LbZZ zS6Zlj`Cg{PWz77+u4dt&M}pqkaFDIh!CyL$GIVWUN7pt3;2NjP`Z#a*@(N^P=lqqU zaVE6RkDa%jD?Hp6$Bt8U3rWkThq*0 z*O2Ok^StDGo*-5kG?94^owUfBF8qH@UR85sJTGNgN4V963dH4WQCev|heg^)S%Fc$ zlI66c{5J>=kR(%R>(J>ifp<4G&TTV(nxyS)Bj3kps4ZI07G-x|Xr5s^SE`+RbVQYc zQ{>h`akK*}9vIT|%=uC{+B-+r^1NcZ_cEaO_NyKL2W7T;&EtgLh*=o<-frJ_<9JJ^ z{9dJlPqd`uW|na`DaT=80wJ5YI9RhvIKpu(B)V+|=>~Fc$N|A7(~*nu z3iS30pM;*Ii3$*OhA1Ay_!T=m3TM>giWlK1fHV5|Kl1whH2-qaAN zQ@WqlO4PD*yAYS~=eq+0RC^dA<1xA%n)})7hOD&WUrV){^kdY;QAJl;7M4IDHQjHz z4G&%}FizsL=#lx5`bNgcvG?LdR4w$>HM1M0d$OcV2Q3FzYR#vvJ5dgqY3PLL> zm^+nja&ngK7zYORgB>UuhXPBo)Mm@)Ey5W59;iXVR(yXJ|3ia91=84_>+|H z8LW|M7{tU4khv;Kni@3@+wPcmsmK;IYoF%mz*OA3~-}4 zCE1`5&-~TNPOg~jXfc8?n(vU?PWOr^=a57oi0!#kS_g@Ub`zxD?Gp;XnQ)21-zh-y z<&sDC-6^c_8s%h-*3qQ0xeYsF1Xg51tC@nH8Yq!exzyAK?pepYtUc5?`}T^?KH&zy zXA|;6RLRLye(4^;UmLo3H^{JEmn*x49NP`y`<*;bOcO}bXVJr%QrPp1)MdHGH9QJ7>N} z^DR3ff#s%47G(&zm*W&49T!9G0%1kV0)|W>DP5{B! z$%p|_l@clsGbkLxChp+_7gba-=0Wbl`6u9o3bpGiE?SZ2Z zvyrqC^O8J{>W3mO+WGvbeRV&(z#JE=X}E5mI0M(Dj9oDUW|1H(i?JPml7u;fQIRur zDk%Aib|To0JVq{2PcSG1E*uaP^KN1_8fo&jjABHH4%#HOjOf3Ty#AJtMYmk6K+MoT zy_!?rmOIR>uVr=eUojMQeKDruywAePz8G4jYLQ9AeM4eL6}l96xp%*5Byu*c)=cRFREt5%Z9gO6VVaSo3n3*Zc`oaP zpoDpBYA+cBZFNTCen`@<+YCY|0^JofU_n<33y-5_sH74GpyNIZ7L6RN zn;z=1@Bn9>1w047Mnhprb^K+J3s+u}{M%$yr3(=uV4gp$dJJ`a`Pp!MLQ zor6^=wxD7fYs6mDM>^9}Ovuj|Nh&q*8jW+q{QE9awY`^x&MFE|szMK>a>!C9SfhzM z6Oeb_VlLA1!x?->!E|J^`}nU7G!R32*@TR07-0)Bqd=>*V<;oSU4b0Oq&;OEOy!1e zDS~7y3#(3cppUJ0uP(GcVi!5I$XyJPFfr>Ak+9`0VzH?IC84?1%-h?Fju~>sZsmbC z3s$~Ma-8*riUP{4F2-F2&{+OaQ+i>VSfpvaiO_nhWqI|YGevm=EuAb0O{tRBnQH2X zVpFjixhPhVtLS)VgVx@|o=2fV?7`4z$?{=m-W&47?wx2WyHg~+w|h-_BCO>5@kRH| z#d+)QUs}$~76|$C*3FxXx2-Iy_&y%H&J~@X@nh#De7O6Kb58Ygk=ho-_RI78iTRD3 z9~ArSWaW~tGq=b4P&S-S-Htjl#n}pHQs_4_Rd|C2 z!I=8XABMz!tRe?yMeFSDEe$1(q6fWSe&`oRarwV#j0=t2(r6lu-K*Y8`{f+7f?T9S z7S|8Ch3^O6 z5rSGpfVK!(k~3cq(j;s04b(DGvj(y7n5;@GBxXa9aw|Krz2w)jbG0(}v$S+-#6}m} zEvo!xod*G}5i}9mvVWWXnGXfwdkdRKY?z&VzX)>yg?=nFRgO~!--7thYW^519Sn-z z1P}yj#&@)|jXW5fJ#3v9+HSqJtU7=H1#2|VZow8xGRz%bAcLPvLObL=3Bp}0?)c+; z)?dK)!X$tH@!LKOKU+*W6@8qfSOnAqQyV{q9$wB}EHWR1CH`p^v{8A%W#-o7U~L9Jra1J7 z>U9J*LP{;aom{@-Rj_k}DT`%1=$`&46>xIrB@gR`+}2jQA^V!HbitK9&z8F63f{^( zH%i=AI`^NJNnv2*G9CtQ7^i~W zfuUR7tE)zo#%7BOA3LDycoYelEPsd=V1F%t@SMBf!@ZvW@mo)r)_(yf^r3T#MReRhz zf?7#Vv0u*T_>_G+6a1KW-^}3|?o5XEOv_0`jlDWP%?*3{;6--fQP4yD*-gJf8q}a) z0gzbNJHwx8*SnMDA6{Rz&p|c5y1;IM_K^1zT6VJhaTF0P@D(EAKzHI`ePYQcMtF5m zYfbM@P^^`-OBJDpOx1*f;d#rVv8cb+w3FqN{%Pe#d|I?|yC)s!-X(NQY5!QIa@T1I z?;4g+;=V}5+R@xQDo2MrZN*9yh0=_MRV;J5O3ATn&P_YtLYoH#S4>=}p%y#cBnl^B zzxQBL0+uYce70R=+ zf`cxIgY3&_6c8f?U z1l3^>)Ww`=tlk&1iSXFR#oQ>5Y53Rz(2EHWBN!h;x^l`=oO@cILKveHC=t;~{V~X7 zeu}WHIK=`cSeZbX z!8%PgUNybEnL%zIt_c_IWn44)h{VFcbI{$X_2MnkCC z11%Z+nG$Ke(h$mFAX#z^0e0o+&%9jUcq3h5qXmEW4QSs8oJ&#j#>7fgp9A;3&!+KW zE?3h$NwF>>T9Pv=L+X4E?|lsC7p=aZ3HuRMat{R73_I#`xy}&d8OHqMw?pApgPXCk zX%8zQgM8<@#^0lG@s1-@eRR5IMHzPR753m`qt*s8GX>>CynrfXW_eLyQOJ^++Tz$!n>Q3_|AYyn1HPaM#v=qxeD2{{ zF+iJ3o+WJ_^cY+;Qbn|u|Blc!I)kt**7kPeeJrYe0%J`U)vhv=c&_gief|SbxKfdN zp{^D6W0rrDks5TeeHxSM{?QvuuGQQbbFE=yBn>c!LqtAYUw4prtQf&TFUmuNa10?^ zQ`(}0u0CdQ&kteKxTG8tyCC7!%Sy1@PesAy*trF3*#tM~yoAUVhp;Z6-wO35uh@5A z3_#tv&_zR?Ef6J}8T&FI5{kGtlVR|%7zU>TwhHC01^;)34se@`m0EC7o5^BJVB}tFXSk#LL%kM|@eG3$ss3PPq&=FLyU4;Kd z2!F_8%vaa&0-r{@-Y0Osc5klif>r3=0JPhMR32ch$_N-861Q%!a6MCwu8W-*>#dUa z+TdvI2>T5ci$8)1kjCV~31H$eS_NAKV_G5>pJHXg1Uez^w^|SvvL~D*wgs@SEav zwSV7Oh;|+Nlrh>;2b%TI(CYW=0HGF+Ycx7gR}!Ga9mFS1Dfv}8>P*P3;A&p8TPpJs z1)8B~YD~hrAALUKl(afUgWW?voMtAWcSv}|oFGe}n8j1fz)`L=nF$uSQRv}ujN2$c z)cp*n$peKS{u6JIn1&$Ft!D2Htv`1dW!0OBU|ZJaBz}02YOUV1wnVtCxPE5?XSu($ z-Uvrm?bYgn1VS#d{$BRQg9SpMa?_LutE%(*QT${TP5y^#tmRazEKv zGJC^~_XvP&2*Y}isCJts*xcJD!pijYS@}pV1v62Yxiee}&Vs}zNaQr_kw+5IkP-6n z-EPr2Y1mi|vWi5A*i+0NT0RkljHx_D2Wr8{%aur|3Q_ zYQv7+kk=AJAjzVLFn-B$P_Xh^LC?S{A@3$7;0e1w*m2%sMn^>3CIcD&k~4n*4BbY)+3vXI6(Ur+CCP&^5 zqS6x}8}7hEkmuL)cszhJ1uZWRG=XUg9NAVt)sT=j^I5HWw)(PA zbSfycXd-r;5(FdFD9Z?^=t>?U4UhpEqynQL`~c1X`nb_lmsG^V`f?`EN^WrJm+s_# z>0g9M0DGOYOm;GE4(IciFFU)xxHfaMOL8tKH>a++ONlUPl^d(70A|^Vjz@@2nus3j zJPcHHu{UXdjVlJ7pv6iQM<4?lJ-b=5bInm%thP`r7%5ef+*J>XuA9gcyI9>iuU&JG zgW4?SWTh#dqHoaW9vO9BsyHpv7;#xb_e0+fnq5d}=PUm$UP^l4_7ukry(?la%d!@KbL&TnLd`cuP@4`QT3Q&xgV-qf&L1tD_N+4mlJ{Kb#I%^f@N5B zi_5!!^haKu*@eo*w81rbG+$P48~STbWwv5pe|c^K^K<=`4~%~Y$V zhvl8BDx*#~I#Hz&Aa95cX3lwkauiiRC>^MxshHG!|_k`H*JKw)O#= zrT3zZeYFqZ@+}k6SF$ucr3zYQb(m_hP{meF!I1T(iQzq_(wz~=pDg04&lH%8tfUyz z{PC#!-zvV?={(~i|^2#Y!n z4Y!FE=D1Ry>558)uOd8AT7>)0O2=F`L~xr$@T|f*Th?Dyu_HA!oL#j#T1d znjvP&r$?oDIVcvNX~SUw&4b253;+K0@7h-_=V;{eDE|A`2BQrA*ry-Yz1hOGs<|^& z(pqBg*t|OKXwJ=t@spbJ+6q8mi3PYXA(2t}$^wK^v+)u83deNcr~1ced}IY5nF5=v zhoBgvWrK39Y|x2B=gIoFYrM*0i6__Eco23Ah|kMdmuK+K{tx|SOIk#p>Mat*|({Ipi|4|q~Ho3f~nIY+>IBY zA`j9%!f1ff^cSS*a|3x7irN=uKz|!{gINosL2qSYi9($njg&~lb`UAa0xr^MwSLEi zmZo1Bcp+$dQc;d%E8R8=QCKM%pCEpG<))K?J2KU!3gyE<72EaV;dx#%NQ{jNWq?>M z0SONZV%3ldjiUI*ezF85J!ogTmGYxvDh&9o-Bf&<1yw{=7-!OoN!gGw=;0 zNGvld%+35V!bwE0Y4hFkTcY48w`qfDx1j_-t#$wwmX;kj?-k|Q3Cdkrn0r8o*2sAa zQw=@Dee*UP`ItCAGPi}OO1(ziwae>zm-9(vHPQq$GF$UA&{ZSiwE-b@cK5r>9r!g~ z^cQTn{J|H{b+{TPXcG26ehcOWgIy!)vE>O}X}J@YSzH?~otqivmx$yW%1M3?JxdXM zk0l2Mf)lB0B%Zjs65$5RX2a(GW70V{=$|2TD+8ONlKCf6tWMlsFUoW#qW6k5)k><6 zt4Jn;bl_q^z6=bN#oE@XP?kQYR*YdRat086fnvdOL1MY8w2_xqk%OVSpgW*E3r%SQ z57b8Du8vRN^;2(05XFU3!Y%(azzhr|y~nzdiED6_R0%bM^Xgf(r}()|t8%V7krOC1 z&naRKhhlDg-eXZBO0meyK`IM?n@AAhVNHnG>GRaXym&MFR=9laIqRy{&%xtf;Slt( z5IP1$MWI4U7Jv`Pk?FBg!j<8(pYb4A<+R^I123f`i4r+Lt6R+IFstRrx@Q|rytzdi zuV-N-XwdQuN|juhQ=lQFUy-1}M@fPvn`dZf$#$N3U){}4=je)qVaJKxw5Lu8l?Fh> zGN{ZkDh?M7E6$24wa&=Ouyb24{{Sr*MvM>0csOIw9+cQx#emEWovop5d9aP;W5bSV zS!Y4W2#if4=z|VpAbQLEFPmD(1^E$PQsyfvyNgq1s!fk+3;| zW?>*+=tUDxE}!pqj@zy7Zu{z}=+%Z+Pcq#P4INbSeg%pI**)`LeFnOr7rA#oQU){r z7=u{yCUa5A8g7RFz&vu^Vu@Z*9+P=qSdO#ztJnhb!wYkpQMshE1(ir-5OhA@fAZux zi1hs@drytcYSZ@Bo6$ z)OEDZk;Csjtn;up$KYuNKgmiB!dR3xJ*vrWhc_%gfa0mfnVeo3-K)}uGEFtkv*Af8 z728(os4*5l=SetcCT?JLk2ad4MrU40h6<-YrN&Xp=?NS}Xp58d?Pk?*q%@D|>sSc% zmtbPS;56%?aY$ML3TjxYRPi%GD-Am%B_2MeK?|&SEeuP>&W!t%R3h)R8V`;D=gkGc zdW_Ou+ynb6TA;&X3)Y>%No6-CpTdLO)oGFkE}6TU^H>zc&E19TYe(3Qi6PeQG`y<< z9h$>5J~Xt>MQOpUk%?_cb9PlC=n$!r@=eP>V#PohBO;{M3L<}BQnm2PHY4Q=v^d5ew%JRw;b ztTv3^VcATMRAl8+F2QC(F#;H>%iO8JGvkp#! z;2{isEal=RSP29@UN}1-`@$GZ7)M2{3`~$^#LARqRw1BMG`Fx>T+-;uV($g!9~G_| z2|`&6*qEmK24S5I<*#1VxH@WuEe263*sC-8hsBJkTTYh$D}sYnov->SDvU0@v{kUW zl<2+W@`eG_V;Id5MQJ^xG>Vo#_QzEHfe%%m-kjZ_=cETikB&ehi31~tAx3k`7JMMf zc6G8~>4qzB!!F2t6e4ieS=uhYJZtYDJsU%Mwh8%(vFr&I|0%F_hT^K_Uk4S$|CWt6 zSc@zpi`%DJJrM?$;{VO9EAfq_UNy_OY+zq)6yT~=2Gn6fnj6+<62YyZL|sdV4)TKe z(uF{T*91pXm7d@)WHkyo`g0Q3<3U3g)kaE0_1PlZG{dt6xig6sb3#R#ox9(|sRIol zUx9R}lh(JRipt2UAHty?3x#lX<&XOusimXqu+Gi1U(UDFQ&f-?kPT4mHBQxi`n0YD zf>Cr=DOhWEa zzG&fdl5G@)>2Tzr2=LIQL?`Dx4#hkGhMhj~P|2q&HG+Gq5k}Ht-Cbv2W)9 zU_hV0bg2|d!fwrwo6`|2NWtVn4V{59wcBGA^I8dTGBSLvz%1)Sv-d&)>5Z~N24p)^ zM)@b~3+Gp#bv)+AzxoU*BYXn1Daq3%zK4->0+%urCC;U~PZ7UmD^e}MIn|TMQ%yBA z09BhQrfg=wfGp$xR;+0P2_RoUB}!mrKei%X{t<Ca_XnhOG+M^iD!P4XJL1)CU zua8mrsXj&&bL)pPr9_MnLKxc`5qn<}uN&yW9lh|B7^US8LxJ1G9YshRQ37Y}Fx7Qs zh5g%T^)S6c2t9uJf#Fb1V9~ggVej1iTY8J8FAmxiIZ!7o-`Tr;SV}A(C>SXbtu>N# z(Mc^u2s)|PmGsRelMKCyOLT!`vC8WDx58a*E2?vklX{t4#{mrA0y<1St*rDZcE8J6 z53v(h?g;v*#xNExyMlub5m0cnkbh2K(zObx`Opx7cR6Hs%q$0KO()iQ#j2B^O2%mJ zjdQ>vCnUn$S%~na@I++aQy6LNPI=-IuFNd!p;fy5!uk8x|B*j@Ubyil;1v~fs~`0D zum5{_T-v`*>7;~RirjIlo$Cjc%uhvvK2U(p-+}RUaqbu%Cu35G?v4qLJINs-4X}0S zlf5c+b}_SVB zkj32KOyKk!wVl>4+ec3O+PS>A$!3TGahp$(5eKs$66JbZ>ug&$D8_=dYeP4Wsi`+K zf452fP_5qCm5yrlMSTikR>N+`zz(V=uwu;Ib8$5)sYH)PQf6H;x?TIKedY8%;q;d; zvo5x-Xoc{*r44vH5NS-_lUY z@&In}VZQdTpZ?Zpu*?!r7DA;K{|GW-`Df^NS{NXZ%OEl;s`}Z8l#Lp=&g6cV4&e=; z_&QfS=HpNFY=;n+R3@%w7JEfQ*9;+O~oAbKbgn({kQ9H^=Wft-F6|pJkE5_TB&8y?)zr zI?nNFtNiWe3eg7IGihfXp*?@;T(+)SdP?HvO)*Zk^-X!(>2}-a0x<8Kb*^a>+?*df zusN=rW7r2eq<-_J8Ed~1`%>}%A76CeT%5x!ww#wOur_dRFJJ~S%C?xt3(z9L?zs$S zaU#-+3d9PZpR`T(gp)L5vTIKeG7l@ZNIfm@T7?s}X?y#hk5Js97Eapub?2n>PdDu{ z>&JlFLm0N@2W$`{NZZ!hZtQNGwyX77DuPV!r=sNT2WY8TQPV<%*9B#2#c^|Gwd~G) zQnjZHDD4SaT~SD9yIGpC^y5mh@~RNt;bX7JT-b*LA0Cj!lCe*sWCszx$!)5_x`bKr zWr#}tp{`+G+LtWT(z}}xU3+{mNMps|E3TKYs@#!|rQ9aWpX=|XtYjHCQf&}80(UA@ zMa6m_Fq4LL^#JYH#OaZ2t5_9U@=OmIQDt*Dyr$$hmt2l9Rl@Tm9$M9-7GS`e|zNAFy;_J*GHHQju+3!zda7Ehzn(v#Ky$S zo7dm4b?(8r^|o{U+G(LKaB(G_dtcr|IVlTLOYOI$Ad1do(Sms$VYiLJYe@A5NBVW< zU)y!8PrCb`kL8JM?2C-+F)wR(V=onuzQ27+WW+u%qDl6S4#HaODNZJ2Q; zRctOz1n2YX_Q_TRi4kP6X|ir7cH95_PP88jiY|W)1{uyjV5x(uNg{FY1q8y@M4^#6 zU0KoRX*qZLD4bxKJ5Gs0pnC}BR_Y!ohVAts_W{;gWkvm94fmXqdB3d9BTR_Yr2xM!G-*C_` zU7yd~Y`Et$!d}z+;M3jzj+Q@ourDQPpvmXDX>$$AoQ7)za~ktCg4vFPkVwYxos80J zL^HP6WFyzcuE`jf=<7=xX&2I72I4~Bf}%w99xXqJvCT&Zl)?I<-oK-XwfeM zDSaT)E5!NxCA6JGOcg;{9)(vIRrOZ+!OB{pb zO+cHCv!kwfzO8<@pVzCAQMv7p?_A0jX zIhz%-twBm~GScCZtW1t=GJe;y0Us83a1pO_9!3-V3<@u(x~S;Ju+EXI5<>=J)Hsai zj&lZWxn&V@_e%)lE9Av8H^q@eo$s<>gysSwM(Hsa_SlgH!H8*rC8N-jUI@^4r|T<| z?^Qb4%E7FQs}2UUe)>z!-=V`T&5&9rhkxfF@iim9z9pc8y=`Az-kiU=2AiP_3&r8h zvMEgiz@kaz!Ff_y(YMT%oefS~mKMT-h=kG>5vz)TV^)Jm`Vdx+%#(`ZN>v_Tt{hLQ3_kpja;w4`Yg}>&2w`$l9I;I*&dZw;bOM@ z;XQHNz`}7sM!86cB@VMOlB9n8ZX6SL6){;&p()`^aK{lt2^?^thjejiwD|a~hfJfy zvCZ8moIU8dX*fGxXN%`m3KC$)|`s__7Byb-!N zsZzE9a&8jR8v;)01GLKt3pe4g5;GEWxQy)`K(+M2R`DaY~&*^jupohFJH%0EDN%FIM z8wwDB)k!{ret;4|cF_uwms<@TK!8 z--HroWkP;dZ&Sk$UP%-qiY}HaWg(FxJSOr4r2i?cpo3(TQd9@hb zD}zujFcq1{N~9_?2W6nhSEJeJq8M_ z>M%Gg7K}f0$C?wjU-EPHDQ69IohTNBi2`2<$K*Ujg+ESG1Xsp83Km9$B6^Fd_^`lm z!DJ*ehDi?nQ5gF1PBk6HPEkzs`7M$G2(%ffQ9}e9xf>8s?N^>Q0<%{{Q)RF`ZCny; zv51jLnU7?Va%W1Ynzj%@TS!4yME==;F|T1$?ac)^BQztx&x`=$oUn0s8Q`bZ(5OE*N zj+^NzRw)1L_6e?Tq7`X^^55J6mxA;W65i*Ipeu!=RA=J8*Ez9y!!R1I(Vk=Cdw;b2 z9t=s*+WYwhjrp-R4>DH;h+9f^;ENn;nk+w{!_7s_efc2?y->QF5sEEKIX*J0Q(1Tr?3xu1BQ zm0xEL}k1q?9?dDdiGI+Z)5eX^QrSmok-jM$FvAAvuj$-<8NZ9@OPL>2z*Av=Bhg;!`38 zR4HD9s7&J%)Ra!*yCut0bl4rBRlz49%$s^%t$}50U<8|k4h-jCwbig!M&hYB8oYl=c_B=lp5A(o?R}S<1 zS!)A?mL1C?Z(gA@K9?`&F|qs$s4u-9(*Y-3Qp!!Snh%yK5`Ca(+(Y%TAf}!_d9r_4 zZEPJpeo>+*&9_3%7IyeWDdRYd3?odl-!sQr28LZkqUMi*NB=mvd*x!Bu( zQ9BoVYv*Fm`Mh(v_p>jYBP6UY6PZdvYnTe&)PZ5W4GPE|iO`uKe*`Sj^sAlhyKFxU z2&|>Pj`ezDVq3s@fJuw8Qkh$ok+`jmQ8-XxGM-lWN!4UjCczaZWB-esh8M;hC!?HX zi2)ce9yW6(+s{cJ!U*m&QV%lbMz0R*dR1l+7KuGrLm~=T@`ck42UwMl!71xTp7_)$ zM)7t`w`KYW!7`pi<)$B)SY6h{OxA{5UMv;Oziro({jS$0&% zVyL=fGZu|S{!p+(UknBIS^%-nvoNomF{FSru9wEhG&elAgY$R0rIdL&W+kun@5?wDj^wEUuD^@afaP!(>O(< zLW>gGmMuf3UgK6U?QYgULn_!bur{SL%O{SxKE;hPTwWaR=Pf|W+1KsX>GkU>uSto| z_9#S>-AHCIe<6!lmS3HHd*3ZtFEivGa%hX0oAEw7aif-2&)05U`&Kb;HE8OFvs&cB z#yM41lbBPiPH4`l{I+iES--x(F`d6(rv8if8*W?mW!=77|MqJ3VUXwV(im5IAo$h) z{`DWeK>s)#vRF{M+>XL8wKJjDDS*aktysNFO%JclCaDv&Eu2#R5L?9b)mgN&O6Il{ zL*@fCvba9#r=+_PUwwhmMD3T=P3eBmra@zHX--eq6kTOwAB zPTfJ=s-Y=rB50ZoRmNG8D_PkgsPwAWmGoO{YtgKfa-w?6Aj2PIH5DwYw^TrkbW|J3 zIs#YJm2vs}q-vYUc3oW`SG8@8C*eh1r1@!aznm%WIEW>XbLFDuJI=z5!(JHT^8NUo zCu^SvJRXN^!l?}5Fm&>A)aG)LRy2*6n>jd3ZbUzv?c(zY{-)4UYZiRNE}}D#gmxOZ zc@lz_mAKy~>_e#@3l|^n#$X8nLRfomGnXwmNG9BJXwd%<{feLDQZb-F+gomicHW@hGS(nS$}wd# zV^L5n7P8w2yj2RU?QyJ_O1UPYL0d!t~O@l9u6 zH9?EV?KExALrG7gmStVpzUuT59RV-*l@@9D=M-Nry8E%oM;H6JVMXQ)ZDvuN6!U7_ zp(@0=3l`66GuO-~v6n4*izp8{U$I5u0t-(Dy(+2K?5C5!}JB zXJgUu*u{p6jVHd&kRuO2EG8X?^?G1qWT-?2r-J?N(kL;acA}jzMlJvo6Ni>P5w&oq z774fg{+gJ$2J3$zj0@k3J}HtNMe-?vsB>Rz!!fDJR5ZC1kO9^eAZJlbK=LJV!Lcd0Yeio0 zl!a4?1l7Z0|JA?nx zn8aT=SNMP4my3p07A2QAY0ryiC1-v2tX;4`s)HK8`s^&c`_bou_zV5@h_69-5FAEe zwe%Ox+1>9h%ME#1b5Y%Cv8Hv^aRlK{<5+a~i##Nxs9Ad~01--bxA3s~&`|yL_Zvzh z_OH^kM0-2M!T@L=4BXo=VvD{@l`doF@=JPdDM!}!w%T~2RqM;Z3CUmfM;dAw4R#$^ zwuw6&a^(ZOJnIls4TGjCS!VK6kW<|5-ieT8W3s_RfjicH|3VDzO)tq zVK9vo;f_hX<5=RCVMWKK^;7>um0{Z(Tpd z9QOvV!c2#!&uU)vqUdC-1avc<;!2n;}4kX9lh%t?N!jZ!?behwVWBk2g?X7QcdFxnj z-l{FxP;lPk#4Gg)rZk&JQ0_p9k`cztHkIqph_hNuX-Q`|tkt3lwNSbvK1lYjF!%Bo zhNis6HBiHaX_N-RL}IsGJ5tMS`(of1>MP2+n8498-WWOi?1|QxI(<= zx9+aLw;s~IvNLvgb}bZ#HrC(2{!Z%binUkZeEWob?N?_Hm!9#+>IwBod8=Nn%e`<) z(QrrWnkp?MLU)p=uV3lXGM@6t>KS!{+^%!$asbL8A}SC_t$cTeOsBRdWd5o)qALT4jqcn~Ff;-1>cMw4sAR2hXB>)MUa9n7+uyg!5_nea zWqno_rI|M?RFss8BTTwkXJO=fJNPp>UDE_h!l;{cx?@D|XlK(EdN0G`3`aIXU z^A3s;0cl$tOaswsT>jK(E6HaqxJBTP zk|1lWp?lPL0Aq~^kV4Ke5(tWXQlDs<+^mSNDA(Iix31*XIVjc-mQ*MNg~$q07X9}D zv@(pw!gyyHwT|*%6DMNMO%lSNvr!1*pg#t?AahUf;5PJyUmiy;n@w?R)%%*9(g!b! zoAI)Jad~Rzdp=Fk0f5xkGcR)QeiXGb;V0zy_9k-$j!@NhX#4s>m?)a!_wkss_s+~& zxMLU;&d7az_nRF@AZ$_{<-L$t=nT}2gd?BtKY8*TRM`C|dryt%)cTgM*x%LzQ#$l7 zVOHt9cTjtF6WGd##*8=0?I~sJ$V8u=$|N5={fpT=I5xYhMy9Umt5k-y!&2w_JtGV> z`hA^68ac-)?0^B*r4qdVu_N+Sf>ORr{ICf#^<%WYA1n36z2Y#vdHK@9k6%(4;VLhL z1ps90p7~hri;i0_VODf;mF{jkgk|+zJl@3toZ^`)>WCw1={@VGiHqOF zD%WqSo-tXOc?jQcCGT+UVAXeKr9H4((r99Aotb^gdLY{-JV}_p5{Yc8>m*{-^IZOY13P|)>hs+ zUH&J@w5q4ndkd`kWM!4!{WzZm6#PO()(=F0j29bY%+cN7EbppEX_UIqZ6Dnr!*^r7 zqYI-+<)e%vQTw4D8nC_@O`LySECCf&Q6Ln4sYd`0)|fg^ySJz8;ZcKQU{0 zPT)s;Uj^5R{!8sLw{OZ?b__Oz*HsW3>p#@PT&&w+wdHzjG38mrH!L^lrSbC$x6a|d z>F>y3iv^_QsXHhAbP&Sv=H_wwfgm*-q?9=|MHDQt&?m$=>g6~@4bW-eIr?GH2X!wc zg|<0q{oeCBcv}gFST>NZ5%`(A`4D51r@F4lO)YEPN#Qvwc4(8v9v5{~ED&34ZZQ{Y zJFPX7tSnor)$zmJ8_iR{D$QoIMjGi2)zs??P0<%$y`D@0I+cl!XEt>Xt=AEIeQ+;F~;Q8-t(MdBv3ealA;}yKnk# zpeK`6WCv_E@Afw{R(%*y&knH-`aLM8jR~U zc0#bpd{}^hMFE2Muyz#R-rSivS2T8Rf8@LggP9n)_lESjYCh2!e}H?fW> z$xbWCsyKReT+5E8z>5?k2OB0Gg+rwpSW`CWx23y(}^?%~x zL?JF|N3-&fraWZ`Wlc)RCDHaukkj@bU#HGfVntncC zyw-!7f&zz>%BbrIRKtsDWo1OnNq|LWZsbvkzzC5nqDot{YRy02dtR3b^ZfCi?&JAt z>W7i2CaHAeCg=6DQaG-CbJe=OINLqxbi194bNx-?7u0&9AfCE@qWnr-Foi}w9V2P7 zD8GIE<@FBU=8Zeyq71z%bvu_NGQ!rRdJZC6k630hOkPou-s~Cm9?|CAIG3*;NRB4T zO10p{X>i{*m&A!AxKl%hfK_~LNN=n>UhHnE4@GR;NP6M&gUdxoDSv0>fZEv$i>INQ~t9eJTMAe?P&C$eLqaS{GNt3(bKv#pBJW0Br|+AW=h!K ztF}7JWk_Ix-bBpf$p92PpZoz5|CDyVfK#7pbjO|>`D`RfHeR_A=J#VdY5^!kKjxW- zW+}j9)`N!&l7clfl9tWKD!+xNin`sBsujQI0Zuu^P_F(9^yrZ+Ivz^6BQyYe!0*(BZuv~DXfcs+;`-TjE6`X3 zwtAE=K(Ib&jI z2v+5>)N@5?T&mZI%8)C^hUX4`aT#$2L-RZTm1nD>PqYM1KU#k9aK+4J$H8=vur261 zK1JPJiXP0xcQ^pjB5EQVirANu=d*p@zIxRZOwgwDqTGh;+j>*b;^o$^BYK?)t;xTx zchjtEMiUPnotiM%~iX*c6U@qeb&(Xthlaz16eoJmM5lZsV1vC>X%#7 z_OA)RG^u#D=GrYR4vScSsb*JE52$T%&BgVbs>yn87_eJYEo;?<^#Zx+5L*P13$LH6 zU~v7aFfLnVAHJxoM_8(TTqHpEbrx{NSR=Ub&;~#aLm)PMk+O_6p|fQCa>I)x*cJRY zx+$shzE`V`jl0$K-P`U!DgAu^`F^!Oalc##T;tM{(hOKYWh(i;Sz`0Xkyf z@*2!afv_K~8?bpIHjQ}9V$$5HInao?*swg7wEdv?hZCv}?4=sh#{+cFWa4kO=v38~ zn2nPVnP0_(&1b>gAHWRmfozFSQoF85+j!+tKVEIeBMB4X;rOpffnA5QUuoU5Ekk>*;Ohq-9&0i zsHGdx%cZcS0GRB&XOcv%kTp$SuH$^g%Q#XqMVJVa55pRiaY;ZN9_-Zxpi0=0?!@(d zr^O?IFD7Ey&%3SbpNYSbmYzAS$fU(trOiLnU@JoHIl5?OYjjF{tGl&?t1GYEw1sv` zx^AfzGrr9#11m4B4?mPb%-!w=V7sEH&xVx@~dBJRXv(n<&)9D044Ft+bC{hfW{bwER;7_ZvNGe0RO;ph>6J zdW+=33509=^IWGx#k^0;Mi5E&X}qcSP2K8i{E!XhSfi}75RtJv&f6@7RWMJzVo2W} z7ZGu$TxfQ4J+#3_F0aoSQ*wzU>@HkN0t-hILjx#ec?H11{*(W23QabdxAz-ZUx@RAnNS2$)jQJq#48tDVv;02r7&4+5 zT@klg+;vZ3vL-WFKs}{MP?`^h7#-4g;)GXtj;-&i2lCjo-4ZaKj(Nsr96)o%qB#qq z{~E~#&GGMYKhB%oyQ!qH3q9z1hT4=ASoLeRZ}pY+`a-cl*Y5RI>%4n;akVj5TW;VuE`~}-sosGNR5c(g zFRqtk7VWh$Gqf4W&4pIh2vf()Zm7ThUW433^!}1XV>XIR*?XD}l7Tyl5~KNcx6}t_ z8Z47_YFsvwaY0r7``6#OaG4lTxU+r+*56kj!?W+lT1HLPoZOlr$Of(C<)3QTz12-C zobpefRTJNq++>%2DyjvG$~VgoI}W;|V}$qwFnGO)iIPE6d11W#Tf~TPDG`6x5j331 zS~@#a)Kt49LWTs0?b1~^)xgnlX27utp6_Y zPnMqwLm-(M@&`Cx1YL_(5C6f7s^LF)>=j};Q}m*H3l{Jcw2TPLw}TmorVEGlkM(x2 zh7lZE6*;$5+ei)niGoLde%U=)bH44ps84M!IAAZ3^NKGrw3Y9qakx6aU)&<=DN8E0 zG$vNIEU{(XdZ?>vQp%JlkmOQaZ|b{=ONrC5oGven$SIgp=vNg~5ECoe^>wy=d>}rr zL7i@^c|T)G5>dSuctaoa-wMD$)Ou6G--_TEOl)N#;09~J3}e;Tf_+wMY9vQ_2kTzD zeVoOGqJ;`@wEb+2%taDI=!+`Zeihe?dLjt5{zyGAmb6S*SJTAH8Pz zSa)psL9_$2&P2yB9H|;2I~He5()9M_|K}^-JkhEOk-gRK-Fx1!cW0oGKxg6}19c=~ zyho&$=g>n1>i#FuneUT$ELve2^xN&15jFmKU0Ahr))rXcM704G`xJ?^vgG70{Iy`Q zS)Mc@kn5kcd@K~5^2~dy6 zci*@k(qB*b8UhkkV5)E$Xk{r^A6r(E^#&v<>t{G(F6wL!1686-2-6CGLjzfAKzJ%k zR2b*g6@xiixP&KzW-R}T3aB>U0zI`7=BKFRZLRnkxT7kJXtF^Ut4FTVj4L@0o|bjC z8*Lr3yZ|bme7QE^he&GxIxWl<^bjqX2<-tuy*VjFGb(x*rcqz3C9N&|WaK8|QMjKI zs<4*DuC~%Yc)fT(8hW^hk$5;Ix7#3Mu*rSmn^RWgL$%(Jrcdh%itd*+YD}?4?bdfP zZV$?{)>0!J;*kYmA7a2A9ac>4CSC+f<~&H23o%xd>!8zbBeJd#w|xgQlR*)? zAn%7}4p6285o^e)jI%I>BPwy(!(H#&P2;q$%o;W-15ld)>LdxxR4PH&+*z?F;9)<8;o? zFMip22ZPjh+RkKDoH+9=*Off7R_?w@*)@`L9m8KXY0)-ERBpb?fvc+|hN; zZ~p(@uBEwc}FCWWh`0t+{uon=&@{zQjtM_y&8U9jcx@RNafsAN$Vx^iXa7aJW-t z=+;vI1M1d1k9rm}nM%kBQUSZ$vYETW1T;^UMG9VGtYU=00l;=vYd{NA!7brn1C7UZ z^a#R)$R*Lk0+1E|NabB(fIv3Vi)FELrQ@ZLz#~N8%zfYiv3-SEtArYnV7UZ}YBx=D z!dGJHB4Oq*rm&>o;mb%L0!+yxj&K+bIj;hd3I7%I+)Q4-q0G>rP{701!1pb}?#>r&T0Bz?_l@b0ZRBtGfX^mQUx5$Et#PlFm=@JK+EM z1WP9F&^+>|zuwEV4a3mRfqWY+LwDsMAjSd8`O<3}SC`4s?V$Y#~ zvZ*7s3eoe)9v+d z2Dg{ai_UrHUH1ciB$O&V*YCXTefj6-AGOR!`N#9Y)w}oI zSBFs5EoMt7@4`Vqasz;*56G#1+eJ(PANJmgnZksxE^h%00yZ36bbEtt2YV3>j2+v@ zBup&0brNp@33a-3GiWB=8-zx3Dx2ST;3N3Ot(Z&?Cj+xVOahO-g4w|3qA?y~I<4X} zJ*+T!S?HT_O^RfmpyMQpH^0P>iPyR%5u}mb|3q<41Uwn*{lJ`?`%wU@VRiLL6&`ffgO!P{;>HeL$8JO z#YJv%H^)dZY}YjXn`-8W276|pKs)I4!Ht4Z5a8~8{y6d1*#SOs=JDq5;Tm?<^^<$1 z=er*YDj7XpZ1V|*%HuA}Z?7*e9Hg-J7{CQrAdtV7KO0)$p3^BkH=_B*rmCm&A7mS^+pE6S*wN%Qog z_Cs98Ynj?|?>eisx~1qhtP68DAEI%t?Sfmwn4#=bRH&K=Oe1-M2(_|32zk%=EQDvM z^e_#9898#h2DVxTC|?GGf|R;3q)Z=aYL{a8unL?AZHuJ-4V^caw>zus)?j0(>BSK|2*PU)#Rz!pvu&LTh^<&SitaW)&p`Y-*8q;|t}=gJZ8;V~I5QiCTM@L|+M#Gha* zN%GH_N!5qI3??(0ZQhymt_W@xnKLG`8|wJRnkAi?>7@P}%;nj^if!fYK;4nXnd3qOTDauoOrn-qu!zfL z!cpU3jw7~lk__Nu9+h!!wTiJCZd<#VUgQB`p>qgPZ)ah3PA!SoabG-fJ!Run|!!Ub6eKfpb;8E`I0WKLcG4ZX#R`^n6rBYTh6iWGug3Fi2Hx^DK^o#SAg1B~C#5j(e)8L8WG+aD#t?Z4q-n zfWvm^tog#+o;@vX^o|zl^P*!XiUl1Ze*Y*{i)%U<_;cR2*wWswNW=kj&A#J2PR5xa0xTjb}IJbR$^<++Rv$=wUVuW?JeEIWo&M*A(9ALVR*5et+ zE~4Ysn^G=~@Kre!dd}Sui<^Ia^)BQ#;R>!*;D+%m#9YUQFR5@aotUq=R;E2BKg6?O zf@JhTAUW%lO9MU(*&PdsJXi41ODZ*h7aSRm5LX=0m5+kEc;yP-vu{K;1zKRoncs;m zMr<-+lW)fB5etu(vo%O(P%4tu=g4s<6Leq`(+_hO9x|qlF`-ZxEE+%~RHg*@UE+@= zUK6tX$eHo!%yA!F?xj!Z@|^~Z5||N#6a1lY>i7}p4p3|BC8ppk0Tc>%{E_-S4uwq= z4^7kUDaD9td-5D|i5K%@ILTuUqwv9G#_@!KOARk*xfgIqvZafJ%MaTzuXQ?VbQU=? zfVbFfM{c3mK}EC#Krp#;aVJTb2e9K-W>)uZKojo}%eI2M->;UrPo2zg+ z12}o!PK;N8N_^LMxow&=#sL7O$ z!$$dVfQVuow`(Ng@e5J7<7dW-euk(GwzPtm(L|d@pVNC427? zVGO@~|Ies(d`_WEk}-(_FbYwSI9O(eSl^8mW+PDSMv3dCKk@~^Ekf|AFQv)yR-DhL zq=&Tey-;v<0V-VWZayi{&wx5DoGb4){vvamNR*hEeMYwEfbDfVH?MlKZnM*ORAEui zxq7KpAUPeq{^zxdt|-`^YmoOVO~`?29lqSMA#+@ zSjGHzOK&Nvp%Sw#HAV7tMkK`pIMHAjF#!8iIW95O9V`mZMEISsRscz1IB&(!y+<%` zv=(QG$Kp7l!$;Z+G6P@!sbp1cObJ%yu0EqgxpG{HGD1lP8hyz4bf!l{aD3Em47<-Z z98iTP=8SgV5FVr;a_^NPt2JJRBpe~Bb;!07{urmd%7;?GMfuZ?*Ls3B?*eIe#p=u$ z2>IEO2GztTMD`wNwliH6BxQa;Y24ICKCJ-CCq*;$TD&$-DmQkU0`tZ>N;~8f9kLr@ zaat`B9c?od_t`|#a8gvEj^{=oLXyD?h(2O5+MP-67zgOXUQt`vM9}MCEPl+?U@_Ji zAUq0WgMTK8M5GbGBbBc65O`Q-J9cL7BWz1!Szr~iej(_&83tpNii^u4s+Da*$ox8F#fzM@u0RrwGr`U#i z7=C-t(gdg`0Iwoa@VuzOE&=l>_l<5-5WA`31=N5XT}9p4KytOr71C6rSZAYKWs~pN zQrfV|ER)JOWtHJRZ#%iRNnVF)zi5b(D*sopxwjJ~BRtKqh$krwE*xLz#?T~7Q*l=1 zAuZf4IH`p4csPbv#z)QroAI$H?TiOBJ`l2cu>2tRgpT6~JDe3*OcH72vQRM9oQ2fs z8GX`1h?6Cv1pCDC0}Mmb8~CV=oBp&iW_Oww*co)2aChnl5l<~B@l(_HRzaVFZ{>z= zd1jU9)Sr_~Fo$~12}b%Fr)f0khS@%?=rCOnSOg9gjH`ZVx$$9fs*#bopBWaj0#DDX0>P)1 zhzst9%)HO6ECKOA|1%#SSt_28qJNz{uXxxZ6Z&eo50u=#|$7D*Nn%nEhkHkIZ0)jzsx0rob| zt)7PuAhAGkT)E#=wkp$|UBrTUJLue8T^_%@9m2wCA<7QtqT>eqQNCEv-@@9vKVMK@ zom$@ZZ5*wL2aJmj-}T|*s#cwBm5LTBMd&e#E@deqWXI$jfuaY0s*jK9lK9TpOE9&f zBoks!-&(1Bu5Q;hN&>nCNwuVgvFKMMi$f)`9YsArNNoNV z;NDRxQ4luB{@ETnY2IdZi+1UVCbY-|Cubj|7@Oq_<<0pQC${UCRqL!Rr`m?!c35_x zhvs>19h|+Nz(J0QJ5Y9g)56$Kj(OJe zLUxaKr(r@U-zRn?p(vTfW!?9e*R_vmky`ZtJC=6mnWv3}jA=C8sRAyuK}JoSGlJHG zGwkFgSJ)VG{iCfhWD~w@Y#R`|i9w96eO^6|<|}n0pS(rFGh<5Dx(6%QXRAxk>YN+RfFtJ(fYzT$$Kxu+D?Xlk z_&dD9nUy9DGxIv^Io;4C3Qb^Qp{^}}=sRR!6N*~$H_NbF} zY5#eb(?aQcppZrhtC!s$wREi8Gc#b%Zx;kYB-zgdrIXnJrC0MdTe-K@|Mtzfkx;FGRlc|C$M+m^^*hnr7Wj~F;9V!ec;Zup>D z52o&nJ^ilq{FlLK8kBSGC2+s}ge*`TOE9Fw1?hrJWa#&TcFg!*6M6P@u` zLF3|;TBH71WJ`1@WlCShd%r$?^5k0weE}yYM$ou0T=~kqEWv`P&L>DBhl{{`pBQ3E zl!;Y9DhG|W8mYVly23H!mq1cNWMp+Y`A9ackc$95oQTDgnIR+IOf121-2!BTrGb>L zL;l{{uLC&tRi_ooF@CC#Ee2^55X;%{Nul$Xa8Pnz$~4 ztCn;P-~2@`%jT2I?G{FjzH~_nMy8#Z(9<+U099O1m0y^pabzhQGFLUtwoDZqHeM2` zxAee}k`89hOBg!CjvJZ(#gN?1hfxc)EhjG>t)kUs?wxh}Nhd=1I~>d8@%hd>P+?RV)K&C!u!q7vye|=$oZjC{477?4hSXmws4hJl1WJ#cs9gi ziojvdeC`p0=fd6m*{AP!_yY9h9JR%Wg^a_k$LW#g6dm&^nX6bGP4#a15$-s5Y`}yn z0uEw{2onQ>V~OK2WMYT3rt%$hW!1`BUl0&=Dh(``EEk6^_w!kD71OPdtOc?7Z Date: Wed, 20 May 2020 10:29:57 +0200 Subject: [PATCH 20/53] tests --- .../dhp/actionmanager/project/PrepareProgrammeTest.java | 2 +- .../dhp/actionmanager/project/SparkUpdateProjectTest.java | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java index 50804f75e..e9755c858 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -73,7 +73,7 @@ public class PrepareProgrammeTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-programmePath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/whole_programme.json").getPath(), + getClass().getResource("/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz").getPath(), "-outputPath", workingDir.toString() + "/preparedProgramme" }); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index d48884842..c41daf2cc 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -9,8 +9,6 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -72,7 +70,7 @@ public class SparkUpdateProjectTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-programmePath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json").getPath(), + getClass().getResource("/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz").getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/projects_subset.json").getPath(), "-outputPath", @@ -85,7 +83,7 @@ public class SparkUpdateProjectTest { .textFile(workingDir.toString() + "/actionSet") .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); - Assertions.assertEquals(14, tmp.count()); + Assertions.assertEquals(16, tmp.count()); // Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); // From c0d9e0234050929097896b56a2824a50e262aa9d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:30:25 +0200 Subject: [PATCH 21/53] zipped test resources that are too big --- .../dhp/actionmanager/project/programme.csv | 25 +++++++++++++++++++ .../dhp/actionmanager/projects_subset.json | 16 ++++++++++++ 2 files changed, 41 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv index e69de29bb..6a9c855a0 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/programme.csv @@ -0,0 +1,25 @@ +rcn;code;title;shortTitle;language +664331;H2020-EU.3.3.2.;Un approvisionnement en électricité à faible coût et à faibles émissions de carbone;Low-cost, low-carbon energy supply;fr +664355;H2020-EU.3.3.7.;Absorción por el mercado de la innovación energética - explotación del Programa Energía Inteligente - Europa Europe;Market uptake of energy innovation;es +664323;H2020-EU.3.3.1.;Ridurre il consumo di energia e le emissioni di carbonio grazie all'uso intelligente e sostenibile;Reducing energy consumption and carbon footprint;it +664233;H2020-EU.2.3.2.3.;Wsparcie innowacji rynkowych;Supporting market-driven innovation;pl +664199;H2020-EU.2.1.5.1.;Tecnologías para las fábricas del futuro;Technologies for Factories of the Future;es +664235;H2020-EU.3.;PRIORITÉ «Défis de société»;Societal Challenges;fr +664355;H2020-EU.3.3.7.;"Assorbimento di mercato dell'innovazione energetica - iniziative fondate sul programma ""Energia intelligente - Europa""";Market uptake of energy innovation;it +664355;H2020-EU.3.3.7.;"Markteinführung von Energieinnovationen – Aufbau auf ""Intelligente Energie – Europa";Market uptake of energy innovation;de +664235;H2020-EU.3.;"PRIORIDAD ""Retos de la sociedad""";Societal Challenges;es +664231;H2020-EU.2.3.2.2.;Mejorar la capacidad de innovación de las PYME;Enhancing the innovation capacity of SMEs;es +664223;H2020-EU.2.3.;LIDERAZGO INDUSTRIAL - Innovación en la pequeña y mediana empresa;Innovation in SMEs;es +664323;H2020-EU.3.3.1.;Réduire la consommation d'énergie et l'empreinte carbone en utilisant l'énergie de manière intelligente et durable;Reducing energy consumption and carbon footprint;fr +664323;H2020-EU.3.3.1.;Reducir el consumo de energía y la huella de carbono mediante un uso inteligente y sostenible;Reducing energy consumption and carbon footprint;es +664215;H2020-EU.2.1.6.4.;Beitrag der europäischen Forschung zu internationalen Weltraumpartnerschaften;Research in support of international space partnerships;de +664213;H2020-EU.2.1.6.3.;Permettere lo sfruttamento dei dati spaziali;;it +664213;H2020-EU.2.1.6.3.;Permettre l'exploitation des données spatiales;Enabling exploitation of space data;fr +664231;H2020-EU.2.3.2.2.;Zwiększenie zdolności MŚP pod względem innowacji;Enhancing the innovation capacity of SMEs;pl +664231;H2020-EU.2.3.2.2.;Rafforzare la capacità di innovazione delle PMI;Enhancing the innovation capacity of SMEs;it +664213;H2020-EU.2.1.6.3.;Grundlagen für die Nutzung von Weltraumdaten;Enabling exploitation of space data;de +664211;H2020-EU.2.1.6.2.;Favorecer los avances en las tecnologías espaciales;Enabling advances in space technology;es +664209;H2020-EU.2.1.6.1.;Assurer la compétitivité et l'indépendance de l'Europe et promouvoir l'innovation dans le secteur spatial européen;Competitiveness, non-dependence and innovation;fr +664231;H2020-EU.2.3.2.2.;Renforcement de la capacité d'innovation des PME;Enhancing the innovation capacity of SMEs;fr +664203;H2020-EU.2.1.5.3.;Tecnologías sostenibles, eficientes en su utilización de recursos y de baja emisión de carbono en las industrias de transformación de gran consumo energético;Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries;es +664103;H2020-EU.1.2.1.;FET Open;FET Open;es \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json index e69de29bb..b8805b2db 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json @@ -0,0 +1,16 @@ +{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} +{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} +{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":null,"frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file From 3c0eb12d3ea6ea7cb0cd074391e0e5a1d5521658 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 10:31:05 +0200 Subject: [PATCH 22/53] removed the not zipped files --- .../eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json | 0 .../resources/eu/dnetlib/dhp/actionmanager/whole_programme.json | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json deleted file mode 100644 index e69de29bb..000000000 From 67ba4fde57d0bd6763c46cc3aaf09af5c51405e3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 13:53:08 +0200 Subject: [PATCH 23/53] added test for prepare projects step --- .../project/PrepareProjectTest.java | 94 ++++++++++++++++++ .../preparedProgramme_whole.json.gz | Bin .../prepared_projects.json} | 0 .../project/projects_subset.json | 0 .../{ => project}/whole_programme.json.gz | Bin 5 files changed, 94 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/{ => project}/preparedProgramme_whole.json.gz (100%) rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/{projects_subset.json => project/prepared_projects.json} (100%) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json rename dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/{ => project}/whole_programme.json.gz (100%) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java new file mode 100644 index 000000000..fa9e32fd5 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -0,0 +1,94 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class PrepareProjectTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = PrepareProjectTest.class + .getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(PrepareProjectTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PrepareProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PrepareProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PrepareProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void numberDistinctProgrammeTest() throws Exception { + PrepareProjects + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-projectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/projects_subset.json").getPath(), + "-outputPath", + workingDir.toString() + "/preparedProjects" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedProjects") + .map(item -> OBJECT_MAPPER.readValue(item, CSVProject.class)); + + Assertions.assertEquals(20, tmp.count()); + + Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProject.class)); + + Assertions.assertEquals(0, verificationDataset.filter("length(id) = 0").count()); + Assertions.assertEquals(0, verificationDataset.filter("length(programme) = 0").count()); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/projects_subset.json rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz similarity index 100% rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz From 9079bc1f6125bc687a4d144093c7125651e9176c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 13:53:32 +0200 Subject: [PATCH 24/53] - --- .../dhp/actionmanager/project/PrepareProgrammeTest.java | 2 +- .../dhp/actionmanager/project/SparkUpdateProjectTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java index e9755c858..7f890a8a3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java @@ -73,7 +73,7 @@ public class PrepareProgrammeTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-programmePath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/whole_programme.json.gz").getPath(), + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz").getPath(), "-outputPath", workingDir.toString() + "/preparedProgramme" }); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index c41daf2cc..3e559228f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -70,9 +70,9 @@ public class SparkUpdateProjectTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-programmePath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/preparedProgramme_whole.json.gz").getPath(), + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz").getPath(), "-projectPath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/projects_subset.json").getPath(), + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), "-outputPath", workingDir.toString() + "/actionSet" }); From 055eec5a77c52d5f4e1b6ec9788fa9645c800220 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 20 May 2020 13:54:10 +0200 Subject: [PATCH 25/53] added resource for prepare project test --- .../actionmanager/project/projects_subset.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json index e69de29bb..edf83fbc8 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -0,0 +1,16 @@ +{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} +{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} +{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} +{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} +{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} +{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} +{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} +{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} +{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file From 4589c428b142a833ab015990147310828d3c4832 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 21 May 2020 16:30:39 +0200 Subject: [PATCH 26/53] generate action sets and saves them in the hdfs path for the actions sets --- .../project/SparkAtomicActionJob.java | 71 ++++++++++++------- .../project/oozie_app/workflow.xml | 1 - 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index 61bd952db..5b038b49a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -3,11 +3,16 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Optional; +import java.util.function.Consumer; +import eu.dnetlib.dhp.schema.action.AtomicAction; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -28,6 +33,15 @@ import eu.dnetlib.dhp.schema.oaf.Programme; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.io.SequenceFile; +import org.apache.spark.rdd.SequenceFileRDDFunctions; +import org.apache.hadoop.io.Text; +import scala.Function1; +import scala.Tuple2; +import scala.runtime.BoxedUnit; + public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -61,6 +75,8 @@ public class SparkAtomicActionJob { final String programmePath = parser.get("programmePath"); log.info("programmePath {}: ", programmePath); + final String nameNode = parser.get("hdfsNameNode"); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -72,7 +88,8 @@ public class SparkAtomicActionJob { spark, projectPath, programmePath, - outputPath); + outputPath, + nameNode); }); } @@ -82,38 +99,38 @@ public class SparkAtomicActionJob { private static void getAtomicActions(SparkSession spark, String projectPatH, String programmePath, - String outputPath) { + String outputPath, + String nameNode) throws Exception{ Dataset project = readPath(spark, projectPatH, CSVProject.class); Dataset programme = readPath(spark, programmePath, CSVProgramme.class); project - .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") - .map(c -> { - CSVProject csvProject = c._1(); - Optional csvProgramme = Optional.ofNullable(c._2()); - if (csvProgramme.isPresent()) { - Project p = new Project(); - p - .setId( - createOpenaireId( - ModelSupport.entityIdPrefix.get("project"), - "corda__h2020", csvProject.getId())); - Programme pm = new Programme(); - pm.setCode(csvProject.getProgramme()); - pm.setDescription(csvProgramme.get().getShortTitle()); - p.setProgramme(Arrays.asList(pm)); - return p; - } + .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") + .map(c -> { + CSVProject csvProject = c._1(); + Optional csvProgramme = Optional.ofNullable(c._2()); + if (csvProgramme.isPresent()) { + Project p = new Project(); + p + .setId( + createOpenaireId( + ModelSupport.entityIdPrefix.get("project"), + "corda__h2020", csvProject.getId())); + Programme pm = new Programme(); + pm.setCode(csvProject.getProgramme()); + pm.setDescription(csvProgramme.get().getShortTitle()); + p.setProgramme(Arrays.asList(pm)); + return new AtomicAction<>(Project.class, p); + } + + return null; + }, Encoders.bean(AtomicAction.class)) + .filter(aa -> !(aa == null)) + .toJavaRDD() + .mapToPair(aa->new Tuple2<>(aa.getClazz().getCanonicalName(), OBJECT_MAPPER.writeValueAsString(aa))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, null); - return null; - }, Encoders.bean(Project.class)) - .filter(p -> !(p == null)) - // .map(p -> new AtomicAction<>(Project.class, p), Encoders.bean(AtomicAction.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); } public static Dataset readPath( diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index cd4d79ab7..ba99fb314 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -24,7 +24,6 @@ - From 473c6d3a23c83053ffc7c13a81a40814f531b467 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:26:57 +0200 Subject: [PATCH 27/53] produces AtomicActions instead of Projects --- .../project/SparkAtomicActionJob.java | 71 +++++++++---------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index 5b038b49a..990e50abd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -9,12 +9,17 @@ import java.util.HashMap; import java.util.Optional; import java.util.function.Consumer; -import eu.dnetlib.dhp.schema.action.AtomicAction; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.rdd.SequenceFileRDDFunctions; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -28,16 +33,11 @@ import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Programme; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; - -import org.apache.hadoop.conf.Configuration; - -import org.apache.hadoop.io.SequenceFile; -import org.apache.spark.rdd.SequenceFileRDDFunctions; -import org.apache.hadoop.io.Text; import scala.Function1; import scala.Tuple2; import scala.runtime.BoxedUnit; @@ -75,8 +75,6 @@ public class SparkAtomicActionJob { final String programmePath = parser.get("programmePath"); log.info("programmePath {}: ", programmePath); - final String nameNode = parser.get("hdfsNameNode"); - SparkConf conf = new SparkConf(); runWithSparkSession( @@ -88,8 +86,7 @@ public class SparkAtomicActionJob { spark, projectPath, programmePath, - outputPath, - nameNode); + outputPath); }); } @@ -99,37 +96,39 @@ public class SparkAtomicActionJob { private static void getAtomicActions(SparkSession spark, String projectPatH, String programmePath, - String outputPath, - String nameNode) throws Exception{ + String outputPath) { Dataset project = readPath(spark, projectPatH, CSVProject.class); Dataset programme = readPath(spark, programmePath, CSVProgramme.class); project - .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") - .map(c -> { - CSVProject csvProject = c._1(); - Optional csvProgramme = Optional.ofNullable(c._2()); - if (csvProgramme.isPresent()) { - Project p = new Project(); - p - .setId( - createOpenaireId( - ModelSupport.entityIdPrefix.get("project"), - "corda__h2020", csvProject.getId())); - Programme pm = new Programme(); - pm.setCode(csvProject.getProgramme()); - pm.setDescription(csvProgramme.get().getShortTitle()); - p.setProgramme(Arrays.asList(pm)); - return new AtomicAction<>(Project.class, p); - } + .joinWith(programme, project.col("programme").equalTo(programme.col("code")), "left") + .map(c -> { + CSVProject csvProject = c._1(); + Optional csvProgramme = Optional.ofNullable(c._2()); + if (csvProgramme.isPresent()) { + Project p = new Project(); + p + .setId( + createOpenaireId( + ModelSupport.entityIdPrefix.get("project"), + "corda__h2020", csvProject.getId())); + Programme pm = new Programme(); + pm.setCode(csvProject.getProgramme()); + pm.setDescription(csvProgramme.get().getShortTitle()); + p.setProgramme(Arrays.asList(pm)); + return p; + } - return null; - }, Encoders.bean(AtomicAction.class)) - .filter(aa -> !(aa == null)) - .toJavaRDD() - .mapToPair(aa->new Tuple2<>(aa.getClazz().getCanonicalName(), OBJECT_MAPPER.writeValueAsString(aa))) - .saveAsHadoopFile(outputPath, Text.class, Text.class, null); + return null; + }, Encoders.bean(Project.class)) + .filter(p -> !(p == null)) + .toJavaRDD() + .map(p -> new AtomicAction(Project.class, p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); } From 50ad83b97f7190642f2e7a89e2cfbad53592b4f8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:27:19 +0200 Subject: [PATCH 28/53] - --- .../actionmanager/project/PrepareProjectTest.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index fa9e32fd5..73bedb741 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.actionmanager.project; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; -import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -18,9 +19,10 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; public class PrepareProjectTest { From ac8025f4696d7cf5b786ef9e0bc76620350748c5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 22 May 2020 15:29:41 +0200 Subject: [PATCH 29/53] - --- .../dhp/actionmanager/project/SparkUpdateProjectTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 3e559228f..f7b8722c4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -70,7 +70,9 @@ public class SparkUpdateProjectTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-programmePath", - getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz").getPath(), + getClass() + .getResource("/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + .getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), "-outputPath", From f3dcca0dd05d3f2fd0796fe47dd98a163da62058 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 27 May 2020 17:23:34 +0200 Subject: [PATCH 30/53] added equals for programme --- .../eu/dnetlib/dhp/schema/oaf/Programme.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java index 69223ab01..c5259d07e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java @@ -1,7 +1,10 @@ package eu.dnetlib.dhp.schema.oaf; -public class Programme { +import java.io.Serializable; +import java.util.Objects; + +public class Programme implements Serializable { private String code; private String description; @@ -20,4 +23,17 @@ public class Programme { public void setDescription(String description) { this.description = description; } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + Programme programme = (Programme) o; + return Objects.equals(code, programme.code); + } + + } From dd1e0b93b851cde75ef27e53d743a44ea901ebb5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 27 May 2020 17:40:32 +0200 Subject: [PATCH 31/53] added merge for Programme --- .../src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java index 1eae3e8ee..1fcfb305e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java @@ -330,6 +330,9 @@ public class Project extends OafEntity implements Serializable { fundedamount = p.getFundedamount() != null && compareTrust(this, e) < 0 ? p.getFundedamount() : fundedamount; + + programme = mergeLists(programme, p.getProgramme()); + mergeOAFDataInfo(e); } } From 18554534340ce5ca7d53addeaed5c48c8d8ab883 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 27 May 2020 17:59:36 +0200 Subject: [PATCH 32/53] changed the outputdir of the last step --- .../eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index ba99fb314..421df460c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -120,7 +120,7 @@ --projectPath${workingDir}/preparedProjects --programmePath${workingDir}/preparedProgramme - --outputPath/tmp/h2020programme + --outputPath${outputPath} From 669c05c7712154e37fa5d76fc8ea1a0bec9c9d8c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:00:45 +0200 Subject: [PATCH 33/53] added groupBy before creating Actions --- .../project/SparkAtomicActionJob.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index 990e50abd..1023e2d19 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; @@ -19,6 +20,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.rdd.SequenceFileRDDFunctions; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -122,7 +124,17 @@ public class SparkAtomicActionJob { return null; }, Encoders.bean(Project.class)) - .filter(p -> !(p == null)) + .filter(Objects::nonNull) + .groupByKey( + (MapFunction) p -> p.getId(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction) (s, it) -> { + Project first = it.next(); + it.forEachRemaining(p -> { + first.mergeFrom(p); + }); + return first; + }, Encoders.bean(Project.class)) .toJavaRDD() .map(p -> new AtomicAction(Project.class, p)) .mapToPair( From 96d1a3c4316f972924a7c0a76dc0d7f066a1712c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:04:10 +0200 Subject: [PATCH 34/53] deleted the file were to store the csv files --- .../dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java index 905194232..2b72b229a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/csvutils/ReadCSV.java @@ -76,10 +76,9 @@ public class ReadCSV implements Closeable { Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { - fsDataOutputStream = fileSystem.append(hdfsWritePath); - } else { - fsDataOutputStream = fileSystem.create(hdfsWritePath); + fileSystem.delete(hdfsWritePath, false); } + fsDataOutputStream = fileSystem.create(hdfsWritePath); this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); From 1060977272fd90449c48612a7976511d9d525154 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:04:36 +0200 Subject: [PATCH 35/53] added fs actions to remove and the create the workingDir --- .../eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 421df460c..edeb0582a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -24,6 +24,8 @@ + + From 87b07f4af88b5fa87d807dc5c15fad097409f7e5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:05:43 +0200 Subject: [PATCH 36/53] removed unused variables --- .../eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index a5abb9ea7..c6dab13a0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -25,7 +25,6 @@ public class PrepareProgramme { private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { From df44db686a8b53e2769167e6329d0f4b9d605cbf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:07:00 +0200 Subject: [PATCH 37/53] refactoring --- .../project/PrepareProjects.java | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index df37f9286..7ca50b219 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -3,10 +3,7 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Optional; +import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -81,17 +78,14 @@ public class PrepareProjects { .flatMap(p -> { List csvProjectList = new ArrayList<>(); String[] programme = p.getProgramme().split(";"); - if (programme.length > 1) { - String id = p.getId(); - for (int i = 0; i < programme.length; i++) { + Arrays + .stream(programme) + .forEach(value -> { CSVProject csvProject = new CSVProject(); - csvProject.setProgramme(programme[i]); - csvProject.setId(id); + csvProject.setProgramme(value); + csvProject.setId(p.getId()); csvProjectList.add(csvProject); - } - } else { - csvProjectList.add(p); - } + }); return csvProjectList.iterator(); }) From 35b72791478e42f3e1e1b1f708ddf825acb4dcc5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 10:26:12 +0200 Subject: [PATCH 38/53] changed test because data are saved as SequenceFile now, and because of the group by the umber of produced update decrease --- .../actionmanager/project/SparkUpdateProjectTest.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index f7b8722c4..64c6ac32f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -5,7 +5,9 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.schema.action.AtomicAction; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -82,10 +84,12 @@ public class SparkUpdateProjectTest { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/actionSet") - .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Project)aa.getPayload())) + ; - Assertions.assertEquals(16, tmp.count()); + Assertions.assertEquals(14, tmp.count()); // Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); // From b737ed8236a8b1866a634e4edb25452e28dd9fe7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 17:29:21 +0200 Subject: [PATCH 39/53] added part to read projects from the openaire db to filter out those in the csv file that are not in the db --- .../actionmanager/project/ProjectSubset.java | 16 +++ .../project/ReadProjectsFromDB.java | 113 ++++++++++++++++++ .../project/read_projects_db.json | 32 +++++ 3 files changed, 161 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java new file mode 100644 index 000000000..cfbb62f21 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import java.io.Serializable; + +public class ProjectSubset implements Serializable { + + private String code; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java new file mode 100644 index 000000000..0015dc60f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -0,0 +1,113 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.DbClient; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.util.Arrays; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; + +public class ReadProjectsFromDB implements Closeable { + + private final DbClient dbClient; + private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private final static String query = "SELECT code " + + "from projects where id like 'corda__h2020%' " ; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ReadProjectsFromDB.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json"))); + + parser.parseArgument(args); + + final String dbUrl = parser.get("postgresUrl"); + final String dbUser = parser.get("postgresUser"); + final String dbPassword = parser.get("postgresPassword"); + final String hdfsPath = parser.get("hdfsPath") ; + final String hdfsNameNode = parser.get("hdfsNameNode"); + + try (final ReadProjectsFromDB rbl = new ReadProjectsFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser, + dbPassword)) { + + log.info("Processing blacklist..."); + rbl.execute(query, rbl::processProjectsEntry); + + } + } + public void execute(final String sql, final Function> producer) throws Exception { + + final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + + dbClient.processResults(sql, consumer); + } + + public List processProjectsEntry(ResultSet rs) { + try { + ProjectSubset p = new ProjectSubset(); + p.setCode(rs.getString("code")); + + return Arrays.asList(p); + + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + protected void writeProject(final ProjectSubset r) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(r)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public ReadProjectsFromDB( + final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) + throws Exception { + + this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + } + + @Override + public void close() throws IOException { + dbClient.close(); + writer.close(); + } +} + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json new file mode 100644 index 000000000..9a2eadaa7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json @@ -0,0 +1,32 @@ +[ + { + "paramName": "p", + "paramLongName": "hdfsPath", + "paramDescription": "the path where storing the sequential file", + "paramRequired": true + }, + { + "paramName": "nn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the name node on hdfs", + "paramRequired": true + }, + { + "paramName": "pgurl", + "paramLongName": "postgresUrl", + "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", + "paramRequired": true + }, + { + "paramName": "pguser", + "paramLongName": "postgresUser", + "paramDescription": "postgres user", + "paramRequired": false + }, + { + "paramName": "pgpasswd", + "paramLongName": "postgresPassword", + "paramDescription": "postgres password", + "paramRequired": false + } +] \ No newline at end of file From 5309a99a70bf6e81ca5d55e6d0062be1d75e05f4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 17:29:53 +0200 Subject: [PATCH 40/53] modified the PrepareProjects to consider those in the db --- .../project/PrepareProjects.java | 71 ++++++++++++++----- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index 7ca50b219..3d8226f4d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -9,9 +9,11 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +57,9 @@ public class PrepareProjects { final String outputPath = parser.get("outputPath"); log.info("outputPath {}: ", outputPath); + final String dbProjectPath = parser.get("dbProjectPath"); + log.info("dbProjectPath {}: ", dbProjectPath); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,7 +67,7 @@ public class PrepareProjects { isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - exec(spark, projectPath, outputPath); + exec(spark, projectPath, dbProjectPath, outputPath); }); } @@ -70,27 +75,55 @@ public class PrepareProjects { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static void exec(SparkSession spark, String progjectPath, String outputPath) { + private static void exec(SparkSession spark, String progjectPath, String dbProjectPath, String outputPath) { Dataset project = readPath(spark, progjectPath, CSVProject.class); + Dataset dbProjects = readPath(spark, dbProjectPath, ProjectSubset.class); - project - .toJavaRDD() - .flatMap(p -> { - List csvProjectList = new ArrayList<>(); - String[] programme = p.getProgramme().split(";"); - Arrays - .stream(programme) - .forEach(value -> { - CSVProject csvProject = new CSVProject(); - csvProject.setProgramme(value); - csvProject.setId(p.getId()); - csvProjectList.add(csvProject); - }); + dbProjects.joinWith(project, dbProjects.col("code").equalTo(project.col("id")), "left") + .flatMap((FlatMapFunction, CSVProject>) value -> { + Optional csvProject = Optional.ofNullable(value._2()); + if(! csvProject.isPresent()){ + return null; + } + List csvProjectList = new ArrayList<>(); + String[] programme = csvProject.get().getProgramme().split(";"); + Arrays + .stream(programme) + .forEach(p -> { + CSVProject proj = new CSVProject(); + proj.setProgramme(p); + proj.setId(csvProject.get().getId()); + csvProjectList.add(proj); + }); - return csvProjectList.iterator(); - }) - .map(p -> OBJECT_MAPPER.writeValueAsString(p)) - .saveAsTextFile(outputPath); + return csvProjectList.iterator(); + }, Encoders.bean(CSVProject.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); +// +// .map(value -> { +// Optional csvProject = Optional.ofNullable(value._2()); +// }, Encoders.bean(CSVProject.class)) +// .filter(Objects::nonNull) +// .toJavaRDD() +// .flatMap(p -> { +// List csvProjectList = new ArrayList<>(); +// String[] programme = p.getProgramme().split(";"); +// Arrays +// .stream(programme) +// .forEach(value -> { +// CSVProject csvProject = new CSVProject(); +// csvProject.setProgramme(value); +// csvProject.setId(p.getId()); +// csvProjectList.add(csvProject); +// }); +// +// return csvProjectList.iterator(); +// }) +// .map(p -> OBJECT_MAPPER.writeValueAsString(p)) +// .saveAsTextFile(outputPath); } From 6a15067a644133d2178e50fa2578db49f03a0934 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 17:30:09 +0200 Subject: [PATCH 41/53] added one step in the workflow --- .../actionmanager/project/oozie_app/workflow.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index edeb0582a..ca0a73b97 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -51,6 +51,19 @@ --hdfsPath${workingDir}/programme --classForNameeu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme + + + + + + + eu.dnetlib.dhp.actionmanager.project.ReadProjectsFromDB + --hdfsPath${workingDir}/dbProjects + --hdfsNameNode${nameNode} + --postgresUrl${postgresURL} + --postgresUser${postgresUser} + --postgresPassword${postgresPassword} + From 773735f87059e43b1b700128b5777ca922149c45 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 17:30:45 +0200 Subject: [PATCH 42/53] added the path to the file containing the projects code from the db --- .../actionmanager/project/prepare_project_parameters.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json index 5fc88ce8e..49f9c7306 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/prepare_project_parameters.json @@ -16,5 +16,11 @@ "paramLongName": "outputPath", "paramDescription": "the path of the new ActionSet", "paramRequired": true -} +}, + { + "paramName": "dbp", + "paramLongName": "dbProjectPath", + "paramDescription": "the path of the project code read from db", + "paramRequired": true + } ] \ No newline at end of file From 01f7876595c7170ad5d7c1c7d9d6e13087b1f288 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 23:50:32 +0200 Subject: [PATCH 43/53] fix issue with flatMap - the return type must not be null --- .../project/PrepareProjects.java | 77 ++++++++----------- .../dhp/actionmanager/project/dbProject | 0 2 files changed, 30 insertions(+), 47 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index 3d8226f4d..78aed1a69 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -57,7 +57,7 @@ public class PrepareProjects { final String outputPath = parser.get("outputPath"); log.info("outputPath {}: ", outputPath); - final String dbProjectPath = parser.get("dbProjectPath"); + final String dbProjectPath = parser.get("dbProjectPath"); log.info("dbProjectPath {}: ", dbProjectPath); SparkConf conf = new SparkConf(); @@ -75,56 +75,39 @@ public class PrepareProjects { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static void exec(SparkSession spark, String progjectPath, String dbProjectPath, String outputPath) { - Dataset project = readPath(spark, progjectPath, CSVProject.class); + private static void exec(SparkSession spark, String projectPath, String dbProjectPath, String outputPath) { + Dataset project = readPath(spark, projectPath, CSVProject.class); Dataset dbProjects = readPath(spark, dbProjectPath, ProjectSubset.class); - dbProjects.joinWith(project, dbProjects.col("code").equalTo(project.col("id")), "left") - .flatMap((FlatMapFunction, CSVProject>) value -> { - Optional csvProject = Optional.ofNullable(value._2()); - if(! csvProject.isPresent()){ - return null; - } - List csvProjectList = new ArrayList<>(); - String[] programme = csvProject.get().getProgramme().split(";"); - Arrays - .stream(programme) - .forEach(p -> { - CSVProject proj = new CSVProject(); - proj.setProgramme(p); - proj.setId(csvProject.get().getId()); - csvProjectList.add(proj); - }); + dbProjects + .joinWith(project, dbProjects.col("code").equalTo(project.col("id")), "left") + .flatMap(getTuple2CSVProjectFlatMapFunction(), Encoders.bean(CSVProject.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); - return csvProjectList.iterator(); - }, Encoders.bean(CSVProject.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); -// -// .map(value -> { -// Optional csvProject = Optional.ofNullable(value._2()); -// }, Encoders.bean(CSVProject.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .flatMap(p -> { -// List csvProjectList = new ArrayList<>(); -// String[] programme = p.getProgramme().split(";"); -// Arrays -// .stream(programme) -// .forEach(value -> { -// CSVProject csvProject = new CSVProject(); -// csvProject.setProgramme(value); -// csvProject.setId(p.getId()); -// csvProjectList.add(csvProject); -// }); -// -// return csvProjectList.iterator(); -// }) -// .map(p -> OBJECT_MAPPER.writeValueAsString(p)) -// .saveAsTextFile(outputPath); + } + private static FlatMapFunction, CSVProject> getTuple2CSVProjectFlatMapFunction() { + return (FlatMapFunction, CSVProject>) value -> { + Optional csvProject = Optional.ofNullable(value._2()); + List csvProjectList = new ArrayList<>(); + if (csvProject.isPresent()) { + + String[] programme = csvProject.get().getProgramme().split(";"); + Arrays + .stream(programme) + .forEach(p -> { + CSVProject proj = new CSVProject(); + proj.setProgramme(p); + proj.setId(csvProject.get().getId()); + csvProjectList.add(proj); + }); + } + return csvProjectList.iterator(); + }; } public static Dataset readPath( diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject new file mode 100644 index 000000000..e69de29bb From 782984d8e5a03cfc2093a76d9c45d458836db1a6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 23:52:41 +0200 Subject: [PATCH 44/53] added needed parameter --- .../dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index ca0a73b97..1e3445675 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -96,7 +96,7 @@ yarn cluster - PrepareProgramme + PrepareProjects eu.dnetlib.dhp.actionmanager.project.PrepareProjects dhp-aggregation-${projectVersion}.jar @@ -111,6 +111,7 @@ --projectPath${workingDir}/projects --outputPath${workingDir}/preparedProjects + --dbProjectPath${workingDir}/dbProjects From 6989fb9c8ad6c5ee45979e7da82fe1d491927d5e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 23:53:24 +0200 Subject: [PATCH 45/53] changed the project test according to the newly introduced join with the db project codes --- .../dhp/actionmanager/project/PrepareProjectTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 73bedb741..5ff88e46f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -67,7 +67,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + public void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { @@ -76,7 +76,10 @@ public class PrepareProjectTest { "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/projects_subset.json").getPath(), "-outputPath", - workingDir.toString() + "/preparedProjects" + workingDir.toString() + "/preparedProjects", + "-dbProjectPath", + getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/dbProject").getPath(), + }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); @@ -85,7 +88,7 @@ public class PrepareProjectTest { .textFile(workingDir.toString() + "/preparedProjects") .map(item -> OBJECT_MAPPER.readValue(item, CSVProject.class)); - Assertions.assertEquals(20, tmp.count()); + Assertions.assertEquals(8, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProject.class)); From 8b6e886fb6d89681d00fc215c271c084f60ccbb9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 28 May 2020 23:54:31 +0200 Subject: [PATCH 46/53] added new resource for testing --- .../eu/dnetlib/dhp/actionmanager/project/dbProject | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject index e69de29bb..f8e3c4589 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/dbProject @@ -0,0 +1,8 @@ +{"code":"894593"} +{"code":"897004"} +{"code":"896300"} +{"code":"892890"} +{"code":"886828"} +{"code":"8867767"} +{"code":"101003374"} +{"code":"886776"} \ No newline at end of file From 6f1eea28b69a10aeb4338305ae43a4b936494961 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 29 May 2020 10:41:39 +0200 Subject: [PATCH 47/53] changed message in log --- .../project/ReadProjectsFromDB.java | 160 +++++++++--------- 1 file changed, 81 insertions(+), 79 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 0015dc60f..2d541d2f9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -1,15 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.DbClient; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; +package eu.dnetlib.dhp.actionmanager.project; import java.io.BufferedWriter; import java.io.Closeable; @@ -22,92 +12,104 @@ import java.util.List; import java.util.function.Consumer; import java.util.function.Function; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.DbClient; + public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; - private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); - private final Configuration conf; - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final DbClient dbClient; + private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + private final Configuration conf; + private final BufferedWriter writer; + private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' " ; + private final static String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - ReadProjectsFromDB.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json"))); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + ReadProjectsFromDB.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/project/read_projects_db.json"))); - parser.parseArgument(args); + parser.parseArgument(args); - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - final String dbPassword = parser.get("postgresPassword"); - final String hdfsPath = parser.get("hdfsPath") ; - final String hdfsNameNode = parser.get("hdfsNameNode"); + final String dbUrl = parser.get("postgresUrl"); + final String dbUser = parser.get("postgresUser"); + final String dbPassword = parser.get("postgresPassword"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); - try (final ReadProjectsFromDB rbl = new ReadProjectsFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser, - dbPassword)) { + try (final ReadProjectsFromDB rbl = new ReadProjectsFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser, + dbPassword)) { - log.info("Processing blacklist..."); - rbl.execute(query, rbl::processProjectsEntry); + log.info("Processing projects..."); + rbl.execute(query, rbl::processProjectsEntry); - } - } - public void execute(final String sql, final Function> producer) throws Exception { + } + } - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + public void execute(final String sql, final Function> producer) throws Exception { - dbClient.processResults(sql, consumer); - } + final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); - public List processProjectsEntry(ResultSet rs) { - try { - ProjectSubset p = new ProjectSubset(); - p.setCode(rs.getString("code")); + dbClient.processResults(sql, consumer); + } - return Arrays.asList(p); + public List processProjectsEntry(ResultSet rs) { + try { + ProjectSubset p = new ProjectSubset(); + p.setCode(rs.getString("code")); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } + return Arrays.asList(p); - protected void writeProject(final ProjectSubset r) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(r)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } + } catch (final Exception e) { + throw new RuntimeException(e); + } + } - public ReadProjectsFromDB( - final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + protected void writeProject(final ProjectSubset r) { + try { + writer.write(OBJECT_MAPPER.writeValueAsString(r)); + writer.newLine(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } - this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + public ReadProjectsFromDB( + final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) + throws Exception { + this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); + this.conf = new Configuration(); + this.conf.set("fs.defaultFS", hdfsNameNode); + FileSystem fileSystem = FileSystem.get(this.conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - } + this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + } - @Override - public void close() throws IOException { - dbClient.close(); - writer.close(); - } + @Override + public void close() throws IOException { + dbClient.close(); + writer.close(); + } } - From dfa4997a4ff96605cc91fc467e2b203ba1e3f3fc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 29 May 2020 10:45:18 +0200 Subject: [PATCH 48/53] removed commented code --- .../actionmanager/project/SparkUpdateProjectTest.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 64c6ac32f..4d3ec140b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import eu.dnetlib.dhp.schema.action.AtomicAction; import org.apache.commons.io.FileUtils; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Project; public class SparkUpdateProjectTest { @@ -84,15 +84,12 @@ public class SparkUpdateProjectTest { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) + .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Project)aa.getPayload())) - ; + .map(aa -> ((Project) aa.getPayload())); Assertions.assertEquals(14, tmp.count()); -// Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); -// -// Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + } } From 1bc1d15eaf1508bdd3bc5a3a5b3aa48d57f63708 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Jun 2020 16:54:28 +0200 Subject: [PATCH 49/53] stubbing for mock datasource.identities must be typed as array --- .../dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index f4c5f97ed..0f1da7095 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -6,9 +6,10 @@ }, { "field": "identities", - "type": "not_used", + "type": "array", "value": [ "274269ac6f3b::2579-5449", + "piwik:13", null ] }, From 306669209fa58206635ab094f2f3ef28963b34f1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Jun 2020 16:54:44 +0200 Subject: [PATCH 50/53] code formatting --- .../eu/dnetlib/dhp/schema/oaf/Measure.java | 79 ++++++++++--------- .../eu/dnetlib/dhp/schema/oaf/Programme.java | 1 - .../eu/dnetlib/dhp/schema/oaf/Relation.java | 14 ++-- .../eu/dnetlib/dhp/schema/oaf/Result.java | 2 +- .../dnetlib/dhp/schema/oaf/MeasureTest.java | 67 +++++++++------- .../actionmanager/project/ProjectSubset.java | 15 ++-- .../project/SparkUpdateProjectTest.java | 1 - 7 files changed, 96 insertions(+), 83 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java index c37e76061..c0c14d10d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java @@ -1,56 +1,59 @@ + package eu.dnetlib.dhp.schema.oaf; -import com.google.common.base.Objects; - import java.util.List; +import com.google.common.base.Objects; + /** * Represent a measure, must be further described by a system available resource providing name and descriptions. */ public class Measure { - /** - * Unique measure identifier. - */ - private String id; + /** + * Unique measure identifier. + */ + private String id; - /** - * List of units associated with this measure. KeyValue provides a pair to store the laber (key) and the value, - * plus common provenance information. - */ - private List unit; + /** + * List of units associated with this measure. KeyValue provides a pair to store the laber (key) and the value, plus + * common provenance information. + */ + private List unit; - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public List getUnit() { - return unit; - } + public List getUnit() { + return unit; + } - public void setUnit(List unit) { - this.unit = unit; - } + public void setUnit(List unit) { + this.unit = unit; + } - public void mergeFrom(Measure m) { - //TODO - } + public void mergeFrom(Measure m) { + // TODO + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Measure measure = (Measure) o; - return Objects.equal(id, measure.id) && - Objects.equal(unit, measure.unit); - } + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + Measure measure = (Measure) o; + return Objects.equal(id, measure.id) && + Objects.equal(unit, measure.unit); + } - @Override - public int hashCode() { - return Objects.hashCode(id, unit); - } + @Override + public int hashCode() { + return Objects.hashCode(id, unit); + } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java index c5259d07e..00dc32fbc 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java @@ -35,5 +35,4 @@ public class Programme implements Serializable { return Objects.equals(code, programme.code); } - } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index d77bd7d73..17a50d7ac 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -8,10 +8,10 @@ import java.util.stream.Collectors; import java.util.stream.Stream; /** - * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id - * pointing to graph node identifiers and it is further characterised by the semantic of the link through the fields - * relType, subRelType and relClass. Provenance information is modeled according to the dataInfo element and collectedFrom, - * while individual relationship types can provide extra information via the properties field. + * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to + * graph node identifiers and it is further characterised by the semantic of the link through the fields relType, + * subRelType and relClass. Provenance information is modeled according to the dataInfo element and collectedFrom, while + * individual relationship types can provide extra information via the properties field. */ public class Relation extends Oaf { @@ -26,7 +26,8 @@ public class Relation extends Oaf { private String subRelType; /** - * Indicates the direction of the relationship, values include 'isSupplementTo', 'isSupplementedBy', 'merges, 'isMergedIn'. + * Indicates the direction of the relationship, values include 'isSupplementTo', 'isSupplementedBy', 'merges, + * 'isMergedIn'. */ private String relClass; @@ -51,7 +52,8 @@ public class Relation extends Oaf { private String validationDate; /** - * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a pair of publications. + * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a + * pair of publications. */ private List properties = new ArrayList<>(); diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index 44737d46e..0dc76fd43 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -241,7 +241,7 @@ public class Result extends OafEntity implements Serializable { Result r = (Result) e; - //TODO consider merging also Measures + // TODO consider merging also Measures instance = mergeLists(instance, r.getInstance()); diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java index 4275e2c56..26b4407c9 100644 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java +++ b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java @@ -1,48 +1,57 @@ + package eu.dnetlib.dhp.schema.oaf; +import java.io.IOException; +import java.util.List; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.util.List; public class MeasureTest { - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .setSerializationInclusion(JsonInclude.Include.NON_NULL); + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL); - @Test - public void testMeasureSerialization() throws IOException { + @Test + public void testMeasureSerialization() throws IOException { - Measure popularity = new Measure(); - popularity.setId("popularity"); - popularity.setUnit(Lists.newArrayList( - unit("score", "0.5"))); + Measure popularity = new Measure(); + popularity.setId("popularity"); + popularity + .setUnit( + Lists + .newArrayList( + unit("score", "0.5"))); - Measure influence = new Measure(); - influence.setId("influence"); - influence.setUnit(Lists.newArrayList( - unit("score", "0.3"))); + Measure influence = new Measure(); + influence.setId("influence"); + influence + .setUnit( + Lists + .newArrayList( + unit("score", "0.3"))); - List m = Lists.newArrayList(popularity, influence); + List m = Lists.newArrayList(popularity, influence); - String s = OBJECT_MAPPER.writeValueAsString(m); - System.out.println(s); + String s = OBJECT_MAPPER.writeValueAsString(m); + System.out.println(s); - List mm = OBJECT_MAPPER.readValue(s, new TypeReference>() { }); + List mm = OBJECT_MAPPER.readValue(s, new TypeReference>() { + }); - Assertions.assertNotNull(mm); - } + Assertions.assertNotNull(mm); + } - private KeyValue unit(String key, String value) { - KeyValue unit = new KeyValue(); - unit.setKey(key); - unit.setValue(value); - return unit; - } + private KeyValue unit(String key, String value) { + KeyValue unit = new KeyValue(); + unit.setKey(key); + unit.setValue(value); + return unit; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java index cfbb62f21..2fccbc516 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ProjectSubset.java @@ -1,16 +1,17 @@ + package eu.dnetlib.dhp.actionmanager.project; import java.io.Serializable; public class ProjectSubset implements Serializable { - private String code; + private String code; - public String getCode() { - return code; - } + public String getCode() { + return code; + } - public void setCode(String code) { - this.code = code; - } + public void setCode(String code) { + this.code = code; + } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 4d3ec140b..718cd8ebe 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -90,6 +90,5 @@ public class SparkUpdateProjectTest { Assertions.assertEquals(14, tmp.count()); - } } From 64f02de5d3765f741cdb458e42c8a20a725aaf4a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 16 Jun 2020 17:48:51 +0200 Subject: [PATCH 51/53] updated workflow definition to include the cleaning step --- .../eu/dnetlib/dhp/wf/profiles/provision.xml | 92 +++++++++++++++---- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml index 487afee4f..28cbde70d 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/provision.xml @@ -4,7 +4,7 @@ - + Data Provision [OCEAN] @@ -61,11 +61,23 @@ + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_provision/graph/05_graph_cleaned + + + + + + Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/05_graph_orcid + /tmp/beta_provision/graph/06_graph_orcid @@ -75,7 +87,7 @@ Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/06_graph_bulktagging + /tmp/beta_provision/graph/07_graph_bulktagging @@ -85,7 +97,7 @@ Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/07_graph_affiliation + /tmp/beta_provision/graph/08_graph_affiliation @@ -95,7 +107,7 @@ Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/08_graph_comunity_organization + /tmp/beta_provision/graph/09_graph_comunity_organization @@ -105,7 +117,7 @@ Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/09_graph_funding + /tmp/beta_provision/graph/10_graph_funding @@ -115,7 +127,7 @@ Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/10_graph_comunity_sem_rel + /tmp/beta_provision/graph/11_graph_comunity_sem_rel @@ -125,7 +137,7 @@ Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/11_graph_country + /tmp/beta_provision/graph/12_graph_country @@ -135,7 +147,7 @@ Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/12_graph_blacklisted + /tmp/beta_provision/graph/13_graph_blacklisted @@ -217,7 +229,8 @@ IIS { - 'graphOutputPath' : 'aggregatorGraphPath' + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl' } @@ -227,8 +240,8 @@ 'mongoDb' : 'mdstore', 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : 'dnet', - 'postgresPassword' : 'dnetPwd', - 'reuseContent' : 'true', + 'postgresPassword' : '', + 'reuseContent' : 'false', 'contentPath' : '/tmp/beta_provision/aggregator', 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' } @@ -255,6 +268,8 @@ { 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', 'activePromoteDatasourceActionPayload' : 'true', 'activePromoteOrganizationActionPayload' : 'true', @@ -315,6 +330,8 @@ { 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', 'activePromoteDatasourceActionPayload' : 'true', 'activePromoteOrganizationActionPayload' : 'true', @@ -354,7 +371,43 @@ build-report - + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + + Do we skip the graph enrichment steps? (Yes to prepare the graph for the IIS) + + NO + + + + @@ -364,7 +417,7 @@ IIS { - 'sourcePath' : 'consistentGraphPath', + 'sourcePath' : 'cleanedGraphPath', 'outputPath': 'orcidGraphPath' } @@ -552,7 +605,7 @@ 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', 'postgresUser' : 'dnet', - 'postgresPassword' : 'dnetPwd' + 'postgresPassword' : '' } build-report @@ -561,12 +614,13 @@ + - wf_20200509_100941_857 - 2020-05-09T13:26:09+00:00 - FAILURE - eu.dnetlib.data.hadoop.rmi.HadoopServiceException: hadoop job: 0002933-200403132837156-oozie-oozi-W failed with status: KILLED, oozie log: 2020-05-09 13:23:31,194 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No results found 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] Start action [0002933-200403132837156-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action status=DONE 2020-05-09 13:23:31,216 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] [***0002933-200403132837156-oozie-oozi-W@:start:***]Action updated in DB! 2020-05-09 13:23:31,257 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No results found 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@:start: 2020-05-09 13:23:31,275 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:23:31,314 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] Start action [0002933-200403132837156-oozie-oozi-W@reset-outputpath] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action status=DONE 2020-05-09 13:23:33,897 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] [***0002933-200403132837156-oozie-oozi-W@reset-outputpath***]Action updated in DB! 2020-05-09 13:23:33,947 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No results found 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] Start action [0002933-200403132837156-oozie-oozi-W@copy_entities] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action status=DONE 2020-05-09 13:23:33,966 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] [***0002933-200403132837156-oozie-oozi-W@copy_entities***]Action updated in DB! 2020-05-09 13:23:34,012 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,018 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,023 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,029 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No results found 2020-05-09 13:23:34,124 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Start action [0002933-200403132837156-oozie-oozi-W@copy_relation] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Start action [0002933-200403132837156-oozie-oozi-W@copy_projects] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,130 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Start action [0002933-200403132837156-oozie-oozi-W@copy_datasources] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:34,140 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Start action [0002933-200403132837156-oozie-oozi-W@copy_organization] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:23:35,010 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action status=RUNNING 2020-05-09 13:23:35,018 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] [***0002933-200403132837156-oozie-oozi-W@copy_projects***]Action updated in DB! 2020-05-09 13:23:35,022 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:23:35,027 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action status=RUNNING 2020-05-09 13:23:35,028 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] [***0002933-200403132837156-oozie-oozi-W@copy_relation***]Action updated in DB! 2020-05-09 13:23:35,031 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action status=RUNNING 2020-05-09 13:23:35,035 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] [***0002933-200403132837156-oozie-oozi-W@copy_datasources***]Action updated in DB! 2020-05-09 13:23:35,037 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:23:35,048 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:23:35,072 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action status=RUNNING 2020-05-09 13:23:35,076 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] [***0002933-200403132837156-oozie-oozi-W@copy_organization***]Action updated in DB! 2020-05-09 13:23:35,084 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_entities] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_entities 2020-05-09 13:23:35,090 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@reset-outputpath] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@reset-outputpath 2020-05-09 13:23:58,926 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:23:59,085 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] checking action, hadoop job ID [job_1585920557248_14570] status [RUNNING] 2020-05-09 13:23:59,242 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:23:59,386 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] checking action, hadoop job ID [job_1585920557248_14569] status [RUNNING] 2020-05-09 13:24:01,343 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] callback for action [0002933-200403132837156-oozie-oozi-W@copy_datasources] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] Hadoop Jobs launched : [job_1585920557248_14573] 2020-05-09 13:24:01,418 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] action completed, external ID [job_1585920557248_14570] 2020-05-09 13:24:01,493 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_datasources] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_datasources 2020-05-09 13:24:01,935 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] callback for action [0002933-200403132837156-oozie-oozi-W@copy_projects] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] Hadoop Jobs launched : [job_1585920557248_14572] 2020-05-09 13:24:02,012 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] action completed, external ID [job_1585920557248_14569] 2020-05-09 13:24:02,076 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_projects] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_projects 2020-05-09 13:25:03,172 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:03,336 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] checking action, hadoop job ID [job_1585920557248_14571] status [RUNNING] 2020-05-09 13:25:05,598 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] callback for action [0002933-200403132837156-oozie-oozi-W@copy_organization] 2020-05-09 13:25:05,688 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] Hadoop Jobs launched : [job_1585920557248_14574] 2020-05-09 13:25:05,691 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] action completed, external ID [job_1585920557248_14571] 2020-05-09 13:25:05,748 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_organization] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_organization 2020-05-09 13:25:23,274 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:23,409 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] checking action, hadoop job ID [job_1585920557248_14568] status [RUNNING] 2020-05-09 13:25:25,419 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] callback for action [0002933-200403132837156-oozie-oozi-W@copy_relation] 2020-05-09 13:25:25,510 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] Hadoop Jobs launched : [job_1585920557248_14575] 2020-05-09 13:25:25,511 INFO org.apache.oozie.action.hadoop.DistcpActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] action completed, external ID [job_1585920557248_14568] 2020-05-09 13:25:25,565 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No results found 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] Start action [0002933-200403132837156-oozie-oozi-W@copy_wait] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action status=DONE 2020-05-09 13:25:25,585 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] [***0002933-200403132837156-oozie-oozi-W@copy_wait***]Action updated in DB! 2020-05-09 13:25:25,627 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No results found 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] Start action [0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action status=DONE 2020-05-09 13:25:25,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] [***0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1***]Action updated in DB! 2020-05-09 13:25:25,694 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,700 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,706 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,711 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No results found 2020-05-09 13:25:25,801 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,825 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:25,828 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] Start action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:27,165 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] checking action, hadoop job ID [job_1585920557248_14578] status [RUNNING] 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action status=RUNNING 2020-05-09 13:25:27,170 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] [***0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct***]Action updated in DB! 2020-05-09 13:25:27,179 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] checking action, hadoop job ID [job_1585920557248_14577] status [RUNNING] 2020-05-09 13:25:27,181 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action status=RUNNING 2020-05-09 13:25:27,183 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] [***0002933-200403132837156-oozie-oozi-W@join_prepare_software***]Action updated in DB! 2020-05-09 13:25:27,188 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:27,617 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] checking action, hadoop job ID [job_1585920557248_14576] status [RUNNING] 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action status=RUNNING 2020-05-09 13:25:27,622 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] [***0002933-200403132837156-oozie-oozi-W@join_prepare_publication***]Action updated in DB! 2020-05-09 13:25:27,625 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] checking action, hadoop job ID [job_1585920557248_14579] status [RUNNING] 2020-05-09 13:25:27,628 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action status=RUNNING 2020-05-09 13:25:27,629 INFO org.apache.oozie.command.wf.ForkedActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] [***0002933-200403132837156-oozie-oozi-W@join_prepare_dataset***]Action updated in DB! 2020-05-09 13:25:27,634 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@fork_prepare_assoc_step1 2020-05-09 13:25:27,639 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_wait] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_wait 2020-05-09 13:25:27,640 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@copy_relation] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@copy_relation 2020-05-09 13:25:41,416 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_software] 2020-05-09 13:25:41,490 INFO org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] action completed, external ID [job_1585920557248_14577] 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.SparkMain], main() threw exception, File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist 2020-05-09 13:25:41,495 WARN org.apache.oozie.action.hadoop.SparkActionExecutor: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] Launcher exception: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist java.io.FileNotFoundException: File file:/data/3/yarn/nm/usercache/dnet.beta/appcache/application_1585920557248_14577/container_e68_1585920557248_14577_01_000002/dhp-propagation-1.1.8-SNAPSHOT.jar does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:598) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:811) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:588) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:432) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:340) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:292) at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:404) at org.apache.spark.deploy.yarn.Client.org$apache$spark$deploy$yarn$Client$$distribute$1(Client.scala:496) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:595) at org.apache.spark.deploy.yarn.Client$$anonfun$prepareLocalResources$9.apply(Client.scala:594) at scala.Option.foreach(Option.scala:257) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:594) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:886) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:180) at org.apache.spark.deploy.yarn.Client.run(Client.scala:1156) at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1608) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) at org.apache.oozie.action.hadoop.SparkMain.runSpark(SparkMain.java:178) at org.apache.oozie.action.hadoop.SparkMain.run(SparkMain.java:90) at org.apache.oozie.action.hadoop.LauncherMain.run(LauncherMain.java:81) at org.apache.oozie.action.hadoop.SparkMain.main(SparkMain.java:57) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:235) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 2020-05-09 13:25:41,514 INFO org.apache.oozie.command.wf.ActionEndXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] ERROR is considered as FAILED for SLA 2020-05-09 13:25:41,541 INFO org.apache.oozie.service.JPAService: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No results found 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Start action [0002933-200403132837156-oozie-oozi-W@Kill] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10] 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action status=DONE 2020-05-09 13:25:41,580 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] [***0002933-200403132837156-oozie-oozi-W@Kill***]Action updated in DB! 2020-05-09 13:25:41,692 WARN org.apache.oozie.workflow.lite.LiteWorkflowInstance: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] Workflow completed [KILLED], killing [3] running nodes 2020-05-09 13:25:41,760 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@Kill] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@Kill 2020-05-09 13:25:41,766 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_software] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_software 2020-05-09 13:25:41,852 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct 2020-05-09 13:25:41,914 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] 2020-05-09 13:25:41,920 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:41,938 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_publication 2020-05-09 13:25:42,005 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] 2020-05-09 13:25:42,010 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_publication] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_publication] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[dnet.beta] GROUP[-] TOKEN[] APP[orcid_to_result_from_semrel_propagation] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W 2020-05-09 13:25:42,028 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_dataset] No Notification URL is defined. Therefore nothing to notify for job 0002933-200403132837156-oozie-oozi-W@join_prepare_dataset 2020-05-09 13:25:42,113 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] callback for action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] 2020-05-09 13:25:42,116 ERROR org.apache.oozie.command.wf.CompletedActionXCommand: SERVER[iis-cdh5-test-m3.ocean.icm.edu.pl] USER[-] GROUP[-] TOKEN[] APP[-] JOB[0002933-200403132837156-oozie-oozi-W] ACTION[0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] XException, org.apache.oozie.command.CommandException: E0800: Action it is not running its in [KILLED] state, action [0002933-200403132837156-oozie-oozi-W@join_prepare_otherresearchproduct] at org.apache.oozie.command.wf.CompletedActionXCommand.eagerVerifyPrecondition(CompletedActionXCommand.java:92) at org.apache.oozie.command.XCommand.call(XCommand.java:257) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:179) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) + wf_20200615_163630_609 + 2020-06-15T17:08:00+00:00 + SUCCESS + \ No newline at end of file From 11b77b9f4e29a0d09cab1f1a3ab49419c6735517 Mon Sep 17 00:00:00 2001 From: miconis Date: Tue, 16 Jun 2020 18:31:11 +0200 Subject: [PATCH 52/53] json dumps for entity merge test modified to fit the new model. title merge adjusted to fix the error --- .../src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java | 4 ++-- .../eu/dnetlib/dhp/dedup/json/publication_merge.json | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java index c9d0ac7c7..82e8fd05d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java @@ -259,9 +259,9 @@ public class Result extends OafEntity implements Serializable { StructuredProperty newMainTitle = null; if (r.getTitle() != null) { newMainTitle = getMainTitle(r.getTitle()); - if (newMainTitle != null && title != null) { + if (newMainTitle != null) { final StructuredProperty p = newMainTitle; - title = title.stream().filter(t -> t != p).collect(Collectors.toList()); + r.setTitle(r.getTitle().stream().filter(t -> t != p).collect(Collectors.toList())); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json index 28548c532..ae688e746 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/publication_merge.json @@ -1,3 +1,3 @@ -{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} -{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} -{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "test title", "classname": "test title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.95"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}], "id": "50|a89337edbe55::4930db9e954866d70916cbfba9f81f97", "subject": [], "instance": [{"refereed": null, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": [], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Journal.fi", "key": "10|openaire____::6eef8049d0feedc089ee009abca55e35"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-9999"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2019-11-05T14:49:22.351Z", "fulltext": [], "dateoftransformation": "2019-11-05T16:10:58.988Z", "description": [], "format": [], "journal": {"issnPrinted": "1459-6067", "conferencedate": "", "conferenceplace": "", "name": "Agricultural and Food Science", "edition": "", "iss": "3", "sp": "", "vol": "27", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "1795-1895", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": [], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2018-09-30"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "doi", "classname": "doi", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.1016/j.nicl.2015.11.006"}], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}], "id": "50|base_oa_____::0968af610a356656706657e4f234b340", "subject": [], "instance": [{"refereed": null, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "NeuroImage: Clinical", "key": "10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "http://creativecommons.org/licenses/by-nc-nd/4.0/"}, "url": ["http://dx.doi.org/10.1016/j.nicl.2015.11.006"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "BASE (Open Access Aggregator)", "key": "10|openaire____::df45502607927471ecf8a6ae83683ff5"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}, {"surname": "Klein", "name": "Christine", "pid": [], "rank": 10, "affiliation": [], "fullname": "Klein, Christine"}, {"surname": "Deuschl", "name": "Gu\\u0308nther", "pid": [], "rank": 11, "affiliation": [], "fullname": "Deuschl, G\\u00fcnther"}, {"surname": "Eimeren", "name": "Thilo", "pid": [], "rank": 12, "affiliation": [], "fullname": "van Eimeren, Thilo"}, {"surname": "Witt", "name": "Karsten", "pid": [], "rank": 13, "affiliation": [], "fullname": "Witt, Karsten"}], "source": [], "dateofcollection": "2017-07-27T19:04:09.131Z", "fulltext": [], "dateoftransformation": "2019-01-23T10:15:19.582Z", "description": [], "format": [], "journal": {"issnPrinted": "2213-1582", "conferencedate": "", "conferenceplace": "", "name": "NeuroImage: Clinical", "edition": "", "iss": "", "sp": "63", "vol": "10", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "70", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "Elsevier BV"}, "language": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "IT", "classname": "Italy", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["10.1016/j.nicl.2015.11.006"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Altered brain activation in a reversal learning task unmasks adaptive changes in cognitive control in writer's cramp"}]} +{"context": [], "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "sysimport:crosswalk:datasetarchive", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "dedup-similarity-result-levenstein", "invisible": false, "trust": "0.9"}, "resourcetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "relevantdate": [], "collectedfrom": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}], "id": "50|CrisUnsNoviS::9f9d014eea45dab432cab636c4c9cf39", "subject": [], "instance": [{"refereed": null, "hostedby": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "processingchargeamount": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "url": ["https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "distributionlocation": "", "processingchargecurrency": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2019-01-01"}, "collectedfrom": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "CRIS UNS (Current Research Information System University of Novi Sad)", "key": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556"}, "accessright": {"classid": "UNKNOWN", "classname": "UNKNOWN", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0004", "classname": "Conference object", "schemename": "dnet:dataCite_resource", "schemeid": "dnet:dataCite_resource"}}], "embargoenddate": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "lastupdatetimestamp": 0, "author": [{"surname": "Zeuner", "name": "Kirsten E.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Zeuner, Kirsten E."}, {"surname": "Knutzen", "name": "Arne", "pid": [], "rank": 2, "affiliation": [], "fullname": "Knutzen, Arne"}, {"surname": "Granert", "name": "Oliver", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0002-0656-1023"}, {"qualifier": {"classid": "pubmed", "classname": "pubmed"}, "value": "pubmed.it"}], "rank": 3, "affiliation": [], "fullname": "Granert, Oliver"}, {"surname": "Sablowsky", "name": "Simone", "pid": [{"qualifier": {"classid": "id", "classname": "id"}, "value": "12345678"}], "rank": 4, "affiliation": [], "fullname": "Sablowsky, Simone"}, {"surname": "Go\\u0308tz", "name": "Julia", "pid": [], "rank": 5, "affiliation": [], "fullname": "G\\u00f6tz, Julia"}, {"surname": "Wolff", "name": "Stephan", "pid": [], "rank": 6, "affiliation": [], "fullname": "Wolff, Stephan"}, {"surname": "Jansen", "name": "Olav", "pid": [{"qualifier": {"classid": "ORCID", "classname": "ORCID"}, "value": "0000-0000-0656-1023"},{"qualifier": {"classid": "id", "classname": "id"}, "value": "987654321"}], "rank": 7, "affiliation": [], "fullname": "Jansen, Olav"}, {"surname": "Dressler", "name": "Dirk", "pid": [], "rank": 8, "affiliation": [], "fullname": "Dressler, Dirk"}, {"surname": "Schneider", "name": "Susanne A.", "pid": [], "rank": 9, "affiliation": [], "fullname": "Schneider, Susanne A."}], "source": [], "dateofcollection": "2020-03-10T15:05:38.685Z", "fulltext": [], "dateoftransformation": "2020-03-11T20:11:13.15Z", "description": [], "format": [], "journal": {"issnPrinted": "", "conferencedate": "", "conferenceplace": "", "name": "", "edition": "", "iss": "", "sp": "", "vol": "", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "issnOnline": "", "ep": "", "issnLinking": ""}, "coverage": [], "publisher": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": ""}, "language": {"classid": "en", "classname": "en", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [{"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["(BISIS)113444", "https://www.cris.uns.ac.rs/record.jsf?recordId=113444&source=OpenAIRE&language=en"], "dateofacceptance": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "value": "2016-01-01"}, "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "", "classname": "", "schemename": "", "schemeid": ""}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": ""}, "qualifier": {"classid": "test title", "classname": "test title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Antichains of copies of ultrahomogeneous structures"}]} \ No newline at end of file From 1d4275acc44957fe412c79a66b9d84e51c044f30 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Jun 2020 09:10:38 +0200 Subject: [PATCH 53/53] implemented first version of exportation of Scholexplorer into ActionSet --- .../dhp/schema/scholexplorer/DLIRelation.java | 14 + .../broker/oa/GenerateEventsApplication.java | 122 ++++-- .../dhp/broker/oa/matchers/UpdateMatcher.java | 6 +- .../simple/EnrichMissingAbstract.java | 6 +- .../simple/EnrichMissingAuthorOrcid.java | 3 +- .../simple/EnrichMissingOpenAccess.java | 7 +- .../oa/matchers/simple/EnrichMissingPid.java | 10 +- .../simple/EnrichMissingPublicationDate.java | 6 +- .../matchers/simple/EnrichMissingSubject.java | 3 +- .../matchers/simple/EnrichMoreOpenAccess.java | 3 +- .../oa/matchers/simple/EnrichMorePid.java | 6 +- .../oa/matchers/simple/EnrichMoreSubject.java | 3 +- .../dhp/broker/oa/util/EventGroup.java | 1 + .../dhp/broker/oa/util/ResultAggregator.java | 1 + .../dhp/broker/oa/util/ResultGroup.java | 1 + .../dhp/broker/oa/util/TrustUtils.java | 16 +- .../dhp/broker/oa/util/UpdateInfo.java | 9 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 1 + .../orcid/oozie_app/config-default.xml | 30 +- .../parser/DatasetScholexplorerParser.java | 2 +- .../PublicationScholexplorerParser.java | 8 +- .../dhp/sx/graph/step1/oozie_app/workflow.xml | 4 +- .../java/eu/dnetlib/dhp/export/DLIToOAF.scala | 376 ++++++++++++++++++ .../SparkExportContentForOpenAire.scala | 118 ++++++ .../input_export_content_parameters.json | 14 + .../sx/export/oozie_app/config-default.xml | 42 ++ .../dhp/sx/export/oozie_app/workflow.xml | 49 +++ .../dhp/export/ExportDLITOOAFTest.scala | 75 ++++ .../eu/dnetlib/dhp/export/dataset.json | 101 +++++ .../eu/dnetlib/dhp/export/publication.json | 128 ++++++ .../eu/dnetlib/dhp/export/relation.json | 23 ++ 31 files changed, 1110 insertions(+), 78 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java index d2d2089c0..ca85fa14f 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java @@ -1,11 +1,25 @@ package eu.dnetlib.dhp.schema.scholexplorer; +import java.util.List; + +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; public class DLIRelation extends Relation { + private String dateOfCollection; + private List collectedFrom; + + public List getCollectedFrom() { + return collectedFrom; + } + + public void setCollectedFrom(List collectedFrom) { + this.collectedFrom = collectedFrom; + } + public String getDateOfCollection() { return dateOfCollection; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 44bc5cb6e..ecf4e3eff 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -91,35 +91,29 @@ public class GenerateEventsApplication { private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = - new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = - new EnrichMissingPublicationIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = - new EnrichMissingPublicationIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); - private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = - new EnrichMissingDatasetIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = - new EnrichMissingDatasetIsReferencedBy(); - private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = - new EnrichMissingDatasetReferences(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = - new EnrichMissingDatasetIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = - new EnrichMissingDatasetIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); // Aggregators - private static final TypedColumn, ResultGroup> resultAggrTypedColumn = new ResultAggregator().toColumn(); + private static final TypedColumn, ResultGroup> resultAggrTypedColumn = new ResultAggregator() + .toColumn(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -172,18 +166,23 @@ public class GenerateEventsApplication { final Class resultClazz, final DedupConfig dedupConfig) { - final Dataset results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), Result.class) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), Result.class) + .filter(r -> r.getDataInfo().getDeletedbyinference()); final Dataset mergedRels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); - return results.joinWith(mergedRels, results.col("id").equalTo(mergedRels.col("source")), "inner") + return results + .joinWith(mergedRels, results.col("id").equalTo(mergedRels.col("source")), "inner") .groupByKey((MapFunction, String>) t -> t._2.getTarget(), Encoders.STRING()) .agg(resultAggrTypedColumn) .map((MapFunction, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class)) .filter(ResultGroup::isValid) - .map((MapFunction) g -> GenerateEventsApplication.generateSimpleEvents(g, dedupConfig), Encoders.kryo(EventGroup.class)) + .map( + (MapFunction) g -> GenerateEventsApplication + .generateSimpleEvents(g, dedupConfig), + Encoders.kryo(EventGroup.class)) .flatMap(group -> group.getData().iterator(), Encoders.kryo(Event.class)); } @@ -207,16 +206,19 @@ public class GenerateEventsApplication { return events; } - private static Dataset generateRelationEvents(final SparkSession spark, + private static Dataset generateRelationEvents( + final SparkSession spark, final String graphPath, final Class sourceClass, final Class targetClass, final DedupConfig dedupConfig) { - final Dataset sources = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset sources = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class) + .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset targets = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); + final Dataset targets = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); final Dataset mergedRels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); @@ -224,7 +226,8 @@ public class GenerateEventsApplication { final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); - final Dataset duplicates = sources.joinWith(mergedRels, sources.col("id").equalTo(rels.col("source")), "inner") + final Dataset duplicates = sources + .joinWith(mergedRels, sources.col("id").equalTo(rels.col("source")), "inner") .groupByKey((MapFunction, String>) t -> t._2.getTarget(), Encoders.STRING()) .agg(resultAggrTypedColumn) .map((MapFunction, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class)) @@ -243,7 +246,8 @@ public class GenerateEventsApplication { return null; } - private List generateProjectsEvents(final Collection>> childrenWithProjects, final DedupConfig dedupConfig) { + private List generateProjectsEvents(final Collection>> childrenWithProjects, + final DedupConfig dedupConfig) { final List> list = new ArrayList<>(); for (final Pair> target : childrenWithProjects) { @@ -254,7 +258,8 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } - private List generateSoftwareEvents(final Collection>> childrenWithSoftwares, final DedupConfig dedupConfig) { + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares, + final DedupConfig dedupConfig) { final List> list = new ArrayList<>(); for (final Pair> target : childrenWithSoftwares) { @@ -279,15 +284,30 @@ public class GenerateEventsApplication { for (final Pair> target : cleanedChildrens) { if (relType.equals("isRelatedTo")) { - list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMisissingPublicationIsRelatedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("references")) { - list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationReferences + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isReferencedBy")) { - list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsReferencedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedTo")) { - list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsSupplementedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedBy")) { - list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsSupplementedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } } @@ -310,15 +330,29 @@ public class GenerateEventsApplication { for (final Pair> target : cleanedChildrens) { if (relType.equals("isRelatedTo")) { - list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMisissingDatasetIsRelatedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("references")) { - list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isReferencedBy")) { - list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsReferencedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedTo")) { - list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsSupplementedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedBy")) { - list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsSupplementedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } } @@ -339,8 +373,12 @@ public class GenerateEventsApplication { private static DedupConfig loadDedupConfig(final String isLookupUrl, final String profId) throws Exception { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - final String conf = isLookUpService.getResourceProfileByQuery(String - .format("for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", profId)); + final String conf = isLookUpService + .getResourceProfileByQuery( + String + .format( + "for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", + profId)); final DedupConfig dedupConfig = new ObjectMapper().readValue(conf, DedupConfig.class); dedupConfig.getPace().initModel(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index 286b40ad5..95d43ae68 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -22,7 +22,8 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final K res, final Collection others, final DedupConfig dedupConfig) { + public Collection> searchUpdatesForRecord(final K res, final Collection others, + final DedupConfig dedupConfig) { final Map> infoMap = new HashMap<>(); @@ -30,7 +31,8 @@ public abstract class UpdateMatcher { if (source != res) { for (final UpdateInfo info : findUpdates(source, res, dedupConfig)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { infoMap.put(s, info); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index c3b6bda66..7dc340b3c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -18,9 +18,11 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) { - return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target, dedupConfig)); + return Arrays + .asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target, dedupConfig)); } return new ArrayList<>(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 89292d3da..7a1677ae2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -19,7 +19,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { // TODO // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index 7f5a595cc..d14490ba8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -21,7 +21,8 @@ public class EnrichMissingOpenAccess extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final long count = target .getInstance() .stream() @@ -29,7 +30,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher { .filter(right -> right.equals(BrokerConstants.OPEN_ACCESS)) .count(); - if (count > 0) { return Arrays.asList(); } + if (count > 0) { + return Arrays.asList(); + } return source .getInstance() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 6e106e669..20303ec1b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -20,10 +20,13 @@ public class EnrichMissingPid extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final long count = target.getPid().size(); - if (count > 0) { return Arrays.asList(); } + if (count > 0) { + return Arrays.asList(); + } return source .getPid() @@ -33,7 +36,8 @@ public class EnrichMissingPid extends UpdateMatcher { .collect(Collectors.toList()); } - public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, final DedupConfig dedupConfig) { + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, + final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index d2b28d65d..e1de8ce4d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -18,9 +18,11 @@ public class EnrichMissingPublicationDate extends UpdateMatcher } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { if (isMissing(target.getDateofacceptance()) && !isMissing(source.getDateofacceptance())) { - return Arrays.asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target, dedupConfig)); + return Arrays + .asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target, dedupConfig)); } return new ArrayList<>(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index de888ff87..c51f8991c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -23,7 +23,8 @@ public class EnrichMissingSubject extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingTypes = target .getSubject() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 021449797..2ac04fd12 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -21,7 +21,8 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set urls = target .getInstance() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index c64ed20ea..e4bf5d2c2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -20,7 +20,8 @@ public class EnrichMorePid extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingPids = target .getPid() .stream() @@ -36,7 +37,8 @@ public class EnrichMorePid extends UpdateMatcher { .collect(Collectors.toList()); } - public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, final DedupConfig dedupConfig) { + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, + final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MORE_PID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 3f7f5b3d5..d6e607c31 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -21,7 +21,8 @@ public class EnrichMoreSubject extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingSubjects = target .getSubject() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java index 9c7081c79..25c7698a0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.io.Serializable; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java index 94685eeae..475c76814 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import org.apache.spark.sql.Encoder; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java index 8fe7a5939..2be673db0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.io.Serializable; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 6bf59c125..5338d4f3d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -1,14 +1,22 @@ + package eu.dnetlib.dhp.broker.oa.util; public class TrustUtils { public static float rescale(final double score, final double threshold) { - if (score >= BrokerConstants.MAX_TRUST) { return BrokerConstants.MAX_TRUST; } + if (score >= BrokerConstants.MAX_TRUST) { + return BrokerConstants.MAX_TRUST; + } - final double val = (score - threshold) * (BrokerConstants.MAX_TRUST - BrokerConstants.MIN_TRUST) / (BrokerConstants.MAX_TRUST - threshold); + final double val = (score - threshold) * (BrokerConstants.MAX_TRUST - BrokerConstants.MIN_TRUST) + / (BrokerConstants.MAX_TRUST - threshold); - if (val < BrokerConstants.MIN_TRUST) { return BrokerConstants.MIN_TRUST; } - if (val > BrokerConstants.MAX_TRUST) { return BrokerConstants.MAX_TRUST; } + if (val < BrokerConstants.MIN_TRUST) { + return BrokerConstants.MIN_TRUST; + } + if (val > BrokerConstants.MAX_TRUST) { + return BrokerConstants.MAX_TRUST; + } return (float) val; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index de6a71397..893aa2827 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -68,8 +68,10 @@ public final class UpdateInfo { private float calculateTrust(final DedupConfig dedupConfig, final Result r1, final Result r2) { try { final ObjectMapper objectMapper = new ObjectMapper(); - final MapDocument doc1 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r1)); - final MapDocument doc2 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r2)); + final MapDocument doc1 = MapDocumentUtil + .asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r1)); + final MapDocument doc2 = MapDocumentUtil + .asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r2)); final double score = new TreeProcessor(dedupConfig).computeScore(doc1, doc2); final double threshold = dedupConfig.getWf().getThreshold(); @@ -118,7 +120,8 @@ public final class UpdateInfo { .map(Instance::getUrl) .flatMap(List::stream) .findFirst() - .orElse(null);; + .orElse(null); + ; final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 58f391c24..bb23d6085 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import static org.junit.jupiter.api.Assertions.assertTrue; diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml index 5621415d9..fe14bb8cb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml @@ -1,14 +1,18 @@ jobTracker - yarnRM + hadoop-rm3.garr-pa1.d4science.org:8032 nameNode - hdfs://nameservice1 + hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 - oozie.action.sharelib.for.java + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark spark2 @@ -16,7 +20,23 @@ true - oozie.launcher.mapreduce.map.java.opts - -Xmx4g + hive_metastore_uris + thrift://hadoop-edge2.garr-pa1.d4science.org:9083 + + + spark2YarnHistoryServerAddress + http://hadoop-edge1.garr-pa1.d4science.org:18089/ + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java index f49163c87..afba57bb8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java @@ -159,7 +159,7 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser { .setDescription( descs .stream() - .map(it -> it.length() < 10000 ? it : it.substring(0, 10000)) +// .map(it -> it.length() < 10000 ? it : it.substring(0, 10000)) .map( it -> { final Field d = new Field<>(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java index edbb444db..bf59a6f0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java @@ -213,10 +213,10 @@ public class PublicationScholexplorerParser extends AbstractScholexplorerParser .setValue( VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']")); - if (StringUtils.isNotBlank(description.getValue()) - && description.getValue().length() > 10000) { - description.setValue(description.getValue().substring(0, 10000)); - } +// if (StringUtils.isNotBlank(description.getValue()) +// && description.getValue().length() > 10000) { +// description.setValue(description.getValue().substring(0, 10000)); +// } parsedObject.setDescription(Collections.singletonList(description)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml index ce00eff7b..d74d68663 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml @@ -47,8 +47,8 @@ - ${wf:conf('reuseContent') eq false} - ${wf:conf('reuseContent') eq true} + ${wf:conf('reuseContent') eq false} + ${wf:conf('reuseContent') eq true} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala new file mode 100644 index 000000000..5d7c444b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala @@ -0,0 +1,376 @@ +package eu.dnetlib.dhp.export + +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter + +import eu.dnetlib.dhp.common.PacePerson +import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Field, Instance, KeyValue, Publication, Qualifier, Relation, StructuredProperty} +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.commons.lang3.StringUtils + +import scala.collection.JavaConverters._ + + +case class DLIExternalReference(id: String, url: String, sitename: String, label: String, pid: String, classId: String) {} + +object DLIToOAF { + + + val collectedFromMap: Map[String, KeyValue] = Map( + "dli_________::r3d100010527" -> generateKeyValue("10|re3data_____::c2a591f440598b63d854556beaf01591", "European Nucleotide Archive"), + "dli_________::r3d100010255" -> generateKeyValue("10|re3data_____::480d275ed6f9666ee76d6a1215eabf26", "Inter-university Consortium for Political and Social Research"), + "dli_________::r3d100011868" -> generateKeyValue("10|re3data_____::db814dc656a911b556dba42a331cebe9", "Mendeley Data"), + "dli_________::elsevier" -> generateKeyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier"), + "dli_________::openaire" -> generateKeyValue("10|infrastruct_::f66f1bd369679b5b077dcdf006089556", "OpenAIRE"), + "dli_________::thomsonreuters" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref"), + "dli_________::r3d100010216" -> generateKeyValue("10|re3data_____::0fd79429de04343dbbec705d9b5f429f", "4TU.Centre for Research Data"), + "dli_________::r3d100010134" -> generateKeyValue("10|re3data_____::9633d1e8c4309c833c2c442abeb0cfeb", "PANGAEA"), + "dli_________::ieee" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref"), + "dli_________::r3d100010197" -> generateKeyValue("10|re3data_____::9fd1d79973f7fda60cbe1d82e3819a68", "The Cambridge Structural Database"), + "dli_________::nature" -> generateKeyValue("10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", "Springer Nature"), + "dli_________::datacite" -> generateKeyValue("10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "Datacite"), + "dli_________::r3d100010578" -> generateKeyValue("10|re3data_____::c4d751f29a7568011a4c80136b30b444", "IEDA"), + "dli_________::r3d100010464" -> generateKeyValue("10|re3data_____::23e2a81591099828f6b83a1c83150666", "Research Data Australia"), + "dli_________::r3d100010327" -> generateKeyValue("10|re3data_____::a644620b81135243dc9acc15d2362246", "Worldwide Protein Data Bank"), + "dli_________::pubmed" -> generateKeyValue("10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357", "PubMed Central"), + "dli_________::europe_pmc__" -> generateKeyValue("10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", "Europe PubMed Central"), + "dli_________::crossref" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref") + ) + + + val relationTypeMapping: Map[String, (String, String)] = Map( + "IsReferencedBy" -> ("isRelatedTo", "relationship"), + "References" -> ("isRelatedTo", "relationship"), + "IsRelatedTo" -> ("isRelatedTo", "relationship"), + "IsSupplementedBy" -> ("IsSupplementedBy", "supplement"), + "Cites" -> ("cites", "citation"), + "Unknown" -> ("isRelatedTo", "relationship"), + "IsSourceOf" -> ("isRelatedTo", "relationship"), + "IsCitedBy" -> ("IsCitedBy", "citation"), + "Reviews" -> ("reviews", "review"), + "Describes" -> ("isRelatedTo", "relationship"), + "HasAssociationWith" -> ("isRelatedTo", "relationship") + ) + + val expectecdPidType = List("uniprot", "ena", "chembl", "ncbi-n", "ncbi-p", "genbank", "pdb", "url") + + + val filteredURL = List( + "www.ebi.ac.uk", + "www.uniprot.org", + "f1000.com", + "en.wikipedia.org", + "flybase.org", + "www.yeastgenome.org", + "research.bioinformatics.udel.edu", + "cancer.sanger.ac.uk", + "www.iedb.org", + "www.crd.york.ac.uk", + "www.wormbase.org", + "web.expasy.org", + "www.hal.inserm.fr", + "sabiork.h-its.org", + "zfin.org", + "www.pombase.org", + "www.guidetopharmacology.org", + "reactome.org" + ) + + + def filterPid(p: StructuredProperty): Boolean = { + if (expectecdPidType.contains(p.getQualifier.getClassname) && p.getQualifier.getClassname.equalsIgnoreCase("url")) + if (filteredURL.exists(u => p.getValue.contains(u))) + return true + else + return false + expectecdPidType.contains(p.getQualifier.getClassname) + } + + + def extractTitle(titles: java.util.List[StructuredProperty]): String = { + + if (titles == null) + return null + + val label = titles.asScala.map(p => p.getValue).find(p => p.nonEmpty) + label.orNull + } + + def convertDLIDatasetToExternalReference(dataset: DLIDataset): DLIExternalReference = { + val currentId = generateId(dataset.getId) + val pids = dataset.getPid.asScala.filter(filterPid) + + if (pids == null || pids.isEmpty) + return null + + val pid: StructuredProperty = pids.head + + + pid.getQualifier.getClassname match { + case "uniprot" => DLIExternalReference(generateId(dataset.getId), s"https://www.uniprot.org/uniprot/${pid.getValue}", "UniProt", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ena" => + if(pid.getValue!= null && pid.getValue.nonEmpty && pid.getValue.length>7) + DLIExternalReference(generateId(dataset.getId), s"https://www.ebi.ac.uk/ena/data/view/${pid.getValue.substring(0, 8)}", "European Nucleotide Archive", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + else + null + case "chembl" => DLIExternalReference(generateId(dataset.getId), s"https://www.ebi.ac.uk/chembl/compound_report_card/${pid.getValue}", "ChEMBL", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ncbi-n" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Nucleotide Database", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ncbi-p" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Nucleotide Database", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "genbank" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "GenBank", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "pdb" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Protein Data Bank", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "url" => DLIExternalReference(generateId(dataset.getId), pid.getValue, "", extractTitle(dataset.getTitle), pid.getValue, "url") + + } + + + } + + + def convertDLIPublicationToOAF(p: DLIPublication): Publication = { + + val result = new Publication + result.setId(generateId(p.getId)) + result.setDataInfo(generateDataInfo(invisibile = true)) + if (p.getCollectedfrom == null || p.getCollectedfrom.size() == 0 || (p.getCollectedfrom.size() == 1 && p.getCollectedfrom.get(0) == null)) + return null + + result.setCollectedfrom(p.getCollectedfrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).asJava) + result.setPid(p.getPid) + result.setDateofcollection(p.getDateofcollection) + result.setOriginalId(p.getPid.asScala.map(p => p.getValue).asJava) + result.setDateoftransformation(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + if (p.getAuthor == null || p.getAuthor.isEmpty) + return null + result.setAuthor(p.getAuthor.asScala.map(convertAuthor).asJava) + result.setResulttype(createQualifier(p.getResulttype.getClassid, p.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + + if (p.getSubject != null) + result.setSubject(p.getSubject.asScala.map(convertSubject).asJava) + + if (p.getTitle == null || p.getTitle.isEmpty) + return null + + result.setTitle(List(patchTitle(p.getTitle.get(0))).asJava) + + if (p.getRelevantdate == null || p.getRelevantdate.size() == 0) + return null + + result.setRelevantdate(p.getRelevantdate.asScala.map(patchRelevantDate).asJava) + + + result.setDescription(p.getDescription) + + result.setDateofacceptance(asField(p.getRelevantdate.get(0).getValue)) + result.setPublisher(p.getPublisher) + result.setSource(p.getSource) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + + val dois = result.getPid.asScala.filter(p => "doi".equalsIgnoreCase(p.getQualifier.getClassname)).map(p => p.getValue) + if (dois.isEmpty) + return null + + + val i: Instance = createInstance(s"https://dx.doi.org/${dois.head}", firstInstanceOrNull(p.getInstance()), result.getDateofacceptance) + + if (i != null) + result.setInstance(List(i).asJava) + + result + } + + + def convertDLIRelation(r: DLIRelation): Relation = { + + val result = new Relation + if (!relationTypeMapping.contains(r.getRelType)) + return null + + if (r.getCollectedFrom == null || r.getCollectedFrom.size() == 0 || (r.getCollectedFrom.size() == 1 && r.getCollectedFrom.get(0) == null)) + return null + val t = relationTypeMapping.get(r.getRelType) + + result.setRelType("resultResult") + result.setRelClass(t.get._1) + result.setSubRelType(t.get._2) + result.setCollectedfrom(r.getCollectedFrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).filter(p => p != null).asJava) + result.setSource(generateId(r.getSource)) + result.setTarget(generateId(r.getTarget)) + + if (result.getSource.equals(result.getTarget)) + return null + result.setDataInfo(generateDataInfo()) + + result + } + + + def convertDLIDatasetTOOAF(d: DLIDataset): Dataset = { + + if (d.getCollectedfrom == null || d.getCollectedfrom.size() == 0 || (d.getCollectedfrom.size() == 1 && d.getCollectedfrom.get(0) == null)) + return null + val result: Dataset = new Dataset + result.setId(generateId(d.getId)) + result.setDataInfo(generateDataInfo()) + result.setCollectedfrom(d.getCollectedfrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).asJava) + + + result.setPid(d.getPid) + + val fpids = result.getPid.asScala.filter(p => "doi".equalsIgnoreCase(p.getQualifier.getClassname) || + "pdb".equalsIgnoreCase(p.getQualifier.getClassname) + ).map(p => p.getValue) + + if (fpids == null || fpids.isEmpty) + return null + + + result.setDateofcollection(d.getDateofcollection) + result.setOriginalId(d.getPid.asScala.map(d => d.getValue).asJava) + result.setDateoftransformation(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + if (d.getAuthor == null || d.getAuthor.isEmpty) + return null + result.setAuthor(d.getAuthor.asScala.map(convertAuthor).asJava) + result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + + if (d.getSubject != null) + result.setSubject(d.getSubject.asScala.map(convertSubject).asJava) + + if (d.getTitle == null || d.getTitle.isEmpty) + return null + + result.setTitle(List(patchTitle(d.getTitle.get(0))).asJava) + + if (d.getRelevantdate == null || d.getRelevantdate.size() == 0) + return null + + result.setRelevantdate(d.getRelevantdate.asScala.map(patchRelevantDate).asJava) + + + result.setDescription(d.getDescription) + + result.setDateofacceptance(asField(d.getRelevantdate.get(0).getValue)) + result.setPublisher(d.getPublisher) + result.setSource(d.getSource) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + + + val instance_urls = if (fpids.head.length < 5) s"https://www.rcsb.org/structure/${fpids.head}" else s"https://dx.doi.org/${fpids.head}" + + val i: Instance = createInstance(instance_urls, firstInstanceOrNull(d.getInstance()), result.getDateofacceptance, true) + if (i != null) + result.setInstance(List(i).asJava) + + result + } + + + def firstInstanceOrNull(instances: java.util.List[Instance]): Instance = { + + if (instances == null || instances.size() == 0) + return null + instances.get(0) + + } + + + def createInstance(url: String, originalInstance: Instance, doa: Field[String], dataset: Boolean = false): Instance = { + + val i = new Instance + i.setUrl(List(url).asJava) + if (dataset) + i.setInstancetype(createQualifier("0021", "Dataset", "dnet:publication_resource", "dnet:publication_resource")) + else + i.setInstancetype(createQualifier("0000", "UNKNOWN", "dnet:publication_resource", "dnet:publication_resource")) + if (originalInstance != null && originalInstance.getHostedby != null) + i.setHostedby(originalInstance.getHostedby) + + i.setAccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + i.setDateofacceptance(doa) + + i + + + } + + + def patchRelevantDate(d: StructuredProperty): StructuredProperty = { + d.setQualifier(createQualifier("UNKNOWN", "dnet:dataCite_date")) + d + + } + + def patchTitle(t: StructuredProperty): StructuredProperty = { + t.setQualifier(createQualifier("main title", "dnet:dataCite_title")) + t + } + + + def convertSubject(s: StructuredProperty): StructuredProperty = { + s.setQualifier(createQualifier("keyword", "dnet:subject_classification_typologies")) + s + + + } + + + def convertAuthor(a: Author): Author = { + if (a == null) + return a + val p = new PacePerson(a.getFullname, false) + if (p.isAccurate) { + a.setName(p.getNameString) + a.setSurname(p.getSurnameString) + } + a + } + + + def generateId(id: String): String = { + val md5 = if (id.contains("::")) StringUtils.substringAfter(id, "::") else StringUtils.substringAfter(id, "|") + s"50|scholix_____::$md5" + } + + + def generateKeyValue(key: String, value: String): KeyValue = { + val kv: KeyValue = new KeyValue() + kv.setKey(key) + kv.setValue(value) + kv.setDataInfo(generateDataInfo("0.9")) + kv + } + + + def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = { + val di = new DataInfo + di.setDeletedbyinference(false) + di.setInferred(false) + di.setInvisible(false) + di.setTrust(trust) + di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions")) + di + } + + def createQualifier(cls: String, sch: String): Qualifier = { + createQualifier(cls, cls, sch, sch) + } + + + def createQualifier(classId: String, className: String, schemeId: String, schemeName: String): Qualifier = { + val q: Qualifier = new Qualifier + q.setClassid(classId) + q.setClassname(className) + q.setSchemeid(schemeId) + q.setSchemename(schemeName) + q + } + + + def asField[T](value: T): Field[T] = { + val tmp = new Field[T] + tmp.setValue(value) + tmp + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala new file mode 100644 index 000000000..f3aa35549 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala @@ -0,0 +1,118 @@ +package eu.dnetlib.dhp.`export` + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.commons.io.IOUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.codehaus.jackson.map.ObjectMapper +import scala.collection.mutable.ArrayBuffer + + +object SparkExportContentForOpenAire { + + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkExportContentForOpenAire.getClass.getResourceAsStream("input_export_content_parameters.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(SparkExportContentForOpenAire.getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sc:SparkContext = spark.sparkContext + + val workingPath = parser.get("workingDirPath") + + implicit val pubEncoder: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val datEncoder: Encoder[OafDataset] = Encoders.bean(classOf[OafDataset]) + implicit val relEncoder: Encoder[Relation] = Encoders.bean(classOf[Relation]) + implicit val dliRelEncoder: Encoder[DLIRelation] = Encoders.bean(classOf[DLIRelation]) + import spark.implicits._ + +// +// val relRDD:RDD[Relation] = sc.textFile(s"$workingPath/relation_j") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIRelation])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIRelation).filter(p=>p!= null) +// spark.createDataset(relRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS") +// +// val datRDD:RDD[OafDataset] = sc.textFile(s"$workingPath/dataset") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIDataset])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIDatasetTOOAF).filter(p=>p!= null) +// spark.createDataset(datRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/datasetDS") +// +// +// val pubRDD:RDD[Publication] = sc.textFile(s"$workingPath/publication") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIPublication])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIPublicationToOAF).filter(p=>p!= null) +// spark.createDataset(pubRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/publicationDS") +// +// +// +// val pubs:Dataset[Publication] = spark.read.load(s"$workingPath/publicationDS").as[Publication] +// val dats :Dataset[OafDataset] = spark.read.load(s"$workingPath/datasetDS").as[OafDataset] + var relDS :Dataset[Relation] = spark.read.load(s"$workingPath/relationDS").as[Relation] +// +// +// pubs.joinWith(relDS, pubs("id").equalTo(relDS("source"))).map(k => k._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS_f1") +// +// relDS= spark.read.load(s"$workingPath/relationDS_f1").as[Relation] +// +// relDS.joinWith(dats, relDS("target").equalTo(dats("id"))).map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS_filtered") +// +// +// val r_source = relDS.select(relDS("source")).distinct() +// val r_target = relDS.select(relDS("source")).distinct() +// +// +// pubs.joinWith(r_source, pubs("id").equalTo(r_source("source")), "inner").map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/publicationDS_filtered") +// +// dats.joinWith(r_target, dats("id").equalTo(r_target("target")), "inner").map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/datasetDS_filtered") +// +// spark.createDataset(sc.textFile(s"$workingPath/dataset") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIDataset])) +// .map(DLIToOAF.convertDLIDatasetToExternalReference) +// .filter(p => p != null)).as[DLIExternalReference].write.mode(SaveMode.Overwrite).save(s"$workingPath/externalReference") +// + + val pf = spark.read.load(s"$workingPath/publicationDS_filtered").select("id") + relDS = spark.read.load(s"$workingPath/relationDS").as[Relation] + val relationTo = pf.joinWith(relDS, pf("id").equalTo(relDS("source")),"inner").map(t =>t._2) + + val extRef = spark.read.load(s"$workingPath/externalReference").as[DLIExternalReference] + + spark.createDataset(relationTo.joinWith(extRef, relationTo("target").equalTo(extRef("id")), "inner").map(d => { + val r = d._1 + val ext = d._2 + (r.getSource, ext) + }).rdd.groupByKey.map(f => { + var dli_ext = ArrayBuffer[DLIExternalReference]() + f._2.foreach(d => if (dli_ext.size < 100) dli_ext += d ) + (f._1, dli_ext) + })).write.mode(SaveMode.Overwrite).save(s"$workingPath/externalReference_grouped") + + + + + + + + + + + + + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json new file mode 100644 index 000000000..b92f87e08 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "mt", + "paramLongName": "master", + "paramDescription": "should be local or yarn", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDirPath", + "paramDescription": "the working path where generated files", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml new file mode 100644 index 000000000..59e5c059f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.wf.rerun.failnodes + false + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml new file mode 100644 index 000000000..181ab80bf --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml @@ -0,0 +1,49 @@ + + + + workingDirPath + the source path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + memory for individual executor + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + ExtractOAF + eu.dnetlib.dhp.export.SparkExportContentForOpenAire + dhp-graph-provision-scholexplorer-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + ${sparkExtraOPT} + + --workingDirPath${workingDirPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala new file mode 100644 index 000000000..c9d33dbe4 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.export + +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter + +import eu.dnetlib.dhp.schema.oaf.Relation +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} +import org.junit.jupiter.api.Test + +import scala.io.Source + +class ExportDLITOOAFTest { + + val mapper = new ObjectMapper() + + @Test + def testDate():Unit = { + println(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + + } + + @Test + def testPublicationMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("publication.json")).mkString + + + val oaf =DLIToOAF.convertDLIPublicationToOAF(mapper.readValue(json, classOf[DLIPublication])) + + println(mapper.writeValueAsString(oaf)) + + + } + + + @Test + def testExternalReferenceMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString + + + val oaf =DLIToOAF.convertDLIDatasetToExternalReference(mapper.readValue(json, classOf[DLIDataset])) + + println(oaf) + + + } + + + + + + + + @Test + def testRelationMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("relation.json")).mkString + + + val oaf =DLIToOAF.convertDLIRelation(mapper.readValue(json, classOf[DLIRelation])) + + println(mapper.writeValueAsString(oaf)) + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json new file mode 100644 index 000000000..dae635730 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json @@ -0,0 +1,101 @@ +{ + "dataInfo": { + "invisible": false, + "inferred": null, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": null + }, + "lastupdatetimestamp": null, + "id": "60|719f19e5a996de1b87cddf93871bf2d4", + "originalId": [ + "a0a3p2gws9::uniprot" + ], + "collectedfrom": [ + { + "key": "dli_________::europe_pmc__", + "value": "Europe PMC", + "dataInfo": null + } + ], + "pid": [ + { + "value": "acc63471", + "qualifier": { + "classid": "ena", + "classname": "ena", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": null + } + ], + "dateofcollection": "2019-07-05T12:47:11.545+02:00", + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "author": null, + "resulttype": { + "classid": "dataset", + "classname": "dataset", + "schemeid": "dataset", + "schemename": "dataset" + }, + "language": null, + "country": null, + "subject": [], + "title": [ + { + "value": "CMD domain-containing protein", + "qualifier": null, + "dataInfo": null + } + ], + "relevantdate": [ + { + "value": "2019-07-15T16:14:28.636", + "qualifier": { + "classid": "resolvedDate", + "classname": "resolvedDate", + "schemeid": "dnet::date", + "schemename": "dnet::date" + }, + "dataInfo": null + } + ], + "description": null, + "dateofacceptance": null, + "publisher": { + "value": "UniProt", + "dataInfo": null + }, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [], + "storagedate": null, + "device": null, + "size": null, + "version": null, + "lastmetadataupdate": null, + "metadataversionnumber": null, + "geolocation": null, + "originalObjIdentifier": "europe_pmc__::719f19e5a996de1b87cddf93871bf2d4", + "dlicollectedfrom": [ + { + "id": "dli_________::europe_pmc__", + "name": "Europe PMC", + "completionStatus": "complete", + "collectionMode": null + } + ], + "completionStatus": "complete" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json new file mode 100644 index 000000000..4ab3de2da --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json @@ -0,0 +1,128 @@ +{ + "dataInfo": { + "invisible": false, + "inferred": null, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": null + }, + "lastupdatetimestamp": null, + "id": "50|9e117414be07bf03cbce8889d22d661a", + "originalId": [ + "9e117414be07bf03cbce8889d22d661a" + ], + "collectedfrom": [ + { + "key": "dli_________::crossref", + "value": "Crossref", + "dataInfo": null + } + ], + "pid": [ + { + "value": "10.1007/978-94-017-3490-5_15", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": null + } + ], + "dateofcollection": "2020-06-08T07:28:55.731Z", + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "author": [ + { + "fullname": "Calcaterra Domenico", + "name": null, + "surname": null, + "rank": null, + "pid": null, + "affiliation": null + }, + { + "fullname": "Parise Mario", + "name": null, + "surname": null, + "rank": null, + "pid": null, + "affiliation": null + } + ], + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "publication", + "schemename": "publication" + }, + "language": null, + "country": null, + "subject":[ + { + "value":"Strain-linked information about bacterial and archaeal biodiversity", + "qualifier":{ + "classid":"dnet:subject", + "classname":"dnet:subject", + "schemeid":"", + "schemename":"" + }, + "dataInfo":null + } + ], + "title": [ + { + "value": "The Contribution of Historical Information in the Assessment of Landslide Hazard", + "qualifier": null, + "dataInfo": null + } + ], + "relevantdate": [ + { + "value": "2013-01-29T16:50:44Z", + "qualifier": { + "classid": "date", + "classname": "date", + "schemeid": "dnet::date", + "schemename": "dnet::date" + }, + "dataInfo": null + } + ], + "description": [ + { + "value": null, + "dataInfo": null + } + ], + "dateofacceptance": null, + "publisher": { + "value": "Springer Netherlands", + "dataInfo": null + }, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [], + "journal": null, + "originalObjIdentifier": "dli_resolver::9e117414be07bf03cbce8889d22d661a", + "dlicollectedfrom": [ + { + "id": "dli_________::crossref", + "name": "Crossref", + "completionStatus": "complete", + "collectionMode": "resolved" + } + ], + "completionStatus": "complete" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json new file mode 100644 index 000000000..cdb0cfa1d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json @@ -0,0 +1,23 @@ +{ + "subRelType": null, + "relClass": "datacite", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": null, + "inferred": null, + "inferenceprovenance": null, + "invisible": false, + "trust": "0.9" + }, + "target": "50|00062410e2a15322480277d063c181bb", + "lastupdatetimestamp": null, + "relType": "IsReferencedBy", + "source": "60|4ee78ab329b49416b45c3774c132f244", + "collectedFrom": [ + { + "dataInfo": null, + "value": "Europe PMC", + "key": "dli_________::europe_pmc__" + } + ] +}