From d410ea8a4176341cdebaa76179c77b5fdd45c631 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 Dec 2023 12:15:01 +0100 Subject: [PATCH] added needed parameter --- .../AppendNewRelations.java | 11 +- .../oozie_app/workflow.xml | 112 ++---------------- .../input_newrelation_parameters.json | 20 ++++ .../eu/dnetlib/dhp/wf/main/job.properties | 4 +- .../dhp/wf/main/oozie_app/workflow.xml | 10 +- 5 files changed, 41 insertions(+), 116 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java index a5884873b..636c14b65 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -2,26 +2,19 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.community.ResultTagger; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; /** * @author miriam.baglioni @@ -54,7 +47,7 @@ public class AppendNewRelations implements Serializable { SparkConf conf = new SparkConf(); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> appendNewRelation(spark, inputPath, outputPath)); diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml index 851aabe8b..d7335d840 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -5,9 +5,10 @@ the source path - outputPath - sets the outputPath + iterations + the number of hops to be done up on the hierarchy + @@ -21,119 +22,26 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - ${wf:conf('resumeFrom') eq 'PrepareInfo'} - - - - - - + + - + - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - yarn cluster - PrepareResultOrganizationAssociation + PrepareResultProjectOrganizationAssociation eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo dhp-enrichment-${projectVersion}.jar @@ -161,7 +69,7 @@ yarn cluster - resultToOrganizationFromSemRel + resultProjectToOrganizationFromSemRel eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel dhp-enrichment-${projectVersion}.jar @@ -177,7 +85,7 @@ --conf spark.sql.shuffle.partitions=3840 --relationPath${workingDir}/preparedInfo/relation - --outputPath${outputPath}/relation + --outputPath${sourcePath}/relation --leavesPath${workingDir}/preparedInfo/leavesPath --childParentPath${workingDir}/preparedInfo/childParentPath --resultOrgPath${workingDir}/preparedInfo/resultOrgPath diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 000000000..5fe92cff1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 6085cd2b2..93e9e0ab1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,5 +1,5 @@ sourcePath=/tmp/beta_provision/graph/09_graph_dedup_enriched -resumeFrom=default +resumeFrom=AffiliationSemanticRelation allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo @@ -24,5 +24,5 @@ pathMap ={"author":"$['author'][*]['fullname']", \ blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ - +iterations=1 diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml index 33f849645..de054b962 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -195,13 +195,13 @@ - + - + - ${wf:appPath()}/affiliation_semantic_relation + ${wf:appPath()}/entity_semantic_relation @@ -209,6 +209,10 @@ sourcePath ${outputPath} + + iterations + ${iterations} +