From 45f2aa0867419093a866fe4686fe3c15400fe7d4 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Mon, 15 May 2023 17:52:20 +0300 Subject: [PATCH 1/5] Move end node ... at the end in workflow.xml --- .../impact_indicators/oozie_app/workflow.xml | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index f185f2a8a..bc40dfd11 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -3,7 +3,7 @@ - + @@ -714,47 +714,42 @@ - - - - - - + PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - + + CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - + + RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + ActionSet creation for results failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -767,4 +762,8 @@ ActionSet creation for projects failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + From b83135c252e1d90e117269ae5b7609009d370c31 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Mon, 15 May 2023 19:55:35 +0300 Subject: [PATCH 2/5] Add missing kill nodes in workflow.xml --- .../impact_indicators/oozie_app/workflow.xml | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index bc40dfd11..d2933e36f 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -715,6 +715,22 @@ + + Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -723,20 +739,16 @@ AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + Error getting key-value pairs for output files, error message[${wf:errorMessage(wf:lastErrorNode())}] - - Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}] - - RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}] From 4eec3e7052756002f2f3d48561d516a3a5c003b5 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Mon, 15 May 2023 22:28:48 +0300 Subject: [PATCH 3/5] Add jobTracker, nameNode && spark2Lib as global params in oozie wf --- .../oozie_app/job.properties | 1 + .../impact_indicators/oozie_app/workflow.xml | 80 ++++++------------- 2 files changed, 24 insertions(+), 57 deletions(-) diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties index 08f9b1eac..fb68a6928 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties @@ -99,3 +99,4 @@ actionSetOutputPath=${workingDir}/bip_actionsets/ # The directory to store project impact indicators projectImpactIndicatorsOutput=${workingDir}/project_indicators +resume=create-openaire-ranking-graph diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index d2933e36f..570dc46f5 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -1,5 +1,17 @@ + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -8,14 +20,14 @@ - ${resume eq "rankings-start"} - ${resume eq "impulse"} - ${resume eq "rankings-iterative"} - ${resume eq "format-results"} - ${resume eq "map-ids"} - ${resume eq "map-scores"} - ${resume eq "start"} - ${resume eq "projects-impact"} + ${wf:conf('resume') eq "rankings-start"} + ${wf:conf('resume') eq "impulse"} + ${wf:conf('resume') eq "rankings-iterative"} + ${wf:conf('resume') eq "format-results"} + ${wf:conf('resume') eq "map-ids"} + ${wf:conf('resume') eq "map-scores"} + ${wf:conf('resume') eq "start"} + ${wf:conf('resume') eq "projects-impact"} @@ -26,10 +38,7 @@ - - ${jobTracker} - - ${nameNode} + - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -135,10 +140,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -187,10 +188,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -238,10 +235,6 @@ - - ${jobTracker} - - ${nameNode} @@ -295,10 +288,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -353,10 +342,6 @@ - - ${jobTracker} - - ${nameNode} /usr/bin/bash @@ -378,7 +363,6 @@ - @@ -391,10 +375,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -443,10 +423,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -498,10 +474,7 @@ - - ${jobTracker} - - ${nameNode} + @@ -548,10 +521,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -636,10 +605,7 @@ - - ${jobTracker} - - ${nameNode} + yarn-cluster cluster From 26328e2a0da67e1469c8781c15750250d915272e Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 16 May 2023 14:39:38 +0300 Subject: [PATCH 4/5] Move job.properties --- .../dhp/oa/graph/impact_indicators/{oozie_app => }/job.properties | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/{oozie_app => }/job.properties (100%) diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties similarity index 100% rename from dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties rename to dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties From 8ef718c3635f88358a3e44187be7b1d38b8b2c55 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Tue, 16 May 2023 16:28:48 +0300 Subject: [PATCH 5/5] Fix workflow application path --- dhp-workflows/dhp-impact-indicators/README.md | 10 ++++++++++ .../dhp/oa/graph/impact_indicators/job.properties | 9 ++++++--- .../oa/graph/impact_indicators/oozie_app/workflow.xml | 10 +++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-impact-indicators/README.md b/dhp-workflows/dhp-impact-indicators/README.md index aad94ea19..de0ad157c 100644 --- a/dhp-workflows/dhp-impact-indicators/README.md +++ b/dhp-workflows/dhp-impact-indicators/README.md @@ -24,3 +24,13 @@ mvn package -Poozie-package,deploy,run -Dworkflow.source.dir=eu/dnetlib/dhp/oa/g ``` Note: edit the property `bip.ranker.tag` of the `pom.xml` file to specify the tag of [BIP-Ranker](https://github.com/athenarc/Bip-Ranker) that you want to use. + + +Job info and logs: +``` +export OOZIE_URL=http://iis-cdh5-test-m3:11000/oozie +oozie job -info +oozie job -log +``` + +where `jobId` is the id of the job returned by the `run_workflow.sh` script. \ No newline at end of file diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties index fb68a6928..a2f3d5828 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties @@ -76,7 +76,7 @@ bipScorePath=${workingDir}/openaire_universe_scores/ checkpointDir=${nameNode}/${workingDir}/check/ # The directory for the doi-based bip graph -bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph +# bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph # The folder from which synonyms of openaire-ids are read # openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/ @@ -89,9 +89,12 @@ synonymFolder=${nameNode}/${workingDir}/openaireid_to_dois/ openaireGraphInputPath=${nameNode}/${workingDir}/openaire_id_graph # The workflow application path -wfAppPath=${nameNode}/${oozieWorkflowPath} +wfAppPath=${oozieTopWfApplicationPath} + # The following is needed as a property of a workflow -oozie.wf.application.path=${wfAppPath} +#oozie.wf.application.path=${wfAppPath} +oozie.wf.application.path=${oozieTopWfApplicationPath} + # Path where the final output should be? actionSetOutputPath=${workingDir}/bip_actionsets/ diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index 570dc46f5..285a66382 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -126,7 +126,7 @@ ${sparkShufflePartitions} - ${wfAppPath}/CC.py#CC.py + ${wfAppPath}/bip-ranker/CC.py#CC.py @@ -171,7 +171,7 @@ ${sparkShufflePartitions} ${checkpointDir} - ${wfAppPath}/TAR.py#TAR.py + ${wfAppPath}/bip-ranker/TAR.py#TAR.py @@ -216,7 +216,7 @@ ${sparkShufflePartitions} 3 - ${wfAppPath}/CC.py#CC.py + ${wfAppPath}/bip-ranker/CC.py#CC.py @@ -274,7 +274,7 @@ ${sparkShufflePartitions} dfs - ${wfAppPath}/PageRank.py#PageRank.py + ${wfAppPath}/bip-ranker/PageRank.py#PageRank.py @@ -324,7 +324,7 @@ ${sparkShufflePartitions} dfs - ${wfAppPath}/AttRank.py#AttRank.py + ${wfAppPath}/bip-ranker/AttRank.py#AttRank.py