diff --git a/dhp-workflows/dhp-impact-indicators/README.md b/dhp-workflows/dhp-impact-indicators/README.md index aad94ea19..de0ad157c 100644 --- a/dhp-workflows/dhp-impact-indicators/README.md +++ b/dhp-workflows/dhp-impact-indicators/README.md @@ -24,3 +24,13 @@ mvn package -Poozie-package,deploy,run -Dworkflow.source.dir=eu/dnetlib/dhp/oa/g ``` Note: edit the property `bip.ranker.tag` of the `pom.xml` file to specify the tag of [BIP-Ranker](https://github.com/athenarc/Bip-Ranker) that you want to use. + + +Job info and logs: +``` +export OOZIE_URL=http://iis-cdh5-test-m3:11000/oozie +oozie job -info +oozie job -log +``` + +where `jobId` is the id of the job returned by the `run_workflow.sh` script. \ No newline at end of file diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties similarity index 94% rename from dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties rename to dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties index 08f9b1eac..a2f3d5828 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/job.properties +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/job.properties @@ -76,7 +76,7 @@ bipScorePath=${workingDir}/openaire_universe_scores/ checkpointDir=${nameNode}/${workingDir}/check/ # The directory for the doi-based bip graph -bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph +# bipGraphFilePath=${nameNode}/${workingDir}/bipdbv8_graph # The folder from which synonyms of openaire-ids are read # openaireDataInput=${nameNode}/tmp/beta_provision/graph/21_graph_cleaned/ @@ -89,9 +89,12 @@ synonymFolder=${nameNode}/${workingDir}/openaireid_to_dois/ openaireGraphInputPath=${nameNode}/${workingDir}/openaire_id_graph # The workflow application path -wfAppPath=${nameNode}/${oozieWorkflowPath} +wfAppPath=${oozieTopWfApplicationPath} + # The following is needed as a property of a workflow -oozie.wf.application.path=${wfAppPath} +#oozie.wf.application.path=${wfAppPath} +oozie.wf.application.path=${oozieTopWfApplicationPath} + # Path where the final output should be? actionSetOutputPath=${workingDir}/bip_actionsets/ @@ -99,3 +102,4 @@ actionSetOutputPath=${workingDir}/bip_actionsets/ # The directory to store project impact indicators projectImpactIndicatorsOutput=${workingDir}/project_indicators +resume=create-openaire-ranking-graph diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index f185f2a8a..285a66382 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -1,21 +1,33 @@ + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + - + - ${resume eq "rankings-start"} - ${resume eq "impulse"} - ${resume eq "rankings-iterative"} - ${resume eq "format-results"} - ${resume eq "map-ids"} - ${resume eq "map-scores"} - ${resume eq "start"} - ${resume eq "projects-impact"} + ${wf:conf('resume') eq "rankings-start"} + ${wf:conf('resume') eq "impulse"} + ${wf:conf('resume') eq "rankings-iterative"} + ${wf:conf('resume') eq "format-results"} + ${wf:conf('resume') eq "map-ids"} + ${wf:conf('resume') eq "map-scores"} + ${wf:conf('resume') eq "start"} + ${wf:conf('resume') eq "projects-impact"} @@ -26,10 +38,7 @@ - - ${jobTracker} - - ${nameNode} + - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -121,7 +126,7 @@ ${sparkShufflePartitions} - ${wfAppPath}/CC.py#CC.py + ${wfAppPath}/bip-ranker/CC.py#CC.py @@ -135,10 +140,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -170,7 +171,7 @@ ${sparkShufflePartitions} ${checkpointDir} - ${wfAppPath}/TAR.py#TAR.py + ${wfAppPath}/bip-ranker/TAR.py#TAR.py @@ -187,10 +188,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -219,7 +216,7 @@ ${sparkShufflePartitions} 3 - ${wfAppPath}/CC.py#CC.py + ${wfAppPath}/bip-ranker/CC.py#CC.py @@ -238,10 +235,6 @@ - - ${jobTracker} - - ${nameNode} @@ -281,7 +274,7 @@ ${sparkShufflePartitions} dfs - ${wfAppPath}/PageRank.py#PageRank.py + ${wfAppPath}/bip-ranker/PageRank.py#PageRank.py @@ -295,10 +288,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -335,7 +324,7 @@ ${sparkShufflePartitions} dfs - ${wfAppPath}/AttRank.py#AttRank.py + ${wfAppPath}/bip-ranker/AttRank.py#AttRank.py @@ -353,10 +342,6 @@ - - ${jobTracker} - - ${nameNode} /usr/bin/bash @@ -378,7 +363,6 @@ - @@ -391,10 +375,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -443,10 +423,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -498,10 +474,7 @@ - - ${jobTracker} - - ${nameNode} + @@ -548,10 +521,6 @@ - - ${jobTracker} - - ${nameNode} yarn-cluster @@ -636,10 +605,7 @@ - - ${jobTracker} - - ${nameNode} + yarn-cluster cluster @@ -714,47 +680,54 @@ - - - - - - - - PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - + CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - + RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + Error getting key-value pairs for output files, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}] + Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + ActionSet creation for results failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -767,4 +740,8 @@ ActionSet creation for projects failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + +