${resume eq "rankings-start"}
${resume eq "impulse"}
${resume eq "rankings-iterative"}
${resume eq "format-results"}
${resume eq "map-ids"}
${resume eq "map-scores"}
${resume eq "start"}
${jobTracker}
${nameNode}
yarn-cluster
cluster
Openaire Ranking Graph Creation
create_openaire_ranking_graph.py
--executor-memory 20G --executor-cores 4 --driver-memory 20G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireDataInput}
${currentYear}
7680
${openaireGraphInputPath}
${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Spark CC
CC.py
--executor-memory 18G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireGraphInputPath}
7680
${wfAppPath}/CC.py#CC.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Spark RAM
TAR.py
--executor-memory 18G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireGraphInputPath}
${ramGamma}
${currentYear}
RAM
7680
${γιτ α}
${wfAppPath}/TAR.py#TAR.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Spark Impulse
CC.py
--executor-memory 18G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireGraphInputPath}
7680
3
${wfAppPath}/CC.py#CC.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Spark Pagerank
PageRank.py
--executor-memory 18G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireGraphInputPath}
${pageRankAlpha}
${convergenceError}
${checkpointDir}
7680
dfs
${wfAppPath}/PageRank.py#PageRank.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Spark AttRank
AttRank.py
--executor-memory 18G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireGraphInputPath}
${attrankAlpha}
${attrankBeta}
${attrankGamma}
${attrankRho}
${currentYear}
${attrankStartYear}
${convergenceError}
${checkpointDir}
7680
dfs
${wfAppPath}/AttRank.py#AttRank.py
${jobTracker}
${nameNode}
/usr/bin/bash
get_ranking_files.sh
/${workflowDataDir}
${wfAppPath}/get_ranking_files.sh#get_ranking_files.sh
${jobTracker}
${nameNode}
yarn-cluster
cluster
Format Ranking Results JSON
format_ranking_results.py
--executor-memory 10G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
json
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['ram_file']}
7680
openaire
${wfAppPath}/format_ranking_results.py#format_ranking_results.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Format Ranking Results BiP! DB
format_ranking_results.py
--executor-memory 10G --executor-cores 4 --driver-memory 10G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
zenodo
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['ram_file']}
7680
openaire
${wfAppPath}/format_ranking_results.py#format_ranking_results.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Openaire-DOI synonym collection
map_openaire_ids_to_dois.py
--executor-memory 18G --executor-cores 4 --driver-memory 15G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireDataInput}
${synonymFolder}
${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py
${jobTracker}
${nameNode}
yarn-cluster
cluster
Mapping Openaire Scores to DOIs
map_scores_to_dois.py
--executor-memory 18G --executor-cores 4 --driver-memory 15G
--master yarn
--deploy-mode cluster
--conf spark.sql.shuffle.partitions=7680
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${synonymFolder}
7680
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workflowDataDir}/${wf:actionData('get-file-names')['ram_file']}
${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py
yarn
cluster
Produces the atomic action with the bip finder scores for publications
eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${bipScorePath}
--outputPath${actionSetOutputPath}
PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
ActionSet creation failed, error message[${wf:errorMessage(wf:lastErrorNode())}]