${jobTracker}
${nameNode}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
projectImpactIndicatorsOutput
${nameNode}${workingDir}/project_indicators
openaireGraphInputPath
${nameNode}/${workingDir}/openaire_id_graph
synonymFolder
${nameNode}/${workingDir}/openaireid_to_dois/
checkpointDir
${nameNode}/${workingDir}/check/
bipScorePath
${nameNode}${workingDir}/openaire_universe_scores/
${wf:conf('resume') eq "rankings-start"}
${wf:conf('resume') eq "impulse"}
${wf:conf('resume') eq "pagerank"}
${wf:conf('resume') eq "attrank"}
${wf:conf('resume') eq "format-results"}
${wf:conf('resume') eq "map-ids"}
${wf:conf('resume') eq "map-scores"}
${wf:conf('resume') eq "start"}
${wf:conf('resume') eq "projects-impact"}
${wf:conf('resume') eq "create-actionset"}
yarn-cluster
cluster
OpenAIRE Ranking Graph Creation
create_openaire_ranking_graph.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkHighDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireDataInput}
${currentYear}
${sparkShufflePartitions}
${nameNode}${workingDir}/openaire_id_graph
${nameNode}${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py
yarn-cluster
cluster
Citation Count calculation
CC.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${nameNode}/${workingDir}/openaire_id_graph
${sparkShufflePartitions}
${wfAppPath}/bip-ranker/CC.py#CC.py
yarn-cluster
cluster
RAM calculation
TAR.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${nameNode}/${workingDir}/openaire_id_graph
${ramGamma}
${currentYear}
RAM
${sparkShufflePartitions}
${checkpointDir}
${wfAppPath}/bip-ranker/TAR.py#TAR.py
yarn-cluster
cluster
Impulse calculation
CC.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${nameNode}/${workingDir}/openaire_id_graph
${sparkShufflePartitions}
3
${wfAppPath}/bip-ranker/CC.py#CC.py
yarn-cluster
cluster
Pagerank calculation
PageRank.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${nameNode}/${workingDir}/openaire_id_graph
${pageRankAlpha}
${convergenceError}
${checkpointDir}
${sparkShufflePartitions}
dfs
${wfAppPath}/bip-ranker/PageRank.py#PageRank.py
yarn-cluster
cluster
AttRank calculation
AttRank.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${nameNode}/${workingDir}/openaire_id_graph
${attrankAlpha}
${attrankBeta}
${attrankGamma}
${attrankRho}
${currentYear}
${attrankStartYear}
${convergenceError}
${checkpointDir}
${sparkShufflePartitions}
dfs
${wfAppPath}/bip-ranker/AttRank.py#AttRank.py
/usr/bin/bash
get_ranking_files.sh
${workingDir}
${wfAppPath}/get_ranking_files.sh#get_ranking_files.sh
yarn-cluster
cluster
Format Ranking Results JSON
format_ranking_results.py
--executor-memory=${sparkNormalExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
json-5-way
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']}
${sparkShufflePartitions}
openaire
${wfAppPath}/format_ranking_results.py#format_ranking_results.py
yarn-cluster
cluster
Format Ranking Results BiP! DB
format_ranking_results.py
--executor-memory=${sparkNormalExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
zenodo
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']}
${sparkShufflePartitions}
openaire
${wfAppPath}/format_ranking_results.py#format_ranking_results.py
yarn-cluster
cluster
Openaire-DOI synonym collection
map_openaire_ids_to_dois.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkHighDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireDataInput}/
${synonymFolder}
${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py
yarn-cluster
cluster
Mapping Openaire Scores to DOIs
map_scores_to_dois.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkHighDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${synonymFolder}
${sparkShufflePartitions}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']}
${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py
yarn-cluster
cluster
Project Impact Indicators calculation
projects_impact.py
--executor-memory=${sparkHighExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.sql.shuffle.partitions=${sparkShufflePartitions}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
${openaireDataInput}/relation
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']}
${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']}
${sparkShufflePartitions}
${projectImpactIndicatorsOutput}
${wfAppPath}/projects_impact.py#projects_impact.py
yarn-cluster
cluster
Produces the atomic action with the bip finder scores
eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob
dhp-aggregation-${projectVersion}.jar
--executor-memory=${sparkNormalExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkNormalDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--resultsInputPath${bipScorePath}
--projectsInputPath${projectImpactIndicatorsOutput}
--outputPath${actionSetOutputPath}
Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Error getting key-value pairs for output files, error message[${wf:errorMessage(wf:lastErrorNode())}]
Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}]
Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}]
Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
ActionSet creation for results failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}]