${jobTracker} ${nameNode} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} ${wf:conf('resume') eq "rankings-start"} ${wf:conf('resume') eq "impulse"} ${wf:conf('resume') eq "rankings-iterative"} ${wf:conf('resume') eq "format-results"} ${wf:conf('resume') eq "map-ids"} ${wf:conf('resume') eq "map-scores"} ${wf:conf('resume') eq "start"} ${wf:conf('resume') eq "projects-impact"} yarn-cluster cluster Openaire Ranking Graph Creation create_openaire_ranking_graph.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput} ${currentYear} ${sparkShufflePartitions} ${openaireGraphInputPath} ${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py yarn-cluster cluster Spark CC CC.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireGraphInputPath} ${sparkShufflePartitions} ${wfAppPath}/CC.py#CC.py yarn-cluster cluster Spark RAM TAR.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireGraphInputPath} ${ramGamma} ${currentYear} RAM ${sparkShufflePartitions} ${checkpointDir} ${wfAppPath}/TAR.py#TAR.py yarn-cluster cluster Spark Impulse CC.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireGraphInputPath} ${sparkShufflePartitions} 3 ${wfAppPath}/CC.py#CC.py yarn-cluster cluster Spark Pagerank PageRank.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireGraphInputPath} ${pageRankAlpha} ${convergenceError} ${checkpointDir} ${sparkShufflePartitions} dfs ${wfAppPath}/PageRank.py#PageRank.py yarn-cluster cluster Spark AttRank AttRank.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireGraphInputPath} ${attrankAlpha} ${attrankBeta} ${attrankGamma} ${attrankRho} ${currentYear} ${attrankStartYear} ${convergenceError} ${checkpointDir} ${sparkShufflePartitions} dfs ${wfAppPath}/AttRank.py#AttRank.py /usr/bin/bash get_ranking_files.sh /${workingDir} ${wfAppPath}/get_ranking_files.sh#get_ranking_files.sh yarn-cluster cluster Format Ranking Results JSON format_ranking_results.py --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} json-5-way ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${sparkShufflePartitions} openaire ${wfAppPath}/format_ranking_results.py#format_ranking_results.py yarn-cluster cluster Format Ranking Results BiP! DB format_ranking_results.py --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} zenodo ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${sparkShufflePartitions} openaire ${wfAppPath}/format_ranking_results.py#format_ranking_results.py yarn-cluster cluster Openaire-DOI synonym collection map_openaire_ids_to_dois.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput} ${synonymFolder} ${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py yarn-cluster cluster Mapping Openaire Scores to DOIs map_scores_to_dois.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${synonymFolder} ${sparkShufflePartitions} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py yarn cluster Produces the atomic action with the bip finder scores for publications eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --inputPath${bipScorePath} --outputPath${actionSetOutputPath}/results/ --targetEntityresult yarn-cluster cluster Project Impact Indicators projects_impact.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput}/relations ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${sparkShufflePartitions} ${projectImpactIndicatorsOutput} ${wfAppPath}/projects_impact.py#projects_impact.py yarn cluster Produces the atomic action with the bip finder scores for projects eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --inputPath${projectImpactIndicatorsOutput} --outputPath${actionSetOutputPath}/projects/ --targetEntityproject Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Error getting key-value pairs for output files, error message[${wf:errorMessage(wf:lastErrorNode())}] Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}] Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}] Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ActionSet creation for results failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ActionSet creation for projects failed, error message[${wf:errorMessage(wf:lastErrorNode())}]