${jobTracker} ${nameNode} oozie.action.sharelib.for.spark ${oozieActionShareLibForSpark2} projectImpactIndicatorsOutput ${nameNode}${workingDir}/project_indicators openaireGraphInputPath ${nameNode}/${workingDir}/openaire_id_graph synonymFolder ${nameNode}/${workingDir}/openaireid_to_dois/ checkpointDir ${nameNode}/${workingDir}/check/ bipScorePath ${nameNode}${workingDir}/openaire_universe_scores/ ${wf:conf('resume') eq "cc"} ${wf:conf('resume') eq "ram"} ${wf:conf('resume') eq "impulse"} ${wf:conf('resume') eq "pagerank"} ${wf:conf('resume') eq "attrank"} ${wf:conf('resume') eq "format-results"} ${wf:conf('resume') eq "map-ids"} ${wf:conf('resume') eq "map-scores"} ${wf:conf('resume') eq "start"} ${wf:conf('resume') eq "projects-impact"} ${wf:conf('resume') eq "create-actionset"} yarn-cluster cluster OpenAIRE Ranking Graph Creation create_openaire_ranking_graph.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput} ${currentYear} ${sparkShufflePartitions} ${nameNode}${workingDir}/openaire_id_graph ${nameNode}${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py yarn-cluster cluster Citation Count calculation CC.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${nameNode}/${workingDir}/openaire_id_graph ${sparkShufflePartitions} ${wfAppPath}/bip-ranker/CC.py#CC.py yarn-cluster cluster RAM calculation TAR.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${nameNode}/${workingDir}/openaire_id_graph ${ramGamma} ${currentYear} RAM ${sparkShufflePartitions} ${checkpointDir} ${wfAppPath}/bip-ranker/TAR.py#TAR.py yarn-cluster cluster Impulse calculation CC.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${nameNode}/${workingDir}/openaire_id_graph ${sparkShufflePartitions} 3 ${wfAppPath}/bip-ranker/CC.py#CC.py yarn-cluster cluster Pagerank calculation PageRank.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${nameNode}/${workingDir}/openaire_id_graph ${pageRankAlpha} ${convergenceError} ${checkpointDir} ${sparkShufflePartitions} dfs ${wfAppPath}/bip-ranker/PageRank.py#PageRank.py yarn-cluster cluster AttRank calculation AttRank.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${nameNode}/${workingDir}/openaire_id_graph ${attrankAlpha} ${attrankBeta} ${attrankGamma} ${attrankRho} ${currentYear} ${attrankStartYear} ${convergenceError} ${checkpointDir} ${sparkShufflePartitions} dfs ${wfAppPath}/bip-ranker/AttRank.py#AttRank.py /usr/bin/bash get_ranking_files.sh ${workingDir} ${wfAppPath}/get_ranking_files.sh#get_ranking_files.sh yarn-cluster cluster Format Ranking Results JSON format_ranking_results.py --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} json-5-way ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${sparkShufflePartitions} openaire ${wfAppPath}/format_ranking_results.py#format_ranking_results.py yarn-cluster cluster Format Ranking Results BiP! DB format_ranking_results.py --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} zenodo ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${sparkShufflePartitions} openaire ${wfAppPath}/format_ranking_results.py#format_ranking_results.py yarn-cluster cluster Openaire-DOI synonym collection map_openaire_ids_to_dois.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput}/ ${synonymFolder} ${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py yarn-cluster cluster Mapping Openaire Scores to DOIs map_scores_to_dois.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkHighDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${synonymFolder} ${sparkShufflePartitions} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} ${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py yarn-cluster cluster Project Impact Indicators calculation projects_impact.py --executor-memory=${sparkHighExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.sql.shuffle.partitions=${sparkShufflePartitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} ${openaireDataInput}/relation ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['pr_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['attrank_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['cc_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${sparkShufflePartitions} ${projectImpactIndicatorsOutput} ${wfAppPath}/projects_impact.py#projects_impact.py yarn-cluster cluster Produces the atomic action with the bip finder scores eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkNormalDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --resultsInputPath${bipScorePath} --projectsInputPath${projectImpactIndicatorsOutput} --outputPath${actionSetOutputPath} Creation of openaire-graph failed, error message[${wf:errorMessage(wf:lastErrorNode())}] CC failed, error message[${wf:errorMessage(wf:lastErrorNode())}] RAM failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Impulse failed, error message[${wf:errorMessage(wf:lastErrorNode())}] PageRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] AttRank failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Error getting key-value pairs for output files, error message[${wf:errorMessage(wf:lastErrorNode())}] Error formatting json files, error message[${wf:errorMessage(wf:lastErrorNode())}] Error formatting BIP files, error message[${wf:errorMessage(wf:lastErrorNode())}] Synonym collection failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Mapping scores to DOIs failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Deleting output path for actionsets failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ActionSet creation for results failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Calculating project impact indicators failed, error message[${wf:errorMessage(wf:lastErrorNode())}] Re-create working dir failed, error message[${wf:errorMessage(wf:lastErrorNode())}]