diff --git a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml index 65067dacef..349e054d8c 100644 --- a/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-impact-indicators/src/main/resources/eu/dnetlib/dhp/oa/graph/impact_indicators/oozie_app/workflow.xml @@ -13,7 +13,6 @@ - @@ -38,27 +37,14 @@ - + - - - - - yarn-cluster cluster - - - Openaire Ranking Graph Creation - + OpenAIRE Ranking Graph Creation create_openaire_ranking_graph.py - --executor-memory=${sparkHighExecutorMemory} @@ -80,39 +66,30 @@ ${sparkShufflePartitions} ${openaireGraphInputPath} - + ${wfAppPath}/create_openaire_ranking_graph.py#create_openaire_ranking_graph.py - - - - + - - yarn-cluster cluster - - - Spark CC - + Citation Count calculation CC.py - --executor-memory=${sparkHighExecutorMemory} @@ -129,31 +106,23 @@ ${openaireGraphInputPath} ${sparkShufflePartitions} - + ${wfAppPath}/bip-ranker/CC.py#CC.py - - - + - - yarn-cluster cluster - - - Spark RAM - + RAM calculation TAR.py - --executor-memory=${sparkHighExecutorMemory} @@ -171,37 +140,27 @@ ${ramGamma} ${currentYear} RAM - ${sparkShufflePartitions} ${checkpointDir} - + ${wfAppPath}/bip-ranker/TAR.py#TAR.py - - - + - - - yarn-cluster cluster - - - Spark Impulse - + Impulse calculation CC.py - --executor-memory=${sparkHighExecutorMemory} @@ -219,47 +178,22 @@ ${sparkShufflePartitions} 3 - + ${wfAppPath}/bip-ranker/CC.py#CC.py - - - - - - - - - - - - - - - - yarn-cluster cluster - - - Spark Pagerank - + Pagerank calculation PageRank.py - --executor-memory=${sparkHighExecutorMemory} @@ -280,31 +214,22 @@ ${sparkShufflePartitions} dfs - + ${wfAppPath}/bip-ranker/PageRank.py#PageRank.py - - - - - yarn-cluster cluster - - - Spark AttRank - + AttRank calculation AttRank.py - --executor-memory=${sparkHighExecutorMemory} @@ -330,27 +255,16 @@ ${sparkShufflePartitions} dfs - + ${wfAppPath}/bip-ranker/AttRank.py#AttRank.py - - - - - - - @@ -360,15 +274,12 @@ ${workingDir} - ${wfAppPath}/get_ranking_files.sh#get_ranking_files.sh - - @@ -383,18 +294,12 @@ - - yarn-cluster cluster - - Format Ranking Results JSON - format_ranking_results.py - --executor-memory=${sparkNormalExecutorMemory} @@ -419,13 +324,11 @@ ${sparkShufflePartitions} openaire - + ${wfAppPath}/format_ranking_results.py#format_ranking_results.py - - @@ -471,18 +374,15 @@ ${wfAppPath}/format_ranking_results.py#format_ranking_results.py - - - + - + - @@ -490,15 +390,10 @@ - yarn-cluster cluster - - Openaire-DOI synonym collection - map_openaire_ids_to_dois.py - --executor-memory=${sparkHighExecutorMemory} @@ -515,19 +410,16 @@ ${openaireDataInput}/ ${synonymFolder} - + ${wfAppPath}/map_openaire_ids_to_dois.py#map_openaire_ids_to_dois.py - - - - + @@ -535,12 +427,8 @@ yarn-cluster cluster - - Mapping Openaire Scores to DOIs - map_scores_to_dois.py - --executor-memory=${sparkHighExecutorMemory} @@ -564,18 +452,15 @@ ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['impulse_file']} ${nameNode}/${workingDir}/${wf:actionData('get-file-names')['ram_file']} - ${wfAppPath}/map_scores_to_dois.py#map_scores_to_dois.py - - - + @@ -590,11 +475,13 @@ + yarn-cluster cluster Produces the atomic action with the bip finder scores for publications eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob dhp-aggregation-${projectVersion}.jar + --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -609,23 +496,19 @@ --outputPath${actionSetOutputPath}/results/ --targetEntityresult + + - - yarn-cluster cluster - - - Project Impact Indicators - + Project Impact Indicators calculation projects_impact.py - --executor-memory=${sparkHighExecutorMemory} @@ -639,7 +522,6 @@ - ${openaireDataInput}/relation @@ -653,26 +535,23 @@ ${sparkShufflePartitions} ${projectImpactIndicatorsOutput} - - ${wfAppPath}/projects_impact.py#projects_impact.py - - - + yarn-cluster cluster Produces the atomic action with the bip finder scores for projects eu.dnetlib.dhp.actionmanager.bipfinder.SparkAtomicActionScoreJob dhp-aggregation-${projectVersion}.jar + --executor-memory=${sparkNormalExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -683,12 +562,15 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --inputPath${projectImpactIndicatorsOutput} --outputPath${actionSetOutputPath}/projects/ --targetEntityproject + +