From b33dd58be4dbaeb841363c61d95f3f9bab890d25 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 22 May 2020 08:50:06 +0200 Subject: [PATCH] replaced parameter 'reuseRecords' with 'resumeFrom', allowing to restart the provision workflow execution from any step, useful for manual submissions or debugging --- .../dhp/oa/provision/oozie_app/workflow.xml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index fd8f5ba89a..6983ecf53c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -84,7 +84,6 @@ sparkNetworkTimeout configures spark.network.timeout - @@ -98,12 +97,16 @@ - + - + - ${wf:conf('reuseRecords') eq false} - ${wf:conf('reuseRecords') eq true} + ${wf:conf('resumeFrom') eq 'prepare_relations'} + ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} + ${wf:conf('resumeFrom') eq 'join_all_entities'} + ${wf:conf('resumeFrom') eq 'adjancency_lists'} + ${wf:conf('resumeFrom') eq 'convert_to_xml'} + ${wf:conf('resumeFrom') eq 'to_solr_index'} @@ -131,9 +134,7 @@ --inputRelationsPath${inputGraphRootPath}/relation --outputPath${workingDir}/relation - --relPartitions${relPartitions} - --relationFilter${relationFilter} - --maxRelations${maxRelations} + --relPartitions3000 @@ -340,7 +341,6 @@ - yarn @@ -362,7 +362,7 @@ --inputGraphRootPath${inputGraphRootPath} --inputRelatedEntitiesPath${workingDir}/join_partial --outputPath${workingDir}/join_entities - --numPartitions24000 + --numPartitions12000 @@ -386,7 +386,7 @@ --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} - --inputPath${workingDir}/join_entities + --inputPath ${workingDir}/join_entities --outputPath${workingDir}/joined