diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index fd8f5ba89..6983ecf53 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -84,7 +84,6 @@ sparkNetworkTimeout configures spark.network.timeout - @@ -98,12 +97,16 @@ - + - + - ${wf:conf('reuseRecords') eq false} - ${wf:conf('reuseRecords') eq true} + ${wf:conf('resumeFrom') eq 'prepare_relations'} + ${wf:conf('resumeFrom') eq 'fork_join_related_entities'} + ${wf:conf('resumeFrom') eq 'join_all_entities'} + ${wf:conf('resumeFrom') eq 'adjancency_lists'} + ${wf:conf('resumeFrom') eq 'convert_to_xml'} + ${wf:conf('resumeFrom') eq 'to_solr_index'} @@ -131,9 +134,7 @@ --inputRelationsPath${inputGraphRootPath}/relation --outputPath${workingDir}/relation - --relPartitions${relPartitions} - --relationFilter${relationFilter} - --maxRelations${maxRelations} + --relPartitions3000 @@ -340,7 +341,6 @@ - yarn @@ -362,7 +362,7 @@ --inputGraphRootPath${inputGraphRootPath} --inputRelatedEntitiesPath${workingDir}/join_partial --outputPath${workingDir}/join_entities - --numPartitions24000 + --numPartitions12000 @@ -386,7 +386,7 @@ --conf spark.sql.shuffle.partitions=7680 --conf spark.network.timeout=${sparkNetworkTimeout} - --inputPath${workingDir}/join_entities + --inputPath ${workingDir}/join_entities --outputPath${workingDir}/joined