diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index d4c6ade434..506d86a081 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -24,7 +24,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -40,11 +40,37 @@ --workingPath${crossrefDumpPath} --outputPath${workingDir}/files/ - + - + + + yarn-cluster + cluster + SparkGenerateCrossrefDataset + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${inputPathCrossref}/crossref_ds + + + + + + + yarn-cluster cluster @@ -62,8 +88,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn-cluster - --sourcePath${workingDir}/files - --targetPath${inputPathCrossref}/crossref_ds + --sourcePath${inputPathCrossref}/crossref_ds + --targetPath${inputPathCrossref}/crossref_ds_updates