diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml index 5a943c673d..fa47e142d0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml @@ -142,6 +142,32 @@ --workingPath${crossrefDumpPath} --outputPath${crossrefDumpPath}/files/ + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + @@ -155,7 +181,7 @@ dhp-doiboost-${projectVersion}.jar --executor-memory=7G - --executor-cores=4 + --executor-cores=2 --driver-memory=7G --conf spark.sql.shuffle.partitions=3840 --conf spark.extraListeners=${spark2ExtraListeners} @@ -164,7 +190,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files/ + --sourcePath${crossrefDumpPath}/crossref_unpack/ --targetPath${inputPathCrossref}/crossref_ds @@ -174,7 +200,8 @@ - + +