diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
deleted file mode 100644
index 508202e30..000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
- jobTracker
- yarnRM
-
-
- nameNode
- hdfs://nameservice1
-
-
- oozie.use.system.libpath
- true
-
-
- oozie.action.sharelib.for.spark
- spark2
-
-
- oozie.launcher.mapreduce.user.classpath.first
- true
-
-
- hive_metastore_uris
- thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
-
-
- spark2YarnHistoryServerAddress
- http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
-
-
- spark2EventLogDir
- /user/spark/spark2ApplicationHistory
-
-
- spark2ExtraListeners
- "com.cloudera.spark.lineage.NavigatorAppListener"
-
-
- spark2SqlQueryExecutionListeners
- "com.cloudera.spark.lineage.NavigatorQueryListener"
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
index 6e4f17912..e69de29bb 100644
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
@@ -1,128 +0,0 @@
-
-
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
-
-
-
-
- crossrefdumpfilename
- the Crossref input path
-
-
- crossrefDumpPath
- the Crossref dump path
-
-
- crossrefdumptoken
- the token for the API dump path
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- oozie.action.sharelib.for.spark
- ${oozieActionShareLibForSpark2}
-
-
-
-
-
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- mapred.job.queue.name
- ${queueName}
-
-
- download.sh
- ${url}
- ${crossrefDumpPath}
- ${crossrefdumpfilename}
- ${crossrefdumptoken}
- HADOOP_USER_NAME=${wf:user()}
- download.sh
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
- --hdfsServerUri${nameNode}
- --crossrefFileNameTarGz${crossrefdumpfilename}
- --workingPath${crossrefDumpPath}
- --outputPath${crossrefDumpPath}/files/
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkUnpackCrossrefEntries
- eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${crossrefDumpPath}/files
- --targetPath${crossrefDumpPath}/crossref_unpack/
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
index de433b038..ab3b9593e 100644
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
@@ -1,4 +1,4 @@
-
+
sparkDriverMemory
@@ -63,12 +63,10 @@
- ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'}
- ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}
${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}
${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}
${wf:conf('resumeFrom') eq 'PreProcessORCID'}
-
+
@@ -77,46 +75,6 @@
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
- --hdfsServerUri${nameNode}
- --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz
- --workingPath${crossrefDumpPath}
- --outputPath${crossrefDumpPath}/files/
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkUnpackCrossrefEntries
- eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${crossrefDumpPath}/files
- --targetPath${crossrefDumpPath}/crossref_unpack/
-
-
-
-
-
-
yarn-cluster
@@ -139,18 +97,11 @@
--targetPath${inputPathCrossref}/crossref_ds
-
+
-
-
-
-
-
-
-
-
+
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
index d3009a570..f5596b60e 100644
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml
@@ -1,4 +1,4 @@
-
+
sparkDriverMemory
@@ -75,6 +75,7 @@
+ ${wf:conf('resumeFrom') eq 'Skip'}
${wf:conf('resumeFrom') eq 'PreprocessMag'}
${wf:conf('resumeFrom') eq 'PreprocessUW'}
${wf:conf('resumeFrom') eq 'ProcessORCID'}