diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json
deleted file mode 100644
index 63e0803372..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json
+++ /dev/null
@@ -1,21 +0,0 @@
-[
- {
- "paramName": "s",
- "paramLongName": "sourcePath",
- "paramDescription": "the source mdstore path",
- "paramRequired": true
- },
-
- {
- "paramName": "t",
- "paramLongName": "targetPath",
- "paramDescription": "the target mdstore path",
- "paramRequired": true
- },
- {
- "paramName": "m",
- "paramLongName": "master",
- "paramDescription": "the master name",
- "paramRequired": true
- }
-]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
deleted file mode 100644
index 508202e301..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
- jobTracker
- yarnRM
-
-
- nameNode
- hdfs://nameservice1
-
-
- oozie.use.system.libpath
- true
-
-
- oozie.action.sharelib.for.spark
- spark2
-
-
- oozie.launcher.mapreduce.user.classpath.first
- true
-
-
- hive_metastore_uris
- thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
-
-
- spark2YarnHistoryServerAddress
- http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
-
-
- spark2EventLogDir
- /user/spark/spark2ApplicationHistory
-
-
- spark2ExtraListeners
- "com.cloudera.spark.lineage.NavigatorAppListener"
-
-
- spark2SqlQueryExecutionListeners
- "com.cloudera.spark.lineage.NavigatorQueryListener"
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
deleted file mode 100644
index 506d86a081..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml
+++ /dev/null
@@ -1,118 +0,0 @@
-
-
-
- crossrefDumpPath
- the working dir base path
-
-
- inputPathCrossref
- the working dir base path
-
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- 2
- number of cores used by single executor
-
-
-
-
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
- --hdfsServerUri${nameNode}
- --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz
- --workingPath${crossrefDumpPath}
- --outputPath${workingDir}/files/
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkGenerateCrossrefDataset
- eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${crossrefDumpPath}/files
- --targetPath${inputPathCrossref}/crossref_ds
-
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkGenerateCrossrefDataset
- eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${inputPathCrossref}/crossref_ds
- --targetPath${inputPathCrossref}/crossref_ds_updates
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml
deleted file mode 100644
index 508202e301..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
- jobTracker
- yarnRM
-
-
- nameNode
- hdfs://nameservice1
-
-
- oozie.use.system.libpath
- true
-
-
- oozie.action.sharelib.for.spark
- spark2
-
-
- oozie.launcher.mapreduce.user.classpath.first
- true
-
-
- hive_metastore_uris
- thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
-
-
- spark2YarnHistoryServerAddress
- http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
-
-
- spark2EventLogDir
- /user/spark/spark2ApplicationHistory
-
-
- spark2ExtraListeners
- "com.cloudera.spark.lineage.NavigatorAppListener"
-
-
- spark2SqlQueryExecutionListeners
- "com.cloudera.spark.lineage.NavigatorQueryListener"
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh
deleted file mode 100644
index 1bb7aff1f0..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh
deleted file mode 100644
index 30386d6134..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-curl -LSs $1 | hdfs dfs -put - $2/$3
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml
deleted file mode 100644
index 91de3bfb37..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-
- sparkDriverMemory
- memory for driver process
-
-
- sparkExecutorMemory
- memory for individual executor
-
-
- sparkExecutorCores
- number of cores used by single executor
-
-
-
-
- crossrefdumpfilename
- the Crossref input path
-
-
- crossrefDumpPath
- the Crossref dump path
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- oozie.action.sharelib.for.spark
- ${oozieActionShareLibForSpark2}
-
-
-
-
-
-
-
-
- Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
-
-
- mapred.job.queue.name
- ${queueName}
-
-
- download.sh
- ${url}
- ${crossrefDumpPath}
- ${crossrefdumpfilename}
- HADOOP_USER_NAME=${wf:user()}
- download.sh
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
- --hdfsServerUri${nameNode}
- --crossrefFileNameTarGz${crossrefdumpfilename}
- --workingPath${crossrefDumpPath}
- --outputPath${crossrefDumpPath}/files/
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkUnpackCrossrefEntries
- eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${crossrefDumpPath}/files
- --targetPath${crossrefDumpPath}/crossref_unpack/
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh
deleted file mode 100644
index 30386d6134..0000000000
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-curl -LSs $1 | hdfs dfs -put - $2/$3
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
index ecaeda7091..3700ce5d97 100644
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml
@@ -63,14 +63,10 @@
- ${wf:conf('resumeFrom') eq 'Skip'}
- ${wf:conf('resumeFrom') eq 'ImportCrossRef'}
- ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'}
- ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'}
${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'}
${wf:conf('resumeFrom') eq 'ConvertMagToDataset'}
${wf:conf('resumeFrom') eq 'PreProcessORCID'}
-
+
@@ -79,67 +75,6 @@
-
-
- ${jobTracker}
- ${nameNode}
-
-
- mapred.job.queue.name
- ${queueName}
-
-
- download.sh
- ${url}
- ${crossrefDumpPath}
- ${crossrefdumpfilename}
- download.sh
-
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords
- --hdfsServerUri${nameNode}
- --crossrefFileNameTarGz${crossrefdumpfilename}
- --workingPath${crossrefDumpPath}
- --outputPath${crossrefDumpPath}/files/
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- SparkUnpackCrossrefEntries
- eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries
- dhp-doiboost-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=3840
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --masteryarn-cluster
- --sourcePath${crossrefDumpPath}/files
- --targetPath${crossrefDumpPath}/crossref_unpack/
-
-
-
-
-
-
yarn-cluster
@@ -166,14 +101,7 @@
-
-
-
-
-
-
-
-
+