diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
index a8be7c32e..dbe2d088f 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json
@@ -1,10 +1,5 @@
[
- {
- "paramName":"is",
- "paramLongName":"isLookUpUrl",
- "paramDescription": "URL of the isLookUp Service",
- "paramRequired": true
- },
+
{
"paramName":"s",
"paramLongName":"sourcePath",
@@ -17,12 +12,7 @@
"paramDescription": "the json path associated to each selection field",
"paramRequired": true
},
- {
- "paramName":"tn",
- "paramLongName":"resultTableName",
- "paramDescription": "the name of the result table we are currently working on",
- "paramRequired": true
- },
+
{
"paramName": "out",
"paramLongName": "outputPath",
@@ -35,17 +25,19 @@
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
- {
- "paramName": "test",
- "paramLongName": "isTest",
- "paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded",
- "paramRequired": false
- },
+
{
"paramName": "tg",
"paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false
+ },
+
+ {
+ "paramName": "p",
+ "paramLongName": "production",
+ "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
+ "paramRequired": true
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
index fe82ae194..c92f559f9 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml
@@ -45,10 +45,14 @@
sparkExecutorMemory
- 6G
+ 5G
- sparkExecutorCores
- 1
+ memoryOverhead
+ 3g
+
+
+ partitions
+ 3284
\ No newline at end of file
diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
index b868e4c72..4b81c58e4 100644
--- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml
@@ -4,10 +4,6 @@
sourcePath
the source path
-
- isLookUpUrl
- the isLookup service endpoint
-
pathMap
the json path associated to each selection field
@@ -102,16 +98,9 @@
-
+
-
-
-
-
-
-
-
-
+
yarn-cluster
cluster
@@ -122,104 +111,23 @@
--num-executors=${sparkExecutorNumber}
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
+ --conf spark.executor.memoryOverhead=${memeoryOverhead}
+ --conf spark.sql.shuffle.partitions=${partitions}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
- --sourcePath${sourcePath}/publication
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
- --outputPath${outputPath}/publication
+ --sourcePath${sourcePath}/
+ --outputPath${outputPath}/
--pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
+ --production${production}
-
+
-
-
- yarn-cluster
- cluster
- bulkTagging-dataset
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob
- dhp-enrichment-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/dataset
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
- --outputPath${outputPath}/dataset
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- bulkTagging-orp
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob
- dhp-enrichment-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/otherresearchproduct
- --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
- --outputPath${outputPath}/otherresearchproduct
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-
-
- yarn-cluster
- cluster
- bulkTagging-software
- eu.dnetlib.dhp.bulktag.SparkBulkTagJob
- dhp-enrichment-${projectVersion}.jar
-
- --num-executors=${sparkExecutorNumber}
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.extraListeners=${spark2ExtraListeners}
- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-
- --sourcePath${sourcePath}/software
- --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
- --outputPath${outputPath}/software
- --pathMap${pathMap}
- --isLookUpUrl${isLookUpUrl}
-
-
-
-
-
-