diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
index dc0529012..0cf6cdd05 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml
@@ -13,6 +13,23 @@
isLookupUrl
the address of the lookUp service
+
+ shouldCleanContext
+ true if the context have to be cleaned
+
+
+ contextId
+ sobigdata
+ It is the context id that should be removed from the result if the condition is matched.
+ Now it is just sobigdata. In a futere implementation I plan to have the contextId as value in a json
+ where to specify also the constraints that should be verified to remove the context from the result
+
+
+ verifyParam
+ gcube
+ It is the constrint to be verified. This time is hardcoded as gcube and it is searched for in
+ the title. If title starts with gcube than the context sobigdata will be removed by the result if present
+
sparkDriverMemory
@@ -275,7 +292,131 @@
-
+
+
+
+
+ ${wf:conf('shouldCleanContext') eq true}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Clean publications context
+ eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${graphOutputPath}/publication
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
+ --workingPath${workingDir}/working/publication
+ --contextId${contextId}
+ --verifyParam${verifyParam}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Clean datasets Context
+ eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${graphOutputPath}/dataset
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --workingPath${workingDir}/working/dataset
+ --contextId${contextId}
+ --verifyParam${verifyParam}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Clean otherresearchproducts context
+ eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${graphOutputPath}/otherresearchproduct
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --workingPath${workingDir}/working/otherresearchproduct
+ --contextId${contextId}
+ --verifyParam${verifyParam}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Clean softwares context
+ eu.dnetlib.dhp.oa.graph.clean.CleanContextSparkJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=7680
+
+ --inputPath${graphOutputPath}/software
+ --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
+ --workingPath${workingDir}/working/software
+ --contextId${contextId}
+ --verifyParam${verifyParam}
+
+
+
+
+
+
\ No newline at end of file