diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json
new file mode 100644
index 0000000000..87de13d63b
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json
@@ -0,0 +1,24 @@
+[
+
+ {
+ "paramName":"is",
+ "paramLongName":"isLookUpUrl",
+ "paramDescription": "URL of the isLookUp Service",
+ "paramRequired": false
+ },
+ {
+ "paramName": "hdfs",
+ "paramLongName": "hdfsPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "nn",
+ "paramLongName": "nameNode",
+ "paramDescription": "the name node",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json
new file mode 100644
index 0000000000..283f2d0aaf
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json
@@ -0,0 +1,36 @@
+[
+
+ {
+ "paramName":"ocm",
+ "paramLongName":"organizationCommunityMap",
+ "paramDescription": "the organization community map association",
+ "paramRequired": false
+ },
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json
similarity index 99%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json
rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json
index b6dbbff9ea..a1407ae674 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json
@@ -1,5 +1,3 @@
-
-
[
{
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json
new file mode 100644
index 0000000000..2bfcac3bce
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json
@@ -0,0 +1,26 @@
+
+
+[
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml
new file mode 100644
index 0000000000..e5ec3d0aee
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml
@@ -0,0 +1,30 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ hiveMetastoreUris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ hiveJdbcUrl
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+
+
+ hiveDbName
+ openaire
+
+
+ oozie.launcher.mapreduce.user.classpath.first
+ true
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml
new file mode 100644
index 0000000000..2de5616ebe
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml
@@ -0,0 +1,524 @@
+
+
+
+
+ sourcePath
+ the source path
+
+
+ isLookUpUrl
+ the isLookup service endpoint
+
+
+ outputPath
+ the output path
+
+
+ organizationCommunityMap
+ the organization community map
+
+
+
+ hiveDbName
+ the target hive database name
+
+
+ hiveJdbcUrl
+ hive server jdbc url
+
+
+ hiveMetastoreUris
+ hive server metastore URIs
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table publication
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/publication
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/result/publication
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table dataset
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/dataset
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/result/dataset
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table ORP
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/result/otherresearchproduct
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table software
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/software
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/result/software
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table organization
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/organization
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization
+ --outputPath${outputPath}/organization
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table project
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/project
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Project
+ --outputPath${outputPath}/project
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table datasource
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/datasource
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource
+ --outputPath${outputPath}/datasource
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table relation
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpRelationJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/relation
+ --outputPath${outputPath}/relation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextEntities
+ --hdfsPath${outputPath}/context
+ --hdfsNameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextRelation
+ --hdfsPath${workingDir}/relation/context
+ --hdfsNameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table relation
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkOrganizationRelation
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/relation
+ --outputPath${workingDir}/relation/contextOrg
+ --organizationCommunityMap${organizationCommunityMap}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Extract Relations from publication
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/publication
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/relation/publication
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table dataset
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/dataset
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/relation/dataset
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table ORP
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/relation/orp
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table software
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/software
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/relation/software
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Collect Results and Relations and put them in the right path
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkCollectAndSave
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}
+ --outputPath${outputPath}
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file