diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml
index 29746e3287..3bae12d1a9 100644
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml
@@ -12,12 +12,48 @@
-
+
+
+
+
+
-
+
+
+
+ yarn
+ cluster
+ PrepareResult
+ eu.dnetlib.dhp.ircdl_extention.PrepareNormalizedOrcid
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${orcidInputPath}
+ --outputPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+
+
+
+
+
+
+
yarn
cluster
@@ -34,11 +70,121 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
+ --inputPath${inputPath}/publication
+ --resultClasseu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/GRAPH/publicationsWithOrcid
-
+
+
+
+ yarn
+ cluster
+ PrepareResult
+ eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${inputPath}/dataset
+ --resultClasseu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/GRAPH/datasetWithOrcid
+
+
+
+
+
+
+
+ yarn
+ cluster
+ PrepareResult
+ eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${inputPath}/software
+ --resultClasseu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/GRAPH/softwareWithOrcid
+
+
+
+
+
+
+
+ yarn
+ cluster
+ PrepareResult
+ eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${inputPath}/otherresearchproduct
+ --resultClasseu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/GRAPH/otherWithOrcid
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ PrepareResult
+ eu.dnetlib.dhp.ircdl_extention.PrepareNormalizedResultSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/
+ --outputPath${workingDir}/GRAPH/Normalized/
+
+
+
+
+
+
+
+
+
+
@@ -58,8 +204,11 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
+ --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/
+ --outputPath${workingDir}/GRAPH/InstRepo/
+ --datasourcePath${datasourcePath}
-
+
@@ -80,10 +229,13 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
+ --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/
+ --outputPath${workingDir}/GRAPH/Datacite/
-
+
+
yarn
@@ -101,10 +253,16 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
+ --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/
+ --outputPath${workingDir}/GRAPH/Crossref/
-
+
+
+
+
+
yarn
@@ -122,10 +280,200 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
+ --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/
+ --outputPath${workingDir}/GRAPH/AllTheRest/
+ --instRepoPath${workingDir}/GRAPH/InstRepo/
+ --datacitePath${workingDir}/GRAPH/Datacite/
+ --crossrefPath${workingDir}/GRAPH/Crossref/
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/InstRepo/
+ --outputPath${outputPath}/InstRepo/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/Datacite/allDatacite/
+ --outputPath${outputPath}
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/Crossref/
+ --outputPath${outputPath}/Crossref/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/AllTheRest/
+ --outputPath${outputPath}/AllTheRest/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/Datacite/Zenodo/
+ --outputPath${outputPath}/Zenodo/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/Datacite/Figshare/
+ --outputPath${outputPath}/Figshare/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
+ yarn
+ cluster
+ GetResultInstRepo
+ eu.dnetlib.dhp.ircdl_extention.WrongSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-cores=${sparkExecutorCores}
+ --executor-memory=${sparkExecutorMemory}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.shuffle.partitions=3840
+
+ --inputPath${workingDir}/GRAPH/Datacite/Dryad/
+ --outputPath${outputPath}/Dryad/
+ --orcidPath${workingDir}/ORCID/entrySetMayNormalized/
+
+
+
+
+
+
\ No newline at end of file