diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml index 29746e3287..3bae12d1a9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/ircdl_extention/oozie_app/workflow.xml @@ -12,12 +12,48 @@ - + + + + + - + + + + yarn + cluster + PrepareResult + eu.dnetlib.dhp.ircdl_extention.PrepareNormalizedOrcid + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${orcidInputPath} + --outputPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + + + + + + + yarn cluster @@ -34,11 +70,121 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 + --inputPath${inputPath}/publication + --resultClasseu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/GRAPH/publicationsWithOrcid - + + + + yarn + cluster + PrepareResult + eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${inputPath}/dataset + --resultClasseu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/GRAPH/datasetWithOrcid + + + + + + + + yarn + cluster + PrepareResult + eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${inputPath}/software + --resultClasseu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/GRAPH/softwareWithOrcid + + + + + + + + yarn + cluster + PrepareResult + eu.dnetlib.dhp.ircdl_extention.PrepareResultSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${inputPath}/otherresearchproduct + --resultClasseu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/GRAPH/otherWithOrcid + + + + + + + + + + + yarn + cluster + PrepareResult + eu.dnetlib.dhp.ircdl_extention.PrepareNormalizedResultSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/ + --outputPath${workingDir}/GRAPH/Normalized/ + + + + + + + + + + @@ -58,8 +204,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 + --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/ + --outputPath${workingDir}/GRAPH/InstRepo/ + --datasourcePath${datasourcePath} - + @@ -80,10 +229,13 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 + --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/ + --outputPath${workingDir}/GRAPH/Datacite/ - + + yarn @@ -101,10 +253,16 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 + --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/ + --outputPath${workingDir}/GRAPH/Crossref/ - + + + + + yarn @@ -122,10 +280,200 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 + --inputPath${workingDir}/GRAPH/Normalized/ResultWithOrcid/ + --outputPath${workingDir}/GRAPH/AllTheRest/ + --instRepoPath${workingDir}/GRAPH/InstRepo/ + --datacitePath${workingDir}/GRAPH/Datacite/ + --crossrefPath${workingDir}/GRAPH/Crossref/ - + + + + + + + + + + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/InstRepo/ + --outputPath${outputPath}/InstRepo/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/Datacite/allDatacite/ + --outputPath${outputPath} + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/Crossref/ + --outputPath${outputPath}/Crossref/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/AllTheRest/ + --outputPath${outputPath}/AllTheRest/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/Datacite/Zenodo/ + --outputPath${outputPath}/Zenodo/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/Datacite/Figshare/ + --outputPath${outputPath}/Figshare/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + + yarn + cluster + GetResultInstRepo + eu.dnetlib.dhp.ircdl_extention.WrongSpark + dhp-aggregation-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --inputPath${workingDir}/GRAPH/Datacite/Dryad/ + --outputPath${outputPath}/Dryad/ + --orcidPath${workingDir}/ORCID/entrySetMayNormalized/ + + + + + + \ No newline at end of file