From c1e24602939e5e2085935963ad79973bdcfe7d20 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 20 Jan 2023 09:20:26 +0100 Subject: [PATCH] [cleaning] the datasource master-duplicate fixup should not be brought to production yet --- .../dhp/oa/graph/clean/oozie_app/workflow.xml | 196 +----------------- 1 file changed, 1 insertion(+), 195 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 683c2417b..ee79f4f1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -582,201 +582,7 @@ - - - - - ${wf:conf('shouldClean') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.clean.MasterDuplicateAction - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} - --hdfsPath${workingDir}/masterduplicate - --hdfsNameNode${nameNode} - - - - - - - - - - - - - - - yarn - cluster - patch publication cfhb - eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${graphOutputPath}/publication - --resolvedPath${workingDir}/cfHbResolved/publication - --outputPath${workingDir}/cfHbPatched/publication - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --masterDuplicatePath${workingDir}/masterduplicate - - - - - - - - yarn - cluster - patch dataset cfhb - eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${graphOutputPath}/dataset - --resolvedPath${workingDir}/cfHbResolved/dataset - --outputPath${workingDir}/cfHbPatched/dataset - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - --masterDuplicatePath${workingDir}/masterduplicate - - - - - - - - yarn - cluster - patch otherresearchproduct cfhb - eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${graphOutputPath}/otherresearchproduct - --resolvedPath${workingDir}/cfHbResolved/otherresearchproduct - --outputPath${workingDir}/cfHbPatched/otherresearchproduct - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --masterDuplicatePath${workingDir}/masterduplicate - - - - - - - - yarn - cluster - patch software cfhb - eu.dnetlib.dhp.oa.graph.clean.cfhb.CleanCfHbSparkJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --inputPath${graphOutputPath}/software - --resolvedPath${workingDir}/cfHbResolved/software - --outputPath${workingDir}/cfHbPatched/software - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - --masterDuplicatePath${workingDir}/masterduplicate - - - - - - - - - - - - - - - - - - - - ${workingDir}/cfHbPatched/publication - ${graphOutputPath}/publication - - - - - - - - - - - ${workingDir}/cfHbPatched/dataset - ${graphOutputPath}/dataset - - - - - - - - - - - ${workingDir}/cfHbPatched/otherresearchproduct - ${graphOutputPath}/otherresearchproduct - - - - - - - - - - - ${workingDir}/cfHbPatched/software - ${graphOutputPath}/software - - - - - - +