diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml index 39807dd365..529e9a536c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml @@ -1,16 +1,13 @@ - migrationPathStep1 - the base path to store hdfs file + workingPath + /tmp/dhp_migration + the base path to store temporary intermediate data - migrationPathStep2 - the temporary path to store entities before dispatching - - - migrationPathStep3 - the graph Raw base path + graphBasePath + the target path to store raw graph postgresURL @@ -54,8 +51,10 @@ - - + + + + @@ -66,7 +65,7 @@ ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication - -p${migrationPathStep1}/db_records + -p${workingPath}/db_records -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} @@ -80,7 +79,7 @@ ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication - -p${migrationPathStep1}/odf_records + -p${workingPath}/odf_records -mongourl${mongoURL} -mongodb${mongoDb} -fODF @@ -96,7 +95,7 @@ ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication - -p${migrationPathStep1}/oaf_records + -p${workingPath}/oaf_records -mongourl${mongoURL} -mongodb${mongoDb} -fOAF @@ -125,10 +124,17 @@ GenerateEntities eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication dhp-aggregation-${projectVersion}.jar - --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" + --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" + --conf spark.sql.warehouse.dir="/user/hive/warehouse" + -mt yarn-cluster - -s${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records - -t${migrationPathStep2}/all_entities + -s${workingPath}/db_records,${workingPath}/oaf_records,${workingPath}/odf_records + -t${workingPath}/all_entities -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} @@ -155,10 +161,17 @@ GenerateGraph eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication dhp-aggregation-${projectVersion}.jar - --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" + --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" + --conf spark.sql.warehouse.dir="/user/hive/warehouse" + -mt yarn-cluster - -s${migrationPathStep2}/all_entities - -g${migrationPathStep3} + -s${workingPath}/all_entities + -g${graphBasePath}/graph_raw