workingPath /tmp/dhp_migration the base path to store temporary intermediate data graphBasePath the target path to store raw graph reuseContent false should import content from the aggregator or reuse a previous version postgresURL the postgres URL to access to the database postgresUser the user postgres postgresPassword the password postgres mongoURL mongoDB url, example: mongodb://[username:password@]host[:port] mongoDb mongo database sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor

${jobTracker}

${nameNode}

mapreduce.job.queuename ${queueName} oozie.launcher.mapred.job.queue.name ${oozieLauncherQueueName} Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${wf:conf('reuseContent') eq false} ${wf:conf('reuseContent') eq true}

eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication

-p${workingPath}/db_records -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword}

eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication

-p${workingPath}/odf_records -mongourl${mongoURL} -mongodb${mongoDb} -fODF -lstore -icleaned

eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication

-p${workingPath}/oaf_records -mongourl${mongoURL} -mongodb${mongoDb} -fOAF -lstore -icleaned yarn cluster GenerateEntities eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication dhp-aggregation-${projectVersion}.jar

--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"

-mt yarn-cluster -s${workingPath}/db_records,${workingPath}/oaf_records,${workingPath}/odf_records -t${workingPath}/all_entities -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} yarn cluster GenerateGraph eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication dhp-aggregation-${projectVersion}.jar

-mt yarn-cluster -s${workingPath}/all_entities -g${graphBasePath}/graph_raw