workingPath
/tmp/dhp_migration
the base path to store temporary intermediate data
graphBasePath
the target path to store raw graph
reuseContent
false
should import content from the aggregator or reuse a previous version
postgresURL
the postgres URL to access to the database
postgresUser
the user postgres
postgresPassword
the password postgres
mongoURL
mongoDB url, example: mongodb://[username:password@]host[:port]
mongoDb
mongo database
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${wf:conf('reuseContent') eq false}
${wf:conf('reuseContent') eq true}
eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication
-p${workingPath}/db_records
-pgurl${postgresURL}
-pguser${postgresUser}
-pgpasswd${postgresPassword}
eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication
-p${workingPath}/odf_records
-mongourl${mongoURL}
-mongodb${mongoDb}
-fODF
-lstore
-icleaned
eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication
-p${workingPath}/oaf_records
-mongourl${mongoURL}
-mongodb${mongoDb}
-fOAF
-lstore
-icleaned
yarn
cluster
GenerateEntities
eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
-s${workingPath}/db_records,${workingPath}/oaf_records,${workingPath}/odf_records
-t${workingPath}/all_entities
-pgurl${postgresURL}
-pguser${postgresUser}
-pgpasswd${postgresPassword}
yarn
cluster
GenerateGraph
eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
-s${workingPath}/all_entities
-g${graphBasePath}/graph_raw