workingPath the base path to store hdfs file graphRawPath the graph Raw base path postgresURL the postgres URL to access to the database postgresUser the user postgres postgresPassword the password postgres mongourl mongoDB url, example: mongodb://[username:password@]host[:port] mongoDb mongo database sparkDriverMemory memory for driver process sparkExecutorMemory memory for individual executor sparkExecutorCores number of cores used by single executor Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.MigrateDbEntitiesApplication -p${workingPath}/db_entities -n${nameNode} -u${hdfsUser} -dburl${postgresURL} -dbuser${postgresUser} -dbpasswd${postgresPassword} ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication -p${workingPath}/odf_entities -n${nameNode} -u${hdfsUser} -mongourl${mongourl} -db${mongoDb} -fODF -lstore -icleaned -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} ${jobTracker} ${nameNode} eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication -p${workingPath}/oaf_entities -n${nameNode} -u${hdfsUser} -mongourl${mongourl} -db${mongoDb} -fOAF -lstore -icleaned -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: publication eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/publication -epublication ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: dataset eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/dataset -edataset ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: software eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/software -esoftware ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: otherresearchproduct eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/otherresearchproduct -eotherresearchproduct ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: datasource eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/datasource -edatasource ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: organization eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/organization -eorganization ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: project eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/project -eproject ${jobTracker} ${nameNode} yarn-cluster cluster ExtractEntities: relation eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob dhp-aggregation-${projectVersion}.jar --executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse" -mt yarn-cluster --sourcePath${workingPath} -g${graphRawPath}/relation -erelation