workingPath
the base path to store hdfs file
graphRawPath
the graph Raw base path
postgresURL
the postgres URL to access to the database
postgresUser
the user postgres
postgresPassword
the password postgres
mongourl
mongoDB url, example: mongodb://[username:password@]host[:port]
mongoDb
mongo database
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${jobTracker}
${nameNode}
eu.dnetlib.dhp.migration.MigrateDbEntitiesApplication
-p${workingPath}/db_entities
-n${nameNode}
-u${hdfsUser}
-dburl${postgresURL}
-dbuser${postgresUser}
-dbpasswd${postgresPassword}
${jobTracker}
${nameNode}
eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication
-p${workingPath}/odf_entities
-n${nameNode}
-u${hdfsUser}
-mongourl${mongourl}
-db${mongoDb}
-fODF
-lstore
-icleaned
-pgurl${postgresURL}
-pguser${postgresUser}
-pgpasswd${postgresPassword}
${jobTracker}
${nameNode}
eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication
-p${workingPath}/oaf_entities
-n${nameNode}
-u${hdfsUser}
-mongourl${mongourl}
-db${mongoDb}
-fOAF
-lstore
-icleaned
-pgurl${postgresURL}
-pguser${postgresUser}
-pgpasswd${postgresPassword}
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: publication
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/publication
-epublication
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: dataset
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/dataset
-edataset
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: software
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/software
-esoftware
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: otherresearchproduct
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/otherresearchproduct
-eotherresearchproduct
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: datasource
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/datasource
-edatasource
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: organization
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/organization
-eorganization
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: project
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/project
-eproject
${jobTracker}
${nameNode}
yarn-cluster
cluster
ExtractEntities: relation
eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob
dhp-aggregation-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"
-mt yarn-cluster
--sourcePath${workingPath}
-g${graphRawPath}/relation
-erelation