inputPath
the source path
hiveDbName
the target hive database name
hiveJdbcUrl
hive server jdbc url
hiveMetastoreUris
hive server metastore URIs
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
oozieActionShareLibForSpark2
oozie action sharelib for spark 2.*
spark2ExtraListeners
com.cloudera.spark.lineage.NavigatorAppListener
spark 2.* extra listeners classname
spark2SqlQueryExecutionListeners
com.cloudera.spark.lineage.NavigatorQueryListener
spark 2.* sql query execution listeners classname
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
hive.metastore.uris
${hiveMetastoreUris}
${hiveJdbcUrl}/${hiveDbName}
hiveDbName=${hiveDbName}
yarn
cluster
Import table publication
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/publication
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Publication
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table dataset
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/dataset
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Dataset
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table otherresearchproduct
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/otherresearchproduct
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table software
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/software
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Software
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table datasource
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/datasource
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Datasource
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table organization
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/organization
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Organization
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Import table project
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/project
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Project
--hiveMetastoreUris${hiveMetastoreUris}
--numPartitions100
yarn
cluster
Import table project
eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob
dhp-graph-mapper-${projectVersion}.jar
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--inputPath${inputPath}/relation
--hiveDbName${hiveDbName}
--classNameeu.dnetlib.dhp.schema.oaf.Relation
--hiveMetastoreUris${hiveMetastoreUris}
hive.metastore.uris
${hiveMetastoreUris}
${hiveJdbcUrl}/${hiveDbName}
hiveDbName=${hiveDbName}