inputGraph
the input graph name (or path)
outputGraph
the output graph name (or path)
isLookUpUrl
the address of the lookUp service
inputGraphFormat
HIVE
the input graph data format
outputGraphFormat
HIVE
the output graph data format
hiveMetastoreUris
hive server metastore URIs
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
oozieActionShareLibForSpark2
oozie action sharelib for spark 2.*
spark2ExtraListeners
com.cloudera.spark.lineage.NavigatorAppListener
spark 2.* extra listeners classname
spark2SqlQueryExecutionListeners
com.cloudera.spark.lineage.NavigatorQueryListener
spark 2.* sql query execution listeners classname
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
sparkSqlWarehouseDir
spark 2.* db directory location
${jobTracker}
${nameNode}
mapreduce.job.queuename
${queueName}
oozie.launcher.mapred.job.queue.name
${oozieLauncherQueueName}
oozie.action.sharelib.for.spark
${oozieActionShareLibForSpark2}
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
${wf:conf('outputGraphFormat') eq 'JSON'}
${wf:conf('outputGraphFormat') eq 'HIVE'}
yarn
cluster
reset_DB
eu.dnetlib.dhp.common.ResetHiveDbApplication
dhp-graph-mapper-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--dbName${outputGraph}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean publications
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean datasets
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean otherresearchproducts
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean softwares
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean datasources
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean organizations
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean projects
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}
yarn
cluster
Clean relations
eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=7680
--inputGraph${inputGraph}
--outputGraph${outputGraph}
--inputGraphFormat${inputGraphFormat}
--outputGraphFormat${outputGraphFormat}
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation
--isLookUpUrl${isLookUpUrl}
--hiveMetastoreUris${hiveMetastoreUris}