betaInputGgraphPath
the beta graph root path
prodInputGgraphPath
the production graph root path
graphOutputPath
the output merged graph root path
priority
decides from which infrastructure the content must win in case of ID clash
sparkDriverMemory
memory for driver process
sparkExecutorMemory
memory for individual executor
sparkExecutorCores
number of cores used by single executor
oozieActionShareLibForSpark2
oozie action sharelib for spark 2.*
spark2ExtraListeners
com.cloudera.spark.lineage.NavigatorAppListener
spark 2.* extra listeners classname
spark2SqlQueryExecutionListeners
com.cloudera.spark.lineage.NavigatorQueryListener
spark 2.* sql query execution listeners classname
spark2YarnHistoryServerAddress
spark 2.* yarn history server address
spark2EventLogDir
spark 2.* event log dir location
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
yarn
cluster
Merge publications
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/publication
--prodInputPath${prodInputGgraphPath}/publication
--outputPath${graphOutputPath}/publication
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication
--priority${priority}
yarn
cluster
Merge datasets
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/dataset
--prodInputPath${prodInputGgraphPath}/dataset
--outputPath${graphOutputPath}/dataset
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset
--priority${priority}
yarn
cluster
Merge otherresearchproducts
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/otherresearchproduct
--prodInputPath${prodInputGgraphPath}/otherresearchproduct
--outputPath${graphOutputPath}/otherresearchproduct
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
--priority${priority}
yarn
cluster
Merge softwares
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/software
--prodInputPath${prodInputGgraphPath}/software
--outputPath${graphOutputPath}/software
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software
--priority${priority}
yarn
cluster
Merge datasources
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/datasource
--prodInputPath${prodInputGgraphPath}/datasource
--outputPath${graphOutputPath}/datasource
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Datasource
--priority${priority}
yarn
cluster
Merge organizations
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/organization
--prodInputPath${prodInputGgraphPath}/organization
--outputPath${graphOutputPath}/organization
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Organization
--priority${priority}
yarn
cluster
Merge projects
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/project
--prodInputPath${prodInputGgraphPath}/project
--outputPath${graphOutputPath}/project
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Project
--priority${priority}
yarn
cluster
Merge relations
eu.dnetlib.dhp.oa.graph.merge.MergeGraphTableSparkJob
dhp-graph-mapper-${projectVersion}.jar
--executor-cores=${sparkExecutorCores}
--executor-memory=${sparkExecutorMemory}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
--betaInputPath${betaInputGgraphPath}/relation
--prodInputPath${prodInputGgraphPath}/relation
--outputPath${graphOutputPath}/relation
--graphTableClassNameeu.dnetlib.dhp.schema.oaf.Relation
--priority${priority}